mirror of
https://codeberg.org/privacy1st/de-p1st-monitor
synced 2024-11-20 19:28:05 +01:00
feat: log drive temperature with psutil
This commit is contained in:
parent
be10017165
commit
23aa370cbf
@ -8,6 +8,7 @@ from de.p1st.monitor.cfg.singleton import get_cfg
|
||||
from de.p1st.monitor.logger_ex import LoggerArgEx
|
||||
from de.p1st.monitor.loggers.cpu import CPULogger1, CPULogger5, CPULogger15
|
||||
from de.p1st.monitor.loggers.drive import DriveLogger
|
||||
from de.p1st.monitor.loggers.drive_temp import DriveTempLogger
|
||||
from de.p1st.monitor.loggers.filesystem import FilesystemLogger
|
||||
from de.p1st.monitor.loggers.memory import MemoryLogger
|
||||
from de.p1st.monitor.loggers.network import NetworkLogger
|
||||
@ -86,6 +87,13 @@ def get_loggers() -> tuple[list[Logger], list[LoggerArgEx]]:
|
||||
warn_data_range = int(cfg_.get('warn_data_range', '1'))
|
||||
return DriveLogger(uuid, id_, device, warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def drive_temp(cfg_: configparser.SectionProxy) -> Logger:
|
||||
type_ = cfg_.get('type', None)
|
||||
warn_if_above = int(cfg_['warn_if_above']) if 'warn_if_above' in cfg_ else None
|
||||
warn_threshold = int(cfg_.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg_.get('warn_data_range', '1'))
|
||||
return DriveTempLogger(type_, warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def memory(cfg_: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg_.get('warn_if_above', '1.0'))
|
||||
warn_threshold = int(cfg_.get('warn_threshold', '1'))
|
||||
@ -107,6 +115,7 @@ def get_loggers() -> tuple[list[Logger], list[LoggerArgEx]]:
|
||||
'network': net,
|
||||
'filesystem': filesystem,
|
||||
'drive': drive,
|
||||
'drive_temp': drive_temp,
|
||||
'memory': memory,
|
||||
'swap': swap,
|
||||
}
|
||||
|
@ -53,27 +53,15 @@ warn_if_above = 60
|
||||
[network.1]
|
||||
network_interface = enp0s31f6
|
||||
|
||||
[drive.1]
|
||||
; NVMe 256GB2
|
||||
; TODO NVMe 49 warn, 55 limit
|
||||
id = nvme-WDC_PC_SN520_SDAPNUW-256G-1002_183873801941
|
||||
[drive_temp.1]
|
||||
; NVMe drives
|
||||
; TODO: 49 warn, 55 limit
|
||||
type = nvme
|
||||
warn_if_above = 50
|
||||
[drive.2]
|
||||
; HDD 12TB1
|
||||
; TODO HDD 39 warn, 45 limit
|
||||
id = ata-TOSHIBA_MG07ACA12TE_X1E0A0WKF95G
|
||||
warn_if_above = 40
|
||||
[drive.3]
|
||||
; HDD 3TB1
|
||||
id = ata-WDC_WD30EFRX-68EUZN0_WD-WCC4N1173157
|
||||
warn_if_above = 40
|
||||
[drive.4]
|
||||
; HDD 3TB2
|
||||
id = ata-WDC_WD30EFRX-68EUZN0_WD-WMC4N0564095
|
||||
warn_if_above = 40
|
||||
[drive.5]
|
||||
; HDD 4TB1
|
||||
id = ata-WDC_WD40EFRX-68N32N0_WD-WCC7K0CPF0N1
|
||||
[drive_temp.2]
|
||||
; HDD drives
|
||||
; TODO: 39 warn, 45 limit
|
||||
type = drivetemp
|
||||
warn_if_above = 40
|
||||
|
||||
; [sensor_script.1]
|
||||
|
@ -69,20 +69,14 @@ unmounted_ok = true
|
||||
warn_if_above = 0.1
|
||||
|
||||
|
||||
[drive.1]
|
||||
; Either `uuid`, `id` or `device` must be given.
|
||||
;
|
||||
; `uuid` as in /dev/disk/by-uuid/*
|
||||
;uuid = ea7099e3-320d-4eb3-a4c3-9910a9af817b
|
||||
; `id` as in /dev/disk/by-id/*
|
||||
; id = nvme-XPG_GAMMIX_S50_Lite_2K462L2JN9KG
|
||||
; device as in /dev/*
|
||||
device = /dev/nvme0n1
|
||||
|
||||
[drive_temp.1]
|
||||
; Either `nvme` (for NVMe drives) or `drivetemp` (for HDDs)
|
||||
type = nvme
|
||||
; Warn if temperature is above this value.
|
||||
; Unit: °C
|
||||
warn_if_above = 25
|
||||
|
||||
|
||||
[sensor_script.1]
|
||||
; The command will be executed.
|
||||
; It has to return a float (or int) and exit code 0 on success.
|
||||
|
@ -135,17 +135,24 @@ class DriveLogger(Logger):
|
||||
@classmethod
|
||||
def get_temp_from_device(cls, device: Path) -> int:
|
||||
"""
|
||||
Use `smartctl` to get HDD/SSD temperature.
|
||||
|
||||
As reading SMART data wakes up standby HDD drives, we skip them.
|
||||
|
||||
:param device: Partition path, e.g. `/dev/sda`
|
||||
:return: Temperature in celsius
|
||||
:return: Temperature in Celsius
|
||||
"""
|
||||
|
||||
# -n standby: Don't spin-up an HDD if it is in standby mode.
|
||||
# -j: JSON output.
|
||||
# -a: Print all SMART device information.
|
||||
# For NVMe, this is equivalent to: '-H -i -c -A -l error -l selftest'.
|
||||
# -H: Print health status.
|
||||
# -A: Prints only the vendor specific SMART Attributes.
|
||||
returncode, stdout, stderr = execute_capture(['smartctl', '-j', '-A', f'{device}'])
|
||||
returncode, stdout, stderr = execute_capture(['smartctl', '-n', 'standby', '-j', '-A', f'{device}'])
|
||||
|
||||
if returncode == 2 and 'Device is in STANDBY mode' in stdout:
|
||||
raise LoggerReadEx(f'Could not read drive temperature as it is in standby mode: {device}')
|
||||
if returncode != 0:
|
||||
raise LoggerReadEx(f'smartctl failed with returncode {returncode}\nstdout: {stdout}\nstderr: {stderr}')
|
||||
j = json.loads(stdout)
|
||||
|
98
src/de/p1st/monitor/loggers/drive_temp.py
Normal file
98
src/de/p1st/monitor/loggers/drive_temp.py
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import math
|
||||
from typing import Literal
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from de.p1st.monitor import datetime_util
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.logger_ex import LoggerReadEx
|
||||
from de.p1st.monitor.warn_data import WarnData
|
||||
|
||||
|
||||
class DriveTempLogger(Logger):
|
||||
def __init__(self,
|
||||
type_: Literal['drivetemp', 'nvme'],
|
||||
warn_if_above: int = None,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
"""
|
||||
:param type_: HDD -> drivetemp, NVMe -> nvme
|
||||
"""
|
||||
|
||||
critical_if_above = warn_if_above + 10
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above
|
||||
)
|
||||
|
||||
self.type = type_
|
||||
|
||||
def get_warn_data(self, data: list[any]) -> WarnData:
|
||||
min_temp = data[1]
|
||||
max_temp = data[2]
|
||||
message = f'Temperature of drive type {self.type} is in range {min_temp}:{max_temp}'
|
||||
return WarnData(data[0], max_temp, message)
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
min_temp, max_temp = self.get_drive_temp()
|
||||
return [
|
||||
datetime_util.now(),
|
||||
min_temp,
|
||||
max_temp
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return ['datetime#Date', 'float#MinTemperature', 'float#MaxTemperature']
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'drive-temp_{self.type}.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
def get_drive_temp(self) -> (float, float):
|
||||
"""
|
||||
Use `psutil` Python library to get HDD/SSD temperature.
|
||||
https://psutil.readthedocs.io/en/latest/index.html#psutil.sensors_temperatures
|
||||
|
||||
Not sure if this changed the results:
|
||||
sudo modprobe drivetemp
|
||||
|
||||
Example output:
|
||||
{
|
||||
'nvme': [
|
||||
shwtemp(label='Composite', current=37.85, high=81.85, critical=85.85)
|
||||
],
|
||||
'pch_skylake': [...],
|
||||
'coretemp': [...],
|
||||
'drivetemp': [
|
||||
shwtemp(label='', current=23.0, high=65.0, critical=85.0),
|
||||
shwtemp(label='', current=25.0, high=55.0, critical=70.0),
|
||||
shwtemp(label='', current=24.0, high=60.0, critical=85.0),
|
||||
shwtemp(label='', current=22.0, high=60.0, critical=85.0)]
|
||||
}
|
||||
|
||||
Problem: If one has multiple drives attached, they can't be distinguished.
|
||||
https://github.com/giampaolo/psutil/issues/1902
|
||||
|
||||
Therefore, we currently accumulate the maximum and minimum values of all drives of the same type.
|
||||
"""
|
||||
min_temp, max_temp = -math.inf, math.inf
|
||||
|
||||
data = psutil.sensors_temperatures(fahrenheit=False)
|
||||
if self.type not in data:
|
||||
raise LoggerReadEx(f'Sensor {self.type} not found')
|
||||
if len(data[self.type]) == 0:
|
||||
raise LoggerReadEx(f'Sensor {self.type} has no entries')
|
||||
for i in data[self.type]:
|
||||
current = i.current
|
||||
min_temp = max(min_temp, current)
|
||||
max_temp = min(max_temp, current)
|
||||
|
||||
return min_temp, max_temp
|
@ -54,7 +54,7 @@ class TempLogger(Logger):
|
||||
|
||||
def get_temp(self) -> float:
|
||||
"""
|
||||
:return: Temperature in celsius
|
||||
:return: Temperature in Celsius
|
||||
"""
|
||||
data = psutil.sensors_temperatures(fahrenheit=False)
|
||||
if self.name not in data:
|
||||
|
Loading…
Reference in New Issue
Block a user