Mercurial > repos > other > usr-local-bin
view load-record @ 38:737061eac1d0 default tip
Skip more large videos and some permission issues
author | IBBoard <dev@ibboard.co.uk> |
---|---|
date | Wed, 18 Aug 2021 20:40:17 +0100 |
parents | ccc8f0903d2e |
children |
line wrap: on
line source
#! /usr/bin/env python3 import psutil import os import os.path import rrdtool import sched import subprocess import threading from pathlib import Path home = str(Path.home()) DB = os.path.join(home, ".load.rrd") cpus = psutil.cpu_count() config = [ ['load_1', 'GAUGE', 2, 0, 100], ['load_5', 'GAUGE', 2, 0, 100], ['load_15', 'GAUGE', 2, 0, 100], *[[f'core{i+1}', 'GAUGE', 2, 0, 100] for i in range(cpus)], ['core_avg', 'GAUGE', 2, 0, 100], ['GPU', 'GAUGE', 2, 0, 100], ['user', 'GAUGE', 2, 0, 100], ['system', 'GAUGE', 2, 0, 100], ['iowait', 'GAUGE', 2, 0, 100], ['mem_used', 'GAUGE', 2, 0, 100], ['mem_buffers', 'GAUGE', 2, 0, 100], ] fields = len(config) def needs_creating(): if not os.path.exists(DB): return True else: cur_config = rrdtool.info(DB) for i, entry in enumerate(config): key, datatype, heartbeat, minval, maxval = entry if f"ds[{key}].index" not in cur_config or \ cur_config[f"ds[{key}].index"] != i: return True elif cur_config[f"ds[{key}].type"] != datatype or \ cur_config[f"ds[{key}].minimal_heartbeat"] != heartbeat: # We don't appear to be able to check min/max from info return True # TODO: Check RRA definitions based on rra[i].cf, rra[i].pdp_per_row and rra[i].rows return False # TODO: Add "pressure" support - relies on "psi=1" in kernel and /proc/pressure/… existing if needs_creating(): rrdtool.create(DB, '--step', '1', *[f'DS:{key}:{datatype}:{heartbeat}:{minval}:{maxval}' for \ key, datatype, heartbeat, minval, maxval in config], 'RRA:AVERAGE:0.5:1:3600', #1hr of 1s interval (averaged) 'RRA:MAX:0.5:60:360', #6hrs of 1 minute 'RRA:MAX:0.5:300:8640') #and 1mo of 5m resolution samples = 10 gpu_idx = 3 + cpus + 1 last_avg_idx = 3 + cpus + 2 # load + CPUs + CPU average + GPU total_mem = psutil.virtual_memory().total # Note: We use some global variables on the assumption that: # 1) We just need "the latest" gpu_val value # 2) Because gpu_val is numeric then it can't be "inconsistent" # 3) The use of the scheduler and its priorities ensures that # the record_interims and record_record functions happen in sequence # 4) The record_interims function takes under 1/samples seconds to run # # If this ever fails, we need to look at multiprocessing.Value and .Array gpu_val = 0 interims = [[0] * samples for _ in range(fields)] pos = 0 def parse_nvidia_output(): global gpu_val nv_smi = subprocess.Popen(['nvidia-smi', 'dmon', '-s', 'u'], stdout=subprocess.PIPE, universal_newlines=True) while True: # Readline blocks, so this thread will update as and when new values are available line = nv_smi.stdout.readline() if line and line[0] != '#': gpu_val = int(line[8:11]) def record_interims(): global pos scheduler.enter(1.0/samples, 1, record_interims) cur_pos = pos pos = (pos + 1) % samples cpu_pcs = psutil.cpu_percent(percpu=True) cpu_pc = sum(cpu_pcs) / cpus #TODO: If cpu_pc > 25% (?) log top processes cpu_states_pc = psutil.cpu_times_percent() loads = os.getloadavg() mem = psutil.virtual_memory() i = 0 interims[i][cur_pos] = loads[0] i = i + 1 interims[i][cur_pos] = loads[1] i = i + 1 interims[i][cur_pos] = loads[2] i = i + 1 for a_cpu_pc in cpu_pcs: interims[i][cur_pos] = a_cpu_pc i = i + 1 interims[i][cur_pos] = cpu_pc i = i + 1 interims[i][cur_pos] = 0 # Placeholder for GPU i = i + 1 interims[i][cur_pos] = cpu_states_pc.user i = i + 1 interims[i][cur_pos] = cpu_states_pc.system i = i + 1 interims[i][cur_pos] = cpu_states_pc.iowait i = i + 1 interims[i][cur_pos] = (mem.used / total_mem) * 100 i = i + 1 interims[i][cur_pos] = ((mem.buffers + mem.cached) / total_mem) * 100 def record_record(): global gpu_val, gpu_idx scheduler.enter(1, 2, record_record) vals = [0] * fields for i, interim_vals in enumerate(interims): if i < last_avg_idx: # Average most values vals[i] = sum(interim_vals) / samples else: # But take the max CPU state value and memory usage vals[i] = max(interim_vals) vals[gpu_idx] = gpu_val rrdtool.update(DB, "N:{}".format(':'.join(str(val) for val in vals))) nv_thread = threading.Thread(target=parse_nvidia_output) nv_thread.start() scheduler = sched.scheduler() # Let record_interims run and schedule itself record_interims() # Schedule record recording for 1s later so we've got a set of values, and then it'll schedule future calls scheduler.enter(1, 2, record_record) scheduler.run()