# HG changeset patch # User IBBoard # Date 1538423546 -3600 # Node ID e245a271fc445343665cf8e014aa8222b29335e8 # Parent b8c7a89c4ebdd581af07250e43a4e39d2a651f4a Add scripts for recording/displaying CPU and GPU activity Graphing is based on sensors-graph, but recording is a Python script because more processing is required than logging temperatures. CPU/GPU recording will hopefully be useful to work out what causes occasional lags in games - is it just I/O Wait (despite the SSD)? diff -r b8c7a89c4ebd -r e245a271fc44 load-graph --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load-graph Mon Oct 01 20:52:26 2018 +0100 @@ -0,0 +1,33 @@ +#! /bin/bash + +start="-6h" + +if [ $# -eq 1 ] +then + start=$1 +fi + +rrdtool graph /tmp/load-percent.png -w 1280 -h 1024 -a PNG --start "$start" \ + --vertical-label "Usage (%)" \ + DEF:core1=$HOME/.load.rrd:core1:AVERAGE \ + DEF:core2=$HOME/.load.rrd:core2:AVERAGE \ + DEF:core3=$HOME/.load.rrd:core3:AVERAGE \ + DEF:core4=$HOME/.load.rrd:core4:AVERAGE \ + DEF:CPU=$HOME/.load.rrd:core_avg:AVERAGE \ + DEF:GPU=$HOME/.load.rrd:GPU:AVERAGE \ + LINE1:core1#99000030:"Core 1" \ + LINE1:core2#99000030:"Core 2" \ + LINE1:core3#99000030:"Core 3" \ + LINE1:core4#99000030:"Core 4" \ + LINE2:CPU#990000:"CPU" \ + LINE2:GPU#009900:"GPU" \ +&& rrdtool graph /tmp/load-type.png -w 1280 -h 1024 -a PNG --start "$start" \ + --vertical-label "Usage (%)" \ + DEF:user=$HOME/.load.rrd:user:AVERAGE \ + DEF:system=$HOME/.load.rrd:system:AVERAGE \ + DEF:iowait=$HOME/.load.rrd:iowait:AVERAGE \ + AREA:iowait#999999:"I/O Wait" \ + STACK:system#000099:"System" \ + STACK:user#999900:"User" \ +&& eog /tmp/load-*.png +rm -f /tmp/load-*.png diff -r b8c7a89c4ebd -r e245a271fc44 load-record --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load-record Mon Oct 01 20:52:26 2018 +0100 @@ -0,0 +1,81 @@ +#! /usr/bin/env python3 + +import psutil +import os +import os.path +import rrdtool +import time +import subprocess + +from pathlib import Path + +home = str(Path.home()) +DB = os.path.join(home, ".load.rrd") + +if not os.path.exists(DB): + rrdtool.create(DB, '--step', '1', + 'DS:load_1:GAUGE:2:0.0:100.0', + 'DS:load_5:GAUGE:2:0.0:100.0', + 'DS:load_15:GAUGE:2:0.0:100.0', + 'DS:core1:GAUGE:2:0.0:100.0', + 'DS:core2:GAUGE:2:0.0:100.0', + 'DS:core3:GAUGE:2:0.0:100.0', + 'DS:core4:GAUGE:2:0.0:100.0', + 'DS:core_avg:GAUGE:2:0.0:100.0', + 'DS:GPU:GAUGE:2:0.0:100.0', + 'DS:user:GAUGE:2:0.0:100.0', + 'DS:system:GAUGE:2:0.0:100.0', + 'DS:iowait:GAUGE:2:0.0:100.0', + 'RRA:AVERAGE:0.5:1:3600', #1hr of 1s interval (averaged) + 'RRA:MAX:0.5:60:360', #6hrs of 1 minute + 'RRA:MAX:0.5:300:8640') #and 1mo of 5m resolution + +cpus = psutil.cpu_count() +fields = 12 + +while True: + interims = [[] for _ in range(fields)] + for _ in range(0, 10): + cpu_pcs = psutil.cpu_percent(percpu=True) + cpu_pc = sum(cpu_pcs) / cpus + #TODO: If cpu_pc > 25% (?) log top processes + cpu_states_pc = psutil.cpu_times_percent() + loads = os.getloadavg() + # TODO: Include GPU load + nv_smi = subprocess.run(["nvidia-smi", "stats", "-d", "gpuUtil", "-c", "1"], stdout=subprocess.PIPE, universal_newlines=True) + gpu = 0 + lines = nv_smi.stdout.split('\n') + line_count = len(lines) + for line in lines: + line_parts = line.split(',') + if len(line_parts) == 4: + gpu += int(line_parts[3]) + else: + line_count -= 1 + gpu = gpu / line_count + time.sleep(0.1) + interims[0].append(loads[0]) + interims[1].append(loads[1]) + interims[2].append(loads[2]) + for i in range(cpus): + interims[i+3].append(cpu_pcs[i]) + i = cpus + 3 + interims[i].append(cpu_pc) + i += 1 + interims[i].append(gpu) + i += 1 + interims[i].append(cpu_states_pc.user) + i += 1 + interims[i].append(cpu_states_pc.system) + i += 1 + interims[i].append(cpu_states_pc.iowait) + vals = [] + for i in range(fields): + interim_vals = interims[i] + # Average most values + if i < fields - 3: + vals.append(sum(interim_vals) / 10) + else: + # But take the max CPU state value + vals.append(max(interim_vals)) + rrdtool.update(DB, "N:{}".format(':'.join(str(val) for val in vals)))