WIP support nvidia power management
This commit is contained in:
parent
564796b81d
commit
b5920374a7
|
@ -3,7 +3,7 @@ local i_o = require 'i_o'
|
|||
|
||||
return function(update_freq, config, common, width, point)
|
||||
local NA = 'N/A'
|
||||
local NVIDIA_EXE = 'nvidia-settings'
|
||||
local NVIDIA_EXE = 'nvidia-smi'
|
||||
|
||||
local geo = config.geometry
|
||||
local sep_spacing = geo.sep_spacing
|
||||
|
@ -22,29 +22,17 @@ return function(update_freq, config, common, width, point)
|
|||
|
||||
-- vars to process the nv settings glob
|
||||
--
|
||||
-- glob will be of the form:
|
||||
-- <used_mem>
|
||||
-- <total_mem>
|
||||
-- <temp>
|
||||
-- <gpu_freq>,<mem_freq>
|
||||
-- graphics=<gpu_util>, memory=<mem_util>, video=<vid_util>, PCIe=<pci_util>
|
||||
local NV_QUERY = NVIDIA_EXE..
|
||||
' -t'..
|
||||
' -q UsedDedicatedGPUmemory'..
|
||||
' -q TotalDedicatedGPUmemory'..
|
||||
' -q ThermalSensorReading'..
|
||||
' -q [gpu:0]/GPUCurrentClockFreqs'..
|
||||
' -q [gpu:0]/GPUutilization'..
|
||||
' 2>/dev/null'
|
||||
|
||||
local NV_REGEX = '(%d+)\n'..
|
||||
'(%d+)\n'..
|
||||
'(%d+)\n'..
|
||||
'(%d+),(%d+)\n'..
|
||||
'graphics=(%d+), memory=%d+, video=(%d+), PCIe=%d+\n'
|
||||
local NV_QUERY = NVIDIA_EXE..
|
||||
' --query-gpu=memory.used,memory.total,temperature.gpu,clocks.gr,clocks.mem,utilization.gpu,utilization.decoder'..
|
||||
' --format=csv,noheader,nounits'
|
||||
|
||||
local NV_REGEX = '(%d+), (%d+), (%d+), (%d+), (%d+), (%d+), (%d+)'
|
||||
|
||||
local mod_state = {
|
||||
error = false,
|
||||
gpu_frequency = 0,
|
||||
memory_frequency = 0,
|
||||
used_memory = 0,
|
||||
total_memory = 0,
|
||||
temp_reading = 0,
|
||||
|
@ -52,6 +40,11 @@ return function(update_freq, config, common, width, point)
|
|||
vid_utilization = 0
|
||||
}
|
||||
|
||||
local sleep_token = 0
|
||||
local sleep_limit = 10
|
||||
local gpu_idle_freq_limit = 250
|
||||
|
||||
-- TODO ensure this file exists
|
||||
local runtime_status_file = config.dev_power..'/runtime_status'
|
||||
|
||||
local want_nvidia_query = config.show_temp or config.show_clock
|
||||
|
@ -59,7 +52,28 @@ return function(update_freq, config, common, width, point)
|
|||
|
||||
local update_state = function()
|
||||
local is_active = i_o.read_file(runtime_status_file, nil, '*l') == 'active'
|
||||
if is_active and want_nvidia_query then
|
||||
-- this will make the nvidia-smi query fire only so often when the clock
|
||||
-- is below a certain threshold. This is necessary to get the GPU to
|
||||
-- suspend when nothing is 'using' it, at the cost of lowering the
|
||||
-- response time for when it eventually is used again. Maybe won't
|
||||
-- matter that much since the jobs that use the GPU tend to be long
|
||||
-- anyways, so a few seconds won't hurt. Furthermore, there are ways to
|
||||
-- wake this up manually by detecting certain processes the likely will
|
||||
-- use the GPU (ffmpeg and friends) or detecting processes that are
|
||||
-- holding /dev/nvidia* files (which isn't foolproof but it will capture
|
||||
-- most events)
|
||||
if is_active and
|
||||
mod_state.gpu_frequency > 0 and
|
||||
mod_state.gpu_frequency < gpu_idle_freq_limit then
|
||||
if sleep_token < sleep_limit - 1 then
|
||||
sleep_token = sleep_token + 1
|
||||
else
|
||||
sleep_token = 0
|
||||
end
|
||||
else
|
||||
sleep_token = 0
|
||||
end
|
||||
if is_active and want_nvidia_query and sleep_token == 0 then
|
||||
local nvidia_settings_glob = i_o.execute_cmd(NV_QUERY)
|
||||
if nvidia_settings_glob == nil then
|
||||
mod_state.error = 'Error'
|
||||
|
@ -72,11 +86,13 @@ return function(update_freq, config, common, width, point)
|
|||
mod_state.gpu_utilization,
|
||||
mod_state.vid_utilization
|
||||
= __string_match(nvidia_settings_glob, NV_REGEX)
|
||||
mod_state.gpu_frequency = tonumber(mod_state.gpu_frequency)
|
||||
mod_state.error = false
|
||||
end
|
||||
elseif is_active then
|
||||
mod_state.error = false
|
||||
else
|
||||
mod_state.gpu_frequency = 0
|
||||
mod_state.error = 'Off'
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue