WIP support nvidia power management
This commit is contained in:
parent
564796b81d
commit
b5920374a7
|
@ -3,7 +3,7 @@ local i_o = require 'i_o'
|
||||||
|
|
||||||
return function(update_freq, config, common, width, point)
|
return function(update_freq, config, common, width, point)
|
||||||
local NA = 'N/A'
|
local NA = 'N/A'
|
||||||
local NVIDIA_EXE = 'nvidia-settings'
|
local NVIDIA_EXE = 'nvidia-smi'
|
||||||
|
|
||||||
local geo = config.geometry
|
local geo = config.geometry
|
||||||
local sep_spacing = geo.sep_spacing
|
local sep_spacing = geo.sep_spacing
|
||||||
|
@ -22,29 +22,17 @@ return function(update_freq, config, common, width, point)
|
||||||
|
|
||||||
-- vars to process the nv settings glob
|
-- vars to process the nv settings glob
|
||||||
--
|
--
|
||||||
-- glob will be of the form:
|
|
||||||
-- <used_mem>
|
|
||||||
-- <total_mem>
|
|
||||||
-- <temp>
|
|
||||||
-- <gpu_freq>,<mem_freq>
|
|
||||||
-- graphics=<gpu_util>, memory=<mem_util>, video=<vid_util>, PCIe=<pci_util>
|
|
||||||
local NV_QUERY = NVIDIA_EXE..
|
|
||||||
' -t'..
|
|
||||||
' -q UsedDedicatedGPUmemory'..
|
|
||||||
' -q TotalDedicatedGPUmemory'..
|
|
||||||
' -q ThermalSensorReading'..
|
|
||||||
' -q [gpu:0]/GPUCurrentClockFreqs'..
|
|
||||||
' -q [gpu:0]/GPUutilization'..
|
|
||||||
' 2>/dev/null'
|
|
||||||
|
|
||||||
local NV_REGEX = '(%d+)\n'..
|
local NV_QUERY = NVIDIA_EXE..
|
||||||
'(%d+)\n'..
|
' --query-gpu=memory.used,memory.total,temperature.gpu,clocks.gr,clocks.mem,utilization.gpu,utilization.decoder'..
|
||||||
'(%d+)\n'..
|
' --format=csv,noheader,nounits'
|
||||||
'(%d+),(%d+)\n'..
|
|
||||||
'graphics=(%d+), memory=%d+, video=(%d+), PCIe=%d+\n'
|
local NV_REGEX = '(%d+), (%d+), (%d+), (%d+), (%d+), (%d+), (%d+)'
|
||||||
|
|
||||||
local mod_state = {
|
local mod_state = {
|
||||||
error = false,
|
error = false,
|
||||||
|
gpu_frequency = 0,
|
||||||
|
memory_frequency = 0,
|
||||||
used_memory = 0,
|
used_memory = 0,
|
||||||
total_memory = 0,
|
total_memory = 0,
|
||||||
temp_reading = 0,
|
temp_reading = 0,
|
||||||
|
@ -52,6 +40,11 @@ return function(update_freq, config, common, width, point)
|
||||||
vid_utilization = 0
|
vid_utilization = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
local sleep_token = 0
|
||||||
|
local sleep_limit = 10
|
||||||
|
local gpu_idle_freq_limit = 250
|
||||||
|
|
||||||
|
-- TODO ensure this file exists
|
||||||
local runtime_status_file = config.dev_power..'/runtime_status'
|
local runtime_status_file = config.dev_power..'/runtime_status'
|
||||||
|
|
||||||
local want_nvidia_query = config.show_temp or config.show_clock
|
local want_nvidia_query = config.show_temp or config.show_clock
|
||||||
|
@ -59,7 +52,28 @@ return function(update_freq, config, common, width, point)
|
||||||
|
|
||||||
local update_state = function()
|
local update_state = function()
|
||||||
local is_active = i_o.read_file(runtime_status_file, nil, '*l') == 'active'
|
local is_active = i_o.read_file(runtime_status_file, nil, '*l') == 'active'
|
||||||
if is_active and want_nvidia_query then
|
-- this will make the nvidia-smi query fire only so often when the clock
|
||||||
|
-- is below a certain threshold. This is necessary to get the GPU to
|
||||||
|
-- suspend when nothing is 'using' it, at the cost of lowering the
|
||||||
|
-- response time for when it eventually is used again. Maybe won't
|
||||||
|
-- matter that much since the jobs that use the GPU tend to be long
|
||||||
|
-- anyways, so a few seconds won't hurt. Furthermore, there are ways to
|
||||||
|
-- wake this up manually by detecting certain processes the likely will
|
||||||
|
-- use the GPU (ffmpeg and friends) or detecting processes that are
|
||||||
|
-- holding /dev/nvidia* files (which isn't foolproof but it will capture
|
||||||
|
-- most events)
|
||||||
|
if is_active and
|
||||||
|
mod_state.gpu_frequency > 0 and
|
||||||
|
mod_state.gpu_frequency < gpu_idle_freq_limit then
|
||||||
|
if sleep_token < sleep_limit - 1 then
|
||||||
|
sleep_token = sleep_token + 1
|
||||||
|
else
|
||||||
|
sleep_token = 0
|
||||||
|
end
|
||||||
|
else
|
||||||
|
sleep_token = 0
|
||||||
|
end
|
||||||
|
if is_active and want_nvidia_query and sleep_token == 0 then
|
||||||
local nvidia_settings_glob = i_o.execute_cmd(NV_QUERY)
|
local nvidia_settings_glob = i_o.execute_cmd(NV_QUERY)
|
||||||
if nvidia_settings_glob == nil then
|
if nvidia_settings_glob == nil then
|
||||||
mod_state.error = 'Error'
|
mod_state.error = 'Error'
|
||||||
|
@ -72,11 +86,13 @@ return function(update_freq, config, common, width, point)
|
||||||
mod_state.gpu_utilization,
|
mod_state.gpu_utilization,
|
||||||
mod_state.vid_utilization
|
mod_state.vid_utilization
|
||||||
= __string_match(nvidia_settings_glob, NV_REGEX)
|
= __string_match(nvidia_settings_glob, NV_REGEX)
|
||||||
|
mod_state.gpu_frequency = tonumber(mod_state.gpu_frequency)
|
||||||
mod_state.error = false
|
mod_state.error = false
|
||||||
end
|
end
|
||||||
elseif is_active then
|
elseif is_active then
|
||||||
mod_state.error = false
|
mod_state.error = false
|
||||||
else
|
else
|
||||||
|
mod_state.gpu_frequency = 0
|
||||||
mod_state.error = 'Off'
|
mod_state.error = 'Off'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue