diff --git a/src/modules/graphics.lua b/src/modules/graphics.lua index 27cfe7c..1a5786a 100644 --- a/src/modules/graphics.lua +++ b/src/modules/graphics.lua @@ -3,7 +3,7 @@ local i_o = require 'i_o' return function(update_freq, config, common, width, point) local NA = 'N/A' - local NVIDIA_EXE = 'nvidia-settings' + local NVIDIA_EXE = 'nvidia-smi' local geo = config.geometry local sep_spacing = geo.sep_spacing @@ -22,29 +22,17 @@ return function(update_freq, config, common, width, point) -- vars to process the nv settings glob -- - -- glob will be of the form: - -- - -- - -- - -- , - -- graphics=, memory=, video=, PCIe= - local NV_QUERY = NVIDIA_EXE.. - ' -t'.. - ' -q UsedDedicatedGPUmemory'.. - ' -q TotalDedicatedGPUmemory'.. - ' -q ThermalSensorReading'.. - ' -q [gpu:0]/GPUCurrentClockFreqs'.. - ' -q [gpu:0]/GPUutilization'.. - ' 2>/dev/null' - local NV_REGEX = '(%d+)\n'.. - '(%d+)\n'.. - '(%d+)\n'.. - '(%d+),(%d+)\n'.. - 'graphics=(%d+), memory=%d+, video=(%d+), PCIe=%d+\n' + local NV_QUERY = NVIDIA_EXE.. + ' --query-gpu=memory.used,memory.total,temperature.gpu,clocks.gr,clocks.mem,utilization.gpu,utilization.decoder'.. + ' --format=csv,noheader,nounits' + + local NV_REGEX = '(%d+), (%d+), (%d+), (%d+), (%d+), (%d+), (%d+)' local mod_state = { error = false, + gpu_frequency = 0, + memory_frequency = 0, used_memory = 0, total_memory = 0, temp_reading = 0, @@ -52,6 +40,11 @@ return function(update_freq, config, common, width, point) vid_utilization = 0 } + local sleep_token = 0 + local sleep_limit = 10 + local gpu_idle_freq_limit = 250 + + -- TODO ensure this file exists local runtime_status_file = config.dev_power..'/runtime_status' local want_nvidia_query = config.show_temp or config.show_clock @@ -59,7 +52,28 @@ return function(update_freq, config, common, width, point) local update_state = function() local is_active = i_o.read_file(runtime_status_file, nil, '*l') == 'active' - if is_active and want_nvidia_query then + -- this will make the nvidia-smi query fire only so often when the clock + -- is below a certain threshold. This is necessary to get the GPU to + -- suspend when nothing is 'using' it, at the cost of lowering the + -- response time for when it eventually is used again. Maybe won't + -- matter that much since the jobs that use the GPU tend to be long + -- anyways, so a few seconds won't hurt. Furthermore, there are ways to + -- wake this up manually by detecting certain processes the likely will + -- use the GPU (ffmpeg and friends) or detecting processes that are + -- holding /dev/nvidia* files (which isn't foolproof but it will capture + -- most events) + if is_active and + mod_state.gpu_frequency > 0 and + mod_state.gpu_frequency < gpu_idle_freq_limit then + if sleep_token < sleep_limit - 1 then + sleep_token = sleep_token + 1 + else + sleep_token = 0 + end + else + sleep_token = 0 + end + if is_active and want_nvidia_query and sleep_token == 0 then local nvidia_settings_glob = i_o.execute_cmd(NV_QUERY) if nvidia_settings_glob == nil then mod_state.error = 'Error' @@ -72,11 +86,13 @@ return function(update_freq, config, common, width, point) mod_state.gpu_utilization, mod_state.vid_utilization = __string_match(nvidia_settings_glob, NV_REGEX) + mod_state.gpu_frequency = tonumber(mod_state.gpu_frequency) mod_state.error = false end elseif is_active then mod_state.error = false else + mod_state.gpu_frequency = 0 mod_state.error = 'Off' end end