Skip to content

Commit f04577f

Browse files
retrixe17314642
authored andcommitted
gpu_fdinfo: read GPU throttling status on Xe/i915
1 parent 2674cf3 commit f04577f

File tree

2 files changed

+115
-19
lines changed

2 files changed

+115
-19
lines changed

src/gpu_fdinfo.cpp

+96-19
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ int GPU_fdinfo::get_gpu_load()
285285
return result;
286286
}
287287

288+
std::vector<std::string> intel_throttle_power = {"reason_pl1", "reason_pl2"};
289+
std::vector<std::string> intel_throttle_current = {"reason_pl4", "reason_vr_tdc"};
290+
std::vector<std::string> intel_throttle_temp = {
291+
"reason_prochot", "reason_ratl", "reason_thermal", "reason_vr_thermalert"};
292+
288293
void GPU_fdinfo::find_i915_gt_dir()
289294
{
290295
std::string device = "/sys/bus/pci/devices/" + pci_dev + "/drm";
@@ -299,20 +304,24 @@ void GPU_fdinfo::find_i915_gt_dir()
299304
}
300305
}
301306

302-
device += "/gt_act_freq_mhz";
303-
304-
if (!fs::exists(device)) {
305-
SPDLOG_WARN(
306-
"Intel gt file ({}) not found. GPU clock will not be available.",
307-
device
308-
);
309-
return;
310-
}
311-
312-
gpu_clock_stream.open(device);
307+
auto gpu_clock_path = device + "/gt_act_freq_mhz";
308+
gpu_clock_stream.open(gpu_clock_path);
313309

314310
if (!gpu_clock_stream.good())
315311
SPDLOG_WARN("Intel i915 gt dir: failed to open {}", device);
312+
313+
// Assuming gt0 since all recent GPUs have the RCS engine on gt0, and latest GPUs need Xe anyway
314+
auto throttle_folder = device + "/gt/gt0/throttle_";
315+
auto throttle_status_path = throttle_folder + "reason_status";
316+
317+
throttle_status_stream.open(throttle_status_path);
318+
if (!throttle_status_stream.good()) {
319+
SPDLOG_WARN("Intel i915 gt dir: failed to open {}", throttle_status_path);
320+
} else {
321+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_power, throttle_power_streams);
322+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_current, throttle_current_streams);
323+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_temp, throttle_temp_streams);
324+
}
316325
}
317326

318327
void GPU_fdinfo::find_xe_gt_dir()
@@ -351,19 +360,47 @@ void GPU_fdinfo::find_xe_gt_dir()
351360
}
352361

353362
if (!has_rcs) {
354-
SPDLOG_WARN(
355-
"rcs not found inside \"{}\". GPU clock will not be available.",
356-
device
357-
);
363+
SPDLOG_WARN("rcs not found inside \"{}\". GPU clock will not be available.", device);
358364
return;
359365
}
360366

361-
device += "/freq0/act_freq";
362-
363-
gpu_clock_stream.open(device);
367+
auto gpu_clock_path = device + "/freq0/act_freq";
368+
gpu_clock_stream.open(gpu_clock_path);
364369

365370
if (!gpu_clock_stream.good())
366-
SPDLOG_WARN("Intel xe gt dir: failed to open {}", device);
371+
SPDLOG_WARN("Intel xe gt dir: failed to open {}", gpu_clock_path);
372+
373+
auto throttle_folder = device + "/freq0/throttle/";
374+
auto throttle_status_path = throttle_folder + "status";
375+
376+
throttle_status_stream.open(throttle_status_path);
377+
if (!throttle_status_stream.good()) {
378+
SPDLOG_WARN("Intel xe gt dir: failed to open {}", throttle_status_path);
379+
} else {
380+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_power, throttle_power_streams);
381+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_current, throttle_current_streams);
382+
load_xe_i915_throttle_reasons(throttle_folder, intel_throttle_temp, throttle_temp_streams);
383+
}
384+
}
385+
386+
void GPU_fdinfo::load_xe_i915_throttle_reasons(
387+
std::string throttle_folder,
388+
std::vector<std::string> throttle_reasons,
389+
std::vector<std::ifstream>& throttle_reason_streams
390+
) {
391+
for (const auto& throttle_reason : throttle_reasons) {
392+
std::string throttle_path = throttle_folder + throttle_reason;
393+
if (!fs::exists(throttle_path)) {
394+
SPDLOG_WARN("Intel xe/i915 gt dir: Throttle file {} not found", throttle_path);
395+
continue;
396+
}
397+
auto throttle_stream = std::ifstream(throttle_path);
398+
if (!throttle_stream.good()) {
399+
SPDLOG_WARN("Intel xe/i915 gt dir: failed to open {}", throttle_path);
400+
continue;
401+
}
402+
throttle_reason_streams.push_back(std::move(throttle_stream));
403+
}
367404
}
368405

369406
int GPU_fdinfo::get_gpu_clock()
@@ -383,6 +420,41 @@ int GPU_fdinfo::get_gpu_clock()
383420
return std::stoi(clock_str);
384421
}
385422

423+
bool GPU_fdinfo::check_throttle_reasons(std::vector<std::ifstream>& throttle_reason_streams)
424+
{
425+
for (auto& throttle_reason_stream : throttle_reason_streams) {
426+
std::string throttle_reason_str;
427+
throttle_reason_stream.seekg(0);
428+
std::getline(throttle_reason_stream, throttle_reason_str);
429+
430+
if (throttle_reason_str == "1")
431+
return true;
432+
}
433+
434+
return false;
435+
}
436+
437+
GPU_throttle_status GPU_fdinfo::get_throttling_status()
438+
{
439+
if (!throttle_status_stream.is_open())
440+
return GPU_throttle_status::NONE;
441+
442+
std::string throttle_status_str;
443+
throttle_status_stream.seekg(0);
444+
std::getline(throttle_status_stream, throttle_status_str);
445+
446+
if (throttle_status_str != "1")
447+
return GPU_throttle_status::NONE;
448+
else if (check_throttle_reasons(throttle_power_streams))
449+
return GPU_throttle_status::POWER;
450+
else if (check_throttle_reasons(throttle_current_streams))
451+
return GPU_throttle_status::CURRENT;
452+
else if (check_throttle_reasons(throttle_temp_streams))
453+
return GPU_throttle_status::TEMP;
454+
455+
return GPU_throttle_status::OTHER;
456+
}
457+
386458
void GPU_fdinfo::main_thread()
387459
{
388460
while (!stop_thread) {
@@ -396,6 +468,11 @@ void GPU_fdinfo::main_thread()
396468
metrics.memoryUsed = get_memory_used();
397469
metrics.powerUsage = get_power_usage();
398470
metrics.CoreClock = get_gpu_clock();
471+
auto throttling = get_throttling_status();
472+
metrics.is_power_throttled = throttling == GPU_throttle_status::POWER;
473+
metrics.is_current_throttled = throttling == GPU_throttle_status::CURRENT;
474+
metrics.is_temp_throttled = throttling == GPU_throttle_status::TEMP;
475+
metrics.is_other_throttled = throttling == GPU_throttle_status::OTHER;
399476
metrics.temp = hwmon_sensors["temp"].val / 1000;
400477
metrics.fan_speed = hwmon_sensors["fan_speed"].val;
401478
metrics.voltage = hwmon_sensors["voltage"].val;

src/gpu_fdinfo.h

+19
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ struct hwmon_sensor {
2828
uint64_t val = 0;
2929
};
3030

31+
enum class GPU_throttle_status {
32+
NONE,
33+
POWER,
34+
CURRENT,
35+
TEMP,
36+
OTHER
37+
};
38+
3139
class GPU_fdinfo {
3240
private:
3341
bool init = false;
@@ -81,6 +89,17 @@ class GPU_fdinfo {
8189
void find_xe_gt_dir();
8290
int get_gpu_clock();
8391

92+
std::ifstream throttle_status_stream;
93+
std::vector<std::ifstream> throttle_power_streams;
94+
std::vector<std::ifstream> throttle_current_streams;
95+
std::vector<std::ifstream> throttle_temp_streams;
96+
void load_xe_i915_throttle_reasons(
97+
std::string throttle_folder,
98+
std::vector<std::string> throttle_reasons,
99+
std::vector<std::ifstream> &throttle_reason_streams);
100+
bool check_throttle_reasons(std::vector<std::ifstream> &throttle_reason_streams);
101+
GPU_throttle_status get_throttling_status();
102+
84103
public:
85104
GPU_fdinfo(const std::string module, const std::string pci_dev)
86105
: module(module)

0 commit comments

Comments
 (0)