@@ -285,6 +285,11 @@ int GPU_fdinfo::get_gpu_load()
285
285
return result;
286
286
}
287
287
288
+ std::vector<std::string> intel_throttle_power = {" reason_pl1" , " reason_pl2" };
289
+ std::vector<std::string> intel_throttle_current = {" reason_pl4" , " reason_vr_tdc" };
290
+ std::vector<std::string> intel_throttle_temp = {
291
+ " reason_prochot" , " reason_ratl" , " reason_thermal" , " reason_vr_thermalert" };
292
+
288
293
void GPU_fdinfo::find_i915_gt_dir ()
289
294
{
290
295
std::string device = " /sys/bus/pci/devices/" + pci_dev + " /drm" ;
@@ -299,20 +304,24 @@ void GPU_fdinfo::find_i915_gt_dir()
299
304
}
300
305
}
301
306
302
- device += " /gt_act_freq_mhz" ;
303
-
304
- if (!fs::exists (device)) {
305
- SPDLOG_WARN (
306
- " Intel gt file ({}) not found. GPU clock will not be available." ,
307
- device
308
- );
309
- return ;
310
- }
311
-
312
- gpu_clock_stream.open (device);
307
+ auto gpu_clock_path = device + " /gt_act_freq_mhz" ;
308
+ gpu_clock_stream.open (gpu_clock_path);
313
309
314
310
if (!gpu_clock_stream.good ())
315
311
SPDLOG_WARN (" Intel i915 gt dir: failed to open {}" , device);
312
+
313
+ // Assuming gt0 since all recent GPUs have the RCS engine on gt0, and latest GPUs need Xe anyway
314
+ auto throttle_folder = device + " /gt/gt0/throttle_" ;
315
+ auto throttle_status_path = throttle_folder + " reason_status" ;
316
+
317
+ throttle_status_stream.open (throttle_status_path);
318
+ if (!throttle_status_stream.good ()) {
319
+ SPDLOG_WARN (" Intel i915 gt dir: failed to open {}" , throttle_status_path);
320
+ } else {
321
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_power, throttle_power_streams);
322
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_current, throttle_current_streams);
323
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_temp, throttle_temp_streams);
324
+ }
316
325
}
317
326
318
327
void GPU_fdinfo::find_xe_gt_dir ()
@@ -351,19 +360,47 @@ void GPU_fdinfo::find_xe_gt_dir()
351
360
}
352
361
353
362
if (!has_rcs) {
354
- SPDLOG_WARN (
355
- " rcs not found inside \" {}\" . GPU clock will not be available." ,
356
- device
357
- );
363
+ SPDLOG_WARN (" rcs not found inside \" {}\" . GPU clock will not be available." , device);
358
364
return ;
359
365
}
360
366
361
- device += " /freq0/act_freq" ;
362
-
363
- gpu_clock_stream.open (device);
367
+ auto gpu_clock_path = device + " /freq0/act_freq" ;
368
+ gpu_clock_stream.open (gpu_clock_path);
364
369
365
370
if (!gpu_clock_stream.good ())
366
- SPDLOG_WARN (" Intel xe gt dir: failed to open {}" , device);
371
+ SPDLOG_WARN (" Intel xe gt dir: failed to open {}" , gpu_clock_path);
372
+
373
+ auto throttle_folder = device + " /freq0/throttle/" ;
374
+ auto throttle_status_path = throttle_folder + " status" ;
375
+
376
+ throttle_status_stream.open (throttle_status_path);
377
+ if (!throttle_status_stream.good ()) {
378
+ SPDLOG_WARN (" Intel xe gt dir: failed to open {}" , throttle_status_path);
379
+ } else {
380
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_power, throttle_power_streams);
381
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_current, throttle_current_streams);
382
+ load_xe_i915_throttle_reasons (throttle_folder, intel_throttle_temp, throttle_temp_streams);
383
+ }
384
+ }
385
+
386
+ void GPU_fdinfo::load_xe_i915_throttle_reasons (
387
+ std::string throttle_folder,
388
+ std::vector<std::string> throttle_reasons,
389
+ std::vector<std::ifstream>& throttle_reason_streams
390
+ ) {
391
+ for (const auto & throttle_reason : throttle_reasons) {
392
+ std::string throttle_path = throttle_folder + throttle_reason;
393
+ if (!fs::exists (throttle_path)) {
394
+ SPDLOG_WARN (" Intel xe/i915 gt dir: Throttle file {} not found" , throttle_path);
395
+ continue ;
396
+ }
397
+ auto throttle_stream = std::ifstream (throttle_path);
398
+ if (!throttle_stream.good ()) {
399
+ SPDLOG_WARN (" Intel xe/i915 gt dir: failed to open {}" , throttle_path);
400
+ continue ;
401
+ }
402
+ throttle_reason_streams.push_back (std::move (throttle_stream));
403
+ }
367
404
}
368
405
369
406
int GPU_fdinfo::get_gpu_clock ()
@@ -383,6 +420,41 @@ int GPU_fdinfo::get_gpu_clock()
383
420
return std::stoi (clock_str);
384
421
}
385
422
423
+ bool GPU_fdinfo::check_throttle_reasons (std::vector<std::ifstream>& throttle_reason_streams)
424
+ {
425
+ for (auto & throttle_reason_stream : throttle_reason_streams) {
426
+ std::string throttle_reason_str;
427
+ throttle_reason_stream.seekg (0 );
428
+ std::getline (throttle_reason_stream, throttle_reason_str);
429
+
430
+ if (throttle_reason_str == " 1" )
431
+ return true ;
432
+ }
433
+
434
+ return false ;
435
+ }
436
+
437
+ GPU_throttle_status GPU_fdinfo::get_throttling_status ()
438
+ {
439
+ if (!throttle_status_stream.is_open ())
440
+ return GPU_throttle_status::NONE;
441
+
442
+ std::string throttle_status_str;
443
+ throttle_status_stream.seekg (0 );
444
+ std::getline (throttle_status_stream, throttle_status_str);
445
+
446
+ if (throttle_status_str != " 1" )
447
+ return GPU_throttle_status::NONE;
448
+ else if (check_throttle_reasons (throttle_power_streams))
449
+ return GPU_throttle_status::POWER;
450
+ else if (check_throttle_reasons (throttle_current_streams))
451
+ return GPU_throttle_status::CURRENT;
452
+ else if (check_throttle_reasons (throttle_temp_streams))
453
+ return GPU_throttle_status::TEMP;
454
+
455
+ return GPU_throttle_status::OTHER;
456
+ }
457
+
386
458
void GPU_fdinfo::main_thread ()
387
459
{
388
460
while (!stop_thread) {
@@ -396,6 +468,11 @@ void GPU_fdinfo::main_thread()
396
468
metrics.memoryUsed = get_memory_used ();
397
469
metrics.powerUsage = get_power_usage ();
398
470
metrics.CoreClock = get_gpu_clock ();
471
+ auto throttling = get_throttling_status ();
472
+ metrics.is_power_throttled = throttling == GPU_throttle_status::POWER;
473
+ metrics.is_current_throttled = throttling == GPU_throttle_status::CURRENT;
474
+ metrics.is_temp_throttled = throttling == GPU_throttle_status::TEMP;
475
+ metrics.is_other_throttled = throttling == GPU_throttle_status::OTHER;
399
476
metrics.temp = hwmon_sensors[" temp" ].val / 1000 ;
400
477
metrics.fan_speed = hwmon_sensors[" fan_speed" ].val ;
401
478
metrics.voltage = hwmon_sensors[" voltage" ].val ;
0 commit comments