From 46eb89187f0c180d12733daa1921792474b311bf Mon Sep 17 00:00:00 2001 From: Hanno Schwalm Date: Mon, 30 Dec 2024 18:47:23 +0100 Subject: [PATCH] Improved OpenCL error logs In all cases of kernel / cl_mem access errors the device name is also logged. --- src/common/opencl.c | 139 ++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/src/common/opencl.c b/src/common/opencl.c index 8fda4d489164..d5dc10ca4a2d 100644 --- a/src/common/opencl.c +++ b/src/common/opencl.c @@ -1481,22 +1481,20 @@ void dt_opencl_init( _opencl_apply_scheduling_profile(profile); // let's keep track on unified memory devices - size_t unified_sysmem = 0; + dt_sys_resources_t *res = &darktable.dtresources; for(int i = 0; i < cl->num_devs; i++) { if(cl->dev[i].unified_memory) { - const size_t reserved = MIN(cl->dev[i].max_global_mem, - darktable.dtresources.total_memory * cl->dev[i].unified_fraction); + const size_t reserved = MIN(cl->dev[i].max_global_mem, res->total_memory * cl->dev[i].unified_fraction); cl->dev[i].max_global_mem = reserved; + cl->dev[i].max_mem_alloc = MIN(cl->dev[i].max_mem_alloc, reserved); dt_print_nts(DT_DEBUG_OPENCL, - " UNIFIED MEM SIZE: %.0f MB reserved for '%s'", - (double)reserved / 1024.0 / 1024.0, - cl->dev[i].cname); - unified_sysmem = MAX(unified_sysmem, reserved); + " UNIFIED MEM SIZE: %.0f MB reserved for '%s' id=%d", + (double)reserved / 1024.0 / 1024.0, cl->dev[i].cname, i); + res->total_memory -= reserved; } } - darktable.dtresources.total_memory -= unified_sysmem; } else // initialization failed { @@ -1566,9 +1564,9 @@ void dt_opencl_cleanup(dt_opencl_t *cl) if(cl->print_statistics && (darktable.unmuted & DT_DEBUG_MEMORY)) { dt_print_nts(DT_DEBUG_OPENCL, - " [opencl_summary_statistics] device '%s' (%d):" - " peak memory usage %zu bytes (%.1f MB)%s\n", - cl->dev[i].fullname, i, cl->dev[i].peak_memory, + " [opencl_summary_statistics] device '%s' id=%d:" + " peak memory usage %.1f MB%s\n", + cl->dev[i].fullname, i, (float)cl->dev[i].peak_memory/(1024*1024), cl->dev[i].clmem_error ? ", clmem runtime problem" @@ -1580,7 +1578,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl) if(cl->dev[i].totalevents) { dt_print_nts(DT_DEBUG_OPENCL, - " [opencl_summary_statistics] device '%s' (%d): %d" + " [opencl_summary_statistics] device '%s' id=%d: %d" " out of %d events were " "successful and %d events lost. max event=%d%s%s\n", cl->dev[i].fullname, i, cl->dev[i].totalsuccess, @@ -1596,7 +1594,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl) else { dt_print_nts(DT_DEBUG_OPENCL, - " [opencl_summary_statistics] device '%s' (%d): NOT utilized\n", + " [opencl_summary_statistics] device '%s' id=%d: NOT utilized\n", cl->dev[i].fullname, i); } } @@ -2550,8 +2548,8 @@ static gboolean _check_kernel(const int dev, if(err != CL_SUCCESS) { dt_print(DT_DEBUG_OPENCL, - "[opencl_create_kernel] could not create kernel `%s'! (%s)", - cl->name_saved[kernel], cl_errstr(err)); + "[opencl_create_kernel] could not create kernel `%s' for '%s' id=%d: (%s)", + cl->name_saved[kernel], cl->dev[dev].fullname, dev, cl_errstr(err)); cl->dev[dev].kernel_used[kernel] = 0; cl->name_saved[kernel] = NULL; // don't try again dt_pthread_mutex_unlock(&cl->lock); @@ -2720,9 +2718,9 @@ int dt_opencl_enqueue_kernel_ndim_with_local(const int dev, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device %d: %s", + "[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device '%s' id=%d: %s", dimensions, local ? "_with_local" : "", - darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err)); + cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err)); _check_clmem_err(dev, err); return err; } @@ -2746,9 +2744,10 @@ int dt_opencl_enqueue_kernel_2d_args_internal(const int dev, va_end(ap); if(err != CL_SUCCESS) { + dt_opencl_t *cl = darktable.opencl; dt_print(DT_DEBUG_OPENCL, - "[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device %d: %s", - darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err)); + "[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s", + cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err)); return err; } const size_t sizes[] = { ROUNDUPDWD(w, dev), ROUNDUPDHT(h, dev), 1 }; @@ -2767,9 +2766,10 @@ int dt_opencl_enqueue_kernel_1d_args_internal(const int dev, va_end(ap); if(err != CL_SUCCESS) { + dt_opencl_t *cl = darktable.opencl; dt_print(DT_DEBUG_OPENCL, - "[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device %d: %s", - darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err)); + "[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s", + cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err)); return err; } const size_t sizes[] = { ROUNDUPDWD(x, dev), 1, 1 }; @@ -2962,8 +2962,8 @@ int dt_opencl_enqueue_copy_image(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl copy_image] could not copy on device %d: %s", - devid, cl_errstr(err)); + "[opencl copy_image] could not copy on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); return err; } @@ -2985,8 +2985,8 @@ int dt_opencl_enqueue_copy_image_to_buffer(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl copy_image_to_buffer] could not copy on device %d: %s", - devid, cl_errstr(err)); + "[opencl copy_image_to_buffer] could not copy on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); return err; } @@ -3008,8 +3008,8 @@ int dt_opencl_enqueue_copy_buffer_to_image(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl copy_buffer_to_image] could not copy on device %d: %s", - devid, cl_errstr(err)); + "[opencl copy_buffer_to_image] could not copy on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); return err; } @@ -3031,8 +3031,8 @@ int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid, dstoffset, size, 0, NULL, eventp); if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl copy_buffer_to_buffer] could not copy on device %d: %s", - devid, cl_errstr(err)); + "[opencl copy_buffer_to_buffer] could not copy on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); return err; } @@ -3056,8 +3056,8 @@ int dt_opencl_read_buffer_from_device(const int devid, offset, size, host, 0, NULL, eventp); if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl read_buffer_from_device] could not read from device %d: %s", - devid, cl_errstr(err)); + "[opencl read_buffer_from_device] could not read from device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); return err; } @@ -3081,8 +3081,8 @@ int dt_opencl_write_buffer_to_device(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl write_buffer_to_device] could not write to device %d: %s", - devid, cl_errstr(err)); + "[opencl write_buffer_to_device] could not write to device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); return err; } @@ -3102,8 +3102,8 @@ void *dt_opencl_copy_host_to_device_constant(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, "[opencl copy_host_to_device_constant]" - " could not alloc buffer on device %d: %s", - devid, cl_errstr(err)); + " could not alloc buffer on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD); @@ -3151,8 +3151,8 @@ void *dt_opencl_copy_host_to_device_rowpitch(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, "[opencl copy_host_to_device]" - " could not alloc/copy img buffer on device %d: %s", - devid, cl_errstr(err)); + " could not alloc/copy img buffer on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD); @@ -3197,8 +3197,8 @@ void *dt_opencl_map_buffer(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl map buffer] could not map buffer on device %d: %s", - devid, cl_errstr(err)); + "[opencl map buffer] could not map buffer on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); return ptr; } @@ -3216,8 +3216,8 @@ int dt_opencl_unmap_mem_object(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl unmap mem object] could not unmap mem object on device %d: %s", - devid, cl_errstr(err)); + "[opencl unmap mem object] could not unmap mem object on device '%s' id=%d: %s", + darktable.opencl->dev[devid].fullname, devid, cl_errstr(err)); return err; } @@ -3257,8 +3257,8 @@ void *dt_opencl_alloc_device(const int devid, if(err != CL_SUCCESS) dt_print(DT_DEBUG_OPENCL, - "[opencl alloc_device] could not alloc img buffer on device %d: %s", - devid, cl_errstr(err)); + "[opencl alloc_device] could not alloc img buffer on device '%s' id=%d: %s", + cl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD); @@ -3306,8 +3306,8 @@ void *dt_opencl_alloc_device_use_host_pointer(const int devid, if(err != CL_SUCCESS || dev == NULL) dt_print(DT_DEBUG_OPENCL, "[opencl alloc_device_use_host_pointer]" - " could not allocate cl image on device %d: %s", - devid, cl_errstr(err)); + " could not allocate cl image on device '%s' id=%d: %s", + cl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD); @@ -3331,8 +3331,8 @@ void *dt_opencl_alloc_device_buffer(const int devid, CL_MEM_READ_WRITE, size, NULL, &err); if(err != CL_SUCCESS || buf == NULL) dt_print(DT_DEBUG_OPENCL, - "[opencl alloc_device_buffer] could not allocate cl buffer on device %d: %s", - devid, cl_errstr(err)); + "[opencl alloc_device_buffer] could not allocate cl buffer on device '%s' id=%d: %s", + cl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD); @@ -3357,8 +3357,8 @@ void *dt_opencl_alloc_device_buffer_with_flags(const int devid, flags, size, NULL, &err); if(err != CL_SUCCESS || buf == NULL) dt_print(DT_DEBUG_OPENCL, - "[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device %d: %s", - devid, cl_errstr(err)); + "[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device '%s' id=%d: %s", + cl->dev[devid].fullname, devid, cl_errstr(err)); _check_clmem_err(devid, err); dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD); @@ -3492,28 +3492,27 @@ void dt_opencl_memory_statistics(int devid, if(devid < 0) return; + dt_opencl_t *cl = darktable.opencl; + if(action == OPENCL_MEMORY_ADD) - darktable.opencl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem); + cl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem); else - darktable.opencl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem); + cl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem); - darktable.opencl->dev[devid].peak_memory = - MAX(darktable.opencl->dev[devid].peak_memory, - darktable.opencl->dev[devid].memory_in_use); + cl->dev[devid].peak_memory = MAX(cl->dev[devid].peak_memory, cl->dev[devid].memory_in_use); if(darktable.unmuted & DT_DEBUG_MEMORY) { - dt_print(DT_DEBUG_OPENCL,"[opencl memory] device %d: %zu bytes (%.1f MB) in use, %.1f MB available GPU memory, %.1f MB global GPU mem size", - devid, - darktable.opencl->dev[devid].memory_in_use, - (float)darktable.opencl->dev[devid].memory_in_use/(1024*1024), - (float)darktable.opencl->dev[devid].used_available/(1024*1024), - (float)darktable.opencl->dev[devid].max_global_mem/(1024*1024)); - if(darktable.opencl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available) + dt_print(DT_DEBUG_OPENCL,"[opencl memory] device '%s' id=%d: %.1fMB in use, %.1fMB available GPU mem of %.1fMB", + cl->dev[devid].fullname, devid, + (float)cl->dev[devid].memory_in_use/(1024*1024), + (float)cl->dev[devid].used_available/(1024*1024), + (float)cl->dev[devid].max_global_mem/(1024*1024)); + if(cl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available) { dt_print(DT_DEBUG_OPENCL, - "[opencl memory] Warning, device %d used more GPU memory than available", - devid); + "[opencl memory] Warning, device '%s' id=%d used more GPU memory than available", + cl->dev[devid].fullname, devid); } } } @@ -3553,9 +3552,9 @@ void dt_opencl_check_tuning(const int devid) if(info) dt_print(DT_DEBUG_OPENCL | DT_DEBUG_MEMORY, "[dt_opencl_check_tuning] reference mode %i," - " use %luMB (pinning=%s) on device `%s' id=%i", + " use %.1fMB (pinning=%s) on device `%s' id=%i", level, - cl->dev[devid].used_available / 1024lu / 1024lu, + (float)(cl->dev[devid].used_available / 1024lu / 1024lu), cl->dev[devid].pinned_memory ? "ON" : "OFF", cl->dev[devid].fullname, devid); return; @@ -3571,8 +3570,7 @@ void dt_opencl_check_tuning(const int devid) const int reserved_mb = MAX(1, headroom) + (cl->dev[devid].clmem_error ? DT_OPENCL_DEFAULT_HEADROOM : 0); const int global_mb = cl->dev[devid].max_global_mem / 1024lu / 1024lu; - cl->dev[devid].used_available = (size_t) - (MAX(0, global_mb - reserved_mb)) * 1024ul * 1024ul; + cl->dev[devid].used_available = (size_t)MAX(0, global_mb - reserved_mb) * 1024lu / 1024lu; } else { @@ -3580,15 +3578,14 @@ void dt_opencl_check_tuning(const int devid) const size_t default_headroom = DT_OPENCL_DEFAULT_HEADROOM * 1024ul * 1024ul; const size_t disposable = allmem > default_headroom ? allmem - default_headroom : 0; const int fraction = MIN(1024lu, MAX(0, res->fractions[res->group + 3])); - cl->dev[devid].used_available = - MAX(256ul * 1024ul * 1024ul, disposable / 1024ul * fraction); + cl->dev[devid].used_available = (size_t)MAX(256ul * 1024ul * 1024ul, disposable / 1024ul * fraction); } if(info) dt_print(DT_DEBUG_OPENCL | DT_DEBUG_MEMORY, - "[dt_opencl_check_tuning] use %luMB (headroom=%s, pinning=%s)" + "[dt_opencl_check_tuning] use %.1fMB (headroom=%s, pinning=%s)" " on device `%s' id=%i", - cl->dev[devid].used_available / 1024lu / 1024lu, + (float)(cl->dev[devid].used_available / 1024lu / 1024lu), cl->dev[devid].tunehead ? "ON" : "OFF", cl->dev[devid].pinned_memory ? "ON" : "OFF", cl->dev[devid].fullname, devid);