Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few OpenCL fixes #18065

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/common/nlmeans_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ int nlmeans_denoise_cl(
dt_opencl_finish_sync_pipe(devid, params->pipetype);

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(devid));
dt_opencl_micro_nap(devid);
}

error:
Expand Down Expand Up @@ -796,7 +796,7 @@ int nlmeans_denoiseprofile_cl(
dt_opencl_finish_sync_pipe(devid, params->pipetype);

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(devid));
dt_opencl_micro_nap(devid);
}

error:
Expand Down
130 changes: 65 additions & 65 deletions src/common/opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,11 @@ gboolean dt_opencl_avoid_atomics(const int devid)
return (!_cldev_running(devid)) ? FALSE : cl->dev[devid].avoid_atomics;
}

int dt_opencl_micro_nap(const int devid)
void dt_opencl_micro_nap(const int devid)
{
dt_opencl_t *cl = darktable.opencl;
return (!_cldev_running(devid)) ? 0 : cl->dev[devid].micro_nap;
if(_cldev_running(devid))
dt_iop_nap(cl->dev[devid].micro_nap);
}

gboolean dt_opencl_use_pinned_memory(const int devid)
Expand Down Expand Up @@ -1480,22 +1481,20 @@ void dt_opencl_init(
_opencl_apply_scheduling_profile(profile);

// let's keep track on unified memory devices
size_t unified_sysmem = 0;
dt_sys_resources_t *res = &darktable.dtresources;
for(int i = 0; i < cl->num_devs; i++)
{
if(cl->dev[i].unified_memory)
{
const size_t reserved = MIN(cl->dev[i].max_global_mem,
darktable.dtresources.total_memory * cl->dev[i].unified_fraction);
const size_t reserved = MIN(cl->dev[i].max_global_mem, res->total_memory * cl->dev[i].unified_fraction);
cl->dev[i].max_global_mem = reserved;
cl->dev[i].max_mem_alloc = MIN(cl->dev[i].max_mem_alloc, reserved);
dt_print_nts(DT_DEBUG_OPENCL,
" UNIFIED MEM SIZE: %.0f MB reserved for '%s'",
(double)reserved / 1024.0 / 1024.0,
cl->dev[i].cname);
unified_sysmem = MAX(unified_sysmem, reserved);
" UNIFIED MEM SIZE: %.0f MB reserved for '%s' id=%d",
(double)reserved / 1024.0 / 1024.0, cl->dev[i].cname, i);
res->total_memory -= reserved;
}
}
darktable.dtresources.total_memory -= unified_sysmem;
}
else // initialization failed
{
Expand Down Expand Up @@ -1565,9 +1564,9 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
if(cl->print_statistics && (darktable.unmuted & DT_DEBUG_MEMORY))
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d):"
" peak memory usage %zu bytes (%.1f MB)%s\n",
cl->dev[i].fullname, i, cl->dev[i].peak_memory,
" [opencl_summary_statistics] device '%s' id=%d:"
" peak memory usage %.1f MB%s\n",
cl->dev[i].fullname, i,
(float)cl->dev[i].peak_memory/(1024*1024),
cl->dev[i].clmem_error
? ", clmem runtime problem"
Expand All @@ -1579,7 +1578,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
if(cl->dev[i].totalevents)
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d): %d"
" [opencl_summary_statistics] device '%s' id=%d: %d"
" out of %d events were "
"successful and %d events lost. max event=%d%s%s\n",
cl->dev[i].fullname, i, cl->dev[i].totalsuccess,
Expand All @@ -1595,7 +1594,7 @@ void dt_opencl_cleanup(dt_opencl_t *cl)
else
{
dt_print_nts(DT_DEBUG_OPENCL,
" [opencl_summary_statistics] device '%s' (%d): NOT utilized\n",
" [opencl_summary_statistics] device '%s' id=%d: NOT utilized\n",
cl->dev[i].fullname, i);
}
}
Expand Down Expand Up @@ -2549,8 +2548,8 @@ static gboolean _check_kernel(const int dev,
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL,
"[opencl_create_kernel] could not create kernel `%s'! (%s)",
cl->name_saved[kernel], cl_errstr(err));
"[opencl_create_kernel] could not create kernel `%s' for '%s' id=%d: (%s)",
cl->name_saved[kernel], cl->dev[dev].fullname, dev, cl_errstr(err));
cl->dev[dev].kernel_used[kernel] = 0;
cl->name_saved[kernel] = NULL; // don't try again
dt_pthread_mutex_unlock(&cl->lock);
Expand Down Expand Up @@ -2719,9 +2718,9 @@ int dt_opencl_enqueue_kernel_ndim_with_local(const int dev,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device %d: %s",
"[dt_opencl_enqueue_kernel_%id%s] kernel `%s' (%i) on device '%s' id=%d: %s",
dimensions, local ? "_with_local" : "",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
_check_clmem_err(dev, err);
return err;
}
Expand All @@ -2745,9 +2744,10 @@ int dt_opencl_enqueue_kernel_2d_args_internal(const int dev,
va_end(ap);
if(err != CL_SUCCESS)
{
dt_opencl_t *cl = darktable.opencl;
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device %d: %s",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
"[dt_opencl_enqueue_kernel_2d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s",
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
return err;
}
const size_t sizes[] = { ROUNDUPDWD(w, dev), ROUNDUPDHT(h, dev), 1 };
Expand All @@ -2766,9 +2766,10 @@ int dt_opencl_enqueue_kernel_1d_args_internal(const int dev,
va_end(ap);
if(err != CL_SUCCESS)
{
dt_opencl_t *cl = darktable.opencl;
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device %d: %s",
darktable.opencl->name_saved[kernel], kernel, dev, cl_errstr(err));
"[dt_opencl_enqueue_kernel_1d_args_internal] kernel `%s' (%i) on device '%s' id=%d: %s",
cl->name_saved[kernel], kernel, cl->dev[dev].fullname, dev, cl_errstr(err));
return err;
}
const size_t sizes[] = { ROUNDUPDWD(x, dev), 1, 1 };
Expand Down Expand Up @@ -2961,8 +2962,8 @@ int dt_opencl_enqueue_copy_image(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_image] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_image] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -2984,8 +2985,8 @@ int dt_opencl_enqueue_copy_image_to_buffer(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_image_to_buffer] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_image_to_buffer] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3007,8 +3008,8 @@ int dt_opencl_enqueue_copy_buffer_to_image(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_buffer_to_image] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_buffer_to_image] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3030,8 +3031,8 @@ int dt_opencl_enqueue_copy_buffer_to_buffer(const int devid,
dstoffset, size, 0, NULL, eventp);
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_buffer_to_buffer] could not copy on device %d: %s",
devid, cl_errstr(err));
"[opencl copy_buffer_to_buffer] could not copy on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return err;
}
Expand All @@ -3055,8 +3056,8 @@ int dt_opencl_read_buffer_from_device(const int devid,
offset, size, host, 0, NULL, eventp);
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl read_buffer_from_device] could not read from device %d: %s",
devid, cl_errstr(err));
"[opencl read_buffer_from_device] could not read from device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand All @@ -3080,8 +3081,8 @@ int dt_opencl_write_buffer_to_device(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl write_buffer_to_device] could not write to device %d: %s",
devid, cl_errstr(err));
"[opencl write_buffer_to_device] could not write to device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand All @@ -3101,8 +3102,8 @@ void *dt_opencl_copy_host_to_device_constant(const int devid,
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_host_to_device_constant]"
" could not alloc buffer on device %d: %s",
devid, cl_errstr(err));
" could not alloc buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));

dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);

Expand Down Expand Up @@ -3150,8 +3151,8 @@ void *dt_opencl_copy_host_to_device_rowpitch(const int devid,
if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl copy_host_to_device]"
" could not alloc/copy img buffer on device %d: %s",
devid, cl_errstr(err));
" could not alloc/copy img buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3196,8 +3197,8 @@ void *dt_opencl_map_buffer(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl map buffer] could not map buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl map buffer] could not map buffer on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
_check_clmem_err(devid, err);
return ptr;
}
Expand All @@ -3215,8 +3216,8 @@ int dt_opencl_unmap_mem_object(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl unmap mem object] could not unmap mem object on device %d: %s",
devid, cl_errstr(err));
"[opencl unmap mem object] could not unmap mem object on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

Expand Down Expand Up @@ -3256,8 +3257,8 @@ void *dt_opencl_alloc_device(const int devid,

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device] could not alloc img buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device] could not alloc img buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3305,8 +3306,8 @@ void *dt_opencl_alloc_device_use_host_pointer(const int devid,
if(err != CL_SUCCESS || dev == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_use_host_pointer]"
" could not allocate cl image on device %d: %s",
devid, cl_errstr(err));
" could not allocate cl image on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, dev, OPENCL_MEMORY_ADD);
Expand All @@ -3330,8 +3331,8 @@ void *dt_opencl_alloc_device_buffer(const int devid,
CL_MEM_READ_WRITE, size, NULL, &err);
if(err != CL_SUCCESS || buf == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_buffer] could not allocate cl buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device_buffer] could not allocate cl buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD);
Expand All @@ -3356,8 +3357,8 @@ void *dt_opencl_alloc_device_buffer_with_flags(const int devid,
flags, size, NULL, &err);
if(err != CL_SUCCESS || buf == NULL)
dt_print(DT_DEBUG_OPENCL,
"[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device %d: %s",
devid, cl_errstr(err));
"[opencl alloc_device_buffer_with_flags] could not allocate cl buffer on device '%s' id=%d: %s",
cl->dev[devid].fullname, devid, cl_errstr(err));

_check_clmem_err(devid, err);
dt_opencl_memory_statistics(devid, buf, OPENCL_MEMORY_ADD);
Expand Down Expand Up @@ -3491,28 +3492,27 @@ void dt_opencl_memory_statistics(int devid,
if(devid < 0)
return;

dt_opencl_t *cl = darktable.opencl;

if(action == OPENCL_MEMORY_ADD)
darktable.opencl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem);
cl->dev[devid].memory_in_use += dt_opencl_get_mem_object_size(mem);
else
darktable.opencl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem);
cl->dev[devid].memory_in_use -= dt_opencl_get_mem_object_size(mem);

darktable.opencl->dev[devid].peak_memory =
MAX(darktable.opencl->dev[devid].peak_memory,
darktable.opencl->dev[devid].memory_in_use);
cl->dev[devid].peak_memory = MAX(cl->dev[devid].peak_memory, cl->dev[devid].memory_in_use);

if(darktable.unmuted & DT_DEBUG_MEMORY)
{
dt_print(DT_DEBUG_OPENCL,"[opencl memory] device %d: %zu bytes (%.1f MB) in use, %.1f MB available GPU memory, %.1f MB global GPU mem size",
devid,
darktable.opencl->dev[devid].memory_in_use,
(float)darktable.opencl->dev[devid].memory_in_use/(1024*1024),
(float)darktable.opencl->dev[devid].used_available/(1024*1024),
(float)darktable.opencl->dev[devid].max_global_mem/(1024*1024));
if(darktable.opencl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
dt_print(DT_DEBUG_OPENCL,"[opencl memory] device '%s' id=%d: %.1fMB in use, %.1fMB available GPU mem of %.1fMB",
cl->dev[devid].fullname, devid,
(float)cl->dev[devid].memory_in_use/(1024*1024),
(float)cl->dev[devid].used_available/(1024*1024),
(float)cl->dev[devid].max_global_mem/(1024*1024));
if(cl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
{
dt_print(DT_DEBUG_OPENCL,
"[opencl memory] Warning, device %d used more GPU memory than available",
devid);
"[opencl memory] Warning, device '%s' id=%d used more GPU memory than available",
cl->dev[devid].fullname, devid);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ int dt_opencl_local_buffer_opt(const int devid,
void dt_opencl_write_device_config(const int devid);
gboolean dt_opencl_read_device_config(const int devid);
gboolean dt_opencl_avoid_atomics(const int devid);
int dt_opencl_micro_nap(const int devid);
void dt_opencl_micro_nap(const int devid);
gboolean dt_opencl_use_pinned_memory(const int devid);

G_END_DECLS
Expand Down
4 changes: 2 additions & 2 deletions src/develop/pixelpipe_hb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1790,7 +1790,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
}

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
dt_opencl_micro_nap(pipe->devid);

// transform to input colorspace
if(success_opencl)
Expand Down Expand Up @@ -2084,7 +2084,7 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
return TRUE;

// indirectly give gpu some air to breathe (and to do display related stuff)
dt_iop_nap(dt_opencl_micro_nap(pipe->devid));
dt_opencl_micro_nap(pipe->devid);

// transform to module input colorspace
if(success_opencl)
Expand Down
Loading
Loading