Skip to content

Commit

Permalink
wip: avoid copy
Browse files Browse the repository at this point in the history
  • Loading branch information
rsachetto committed Nov 24, 2023
1 parent a20c4fd commit 2b44c8e
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
15 changes: 15 additions & 0 deletions src/gpu_utils/gpu_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,18 @@ extern "C" void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n)
kernel_update_monodomain<<<GRID, BLOCK_SIZE>>>(vm, b, alpha, n);
cudaDeviceSynchronize();
}

__global__ void kernel_copy_vectors(real *dst, float *src, size_t n) {

unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;

if(i < n) {
dst[i] = src[i];
}
}

extern "C" void gpu_copy_vectors(real *dst, float *src, size_t n) {
const int GRID = (n + BLOCK_SIZE - 1)/BLOCK_SIZE;
kernel_copy_vectors<<<GRID, BLOCK_SIZE>>>(dst, src, n);
cudaDeviceSynchronize();
}
1 change: 1 addition & 0 deletions src/gpu_utils/gpu_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern "C" {
void gpu_vec_div_vec(real *vec1, real *vec2, real* res, size_t n);
real gpu_ecg_integral(real *beta_im, real *distances, real *volumes, size_t vec_size);
void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n);
void gpu_copy_vectors(real *dst, float *src, size_t n);
void cuda_assert(int code, char const *const func, const char *const file, int const line, const char *api);
#ifdef __cplusplus
}
Expand Down
7 changes: 5 additions & 2 deletions src/monodomain/monodomain_solver.c
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,8 @@ int solve_monodomain(struct monodomain_solver *the_monodomain_solver, struct ode
if(save_to_file && (count % print_rate == 0) && (cur_time >= start_saving_after_dt)) {
start_stop_watch(&write_time);

if(edp_gpu) {
//TODO: this is not needed if the EDO is in the GPU
if(edp_gpu && count > 0) {

#ifdef COMPILE_CUDA
cudaMemcpy(gpu_rhs, linear_system_solver_result, original_num_cells * sizeof(float), cudaMemcpyDeviceToHost);
Expand Down Expand Up @@ -1084,7 +1085,9 @@ bool update_ode_state_vector_and_check_for_activity(real_cpu vm_threshold, struc
check_cuda_error(cudaMemcpy(sv, vms, mem_size, cudaMemcpyHostToDevice));
free(vms);
} else {
check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice));
//TODO: make the gpu linear system solver use the same size as the edo solver
//check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice));
gpu_copy_vectors(sv, linear_solver_result, max_number_of_cells);
//TODO: make a kernel to check for activity
act = true;
}
Expand Down

0 comments on commit 2b44c8e

Please sign in to comment.