diff --git a/src/gpu_utils/gpu_utils.cu b/src/gpu_utils/gpu_utils.cu index 08ae31ca..0270b648 100644 --- a/src/gpu_utils/gpu_utils.cu +++ b/src/gpu_utils/gpu_utils.cu @@ -30,3 +30,18 @@ extern "C" void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n) kernel_update_monodomain<<>>(vm, b, alpha, n); cudaDeviceSynchronize(); } + +__global__ void kernel_copy_vectors(real *dst, float *src, size_t n) { + + unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; + + if(i < n) { + dst[i] = src[i]; + } +} + +extern "C" void gpu_copy_vectors(real *dst, float *src, size_t n) { + const int GRID = (n + BLOCK_SIZE - 1)/BLOCK_SIZE; + kernel_copy_vectors<<>>(dst, src, n); + cudaDeviceSynchronize(); +} diff --git a/src/gpu_utils/gpu_utils.h b/src/gpu_utils/gpu_utils.h index 60068c35..ea8f3418 100644 --- a/src/gpu_utils/gpu_utils.h +++ b/src/gpu_utils/gpu_utils.h @@ -36,6 +36,7 @@ extern "C" { void gpu_vec_div_vec(real *vec1, real *vec2, real* res, size_t n); real gpu_ecg_integral(real *beta_im, real *distances, real *volumes, size_t vec_size); void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n); +void gpu_copy_vectors(real *dst, float *src, size_t n); void cuda_assert(int code, char const *const func, const char *const file, int const line, const char *api); #ifdef __cplusplus } diff --git a/src/monodomain/monodomain_solver.c b/src/monodomain/monodomain_solver.c index 1db06551..2d77dbc1 100644 --- a/src/monodomain/monodomain_solver.c +++ b/src/monodomain/monodomain_solver.c @@ -675,7 +675,8 @@ int solve_monodomain(struct monodomain_solver *the_monodomain_solver, struct ode if(save_to_file && (count % print_rate == 0) && (cur_time >= start_saving_after_dt)) { start_stop_watch(&write_time); - if(edp_gpu) { + //TODO: this is not needed if the EDO is in the GPU + if(edp_gpu && count > 0) { #ifdef COMPILE_CUDA cudaMemcpy(gpu_rhs, linear_system_solver_result, original_num_cells * sizeof(float), cudaMemcpyDeviceToHost); @@ -1084,7 +1085,9 @@ bool update_ode_state_vector_and_check_for_activity(real_cpu vm_threshold, struc check_cuda_error(cudaMemcpy(sv, vms, mem_size, cudaMemcpyHostToDevice)); free(vms); } else { - check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice)); + //TODO: make the gpu linear system solver use the same size as the edo solver + //check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice)); + gpu_copy_vectors(sv, linear_solver_result, max_number_of_cells); //TODO: make a kernel to check for activity act = true; }