Skip to content

Commit

Permalink
Starting SYCL wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
rsachetto committed Dec 3, 2024
1 parent 5fc8906 commit 0b34b9c
Show file tree
Hide file tree
Showing 7 changed files with 209 additions and 89 deletions.
147 changes: 74 additions & 73 deletions src/ecg_library/ecg.c

Large diffs are not rendered by default.

23 changes: 17 additions & 6 deletions src/ecg_library/ecg.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,41 @@ struct pseudo_bidomain_persistent_data {
uint32_t diff_curr_rate;
real_cpu diff_curr_max_time;

#if defined(COMPILE_CUDA) || defined(COMPILE_SYCL)
#ifdef COMPILE_CUDA
cusparseHandle_t cusparseHandle;
cublasHandle_t cublasHandle;
cusparseHandle_t sparseHandle;
cublasHandle_t blasHandle;
#else
dpct::sparse::descriptor_ptr sparseHandle;
dpct::blas::descriptor_ptr blasHandle;
#endif
int *d_col, *d_row, nz;
real *d_distances;
real *d_volumes;
real *volumes;
real *tmp_data;
real *d_val;
real *beta_im_cpu;

size_t bufferSize;
void *buffer;

#if CUBLAS_VER_MAJOR <= 10
#if defined(COMPILE_CUDA) && CUBLAS_VER_MAJOR <= 10
cusparseMatDescr_t descr;
real *local_sv;
#else
#elif defined(COMPILE_CUDA)
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vec_vm;
cusparseDnVecDescr_t vec_beta_im;
#endif

#ifdef COMPILE_SYCL
dpct::sparse::sparse_matrix_desc_t matA;
std::shared_ptr<dpct::sparse::dense_vector_desc> vec_vm;
std::shared_ptr<dpct::sparse::dense_vector_desc> vec_beta_im;
#endif

#endif


};

#define PSEUDO_BIDOMAIN_DATA ((struct pseudo_bidomain_persistent_data *)config->persistent_data)
84 changes: 84 additions & 0 deletions src/gpu_utils/accel_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//
// Created by sachetto on 28/11/24.
//

#include "accel_utils.h"
#include "gpu_utils.h"

#ifdef COMPILE_CUDA
#include <cublas_v2.h>
#include <cusparse_v2.h>
#endif

extern "C" void malloc_device(void **ptr, size_t n) {

#ifdef COMPILE_CUDA
check_cuda_error(cudaMalloc(ptr, n));
#elif defined(COMPILE_SYCL)
DPCT_CHECK_ERROR(ptr = sycl::malloc_device(n, dpct::get_in_order_queue()));
#endif

}

extern "C" void free_device(void *ptr) {
#ifdef COMPILE_CUDA
check_cuda_error(cudaFree(ptr));
#elif defined(COMPILE_SYCL)
DPCT_CHECK_ERROR(dpct::dpct_free(persistent_data->d_col, dpct::get_in_order_queue()));
#endif
}

extern "C" void memcpy_device(void *dest, const void *src, size_t n, copy_direction kind) {

if(kind == HOST_TO_DEVICE) {
#ifdef COMPILE_CUDA
check_cuda_error(cudaMemcpy(dest, src, n, cudaMemcpyHostToDevice));
#elif defined(COMPILE_SYCL)
sycl::device dev_ct1;
sycl::queue q_ct1(dev_ct1, sycl::property_list{sycl::property::queue::in_order()});
q_ct1.memcpy(dest, src, n).wait();
#endif
} else if(kind == DEVICE_TO_HOST) {
#ifdef COMPILE_CUDA
check_cuda_error(cudaMemcpy(dest, src, n, cudaMemcpyDeviceToHost));
#elif defined(COMPILE_SYCL)
dpct::device_ext &dev_ct1 = dpct::get_current_device();
sycl::queue &q_ct1 = dev_ct1.default_queue();
q_ct1.memcpy(dest, src, n).wait();
#endif
}
}

extern "C" void create_sparse_handle(void *handle) {
#ifdef COMPILE_CUDA
check_cublas_error(cusparseCreate((cusparseHandle_t *)handle));
#elif defined(COMPILE_SYCL)
DPCT_CHECK_ERROR(handle = new dpct::sparse::descriptor();
#endif
}

extern "C" void create_blas_handle(void *handle) {
#ifdef COMPILE_CUDA
check_cublas_error(cublasCreate((cublasHandle_t *)handle));
#elif defined(COMPILE_SYCL)
DPCT_CHECK_ERROR(handle = new dpct::blas::descriptor();
#endif
}

extern "C" void sparse_create_scr(void *mat, int64_t rows, int64_t cols, int64_t nnz,
void* csrRowOffsets,
void* csrColInd,
void* csrValues,
cusparseIndexType_t csrRowOffsetsType,
cusparseIndexType_t csrColIndType,
cusparseIndexBase_t idxBase,
cudaDataType valueType) {
#ifdef COMPILE_CUDA
check_cuda_error(cusparseCreateCsr(&(PSEUDO_BIDOMAIN_DATA->matA), N, N, nz, PSEUDO_BIDOMAIN_DATA->d_row, PSEUDO_BIDOMAIN_DATA->d_col,
PSEUDO_BIDOMAIN_DATA->d_val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUBLAS_SIZE));
#elif defined(COMPILE_SYCL)
DPCT_CHECK_ERROR(mat = new dpct::sparse::sparse_matrix_desc(
rows, cols, nnz, csrRowOffsets,csrColInd, csrValues, dpct::library_data_t::real_int32,
dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, CUBLAS_SIZE, dpct::sparse::matrix_format::csr));
#endif
}
26 changes: 26 additions & 0 deletions src/gpu_utils/accel_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//
// Created by sachetto on 28/11/24.
//

#ifndef ACCEL_UTILS_H
#define ACCEL_UTILS_H

#include <stddef.h>

typedef enum {
HOST_TO_DEVICE,
DEVICE_TO_HOST,
} copy_direction;

#ifdef __cplusplus
extern "C" {
#endif
void malloc_device(void **ptr, size_t n);
void free_device(void *ptr);
void memcpy_device(void *dest, const void *src, size_t n, copy_direction kind);
void create_sparse_handle(void *handle);
void create_blas_handle(void *handle);
#ifdef __cplusplus
}
#endif
#endif //ACCEL_UTILS_H
9 changes: 4 additions & 5 deletions src/gpu_utils/build.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
GPU_UTILS_SOURCE_FILES="gpu_utils.c"
GPU_UTILS_HEADER_FILES="gpu_utils.h"
GPU_UTILS_SOURCE_FILES="gpu_utils.c accel_utils.cpp"
GPU_UTILS_HEADER_FILES="gpu_utils.h accel_utils.h"

if [ -n "$CUDA_FOUND" ]; then
GPU_UTILS_EXTRA_LIB_PATH=$CUDA_LIBRARY_PATH
GPU_UTILS_DYNAMIC_LIBS="c cudart"
GPU_UTILS_DYNAMIC_LIBS="c cudart cublas cusparse"
GPU_UTILS_SOURCE_FILES="$GPU_UTILS_SOURCE_FILES gpu_utils.cu"
fi


COMPILE_SHARED_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES" "" "$GPU_UTILS_DYNAMIC_LIBS" "$GPU_UTILS_EXTRA_LIB_PATH" "" "$CUDA_FOUND"
#COMPILE_STATIC_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES"
COMPILE_SHARED_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES" "" "$GPU_UTILS_DYNAMIC_LIBS" "$GPU_UTILS_EXTRA_LIB_PATH" "" "$CUDA_FOUND"
3 changes: 2 additions & 1 deletion src/gpu_utils/gpu_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
__global__ void gpu_ecg_integral_kernel(const real *beta_im, const real* distances, const real *volumes, int n, real *result);
__global__ void kernel_gpu_vec_div_vec(real *vec1, real *vec2, real *vec3, size_t n);

extern "C" void gpu_vec_div_vec(real *vec1, real *vec2, real *res, size_t n) {

extern "C" void gpu_vec_div_vec(real *vec1, real *vec2, real *res, size_t n) {
const int GRID = (n + BLOCK_SIZE - 1)/BLOCK_SIZE;
kernel_gpu_vec_div_vec<<<GRID, BLOCK_SIZE>>>(vec1, vec2, res, n);
cudaDeviceSynchronize();
Expand Down
6 changes: 2 additions & 4 deletions src/save_mesh_library/save_mesh.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,7 @@ SAVE_MESH(save_as_text_or_binary) {
float value;
if(ode_solver->gpu) {
value = (float) sv_cpu[i*ode_solver->original_num_cells];
}
else {
} else {
value = sv_cpu[i];
}

Expand All @@ -357,8 +356,7 @@ SAVE_MESH(save_as_text_or_binary) {
}

fprintf(output_file, "\n");
}
else {
} else {
fprintf(output_file, "%g,%g,%g,%g,%g,%g,%g\n", center_x, center_y, center_z, dx, dy, dz, v);
}
}
Expand Down

0 comments on commit 0b34b9c

Please sign in to comment.