Skip to content

Commit 0b34b9c

Browse files
committed
Starting SYCL wrapper
1 parent 5fc8906 commit 0b34b9c

File tree

7 files changed

+209
-89
lines changed

7 files changed

+209
-89
lines changed

src/ecg_library/ecg.c

+74-73
Large diffs are not rendered by default.

src/ecg_library/ecg.h

+17-6
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,41 @@ struct pseudo_bidomain_persistent_data {
1616
uint32_t diff_curr_rate;
1717
real_cpu diff_curr_max_time;
1818

19+
#if defined(COMPILE_CUDA) || defined(COMPILE_SYCL)
1920
#ifdef COMPILE_CUDA
20-
cusparseHandle_t cusparseHandle;
21-
cublasHandle_t cublasHandle;
21+
cusparseHandle_t sparseHandle;
22+
cublasHandle_t blasHandle;
23+
#else
24+
dpct::sparse::descriptor_ptr sparseHandle;
25+
dpct::blas::descriptor_ptr blasHandle;
26+
#endif
2227
int *d_col, *d_row, nz;
2328
real *d_distances;
2429
real *d_volumes;
2530
real *volumes;
2631
real *tmp_data;
2732
real *d_val;
2833
real *beta_im_cpu;
29-
3034
size_t bufferSize;
3135
void *buffer;
32-
33-
#if CUBLAS_VER_MAJOR <= 10
36+
#if defined(COMPILE_CUDA) && CUBLAS_VER_MAJOR <= 10
3437
cusparseMatDescr_t descr;
3538
real *local_sv;
36-
#else
39+
#elif defined(COMPILE_CUDA)
3740
cusparseSpMatDescr_t matA;
3841
cusparseDnVecDescr_t vec_vm;
3942
cusparseDnVecDescr_t vec_beta_im;
4043
#endif
4144

45+
#ifdef COMPILE_SYCL
46+
dpct::sparse::sparse_matrix_desc_t matA;
47+
std::shared_ptr<dpct::sparse::dense_vector_desc> vec_vm;
48+
std::shared_ptr<dpct::sparse::dense_vector_desc> vec_beta_im;
4249
#endif
50+
51+
#endif
52+
53+
4354
};
4455

4556
#define PSEUDO_BIDOMAIN_DATA ((struct pseudo_bidomain_persistent_data *)config->persistent_data)

src/gpu_utils/accel_utils.cpp

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//
2+
// Created by sachetto on 28/11/24.
3+
//
4+
5+
#include "accel_utils.h"
6+
#include "gpu_utils.h"
7+
8+
#ifdef COMPILE_CUDA
9+
#include <cublas_v2.h>
10+
#include <cusparse_v2.h>
11+
#endif
12+
13+
extern "C" void malloc_device(void **ptr, size_t n) {
14+
15+
#ifdef COMPILE_CUDA
16+
check_cuda_error(cudaMalloc(ptr, n));
17+
#elif defined(COMPILE_SYCL)
18+
DPCT_CHECK_ERROR(ptr = sycl::malloc_device(n, dpct::get_in_order_queue()));
19+
#endif
20+
21+
}
22+
23+
extern "C" void free_device(void *ptr) {
24+
#ifdef COMPILE_CUDA
25+
check_cuda_error(cudaFree(ptr));
26+
#elif defined(COMPILE_SYCL)
27+
DPCT_CHECK_ERROR(dpct::dpct_free(persistent_data->d_col, dpct::get_in_order_queue()));
28+
#endif
29+
}
30+
31+
extern "C" void memcpy_device(void *dest, const void *src, size_t n, copy_direction kind) {
32+
33+
if(kind == HOST_TO_DEVICE) {
34+
#ifdef COMPILE_CUDA
35+
check_cuda_error(cudaMemcpy(dest, src, n, cudaMemcpyHostToDevice));
36+
#elif defined(COMPILE_SYCL)
37+
sycl::device dev_ct1;
38+
sycl::queue q_ct1(dev_ct1, sycl::property_list{sycl::property::queue::in_order()});
39+
q_ct1.memcpy(dest, src, n).wait();
40+
#endif
41+
} else if(kind == DEVICE_TO_HOST) {
42+
#ifdef COMPILE_CUDA
43+
check_cuda_error(cudaMemcpy(dest, src, n, cudaMemcpyDeviceToHost));
44+
#elif defined(COMPILE_SYCL)
45+
dpct::device_ext &dev_ct1 = dpct::get_current_device();
46+
sycl::queue &q_ct1 = dev_ct1.default_queue();
47+
q_ct1.memcpy(dest, src, n).wait();
48+
#endif
49+
}
50+
}
51+
52+
extern "C" void create_sparse_handle(void *handle) {
53+
#ifdef COMPILE_CUDA
54+
check_cublas_error(cusparseCreate((cusparseHandle_t *)handle));
55+
#elif defined(COMPILE_SYCL)
56+
DPCT_CHECK_ERROR(handle = new dpct::sparse::descriptor();
57+
#endif
58+
}
59+
60+
extern "C" void create_blas_handle(void *handle) {
61+
#ifdef COMPILE_CUDA
62+
check_cublas_error(cublasCreate((cublasHandle_t *)handle));
63+
#elif defined(COMPILE_SYCL)
64+
DPCT_CHECK_ERROR(handle = new dpct::blas::descriptor();
65+
#endif
66+
}
67+
68+
extern "C" void sparse_create_scr(void *mat, int64_t rows, int64_t cols, int64_t nnz,
69+
void* csrRowOffsets,
70+
void* csrColInd,
71+
void* csrValues,
72+
cusparseIndexType_t csrRowOffsetsType,
73+
cusparseIndexType_t csrColIndType,
74+
cusparseIndexBase_t idxBase,
75+
cudaDataType valueType) {
76+
#ifdef COMPILE_CUDA
77+
check_cuda_error(cusparseCreateCsr(&(PSEUDO_BIDOMAIN_DATA->matA), N, N, nz, PSEUDO_BIDOMAIN_DATA->d_row, PSEUDO_BIDOMAIN_DATA->d_col,
78+
PSEUDO_BIDOMAIN_DATA->d_val, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUBLAS_SIZE));
79+
#elif defined(COMPILE_SYCL)
80+
DPCT_CHECK_ERROR(mat = new dpct::sparse::sparse_matrix_desc(
81+
rows, cols, nnz, csrRowOffsets,csrColInd, csrValues, dpct::library_data_t::real_int32,
82+
dpct::library_data_t::real_int32, oneapi::mkl::index_base::zero, CUBLAS_SIZE, dpct::sparse::matrix_format::csr));
83+
#endif
84+
}

src/gpu_utils/accel_utils.h

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//
2+
// Created by sachetto on 28/11/24.
3+
//
4+
5+
#ifndef ACCEL_UTILS_H
6+
#define ACCEL_UTILS_H
7+
8+
#include <stddef.h>
9+
10+
typedef enum {
11+
HOST_TO_DEVICE,
12+
DEVICE_TO_HOST,
13+
} copy_direction;
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
void malloc_device(void **ptr, size_t n);
19+
void free_device(void *ptr);
20+
void memcpy_device(void *dest, const void *src, size_t n, copy_direction kind);
21+
void create_sparse_handle(void *handle);
22+
void create_blas_handle(void *handle);
23+
#ifdef __cplusplus
24+
}
25+
#endif
26+
#endif //ACCEL_UTILS_H

src/gpu_utils/build.sh

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
GPU_UTILS_SOURCE_FILES="gpu_utils.c"
2-
GPU_UTILS_HEADER_FILES="gpu_utils.h"
1+
GPU_UTILS_SOURCE_FILES="gpu_utils.c accel_utils.cpp"
2+
GPU_UTILS_HEADER_FILES="gpu_utils.h accel_utils.h"
33

44
if [ -n "$CUDA_FOUND" ]; then
55
GPU_UTILS_EXTRA_LIB_PATH=$CUDA_LIBRARY_PATH
6-
GPU_UTILS_DYNAMIC_LIBS="c cudart"
6+
GPU_UTILS_DYNAMIC_LIBS="c cudart cublas cusparse"
77
GPU_UTILS_SOURCE_FILES="$GPU_UTILS_SOURCE_FILES gpu_utils.cu"
88
fi
99

1010

11-
COMPILE_SHARED_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES" "" "$GPU_UTILS_DYNAMIC_LIBS" "$GPU_UTILS_EXTRA_LIB_PATH" "" "$CUDA_FOUND"
12-
#COMPILE_STATIC_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES"
11+
COMPILE_SHARED_LIB "gpu_utils" "$GPU_UTILS_SOURCE_FILES" "$GPU_UTILS_HEADER_FILES" "" "$GPU_UTILS_DYNAMIC_LIBS" "$GPU_UTILS_EXTRA_LIB_PATH" "" "$CUDA_FOUND"

src/gpu_utils/gpu_utils.cu

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
__global__ void gpu_ecg_integral_kernel(const real *beta_im, const real* distances, const real *volumes, int n, real *result);
44
__global__ void kernel_gpu_vec_div_vec(real *vec1, real *vec2, real *vec3, size_t n);
55

6-
extern "C" void gpu_vec_div_vec(real *vec1, real *vec2, real *res, size_t n) {
6+
7+
extern "C" void gpu_vec_div_vec(real *vec1, real *vec2, real *res, size_t n) {
78
const int GRID = (n + BLOCK_SIZE - 1)/BLOCK_SIZE;
89
kernel_gpu_vec_div_vec<<<GRID, BLOCK_SIZE>>>(vec1, vec2, res, n);
910
cudaDeviceSynchronize();

src/save_mesh_library/save_mesh.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,7 @@ SAVE_MESH(save_as_text_or_binary) {
344344
float value;
345345
if(ode_solver->gpu) {
346346
value = (float) sv_cpu[i*ode_solver->original_num_cells];
347-
}
348-
else {
347+
} else {
349348
value = sv_cpu[i];
350349
}
351350

@@ -357,8 +356,7 @@ SAVE_MESH(save_as_text_or_binary) {
357356
}
358357

359358
fprintf(output_file, "\n");
360-
}
361-
else {
359+
} else {
362360
fprintf(output_file, "%g,%g,%g,%g,%g,%g,%g\n", center_x, center_y, center_z, dx, dy, dz, v);
363361
}
364362
}

0 commit comments

Comments
 (0)