Skip to content

Commit 986c137

Browse files
author
Cielo
committed
Added SYCL2020 compatibility
1 parent bb71dac commit 986c137

18 files changed

+232
-151
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*.swp
2+
*.swo
3+
/build/
4+
/.ccls-cache/

CMakeLists.txt

+72-32
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,59 @@ set(MYBUILDTYPE Release Debug RelWithDebInfo MinSizeRel)
2929
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build")
3030
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${MYBUILDTYPE})
3131

32-
# Unfortunately, there is no proper support of SyCL in CMake as of now.
32+
# Unfortunately, there is no proper support of SYCL in CMake as of now.
3333
# Compilation without passing any environment variables was given as a
3434
# requirement, hence, we set the compiler here directly (CMake discourages this).
3535
set(SYCL oneAPI CACHE STRING "Select the SYCL target architecture")
36-
set(MYSYCL oneAPI oneAPIold LLVM hipSYCL)
36+
set(MYSYCL oneAPI oneAPIold LLVM OpenSYCL)
3737
set_property(CACHE SYCL PROPERTY STRINGS ${MYSYCL})
3838
target_compile_definitions(echo PRIVATE SYCL=${SYCL})
39-
# TODO: So far the only target for LLVM is NVIDIA, and for hipSYCL omp/cpu. Add the others!
40-
if (SYCL STREQUAL LLVM)
41-
set(CMAKE_CXX_COMPILER "clang++")
42-
message(STATUS "Configuring Echo with LLVM (NVIDIA) Toolchain")
43-
target_compile_options(echo PUBLIC -fsycl -fsycl-targets=nvptx64-nvidia-cuda)
44-
target_link_options (echo PUBLIC -fsycl -fsycl-targets=nvptx64-nvidia-cuda)
45-
message(STATUS "Set device target to NVIDIA PTX64")
46-
elseif (SYCL STREQUAL hipSYCL )
39+
target_compile_options(echo PUBLIC -fsycl)
40+
target_link_options (echo PUBLIC -fsycl)
41+
42+
# A bit cumbersome to support all!
43+
if (SYCL STREQUAL OpenSYCL )
4744
set(CMAKE_CXX_COMPILER "syclcc-clang")
48-
message(STATUS "Configuring Echo with hipSycl Toolchain")
49-
else()
50-
set(CMAKE_CXX_COMPILER "dpcpp")
51-
message(STATUS "Configuring Echo with LLVM (Intel) Toolchain")
52-
target_compile_options(echo PUBLIC -fsycl -fsycl-unnamed-lambda -xHost)
53-
target_link_options (echo PUBLIC -fsycl)
45+
message(STATUS "Configuring Echo with OpenSycl Toolchain")
46+
elseif (SYCL STREQUAL LLVM)
47+
set(CMAKE_CXX_COMPILER "clang++")
48+
set(SYCL_LLVM_CHAIN Intel CACHE STRING "Intel/NVidia/AMD")
49+
set(MYCHAIN AMD Intel NVidia)
50+
set_property(CACHE SYCL_LLVM_CHAIN PROPERTY STRINGS ${MYCHAIN})
51+
message(STATUS "Configuring Echo with LLVM (${SYCL_LLVM_CHAIN}) Toolchain")
52+
if (SYCL_LLVM_CHAIN STREQUAL AMD)
53+
target_compile_options(echo PUBLIC -fsycl-targets=amdgcn-amd-amdhsa)
54+
target_link_options (echo PUBLIC -fsycl-targets=amdgcn-amd-amdhsa)
55+
set(SYCL_LLVM_ARCH gfx90a CACHE STRING "Run sycl-ls when in doubt.")
56+
set(MYOFFLOAD_ARCH gfx90a gfx908 gfx906)
57+
set_property(CACHE SYCL_LLVM_ARCH PROPERTY STRINGS ${MYOFFLOAD_ARCH})
58+
target_compile_options(echo PUBLIC -Xsycl-target-backend --offload-arch=${SYCL_LLVM_ARCH})
59+
target_link_options (echo PUBLIC -Xsycl-target-backend --offload-arch=${SYCL_LLVM_ARCH})
60+
message(STATUS "Set device target to AMDGCN ${SYCL_LLVM_ARCH}")
61+
elseif (SYCL_LLVM_CHAIN STREQUAL NVidia)
62+
# Atm only AMD needs the arch. specified. Once needed for NVIDIA as well, do it here.
63+
unset(SYCL_LLVM_ARCH CACHE)
64+
target_compile_options(echo PUBLIC -fsycl-targets=nvptx64-nvidia-cuda)
65+
target_link_options (echo PUBLIC -fsycl-targets=nvptx64-nvidia-cuda)
66+
include(CheckLanguage)
67+
check_language(CUDA)
68+
if (CMAKE_CUDA_COMPILER)
69+
enable_language(CUDA)
70+
target_compile_options(echo PUBLIC --cuda-path=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../../..)
71+
target_link_options (echo PUBLIC --cuda-path=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../../..)
72+
#target_compile_options(echo PUBLIC --cuda-path=${CMAKE_CUDA_TOOLKIT_LIBRARY_ROOT})
73+
#target_link_options (echo PUBLIC --cuda-path=${CMAKE_CUDA_TOOLKIT_LIBRARY_ROOT})
74+
message(STATUS "Added explicit CUDA language.")
75+
else ( )
76+
message(WARNING "Explicit CUDA not found. With Codeplay NVidia plugin you should be fine. Otherwise...")
77+
endif ( )
78+
message(STATUS "Set device target to NVIDIA PTX64.")
79+
endif ( )
80+
else ( )
81+
unset(SYCL_LLVM_CHAIN CACHE)
82+
unset(SYCL_LLVM_ARCH CACHE)
83+
set(CMAKE_CXX_COMPILER "icpx")
84+
message(STATUS "Configuring Echo with oneAPI (Intel) Toolchain")
5485
message(STATUS "Set device target to SPIR-V Just-in-Time Compilation")
5586
#-- Stuff specific for oneAPI (for the moment)
5687
option(ENABLE_MPI "Compile with MPI besides DPC++" OFF)
@@ -96,16 +127,26 @@ else()
96127
endif()
97128

98129
# -- DPC++ and Runtime
99-
option(GPU "Uses device selector rather than host" OFF)
100-
if(GPU)
101-
set(SUFFIX "${SUFFIX}_gpu")
102-
target_compile_definitions(echo PRIVATE GPU)
103-
endif(GPU)
130+
set(SYCL_DEVICE CPU CACHE STRING "Select the primary SYCL device (default/CPU/GPU/etc.). Overridden by forceDevice in echo.par")
131+
set(MYDEVICE DEF CPU GPU ACC FPGA)
132+
set_property(CACHE SYCL_DEVICE PROPERTY STRINGS ${MYDEVICE})
133+
target_compile_definitions(echo PRIVATE DEVICE=DEV_${SYCL_DEVICE})
134+
if(NOT SYCL_DEVICE STREQUAL DEF )
135+
string(TOLOWER ${SYCL_DEVICE} DEVSUFFIX)
136+
set(SUFFIX "${SUFFIX}_${DEVSUFFIX}")
137+
endif()
138+
# TODO: Should be done way better than this! Also a10sx and s10sx were options...
139+
if(SYCL_DEVICE STREQUAL FPGA )
140+
message(WARNING "VERY experimental Intel FPGA workflow (probably not yet supported)!")
141+
target_compile_options(echo PUBLIC -fsycl-link -fintelfpga -Xshardware -Xsboard-package=/glob/development-tools/oneapi/oneapi/intel_s10sx_pac -Xsboard=pac_s10_usm)
142+
target_link_options (echo PUBLIC -fsycl-link -fintelfpga -Xshardware -Xsboard-package=/glob/development-tools/oneapi/oneapi/intel_s10sx_pac -Xsboard=pac_s10_usm)
143+
endif()
104144

145+
# -- Compiling
105146
try_compile(COMPILE_SUCCEEDED
106-
${CMAKE_BINARY_DIR}/check
107-
SOURCES ${CMAKE_SOURCE_DIR}/cmake/checks/fscheck.cpp
108-
CXX_STANDARD 17
147+
${CMAKE_BINARY_DIR}/check
148+
SOURCES ${CMAKE_SOURCE_DIR}/cmake/checks/fscheck.cpp
149+
CXX_STANDARD 17
109150
)
110151
if(COMPILE_SUCCEEDED)
111152
message(STATUS "C++ filesystem API available. Using that.")
@@ -116,15 +157,14 @@ endif()
116157
CHECK_INCLUDE_FILE_CXX("filesystem.h" FSH_FOUND)
117158
CHECK_INCLUDE_FILE_CXX(filesystem FS_FOUND)
118159

119-
120160
option(ENABLE_MEMORY_SANITATION "Enable memory sanitizer to find and track invalid memory accesses. Currently disabled." OFF)
121-
#if (ENABLE_MEMORY_SANITATION AND NOT GPU)
122-
# message(STATUS "Memory sanitation enabled. Linking appropriate libraries.")
123-
# target_compile_options(echo PUBLIC -fno-omit-frame-pointer -fsanitize=address -fsanitize=undefined -fsanitize-address-use-after-scope -Wuninitialized -g)
124-
# target_link_libraries(echo PUBLIC -fno-omit-frame-pointer -fsanitize=address -fsanitize=undefined -g)
125-
#elseif(ENABLE_MEMORY_SANITATION)
126-
# message(WARNING "Memory sanitation is only available with disabled GPU for now.")
127-
#endif()
161+
if (ENABLE_MEMORY_SANITATION) # AND NOT GPU)
162+
message(STATUS "Memory sanitation enabled. Linking appropriate libraries.")
163+
target_compile_options(echo PUBLIC -fno-omit-frame-pointer -Wuninitialized -g) # -fsanitize=address -fsanitize=undefined -fsanitize-address-use-after-scope
164+
target_link_libraries (echo PUBLIC -fno-omit-frame-pointer -g) # -fsanitize=address -fsanitize=undefined
165+
elseif(ENABLE_MEMORY_SANITATION)
166+
message(WARNING "Memory sanitation is only available with disabled GPU for now.")
167+
endif()
128168

129169
option(SINGLE_PRECISION "Change field from double to float" OFF)
130170
if(SINGLE_PRECISION)

DeviceConfig.cpp

+49-14
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,54 @@
1313
#include <sstream>
1414

1515
DeviceConfig::DeviceConfig() {
16-
for (auto const &p : mysycl::platform::get_platforms()) {
17-
for (auto dev : p.get_devices()) {
16+
for (auto const &p : mysycl::platform::get_platforms())
17+
for ( auto dev : p.get_devices() )
1818
devices.push_back(dev);
19-
}
19+
}
20+
21+
device DeviceConfig::deviceWith(int id){
22+
Logger *log = Logger::getInstance(); log->setPar(true);
23+
device temp;
24+
if( (id>=0) && (id<devices.size()) ){
25+
((*log)+2)<<TAG <<"Looking at device #"<<id; log->fl();
26+
temp = devices[id];
27+
}else{
28+
((*log)+2)<<TAG <<"Looking at DPEcho default device."; log->fl();
29+
listDevices();
30+
#if SYCL==oneAPI || SYCL==LLVM
31+
#if DEVICE==DEV_CPU
32+
temp = mysycl::device(mysycl::cpu_selector_v); // mysycl::cpu_selector_v fallBackSel;
33+
#elif DEVICE==DEV_GPU
34+
temp = mysycl::device(mysycl::gpu_selector_v); // mysycl::gpu_selector_v fallBackSel;
35+
#elif DEVICE==DEV_ACC
36+
temp = mysycl::device(mysycl::accelerator_selector_v); // mysycl::accelerator_selector_v fallBackSel;
37+
#elif DEVICE==DEV_FPGA
38+
temp = mysycl::device(mysycl::accelerator_selector_v); // mysycl::accelerator_selector_v fallBackSel;
39+
#else // host is deprecated in SYCL2020
40+
temp = mysycl::device(mysycl::default_selector_v); // mysycl::default_selector_v fallBackSel;
41+
#endif
42+
#else
43+
#if DEVICE==DEV_CPU
44+
mysycl::cpu_selector fallBackSel;
45+
#elif DEVICE==DEV_GPU
46+
mysycl::gpu_selector fallBackSel;
47+
#elif DEVICE==DEV_ACC
48+
mysycl::accelerator_selector fallBackSel;
49+
#elif DEVICE==DEV_FPGA
50+
mysycl::accelerator_selector fallBackSel;
51+
#else // host is deprecated in SYCL2020
52+
mysycl::default_selector fallBackSel;
53+
#endif
54+
temp = mysycl::device(fallBackSel);
55+
#endif
2056
}
57+
printTargetInfo(temp);
58+
return temp;
2159
}
2260

2361
void DeviceConfig::listDevices() {
24-
Logger *log = Logger::getInstance();
25-
log->setPar(true);
26-
((*log) + 2) << TAG <<"\n\t# SYCL devices:\t" << devices.size() ;
62+
Logger *log = Logger::getInstance(); log->setPar(false);
63+
((*log) + 2) << TAG <<"\n\t# Available SYCL devices:\t" << devices.size() ;
2764
for (size_t i = 0; i < devices.size(); i++) {
2865
bool hasDpSupport = devices[i].has(aspect::fp64);
2966
(*log) <<"\n\t- Device #" << i << ":\t"
@@ -64,19 +101,17 @@ device DeviceConfig::debugDevice() {
64101
throw std::runtime_error("No debug device is available on this machine!");
65102
}
66103

67-
void DeviceConfig::printTargetInfo ( mysycl::queue q) {
68-
Logger *Log = Logger::getInstance(); Log->setPar(false);
69-
auto dev = q.get_device();
104+
void DeviceConfig::printTargetInfo(device dev) {
105+
Logger *Log = Logger::getInstance(); Log->setPar(true);
70106
*Log+0<<TAG
71-
<< "\n\tHardware " << dev.get_info<info::device::name>() << " is " << (dev.is_host()? "HOST ":"")
107+
<< "\n\tHardware " << dev.get_info<info::device::name>() // << " is " << (dev.is_host()? "HOST ":"")
72108
<< (dev.is_cpu()? "CPU ":"") << (dev.is_gpu()? "GPU ":"") << (dev.is_accelerator()? " ACCELERATOR ":"")
73-
<< "\n\tMax Compute Units : " << dev.get_info<info::device::max_compute_units> ();
74-
#if SYCL <= ONEAPI
109+
<< "\n\tMax Compute Units : " << dev.get_info<info::device::max_compute_units> (); Log->fl();
110+
#if SYCL<=oneAPI
75111
*Log+0<< "\n\tMax Work Group Size: " << dev.get_info<info::device::max_work_group_size>()
76112
<< "\n\tGlobal Memory / GB : " << dev.get_info<info::device::global_mem_size> ()/pow(1024.0, 3)
77-
<< "\n\tLocal Memory / kB : " << dev.get_info<info::device::local_mem_size> ()/1024.0 ;
113+
<< "\n\tLocal Memory / kB : " << dev.get_info<info::device::local_mem_size> ()/1024.0 ; Log->fl();
78114
#else
79-
Log->fl();
80115
*Log+18<<"\n\tMax Work Group Size, Global and Local Memory queries are handled differently outside oneAPI.";
81116
#endif
82117
Log->fl();

DeviceConfig.hpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,16 @@
1717

1818
struct DeviceConfig {
1919
private:
20-
std::vector<device> devices;
20+
std::vector<device> devices;
2121

2222
public:
23-
DeviceConfig();
24-
void printTargetInfo(mysycl::queue);
25-
void listDevices();
26-
device deviceWith(int id);
27-
device debugDevice();
28-
std::vector<device> gpus();
29-
std::vector<device> cpus();
23+
DeviceConfig();
24+
void printTargetInfo(mysycl::device);
25+
void listDevices();
26+
device deviceWith(int imd);
27+
device debugDevice();
28+
std::vector<device> gpus();
29+
std::vector<device> cpus();
3030
};
3131

3232
#endif

Domain.hpp

-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ class Domain {
2121
private:
2222
Logger *Log;
2323
mysycl::queue qq; field *bufL, *bufR; // Tools for the BCex
24-
// Most of the following could be public const and save us the functions below
25-
// IF we pass values from constructor instead of reading echo.par in there.
2624
int cartDims_[3], cartPeriodic_[3], cartCoords_[3], bcType_[3];
2725
field boxMin_[3], boxMax_[3], boxSize_[3]; // Global info, physical
2826
field locMin_[3], locMax_[3], locSize_[3]; // This rank info, physical

Physics.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,6 @@ SYCL_EXTERNAL void cons2prim(id<1> myId, unsigned n, field_array u, field_array
117117
//-- Fluxes and characteristic velocities.
118118
// IMPORTANT: all local quantities, they have been sampled --> access simply by eg. f[VX]
119119
void physicalFlux(int dir, Metric &g, field vD[FLD_TOT], field uD[FLD_TOT], field f[FLD_TOT], field vf[2], field vt[2] ){
120-
121-
static const CONSTANT char FMTA[] = "%s: %lf %lf %lf %lf %lf %lf %lf %lf %lf \n";
122-
static const CONSTANT char FMTV[] = "%s: %lf %lf %lf\n";
123-
static const CONSTANT char FMTF[] = "%s: %lf \n";
124-
125120
field alpha = g.alpha(), betai[3], gCov[9], gCon[9];
126121
g.beta(betai);
127122
g.g3DCov(gCov);

Problem.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414

1515
using namespace std;
1616

17-
Problem::Problem(mysycl::queue qx, Grid *grid, Domain *D, field_array &fld ){
17+
Problem::Problem(mysycl::queue qx, string &confFile, Grid *grid, Domain *D, field_array &fld ){
1818
Log = Logger::getInstance(); grid_ = grid; D_ = D; N_ = grid_->nht;
1919
iOut_ = 0; iStep_ = 0; nStep_ = 0; dumpHalos = false; locSize = 1;
2020
tMax_ = 1.0; dt_ = 0.0; t_ = 0.0, tOut_=0.025, cfl_ = 0.8/3.0; // Divide by 3 as it's 3D
2121
qq = qx;
2222
stepTime_.init();
23-
std::ifstream inFile("echo.par"); std::string key, val;
23+
std::ifstream inFile(confFile); std::string key, val;
2424
Log->setPar(false); *Log+3<<TAG<<"Reading input: ";
2525
while (std::getline(inFile, key, ' ') && std::getline(inFile, val)){
2626
if(!key.compare("tMax" )){ tMax_ = static_cast<field>(stod(val)); *Log<<"\n\ttMax "<<tMax_ ; continue;}
@@ -63,7 +63,7 @@ void Problem::dump(field_array &v){ // Asynchronous output
6363
} else { // Manual indexing necessary
6464
field *vt[FLD_TOT]; for (int i = 0; i < FLD_TOT; i++) vt[i] = v[i];
6565
field *outt[FLD_TOT]; for (int i = 0; i < FLD_TOT; i++) outt[i] = out[i];
66-
qq.parallel_for(range(gr.n[0], gr.n[1], gr.n[2]), [=](item<3> it) {
66+
qq.parallel_for<class parForDump>(range(gr.n[0], gr.n[1], gr.n[2]), [=](item<3> it) {
6767
auto iOut= it.get_linear_id(); // Output array has NH halo scope here
6868
auto iV = globLinId(it.get_id(), gr.nh, gr.h); // v has WH indexing; offset by halos
6969
for(int iVar=0; iVar<FLD_TOT; ++iVar)
@@ -83,7 +83,7 @@ void Problem::dump(field_array &v){ // Asynchronous output
8383

8484
void Problem::InitConstWH(field *v, field val) { // HOST CODE: kernel for initialization.
8585
if(!v){ Log->Error("%s Array was not initialized.", TAG); return; }
86-
qq.parallel_for(range<3>(grid_->nh[0], grid_->nh[1], grid_->nh[2]), [=, gr = *(this->grid_)](item<3> it) {
86+
qq.parallel_for<class parForInitConstWH>(range<3>(grid_->nh[0], grid_->nh[1], grid_->nh[2]), [=, gr = *(this->grid_)](item<3> it) {
8787
int offset[3] = {0,0,0};
8888
auto iV = globLinId(it, gr.nh, offset); // v has WH indexing; offset by halos
8989
v[iV] = val;
@@ -92,7 +92,7 @@ void Problem::InitConstWH(field *v, field val) { // HOST CODE: kernel for initia
9292

9393
void Problem::InitConstNH(field *v, field val) { // HOST CODE: kernel for initialization.
9494
if(!v){ Log->Error("%s Array was not initialized.", TAG); return; }
95-
qq.parallel_for(range<3>(grid_->n[0], grid_->n[1], grid_->n[2]), [=, gr = *(this->grid_)](item<3> it) {
95+
qq.parallel_for<class parForInitConstNH>(range<3>(grid_->n[0], grid_->n[1], grid_->n[2]), [=, gr = *(this->grid_)](item<3> it) {
9696
auto iV = globLinId(it, gr.nh, gr.h); // v has WH indexing; offset by halos
9797
v[iV] = val;
9898
});
@@ -147,7 +147,7 @@ void Problem::Alfven(field_array &v, field_array &u){ // HOST CODE: Initializing
147147
//-- Device code
148148
field bS[]={D_->boxSize(0), D_->boxSize(1), D_->boxSize(2)};
149149
Grid gr = *grid_; // For ease of lambda capture
150-
qq.parallel_for(range(gr.n[0], gr.n[1], gr.n[2]), [=, NN=N_](item<3> it) {
150+
qq.parallel_for<class parForProblemAlfven>(range(gr.n[0], gr.n[1], gr.n[2]), [=, NN=N_](item<3> it) {
151151
field phi = 0.0, bx, by, bz, vx, vy, vz;
152152
auto i = globLinId(it, gr.nh, gr.h); // Addressing fld: WH indexing
153153

Problem.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class Problem {
3131
bool dumpHalos;
3232
field *out[FLD_TOT]; // Just to print
3333

34-
Problem(mysycl::queue q, Grid *g, Domain *f, field_array &out);
34+
Problem(mysycl::queue q, std::string &confFile, Grid *g, Domain *f, field_array &out);
3535
void InitRampWH (field *);
3636
void InitRampNH (field *);
3737
void InitConstWH(field *, field );

0 commit comments

Comments
 (0)