-
Notifications
You must be signed in to change notification settings - Fork 102
/
Copy pathKokkosBlas1_dot_tpl_spec_avail.hpp
113 lines (101 loc) · 5.2 KB
/
KokkosBlas1_dot_tpl_spec_avail.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_HPP_
#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_HPP_
namespace KokkosBlas {
namespace Impl {
// Specialization struct which defines whether a specialization exists
template <class execution_space, class AV, class XMV, class YMV,
int Xrank = XMV::rank, int Yrank = YMV::rank>
struct dot_tpl_spec_avail {
enum : bool { value = false };
};
} // namespace Impl
} // namespace KokkosBlas
namespace KokkosBlas {
namespace Impl {
// Generic Host side BLAS (could be MKL or whatever)
#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS
// double
#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \
template <class ExecSpace> \
struct dot_tpl_spec_avail< \
ExecSpace, \
Kokkos::View<SCALAR, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1, 1> { \
enum : bool { value = true }; \
};
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft,
Kokkos::HostSpace)
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft,
Kokkos::HostSpace)
// TODO: we met difficuties in FindTPLMKL.cmake to set the BLAS library properly
// such that the test in CheckHostBlasReturnComplex.cmake could not be
// compiled and run to give a correct answer on KK_BLAS_RESULT_AS_POINTER_ARG.
// This resulted in segfault in dot() with MKL and complex.
// So we just temporarily disable it until FindTPLMKL.cmake is fixed.
#if !defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex<double>, Kokkos::LayoutLeft,
Kokkos::HostSpace)
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex<float>, Kokkos::LayoutLeft,
Kokkos::HostSpace)
#endif
#endif
#define KOKKOSBLAS1_DOT_TPL_SPEC(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \
template <> \
struct dot_tpl_spec_avail< \
EXECSPACE, \
Kokkos::View<SCALAR, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<EXECSPACE, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1, 1> { \
enum : bool { value = true }; \
};
#define KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(LAYOUT, EXECSPACE, MEMSPACE) \
KOKKOSBLAS1_DOT_TPL_SPEC(float, LAYOUT, EXECSPACE, MEMSPACE) \
KOKKOSBLAS1_DOT_TPL_SPEC(double, LAYOUT, EXECSPACE, MEMSPACE) \
KOKKOSBLAS1_DOT_TPL_SPEC(Kokkos::complex<float>, LAYOUT, EXECSPACE, \
MEMSPACE) \
KOKKOSBLAS1_DOT_TPL_SPEC(Kokkos::complex<double>, LAYOUT, EXECSPACE, MEMSPACE)
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
// Note BMK: CUBLAS dot is consistently slower than our native dot
// (measured 11.2, 11.8, 12.0 using perf test, and all are similar)
// If a future version improves performance, re-enable it here and
// in the tpl_spec_decl file.
#if 0
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Cuda,
Kokkos::CudaSpace)
#endif
#endif
#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP,
Kokkos::HIPSpace)
#endif
#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL)
KOKKOSBLAS1_DOT_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Experimental::SYCL,
Kokkos::Experimental::SYCLDeviceUSMSpace)
#endif
} // namespace Impl
} // namespace KokkosBlas
#endif