Skip to content

Commit bab7bf4

Browse files
committed
include, doc: update and fix brgemm texts and labels
f
1 parent 431631a commit bab7bf4

File tree

10 files changed

+68
-70
lines changed

10 files changed

+68
-70
lines changed

doc/Doxyfile.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -1962,7 +1962,7 @@ INCLUDE_FILE_PATTERNS =
19621962
# recursively expanded use the := operator instead of the = operator.
19631963
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
19641964

1965-
PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS
1965+
PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS DNNL_EXPERIMENTAL_UKERNEL
19661966

19671967
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
19681968
# tag can be used to specify a list of macro names that should be expanded. The

doc/build/link.md

+13-12
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,19 @@ on how oneDNN was built.
77

88
## Header Files
99

10-
| File | Description |
11-
|:-------------------------------------------|:----------------------------------|
12-
| ``include/oneapi/dnnl/dnnl.h`` | C header |
13-
| ``include/oneapi/dnnl/dnnl.hpp`` | C++ header |
14-
| ``include/oneapi/dnnl/dnnl_types.h`` | Auxiliary C header |
15-
| ``include/oneapi/dnnl/dnnl_config.h`` | Auxiliary C header |
16-
| ``include/oneapi/dnnl/dnnl_version.h`` | C header with version information |
17-
| ``include/oneapi/dnnl/dnnl_graph.h`` | C header for graph API |
18-
| ``include/oneapi/dnnl/dnnl_graph.hpp`` | C++ header for graph API |
19-
| ``include/oneapi/dnnl/dnnl_graph_types.h`` | Auxiliary C header for graph API |
20-
| ``include/oneapi/dnnl/dnnl_ukernel.h`` | C header with ukernel API |
21-
| ``include/oneapi/dnnl/dnnl_ukernel.hpp`` | C++ header with ukernel API |
10+
| File | Description |
11+
|:---------------------------------------------|:-----------------------------------|
12+
| ``include/oneapi/dnnl/dnnl.h`` | C header |
13+
| ``include/oneapi/dnnl/dnnl.hpp`` | C++ header |
14+
| ``include/oneapi/dnnl/dnnl_types.h`` | Auxiliary C header |
15+
| ``include/oneapi/dnnl/dnnl_config.h`` | Auxiliary C header |
16+
| ``include/oneapi/dnnl/dnnl_version.h`` | C header with version information |
17+
| ``include/oneapi/dnnl/dnnl_graph.h`` | C header for graph API |
18+
| ``include/oneapi/dnnl/dnnl_graph.hpp`` | C++ header for graph API |
19+
| ``include/oneapi/dnnl/dnnl_graph_types.h`` | Auxiliary C header for graph API |
20+
| ``include/oneapi/dnnl/dnnl_ukernel.h`` | C header for ukernel API |
21+
| ``include/oneapi/dnnl/dnnl_ukernel.hpp`` | C++ header for ukernel API |
22+
| ``include/oneapi/dnnl/dnnl_ukernel_types.h`` | Auxiliary C header for ukernel API |
2223

2324
## Libraries
2425

doc/rst/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ oneAPI Deep Neural Network Library Developer Guide and Reference
1111
dev_guide_examples
1212
performance_profiling_and_inspection
1313
advanced_topics
14+
ukernels
1415
group_dnnl_api.rst
1516

1617
oneAPI Deep Neural Network Library (oneDNN) is an open-source cross-platform performance library of basic building blocks for deep learning applications. The library is optimized for Intel Architecture Processors, Intel Processor Graphics and Xe Architecture graphics. Support for other architectures such as Arm* 64-bit Architecture (AArch64) and OpenPOWER* Power ISA (PPC64) is experimental.

doc/rst/ukernels.rst

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Ukernels
2+
#####################
3+
4+
.. toctree::
5+
:maxdepth: 1
6+
7+
dev_guide_ukernel_basic_concepts.rst
8+
dev_guide_ukernel_brgemm.rst
9+
dev_guide_ukernel_transform.rst
10+
page_cpu_brgemm_example_cpp.rst

doc/sphinx/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def setup(app):
187187

188188
def fixFileNameRefs(app, env, docnames):
189189

190-
replacements = {"page_dev_guide": "dev_guide", "group_Dnnl":"group_dnnl"}
190+
replacements = {"page_dev_guide":"dev_guide", "group_Dnnl":"group_dnnl", "brgemm_pack_B":"brgemm_pack_b"}
191191
targetDir = "rst"
192192

193193
fileExtension = ".rst"

doc/ukernel/operations/brgemm.md

+25-37
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Batch-reduce General Matrix Multiplication {#dev_guide_ukernel_brgemm}
1+
Batch-Reduce General Matrix Multiplication {#dev_guide_ukernel_brgemm}
22
=======================================
33

44
>
@@ -8,33 +8,28 @@ Batch-reduce General Matrix Multiplication {#dev_guide_ukernel_brgemm}
88

99
## General
1010

11-
The batch-reduce General Matrix Multiplication ukernel (BRGeMM) is an
12-
operation that allows to compute a batch of small matrix
13-
multiplication and accumulate their results in the same destination.
11+
The batch-reduce General Matrix Multiplication ukernel (BRGeMM) is an operation
12+
that computes a small matrix multiplication batch and accumulates their results
13+
in the same destination.
1414

15-
```math
16-
C = \sum_i A_i \cdot B_i
17-
```
15+
\f$C = \sum_i A_i \cdot B_i\f$
1816

1917
with
2018
- \f$A_i\f$ a set of matrices of dimension \f$M \times K\f$
2119
- \f$B_i\f$ a set of matrices of dimension \f$K \times N\f$
22-
- C matrix of dimension \f$M \times N\f$.
20+
- \f$C\f$ matrix of dimension \f$M \times N\f$.
2321

24-
The BRGeMM ukernel also supports accumulation with values already
25-
present in \f$C\f$, as well as post-operation and down-conversion to
26-
another \f$D\f$ matrix:
22+
The BRGeMM ukernel also supports accumulation with values already present in
23+
\f$C\f$, as well as post-operation and down-conversion to another \f$D\f$
24+
matrix:
2725

28-
```math
29-
D = \operatorname{convert}( \operatorname{post\_ops}(C + \sum_i A_i \cdot B_i, post_ops_args)).
30-
```
26+
\f$D = \operatorname{convert}( \operatorname{post\_ops}(C + \sum_i A_i \cdot B_i, post\_ops\_args))\f$
3127

3228
## Data Types
3329

34-
In general, C represents an accumulation buffer. Hence when
35-
computations are carried in floating-point arithmetic, C shall be of
36-
type f32, and when computation is carried in integer arithmetic, C
37-
should be of type s32.
30+
In general, C represents an accumulation buffer. Hence, when computations are
31+
carried in floating-point arithmetic, C shall be of type f32; when computation
32+
is carried in integer arithmetic, C should be of type s32.
3833

3934
The BRGeMM ukernel supports the following combinations of data-types.
4035

@@ -47,21 +42,14 @@ The BRGeMM ukernel supports the following combinations of data-types.
4742

4843
## Data Representation
4944

50-
Because of hardware restrictions, the BRGeMM ukernel requires specific
51-
data layout.
45+
Because of hardware restrictions, the BRGeMM ukernel requires a specific data
46+
layout.
5247

53-
<!-- TODO: update with proper query documentation when updated --> The
54-
@ref dnnl::ukernel::brgemm_pack_B::need_pack() method can be called to determine
55-
if packing is necessary. If so,
48+
The @ref dnnl_brgemm_pack_B_need_pack method can be called to
49+
determine if packing is necessary. If so,
5650
[packB ukernel](@ref dev_guide_ukernel_transform) shall be created to do the
5751
actual packing.
5852

59-
<!-- Which pack_type is required can be queried through #ref
60-
dnnl::ukernel::brgemm::get_pack_type(). Using the pack_type, user is
61-
responsible to pack the data appropriately before calling @ref
62-
brgemm::execute, either with custom code, or using the [transform
63-
ukernel](@ref dev_guide_ukernel_transform) -->
64-
6553
## Attributes
6654

6755
The following ukernel attributes can be set through dedicated setters.
@@ -73,19 +61,19 @@ The following ukernel attributes can be set through dedicated setters.
7361
| Post-op | [Binary](@ref dnnl::post_ops::append_binary) | Applies a @ref dnnl_api_binary operation to the result | General binary post-op restrictions |
7462

7563

76-
@note if zero-points are passed for A/B, fpmath_mode should be set for
77-
the computation to happen over floating-point format (so up-conversion
78-
to floating-point format would happen before computation). If
79-
computation in integer format is needed, BRGeMM ukernel should be
80-
configured without zero-point, and the user should prepare a
81-
compensation term that will be passed to the binary post-op.
64+
@note if zero-points are passed for A/B, fpmath_mode should be set for the
65+
computation to happen over floating-point format (so up-conversion to
66+
floating-point format would happen before computation). If computation in
67+
integer format is needed, BRGeMM ukernel should be configured without
68+
zero-point, and the user should prepare a compensation term that will be passed
69+
to the binary post-op.
8270

8371
## Implementation limitations
8472

8573
BRGeMM ukernel has no known limitations.
8674

8775
## Examples
8876

89-
[BRGeMM ukernel example](@ref brgemm_example_cpp)
77+
[BRGeMM ukernel example](@ref cpu_brgemm_example_cpp)
9078

91-
@copydetails brgemm_example_cpp_short
79+
@copydetails cpu_brgemm_example_cpp

doc/ukernel/operations/transform.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ Data transformation {#dev_guide_ukernel_transform}
77
88
## General
99

10-
The packB ukernel allows to pack BRGeMM B matrices in optimal layout
11-
before executing the [BRGeMM ukernel](@ref dev_guide_ukernel_brgemm).
12-
This is an out-of-place operation.
10+
The packB ukernel allows users to pack BRGeMM B matrices in an optimal layout
11+
before executing the [BRGeMM ukernel](@ref dev_guide_ukernel_brgemm). This is an
12+
out-of-place operation.
1313

1414
## Data Types
1515

@@ -37,6 +37,6 @@ No attribute is supported for packB ukernel.
3737

3838
## Examples
3939

40-
[BRGeMM ukernel example](@ref brgemm_example_cpp)
40+
[BRGeMM ukernel example](@ref cpu_brgemm_example_cpp)
4141

42-
@copydetails brgemm_example_cpp_short
42+
@copydetails cpu_brgemm_example_cpp

examples/ukernels/cpu_brgemm.cpp

+4-8
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,14 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
/// @example brgemm.cpp
18-
/// > Annotated version: @ref brgemm_example_cpp
19-
///
20-
/// @page brgemm_example_cpp_short
17+
/// @example cpu_brgemm.cpp
18+
/// > Annotated version: @ref cpu_brgemm_example_cpp
2119
///
20+
/// @page cpu_brgemm_example_cpp BRGeMM ukernel example
2221
/// This C++ API example demonstrates how to create and execute a BRGeMM
2322
/// ukernel.
2423
///
25-
/// @page brgemm_example_cpp Example of using BRGeMM ukernel to implement Matmul
26-
/// @copydetails brgemm_example_cpp_short
27-
///
28-
/// @include brgemm.cpp
24+
/// @include cpu_brgemm.cpp
2925

3026
#include <algorithm>
3127
#include <cmath>

include/oneapi/dnnl/dnnl_ukernel.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ dnnl_status_t DNNL_API dnnl_brgemm_execute(const_dnnl_brgemm_t brgemm,
124124
/// Executes a BRGeMM ukernel object with post operations.
125125
///
126126
/// @param brgemm BRGeMM ukernel object.
127-
/// @param A_ptr Base pointer to a tensor A.
128-
/// @param B_ptr Base pointer to a tensor B.
127+
/// @param A Base pointer to a tensor A.
128+
/// @param B Base pointer to a tensor B.
129129
/// @param A_B_offsets Pointer to a set of tensor A and tensor B offsets for
130130
/// each batch. A set must be contiguous in memory. A single batch should
131131
/// supply offsets for both tensors A and B simultaneously. The number of
@@ -177,7 +177,7 @@ dnnl_status_t DNNL_API dnnl_brgemm_pack_B_need_pack(
177177
const_dnnl_brgemm_pack_B_t brgemm_pack_B, int *need_pack);
178178

179179
/// Generates an executable part of BRGeMM ukernel packing B object.
180-
/// @param brgemm BRGeMM ukernel packing B object.
180+
/// @param brgemm_pack_B BRGeMM ukernel packing B object.
181181
/// @returns #dnnl_success on success and a status describing the error
182182
/// otherwise.
183183
dnnl_status_t DNNL_API dnnl_brgemm_pack_B_generate(

include/oneapi/dnnl/dnnl_ukernel.hpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ struct handle_traits<dnnl_brgemm_pack_B_t> {
5252

5353
/// @} dnnl_api_utils
5454

55-
/// @addtogroup dnnl_api_ukernel
55+
/// @addtogroup dnnl_api_ukernel Ukernels
56+
/// Collection of ukernels
5657
/// @{
5758

5859
/// ukernel namespace
@@ -61,6 +62,7 @@ namespace ukernel {
6162
#ifdef DNNL_EXPERIMENTAL_UKERNEL
6263

6364
/// @addtogroup dnnl_api_ukernel_brgemm BRGeMM ukernel
65+
/// BRGeMM ukernel routines
6466
/// @{
6567

6668
struct brgemm : public handle<dnnl_brgemm_t> {
@@ -282,8 +284,8 @@ struct brgemm_pack_B : public handle<dnnl_brgemm_pack_B_t> {
282284

283285
/// Executes a BRGeMM ukernel packing tensor B object.
284286
///
285-
/// @param in_ptr Pointer to an input buffer.
286-
/// @param out_ptr Pointer to an output buffer.
287+
/// @param in Pointer to an input buffer.
288+
/// @param out Pointer to an output buffer.
287289
void execute(const void *in, void *out) const {
288290
dnnl_status_t status = dnnl_brgemm_pack_B_execute(get(), in, out);
289291
if (status != dnnl_success)

0 commit comments

Comments
 (0)