Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cpu] riscv64: update intrinsics #2929

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
@@ -464,10 +464,17 @@ endif()
if (DNNL_TARGET_ARCH STREQUAL "RV64")
# Check if the RVV Intrinsics can be compiled with the current toolchain and flags
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("#include <riscv_vector.h>
check_cxx_source_compiles("#if !defined(__riscv) || !defined(__riscv_v)
#error \"RISC-V or vector extension(RVV) is not supported by the compiler\"
#endif

#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic < 12000
#error \"RISC-V intrinsics v0.12 or higher is required\"
#endif

#include <riscv_vector.h>
int main() {
size_t size = 64;
return vsetvl_e32m2(size);
return 0;
};"
CAN_COMPILE_RVV_INTRINSICS
)
27 changes: 14 additions & 13 deletions src/cpu/rv64/rvv_nchw_pooling.cpp
Original file line number Diff line number Diff line change
@@ -57,9 +57,9 @@ void MaxPooling(const float *src, float *dst, const dim_t batch,
int ow_offset = ow * strideW - padLeft;
size_t size = std::min(ow_offset + kerW, inW)
- std::max(ow_offset, 0);
size_t cycleLength = vsetvl_e32m8(size);
vfloat32m8_t vmax
= vle32_v_f32m8(&arr_flt_min[0], cycleLength);
size_t cycleLength = __riscv_vsetvl_e32m8(size);
vfloat32m8_t vmax = __riscv_vle32_v_f32m8(
&arr_flt_min[0], cycleLength);

for (int id = std::max(od_offset, 0);
id < std::min(od_offset + kerD, inD); id++)
@@ -73,34 +73,35 @@ void MaxPooling(const float *src, float *dst, const dim_t batch,
size_t iw = 0;
for (; iw < size - cycleLength;
iw += cycleLength) {
vfloat32m8_t vsrc = vle32_v_f32m8(
vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(
&local_src[local_src_offset + iw],
cycleLength);
vmax = vfmax_vv_f32m8(
vmax = __riscv_vfmax_vv_f32m8(
vsrc, vmax, cycleLength);
}

size_t tailLength = vsetvl_e32m8(size - iw);
size_t tailLength
= __riscv_vsetvl_e32m8(size - iw);
{
vfloat32m8_t vsrc = vle32_v_f32m8(
vfloat32m8_t vsrc = __riscv_vle32_v_f32m8(
&local_src[local_src_offset + iw],
tailLength);
vmax = vfmax_vv_f32m8(
vmax = __riscv_vfmax_vv_f32m8(
vsrc, vmax, tailLength);
}
}

vfloat32m1_t min_scalar;
float min = -__FLT_MAX__;
min_scalar = vle32_v_f32m1(&min, 1);
min_scalar = __riscv_vle32_v_f32m1(&min, 1);

cycleLength = vsetvl_e32m8(size);
cycleLength = __riscv_vsetvl_e32m8(size);
vfloat32m1_t vred_res;
vred_res = vfredmax_vs_f32m8_f32m1(
vred_res, vmax, min_scalar, cycleLength);
vred_res = __riscv_vfredmax_vs_f32m8_f32m1(
vmax, min_scalar, cycleLength);

float red_res;
vse32_v_f32m1(&red_res, vred_res, 1);
__riscv_vse32_v_f32m1(&red_res, vred_res, 1);
dst[dst_offset] = red_res;
}
}
Loading