Skip to content

Commit 11abf3f

Browse files
Extend support to BF16 in npu plugin (#26469)
### Details: Extend BF16 logic in NPU plugin. To what it's worth, functional tests on NPU side are running with these changes, although there are some opens in NPU compiler which cause an accuracy; Until then this PR should be merged to ease up integration on our side. ### Tickets: [ - *ticket-id*](https://jira.devtools.intel.com/browse/EISW-140090)
1 parent 288c5f9 commit 11abf3f

File tree

4 files changed

+30
-7
lines changed

4 files changed

+30
-7
lines changed

src/plugins/intel_npu/src/backend/include/zero_device.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class ZeroDevice : public IDevice {
6969

7070
std::map<ov::element::Type, float> device_gops = {{ov::element::f32, 0.f},
7171
{ov::element::f16, 0.f},
72+
{ov::element::bf16, 0.f},
7273
{ov::element::u8, 0.f},
7374
{ov::element::i8, 0.f}};
7475

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
551551
break;
552552
case ov::element::Type_t::f16:
553553
break;
554+
case ov::element::Type_t::bf16:
555+
break;
554556
case ov::element::Type_t::u4:
555557
break;
556558
case ov::element::Type_t::i4:
@@ -575,7 +577,7 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
575577
break;
576578
default:
577579
OPENVINO_THROW("Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() +
578-
"! Supported precisions: FP32, FP16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64");
580+
"! Supported precisions: FP32, FP16, BF16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64");
579581
}
580582
}
581583

src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@ std::vector<std::string> disabledTestPatterns() {
266266
_skipRegistry.addPatterns(
267267
"Tests with unsupported precision", {
268268
".*InferRequestCheckTensorPrecision.*type=boolean.*",
269-
".*InferRequestCheckTensorPrecision.*type=bf16.*",
270269
".*InferRequestCheckTensorPrecision.*type=f64.*",
271270
".*InferRequestCheckTensorPrecision.*type=u1\\D.*",
272271
// [Track number: E#97469]

src/plugins/intel_npu/tools/single-image-test/main.cpp

+26-5
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ std::vector<cv::Mat> ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co
287287
"Unsupported layout: ", layout.to_string());
288288

289289
OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 ||
290-
precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32,
290+
precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 ||
291+
precision == ov::element::Type_t::i32,
291292
"Unsupported precision: ", precision.get_type_name());
292293

293294
int cvType = 0;
@@ -302,6 +303,9 @@ std::vector<cv::Mat> ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co
302303
} else if (precision == ov::element::Type_t::f16) {
303304
cvType = CV_16SC1;
304305
elemSize = sizeof(ov::float16);
306+
} else if (precision == ov::element::Type_t::bf16) {
307+
cvType = CV_16SC1;
308+
elemSize = sizeof(ov::bfloat16);
305309
} else if (precision == ov::element::Type_t::i32) {
306310
cvType = CV_32SC1;
307311
elemSize = sizeof(int32_t);
@@ -392,11 +396,14 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
392396
cvType = static_cast<int>(CV_32FC(C));
393397
} else if (precision == ov::element::Type_t::f16) {
394398
cvType = static_cast<int>(CV_16SC(C));
399+
} else if (precision == ov::element::Type_t::bf16) {
400+
cvType = static_cast<int>(CV_16SC(C));
395401
} else if (precision == ov::element::Type_t::i32) {
396402
cvType = static_cast<int>(CV_32SC(C));
397403
} else {
398404
OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 ||
399-
precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32,
405+
precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 ||
406+
precision == ov::element::Type_t::i32,
400407
"Unsupported precision ", precision.get_type_name());
401408
}
402409

@@ -437,6 +444,10 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
437444
const auto inPtr = in.ptr<float>();
438445
const auto outPtr = out.ptr<ov::float16>();
439446
convertBufferType(outPtr, inPtr, out.size().area() * C);
447+
} else if (precision == ov::element::Type_t::bf16) {
448+
const auto inPtr = in.ptr<float>();
449+
const auto outPtr = out.ptr<ov::bfloat16>();
450+
convertBufferType(outPtr, inPtr, out.size().area() * C);
440451
} else if (precision == ov::element::Type_t::i32) {
441452
in.convertTo(out, CV_32S);
442453
} else {
@@ -451,7 +462,8 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
451462
} else if (layout == ov::Layout("NCHW")) {
452463
auto tensorPlanes = ovToCV(tensor, shape, layout, 0);
453464

454-
if (precision != ov::element::Type_t::f16) {
465+
if (!(precision == ov::element::Type_t::f16 ||
466+
precision == ov::element::Type_t::bf16)) {
455467
cv::split(in, tensorPlanes);
456468
} else {
457469
std::vector<cv::Mat> inPlanes;
@@ -461,8 +473,13 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
461473

462474
for (size_t i = 0; i < tensorPlanes.size(); ++i) {
463475
const auto inPtr = inPlanes[i].ptr<float>();
464-
const auto outPtr = tensorPlanes[i].ptr<ov::float16>();
465-
convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
476+
if (precision == ov::element::Type_t::f16) {
477+
const auto outPtr = tensorPlanes[i].ptr<ov::float16>();
478+
convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
479+
} else if (precision == ov::element::Type_t::bf16) {
480+
const auto outPtr = tensorPlanes[i].ptr<ov::bfloat16>();
481+
convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
482+
}
466483
}
467484
}
468485

@@ -1761,6 +1778,8 @@ static int runSingleImageTest() {
17611778
inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f32;
17621779
} else if (strEq(precision, "FP16")) {
17631780
inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f16;
1781+
} else if (strEq(precision, "BF16")) {
1782+
inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::bf16;
17641783
} else if (strEq(precision, "I32")) {
17651784
inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::i32;
17661785
} else if (strEq(precision, "I64")) {
@@ -1808,6 +1827,8 @@ static int runSingleImageTest() {
18081827
ov::element::Type prc_in = ov::element::u8;
18091828
if (FLAGS_ip == "FP16")
18101829
prc_in = ov::element::f16;
1830+
else if (FLAGS_ip == "BF16")
1831+
prc_in = ov::element::bf16;
18111832
else if (FLAGS_ip == "FP32")
18121833
prc_in = ov::element::f32;
18131834
else if (FLAGS_ip == "I32")

0 commit comments

Comments
 (0)