Skip to content

Commit 327f8e2

Browse files
[GPU] Support different input and output data type in convolution ref (openvinotoolkit#26501)
### Details: - For fp model, some convolutions may not be compressed to fp16 depending on the transformation policy and those convolutions may have the fused node which is of fp16. Then convolution node input data type will be fp32 while output data type fp16. Convolution needs to support this case. ### Tickets: - 147689
1 parent 4ed5ed0 commit 327f8e2

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_ref.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ bool ConvolutionKernel_Ref::Validate(const Params& params) const {
123123

124124
// int8/uint8 inputs (quantization case) require additional checks
125125
// require some additional checks.
126-
if (input_type == output_type && input_type != Datatype::UINT8 && input_type != Datatype::INT8)
126+
if (input_type != Datatype::UINT8 && input_type != Datatype::INT8 &&
127+
output_type != Datatype::UINT8 && output_type != Datatype::INT8)
127128
return true;
128129

129130
// (u)int8 input + fp weights

src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp

+42
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <intel_gpu/primitives/crop.hpp>
1313
#include <intel_gpu/primitives/reorder.hpp>
1414
#include <intel_gpu/primitives/reshape.hpp>
15+
#include <intel_gpu/primitives/permute.hpp>
1516

1617
#include <algorithm>
1718
#include <array>
@@ -1639,6 +1640,47 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
16391640
}
16401641
}
16411642

1643+
TEST(convolution_f32_fw_gpu, input_f32_output_f16_dynamic_ref_kernel) {
1644+
auto& engine = get_test_engine();
1645+
1646+
auto in_layout = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
1647+
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } });
1648+
auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 3, 2 } });
1649+
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1650+
auto eltwise_data = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1651+
1652+
set_values(input, {
1653+
1.0f, 2.0f, 3.0f, 4.0f,
1654+
5.0f, 2.0f, 2.0f, 3.0f,
1655+
4.0f, 6.0f, 3.0f, 3.0f,
1656+
3.0f, 5.0f, 1.0f, 1.0f,
1657+
1.0f, 1.0f, 1.0f, 1.0f }
1658+
);
1659+
1660+
topology topology(
1661+
input_layout("input", in_layout),
1662+
data("weights", weights),
1663+
data("biases", biases),
1664+
data("eltwise_data", eltwise_data),
1665+
convolution( "conv", input_info("input"), "weights", "biases", 1, {2, 1}, {1, 1}, {0, 0}, {0, 0}, false),
1666+
eltwise("eltwise", { input_info("conv"), input_info("eltwise_data") }, eltwise_mode::prod, data_types::f16),
1667+
permute("permute", input_info("eltwise"), {0, 1, 2, 3}));
1668+
1669+
ExecutionConfig config = get_test_default_config(engine);
1670+
ov::intel_gpu::ImplementationDesc conv_impl_ref = { format::bfyx, "convolution_gpu_ref", impl_types::ocl };
1671+
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl_ref } }));
1672+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
1673+
1674+
network network(engine, topology, config);
1675+
network.set_input_data("input", input);
1676+
1677+
auto outputs = network.execute();
1678+
1679+
ASSERT_EQ(outputs.size(), size_t(1));
1680+
ASSERT_FALSE(has_node(*network.get_program(), "eltwise"));
1681+
ASSERT_EQ(outputs.at("permute").get_layout().data_type, data_types::f16);
1682+
}
1683+
16421684
TEST(convolution_f32_fw_gpu, convolution_big_size_weights) {
16431685
auto& engine = get_test_engine();
16441686

0 commit comments

Comments
 (0)