openvinotoolkit · alexey-varyzgin · Apr 1, 2022 · EgorDuplensky · Apr 1, 2022
@@ -1361,7 +1361,7 @@ void Graph::EnforceBF16() {
         if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail)
             continue;
 
-        if (node->getType() != Type::Input && node->getType() != Type::Output) {
+        if (!ov::intel_cpu::one_of(node->getType(), Type::Input, Type::Output, Type::MemoryInput, Type::MemoryOutput)) {
             for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
                 const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
                 /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing.

@@ -289,6 +289,20 @@ void GraphOptimizer::FuseConvolutionMatMulAndBias(Graph &graph) {
     }
 }
 
+/**
+ * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
+ *       for bf16 depthwise postops.
+ *       This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
+ *       multiple binary post ops.
+ *       This check can already be removed for FC fusing, but should be kept for Convolution,
+ *       which still uses legacy depthwise postops for performance reasons.
+ */
+static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) {
+    return childNode->getType() == Type::FakeQuantize &&
+        one_of(Precision::BF16,
+            parentNode->getOriginalOutputPrecisionAtPort(0),
+            childNode->getOriginalOutputPrecisionAtPort(0));
+}
 void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -328,6 +342,12 @@ void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph &graph) {
             continue;
         }
 
+        //  BF16 Quantize Layer Fusing Disabling
+        if (BF16QuantizeNodeFusing(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
         childNode->fuseInto(parentNode);
 
         auto parentEdges = childNode->parentEdges;
@@ -715,21 +735,6 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
     }
 }
 
-/**
- * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
- *       for bf16 depthwise postops.
- *       This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
- *       multiple binary post ops.
- *       This check can already be removed for FC fusing, but should be kept for Convolution,
- *       which still uses legacy depthwise postops for performance reasons.
- */
-static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) {
-    return childNode->getType() == Type::FakeQuantize &&
-        one_of(Precision::BF16,
-            parentNode->getOriginalOutputPrecisionAtPort(0),
-            childNode->getOriginalOutputPrecisionAtPort(0));
-}
-
 void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) {
     auto& graphNodes = graph.GetNodes();
 

@@ -295,6 +295,9 @@ void Deconvolution::getSupportedDescriptors() {
        inputDataType = outputDataType = memory::data_type::bf16;
     if (!fusedWith.empty()) {
         outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
+        // TODO: We have to extend jit_avx512_core_x8s8s32x_deconv_fwd_kernel from oneDNN to support BF16 output data type
+        if (isInt8 && outputDataType == memory::data_type::bf16)
+            outputDataType = memory::data_type::f32;
     }
 
     if (getParentEdges().size() != 2 && getParentEdges().size() != 3)