@@ -465,7 +465,7 @@ void primitive_inst::update_shape() {
465
465
auto desc = get_node ().as <kv_cache>().get_primitive ();
466
466
auto var_mem_size = get_network ().get_variable (desc->variable_info .variable_id ).get_actual_mem_size ();
467
467
// Need to trigger realloc_if_needed
468
- if (var_mem_size < _impl_params->get_output_layout (0 ).get_buffer_size (). count ())
468
+ if (var_mem_size < _impl_params->get_output_layout (0 ).get_linear_size ())
469
469
set_shape_change ();
470
470
}
471
471
}
@@ -684,13 +684,13 @@ event::ptr primitive_inst::realloc_if_needed() {
684
684
prealloc_shape[seq_axis] += tmp_prealloc_count;
685
685
required_buffer_size = std::accumulate (prealloc_shape.begin (), prealloc_shape.end (), size_t (1 ), std::multiplies<size_t >());
686
686
} else {
687
- required_buffer_size = (updated_layouts[i].get_buffer_size (). count ());
687
+ required_buffer_size = (updated_layouts[i].get_linear_size ());
688
688
}
689
689
if (required_buffer_size * 10 < _max_output_layout_count[i]) {
690
690
reclaim = true ;
691
691
}
692
692
if (reclaim) {
693
- GPU_DEBUG_TRACE_DETAIL << id () << " : Updated output[" << i << " ] size " << updated_layouts[i].get_buffer_size (). count ()
693
+ GPU_DEBUG_TRACE_DETAIL << id () << " : Updated output[" << i << " ] size " << updated_layouts[i].get_linear_size ()
694
694
<< " is much smaller than current memory size! " << _max_output_layout_count[i]
695
695
<< " Reset memory of output " << i << std::endl;
696
696
_max_output_layout_count[i] = 0 ;
@@ -705,7 +705,7 @@ event::ptr primitive_inst::realloc_if_needed() {
705
705
}
706
706
707
707
for (size_t i = 0 ; i < actual_layouts.size (); ++i) {
708
- bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_buffer_size (). count () <= _max_output_layout_count[i]);
708
+ bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_linear_size () <= _max_output_layout_count[i]);
709
709
std::pair<bool , ov::Shape> prealloc_info;
710
710
if (_node->is_type <kv_cache>() && i == 0 ) {
711
711
const auto & desc = _node->as <kv_cache>().get_primitive ();
@@ -717,17 +717,15 @@ event::ptr primitive_inst::realloc_if_needed() {
717
717
prealloc_info = sp.predict_preallocation_shape (id (), updated_layouts[i], can_reuse_buffer, i, tmp_prealloc_count);
718
718
}
719
719
if (prealloc_info.first && sp.can_preallocate (ov::shape_size (prealloc_info.second ) * (dt_sizes_in_B[i]))) {
720
- auto new_layout = updated_layouts[i];
721
- new_layout.set_partial_shape (prealloc_info.second );
722
- updated_params.output_layouts [i] = new_layout;
720
+ updated_params.output_layouts [i] = updated_layouts[i].clone_with_other_shape (prealloc_info.second );
723
721
}
724
- if (updated_params.output_layouts [i].get_buffer_size (). count () < updated_layouts[i].get_buffer_size (). count ()) {
722
+ if (updated_params.output_layouts [i].get_linear_size () < updated_layouts[i].get_linear_size ()) {
725
723
updated_params.output_layouts [i] = updated_layouts[i];
726
724
}
727
725
728
726
if (can_reuse_buffer) {
729
727
GPU_DEBUG_TRACE_DETAIL << id () << " : reuse previously allocated output buffer[" << i << " ] - "
730
- << actual_layouts[i].get_buffer_size (). count () << " /" << _max_output_layout_count[i]
728
+ << actual_layouts[i].get_linear_size () << " /" << _max_output_layout_count[i]
731
729
<< std::endl;
732
730
if (_node->is_type <kv_cache>() && (i == 0 )) {
733
731
// kv_cache has already assigned memory.
@@ -759,7 +757,7 @@ event::ptr primitive_inst::realloc_if_needed() {
759
757
GPU_DEBUG_TRACE_DETAIL << id () << " : realloc output memory. " << std::endl;
760
758
GPU_DEBUG_TRACE_DETAIL << " outputs[" << i << " ] "
761
759
<< " Current buffer_size=" << _max_output_layout_count[i]
762
- << " Requested buffer_size=" << updated_layouts[i].get_buffer_size (). count ()
760
+ << " Requested buffer_size=" << updated_layouts[i].get_linear_size ()
763
761
<< std::endl;
764
762
_outputs[i] = allocate_output (_network.get_engine (),
765
763
_network.get_memory_pool (),
@@ -773,7 +771,7 @@ event::ptr primitive_inst::realloc_if_needed() {
773
771
is_output_buffer (this , true ),
774
772
output_memory_ptr (i).get (),
775
773
true );
776
- _max_output_layout_count[i] = updated_params.output_layouts [i].get_buffer_size (). count ();
774
+ _max_output_layout_count[i] = updated_params.output_layouts [i].get_linear_size ();
777
775
GPU_DEBUG_CODE (std::string memalloc_info = " " );
778
776
GPU_DEBUG_CODE (memalloc_info += (((_outputs.size () > 1 ) ? (" o" + to_string (i) + " :" ) : " " ) +
779
777
(_outputs[i]->from_memory_pool ? " from_pool" : " new_alloc" ));)
@@ -1852,7 +1850,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool
1852
1850
_impl_params->strm = _network.get_stream_ptr ();
1853
1851
for (size_t i = 0 ; i < get_node ().get_output_layouts ().size (); ++i) {
1854
1852
if (_outputs.size () > i) {
1855
- _max_output_layout_count.push_back (_outputs[i] ? _outputs[i]->get_layout ().get_buffer_size (). count () : 0 );
1853
+ _max_output_layout_count.push_back (_outputs[i] ? _outputs[i]->get_layout ().get_linear_size () : 0 );
1856
1854
} else {
1857
1855
_outputs.push_back (nullptr );
1858
1856
_max_output_layout_count.push_back (0 );
@@ -1985,9 +1983,9 @@ event::ptr primitive_inst::update_weights() {
1985
1983
GPU_DEBUG_TRACE_DETAIL << id () << " : add original weights memory " << original_layout.to_short_string () << " to weights cache; "
1986
1984
<< " cache_size=" << _reordered_weights_cache.size () << " /" << _reordered_weights_cache.capacity () << std::endl;
1987
1985
} else {
1988
- auto expected_layout = reorder_kernel_params->get_output_layout ();
1989
1986
// Set original partial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
1990
- expected_layout.set_partial_shape (original_layout.get_partial_shape ());
1987
+ auto expected_layout =
1988
+ reorder_kernel_params->get_output_layout ().clone_with_other_shape (original_layout.get_partial_shape ());
1991
1989
_impl_params->weights_layout = optional_layout (expected_layout);
1992
1990
1993
1991
if (_reordered_weights_cache.has (expected_layout)) {
0 commit comments