@@ -244,15 +244,9 @@ void emit_reorder_1d_tile(ngen::HW hw, GeneratorT *host,
244
244
// Replace (float -> float) by (int -> int) as word/dword moves have less
245
245
// restrictions.
246
246
if (src_type == dst_type && to_ir (src_type).is_fp ()) {
247
- int factor = (src_type == ngen::DataType::df ? 2 : 1 );
248
- if (factor == 1 || (src_stride == 1 && dst_stride == 1 )) {
249
- src_type
250
- = to_ngen (type_t::u (ngen::getBytes (src_type) / factor * 8 ));
251
- dst_type = src_type;
252
- width *= factor;
253
- src = src.reinterpret (src_type);
254
- dst = dst.reinterpret (dst_type);
255
- }
247
+ src_type = dst_type = to_ngen (type_t::u (ngen::getBits (src_type)));
248
+ src = src.reinterpret (src_type);
249
+ dst = dst.reinterpret (dst_type);
256
250
}
257
251
258
252
const int grf_size = ngen::GRF::bytes (hw);
@@ -297,7 +291,7 @@ void emit_reorder_1d_tile(ngen::HW hw, GeneratorT *host,
297
291
|| !dst.check_bounds (0 , 64 , /* is_dense=*/ true )))
298
292
step = 8 ;
299
293
300
- if (src_df || dst_df) step = 8 ;
294
+ if (src_df || src_q || dst_df || dst_q ) step = 8 ;
301
295
302
296
// Max supported stride is 4.
303
297
if (src_stride > 4 || dst_stride > 4 ) step = 1 ;
@@ -310,9 +304,6 @@ void emit_reorder_1d_tile(ngen::HW hw, GeneratorT *host,
310
304
// Non-power-of-2 strides must be handled element-by-element
311
305
if (!math::is_pow2 (src_stride) || !math::is_pow2 (dst_stride)) step = 1 ;
312
306
313
- // Qword does not appear to support swizzling.
314
- if (src_q && dst_q && src_stride != dst_stride) step = 1 ;
315
-
316
307
return step;
317
308
};
318
309
@@ -1893,8 +1884,6 @@ class reorder_impl_t {
1893
1884
const int grf_size = ngen::GRF::bytes (hw_);
1894
1885
1895
1886
if (src_layout_.type () != dst_layout_.type ()) return false ;
1896
- // long / f64 swizzle emits scalar instructions
1897
- if (src_layout_.type ().scalar ().size () >= 8 ) return false ;
1898
1887
if (!src_layout_.is_dense ()) return false ;
1899
1888
if (!dst_layout_.is_dense ()) return false ;
1900
1889
0 commit comments