@@ -88,7 +88,6 @@ struct Functor_BatchedSerialGetrf {
88
88
// / \brief Implementation details of batched gbtrf analytical test
89
89
// /
90
90
// / \param Nb [in] Batch size of matrices
91
- // / \param BlkSize [in] Block size of matrix A
92
91
// / 4x4 matrix
93
92
// / which satisfies PA = LU
94
93
// / P = [[0, 0, 1, 0],
@@ -265,9 +264,9 @@ void impl_test_batched_gbtrf_analytical(const int Nb) {
265
264
Kokkos::deep_copy (h_ipiv1_ref, h_ipiv2_m3);
266
265
267
266
// Convert into banded storage
268
- full_to_banded (A0, AB0, kl, ku);
269
- full_to_banded (A1, AB1, kl, ku);
270
- full_to_banded (A2, AB2, kl, ku);
267
+ dense_to_banded (A0, AB0, kl, ku);
268
+ dense_to_banded (A1, AB1, kl, ku);
269
+ dense_to_banded (A2, AB2, kl, ku);
271
270
272
271
// gbtrf to factorize matrix A = P * L * U
273
272
auto info0 =
@@ -285,10 +284,10 @@ void impl_test_batched_gbtrf_analytical(const int Nb) {
285
284
EXPECT_EQ (info2, 0 );
286
285
287
286
// Extract matrix U and L from AB
288
- // first convert it to the full matrix (stored in A)
289
- banded_to_full <View3DType, View3DType>(AB0, A0, kl, ku);
290
- banded_to_full <View3DType, View3DType>(AB1, A1, kl, ku);
291
- banded_to_full <View3DType, View3DType>(AB2, A2, kl, ku);
287
+ // first convert it to the dense matrix (stored in A)
288
+ banded_to_dense <View3DType, View3DType>(AB0, A0, kl, ku);
289
+ banded_to_dense <View3DType, View3DType>(AB1, A1, kl, ku);
290
+ banded_to_dense <View3DType, View3DType>(AB2, A2, kl, ku);
292
291
293
292
// Copy upper triangular components to U
294
293
create_triangular_matrix<View3DType, View3DType, KokkosBatched::Uplo::Upper, KokkosBatched::Diag::NonUnit>(A0, U0);
@@ -348,16 +347,16 @@ void impl_test_batched_gbtrf_analytical(const int Nb) {
348
347
EXPECT_EQ (h_ipiv0 (ib, j), h_ipiv0_ref (ib, j));
349
348
EXPECT_EQ (h_ipiv2 (ib, j), h_ipiv2_ref (ib, j));
350
349
for (int i = 0 ; i < BlkSize; i++) {
351
- EXPECT_NEAR_KK (h_U0 (ib, i, j), h_U0_ref (ib, i, j), eps, " h_U0 " );
352
- EXPECT_NEAR_KK (h_NL0 (ib, i, j), h_NL0_ref (ib, i, j), eps, " h_NL0 " );
350
+ EXPECT_NEAR_KK (h_U0 (ib, i, j), h_U0_ref (ib, i, j), eps);
351
+ EXPECT_NEAR_KK (h_NL0 (ib, i, j), h_NL0_ref (ib, i, j), eps);
353
352
}
354
353
for (int i = 0 ; i < BlkSize - 1 ; i++) {
355
- EXPECT_NEAR_KK (h_U1 (ib, i, j), h_U1_ref (ib, i, j), eps, " h_U1 " );
356
- EXPECT_NEAR_KK (h_NL1 (ib, i, j), h_NL1_ref (ib, i, j), eps, " h_NL1 " );
354
+ EXPECT_NEAR_KK (h_U1 (ib, i, j), h_U1_ref (ib, i, j), eps);
355
+ EXPECT_NEAR_KK (h_NL1 (ib, i, j), h_NL1_ref (ib, i, j), eps);
357
356
}
358
357
for (int i = 0 ; i < BlkSize + 1 ; i++) {
359
- EXPECT_NEAR_KK (h_U2 (ib, i, j), h_U2_ref (ib, i, j), eps, " h_U2 " );
360
- EXPECT_NEAR_KK (h_NL2 (ib, i, j), h_NL2_ref (ib, i, j), eps, " h_NL2 " );
358
+ EXPECT_NEAR_KK (h_U2 (ib, i, j), h_U2_ref (ib, i, j), eps);
359
+ EXPECT_NEAR_KK (h_NL2 (ib, i, j), h_NL2_ref (ib, i, j), eps);
361
360
}
362
361
}
363
362
for (int j = 0 ; j < BlkSize - 1 ; j++) {
@@ -368,8 +367,7 @@ void impl_test_batched_gbtrf_analytical(const int Nb) {
368
367
369
368
// / \brief Implementation details of batched gbtrf test
370
369
// /
371
- // / \param N [in] Batch size of RHS (banded matrix can also be batched matrix)
372
- // / \param k [in] Number of superdiagonals or subdiagonals of matrix A
370
+ // / \param N [in] Batch size of matrix A
373
371
// / \param BlkSize [in] Block size of matrix A
374
372
template <typename DeviceType, typename ScalarType, typename LayoutType, typename AlgoTagType>
375
373
void impl_test_batched_gbtrf (const int Nb, const int BlkSize) {
@@ -395,17 +393,17 @@ void impl_test_batched_gbtrf(const int Nb, const int BlkSize) {
395
393
KokkosKernels::Impl::getRandomBounds (1.0 , randStart, randEnd);
396
394
Kokkos::fill_random (LU, rand_pool, randStart, randEnd);
397
395
398
- full_to_banded (LU, AB, kl, ku); // In banded storage
399
- banded_to_full (AB, A, kl, ku); // In full storage
396
+ dense_to_banded (LU, AB, kl, ku); // In banded storage
397
+ banded_to_dense (AB, A, kl, ku); // In conventional storage
400
398
401
399
Kokkos::deep_copy (LU, A); // for getrf
402
400
403
401
// gbtrf to factorize matrix A = P * L * U
404
402
Functor_BatchedSerialGbtrf<DeviceType, View3DType, PivView2DType, AlgoTagType>(AB, ipiv, kl, ku, BlkSize).run ();
405
403
406
404
// Extract matrix U and L from AB
407
- // first convert it to the full matrix (stored in A)
408
- banded_to_full <View3DType, View3DType>(AB, A, kl, ku);
405
+ // first convert it to the dense matrix (stored in A)
406
+ banded_to_dense <View3DType, View3DType>(AB, A, kl, ku);
409
407
410
408
// Copy upper triangular components to U
411
409
create_triangular_matrix<View3DType, View3DType, KokkosBatched::Uplo::Upper, KokkosBatched::Diag::NonUnit>(A, U);
0 commit comments