diff --git a/frontends/numpy-scipy/cometpy/MLIRGen/lowering.py b/frontends/numpy-scipy/cometpy/MLIRGen/lowering.py index d7931c3c..84ac9a83 100644 --- a/frontends/numpy-scipy/cometpy/MLIRGen/lowering.py +++ b/frontends/numpy-scipy/cometpy/MLIRGen/lowering.py @@ -401,6 +401,7 @@ def translate_and_exec_llvm_with_jit(llvm_in,scf_lower_flags, func_name, inputs, # 2. Call mlir-translate to convert llvm to llvmir # 3. Call clang to generate library # p = subprocess.run(to_llvm_command, input=llvm_in.encode('utf-8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + print(llvm_in) p = subprocess.run(to_llvm_command +' 2>&1 | '+ translate_mlir_command +' | ' + gcc_command , input=llvm_in.encode('utf-8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) if(p.returncode != 0): cleanup() @@ -561,8 +562,8 @@ def lower_dialect_with_jit(ta_dialect_rep, target: str, out_dims, compile_with_f mlir_lower_flags += "--opt-fusion" compile_with_flags = compile_with_flags.replace("--opt-fusion","") compile_with_flags = compile_with_flags.replace("--opt-comp-workspace","") - if "-opt-matmul-tiling" not in compile_with_flags: - mlir_lower_flags += " --convert-to-loops " + # if "-opt-matmul-tiling" not in compile_with_flags: + mlir_lower_flags += " --convert-to-loops " mlir_lower_flags =" "+compile_with_flags + mlir_lower_flags else: mlir_lower_flags = " --convert-ta-to-it --convert-to-loops " diff --git a/lib/Dialect/TensorAlgebra/Transforms/LinalgTransforms.cpp b/lib/Dialect/TensorAlgebra/Transforms/LinalgTransforms.cpp index 5e41ae84..b233d2d1 100644 --- a/lib/Dialect/TensorAlgebra/Transforms/LinalgTransforms.cpp +++ b/lib/Dialect/TensorAlgebra/Transforms/LinalgTransforms.cpp @@ -31,21 +31,27 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypeInterfaces.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Value.h" +#include "mlir/IR/ValueRange.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/SmallVector.h" +#include // suppress all warnings coming from inclusion of blis.h in source tree #ifdef __clang__ @@ -299,7 +305,7 @@ namespace get_level3_blocksizes(&mc, &kc, &nc, &mr, &nr, sizeof(double)); addPatternForTiling(ctx, tilingPatterns, "__with_tiling__", "__L2__with_tiling__", {mc, nc, kc}, false, {1, 2, 0}); - addPatternForTiling(ctx, tilingPatterns, "__L2__with_tiling__", "__micro_kernel__", {mr, nr, kc}, false, {1, 0, 2}); + addPatternForTiling(ctx, tilingPatterns, "__L2__with_tiling__", "__micro_kernel__", {mr, nr, kc}, true, {1, 0, 2}); if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(tilingPatterns)))) @@ -739,40 +745,32 @@ struct OptDenseTranspose : public ConversionPattern currentOrder.push_back(i); } - SmallVector in_ivs; - SmallVector out_ivs; + SmallVector in_ivs; + SmallVector out_ivs; in_ivs.resize(optimalOrder.size()); out_ivs.resize(outputIndices[0].size()); - mlir::Value carried_val = output; - SmallVector loops; - + OpFoldResult one = rewriter.createOrFold(loc, 1); + SmallVector ubs; for (unsigned i = 0; i < optimalOrder.size(); i++) { - int64_t upperBound = inputType.getDimSize(optimalOrder[i]); - if (upperBound == ShapedType::kDynamic) - { - assert(false && "TODO: This dimension is a dynamic size"); - } - - /// create for loops - auto loop = rewriter.create(loc, 0, upperBound, 1, carried_val); - loops.push_back(loop); - rewriter.setInsertionPointToStart(loop.getBody()); - in_ivs[optimalOrder[i]] = loop.getInductionVar(); - out_ivs[optimalOrder[outputIndices[0][i]]] = loop.getInductionVar(); - carried_val = loop.getRegionIterArgs().front(); + Value upperBound = rewriter.create(loc, input, optimalOrder[i]); + ubs.push_back(upperBound); } - - auto load_rhs = rewriter.create(loc, input, in_ivs); - auto store_lhs = rewriter.create(loc, load_rhs, carried_val, out_ivs); - rewriter.create(loc, store_lhs.getResult()); - for(int64_t i = loops.size()-2; i>=0 ; i--) + + SmallVector ones(optimalOrder.size(), one); + auto forAll = rewriter.create(loc, ubs, output, std::nullopt); + rewriter.setInsertionPointToStart(forAll.getBody()); + auto ivs = forAll.getLoopInductionVars(); + for(size_t i = 0; i< forAll.getLoopInductionVars()->size(); i++) { - rewriter.setInsertionPointToEnd(loops[i].getBody()); - rewriter.create(loc, loops[i+1].getResult(0)); + in_ivs[optimalOrder[i]] = forAll.getLoopInductionVars()->data()[i]; + out_ivs[optimalOrder[outputIndices[0][i]]] = forAll.getLoopInductionVars()->data()[i]; } + auto extracts = rewriter.create(loc, input, in_ivs, ones, ones); + rewriter.setInsertionPointToEnd(forAll.getTerminator().getBody()); + rewriter.create(loc, extracts, forAll.getRegionIterArgs().front(), out_ivs, ones, ones); - rewriter.replaceAllUsesWith(op->getResult(0), loops[0].getResult(0)); + rewriter.replaceAllUsesWith(op->getResult(0), forAll->getResult(0)); rewriter.eraseOp(op); //module.dump(); @@ -796,7 +794,7 @@ namespace comet_debug() << "OptDenseTransposePass : public PassWrapper\n"; func::FuncOp func = getOperation(); ConversionTarget target(getContext()); - target.addLegalDialect(); + target.addLegalDialect(); RewritePatternSet patterns(&getContext()); patterns.insert(&getContext(), tile_size, seperate_tiles); diff --git a/test/integration/opts/ccsd_t1_21_ttgt_all_opts.ta b/test/integration/opts/ccsd_t1_21_ttgt_all_opts.ta index 602631b0..2008e7ee 100644 --- a/test/integration/opts/ccsd_t1_21_ttgt_all_opts.ta +++ b/test/integration/opts/ccsd_t1_21_ttgt_all_opts.ta @@ -1,5 +1,5 @@ # RUN: comet-opt -opt-matmul-tiling -opt-matmul-mkernel -opt-dense-transpose --convert-tc-to-ttgt --convert-to-llvm %s &> ccsd_t1_21_ttgt_all.llvm -# RUN: mlir-cpu-runner ccsd_t1_21_ttgt_all.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s +# RUN: mlir-cpu-runner ccsd_t1_21_ttgt_all.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext,%mlir_utility_library_dir/libomp%shlibext | FileCheck %s def main() { #IndexLabel Declarations diff --git a/test/integration/opts/ccsd_t1_21_ttgt_tiling.ta b/test/integration/opts/ccsd_t1_21_ttgt_tiling.ta index 4d558506..f9b2166c 100644 --- a/test/integration/opts/ccsd_t1_21_ttgt_tiling.ta +++ b/test/integration/opts/ccsd_t1_21_ttgt_tiling.ta @@ -1,10 +1,10 @@ # RUN: comet-opt --opt-matmul-tiling --convert-tc-to-ttgt --convert-to-llvm %s &> ccsd_t1_21_ttgt_tiling.llvm -# RUN: mlir-cpu-runner ccsd_t1_21_ttgt_tiling.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s +# RUN: mlir-cpu-runner ccsd_t1_21_ttgt_tiling.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext,%mlir_utility_library_dir/libomp%shlibext | FileCheck %s def main() { #IndexLabel Declarations - IndexLabel [i, c] = [2]; - IndexLabel [m, n, a] = [4]; + IndexLabel [i, c] = [16]; + IndexLabel [m, n, a] = [32]; Tensor v([i, c, m, n], {Dense}); Tensor t2([m, n, c, a], {Dense}); @@ -21,4 +21,4 @@ def main() { # Print the result for verification. # CHECK: data = -# CHECK-NEXT: 250.24,250.24,250.24,250.24,250.24,250.24,250.24,250.24, \ No newline at end of file +# CHECK-NEXT: 128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123,128123, \ No newline at end of file diff --git a/test/integration/opts/ccsd_t1_4_ttgt_bestperm.ta b/test/integration/opts/ccsd_t1_4_ttgt_bestperm.ta index 64f6897c..5570422b 100644 --- a/test/integration/opts/ccsd_t1_4_ttgt_bestperm.ta +++ b/test/integration/opts/ccsd_t1_4_ttgt_bestperm.ta @@ -1,5 +1,5 @@ # RUN: comet-opt --convert-tc-to-ttgt --convert-to-llvm %s &> ccsd_t1_4_ttgt_bestperm.llvm -# RUN: mlir-cpu-runner ccsd_t1_4_ttgt_bestperm.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s +# RUN: mlir-cpu-runner ccsd_t1_4_ttgt_bestperm.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext,%mlir_utility_library_dir/libomp%shlibext | FileCheck %s def main() { #IndexLabel Declarations diff --git a/test/integration/opts/opt_dense_transpose.ta b/test/integration/opts/opt_dense_transpose.ta index 6533fafe..01850fc3 100644 --- a/test/integration/opts/opt_dense_transpose.ta +++ b/test/integration/opts/opt_dense_transpose.ta @@ -1,5 +1,5 @@ # RUN: comet-opt -opt-dense-transpose --convert-ta-to-it --convert-to-loops --convert-to-llvm %s &> opt_dense_transpose.llvm -# RUN: mlir-cpu-runner opt_dense_transpose.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s +# RUN: mlir-cpu-runner opt_dense_transpose.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext,%mlir_utility_library_dir/libomp%shlibext | FileCheck %s #TODO(gkestor): read dense input from file