We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
This will be useful for further optimisation concentrating on the point loop bands.
cd build ./bin/polymer-opt ../example/polybench/EXTRALARGE/gemm/gemm.mlir \ -reg2mem \ -extract-scop-stmt \ -pluto-opt \ -canonicalize \ -annotate-point-loops \ -extract-point-loops
#map0 = affine_map<()[s0] -> ((s0 - 1) floordiv 32 + 1)> #map1 = affine_map<(d0) -> (d0 * 32)> #map2 = affine_map<(d0)[s0] -> (s0, d0 * 32 + 32)> #set0 = affine_set<()[s0, s1] : (s0 - 1 >= 0, s1 - 1 >= 0)> #set1 = affine_set<()[s0] : (s0 - 1 >= 0)> module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} { llvm.mlir.global internal constant @str7("==END DUMP_ARRAYS==\0A\00") llvm.mlir.global internal constant @str6("\0Aend dump: %s\0A\00") llvm.mlir.global internal constant @str5("%0.2lf \00") llvm.mlir.global internal constant @str4("\0A\00") llvm.mlir.global internal constant @str3("C\00") llvm.mlir.global internal constant @str2("begin dump: %s\00") llvm.mlir.global internal constant @str1("==BEGIN DUMP_ARRAYS==\0A\00") llvm.mlir.global external @stderr() : !llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>> llvm.func @fprintf(!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>, ...) -> !llvm.i32 llvm.mlir.global internal constant @str0("\00") llvm.func @strcmp(!llvm.ptr<i8>, !llvm.ptr<i8>) -> !llvm.i32 func @main(%arg0: i32, %arg1: !llvm.ptr<ptr<i8>>) -> i32 { %c0 = constant 0 : index %c2000_i32 = constant 2000 : i32 %c2300_i32 = constant 2300 : i32 %c2600_i32 = constant 2600 : i32 %c42_i32 = constant 42 : i32 %true = constant true %false = constant false %c0_i32 = constant 0 : i32 %0 = alloca() : memref<1xf64> %1 = alloca() : memref<1xf64> %2 = alloc() : memref<2000x2300xf64> %3 = alloc() : memref<2000x2600xf64> %4 = alloc() : memref<2600x2300xf64> %5 = memref_cast %0 : memref<1xf64> to memref<?xf64> %6 = memref_cast %1 : memref<1xf64> to memref<?xf64> call @init_array(%c2000_i32, %c2300_i32, %c2600_i32, %5, %6, %2, %3, %4) : (i32, i32, i32, memref<?xf64>, memref<?xf64>, memref<2000x2300xf64>, memref<2000x2600xf64>, memref<2600x2300xf64>) -> () call @polybench_timer_start() : () -> () %7 = load %0[%c0] : memref<1xf64> %8 = load %1[%c0] : memref<1xf64> call @kernel_gemm_new(%c2000_i32, %c2300_i32, %c2600_i32, %7, %8, %2, %3, %4) : (i32, i32, i32, f64, f64, memref<2000x2300xf64>, memref<2000x2600xf64>, memref<2600x2300xf64>) -> () call @polybench_timer_stop() : () -> () call @polybench_timer_print() : () -> () %9 = cmpi "sgt", %arg0, %c42_i32 : i32 %10 = scf.if %9 -> (i1) { %11 = llvm.load %arg1 : !llvm.ptr<ptr<i8>> %12 = llvm.mlir.addressof @str0 : !llvm.ptr<array<1 x i8>> %13 = llvm.mlir.constant(0 : index) : !llvm.i64 %14 = llvm.getelementptr %12[%13, %13] : (!llvm.ptr<array<1 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %15 = llvm.call @strcmp(%11, %14) : (!llvm.ptr<i8>, !llvm.ptr<i8>) -> !llvm.i32 %16 = llvm.mlir.cast %15 : !llvm.i32 to i32 %17 = trunci %16 : i32 to i1 %18 = xor %17, %true : i1 scf.yield %18 : i1 } else { scf.yield %false : i1 } scf.if %10 { call @print_array(%c2000_i32, %c2300_i32, %2) : (i32, i32, memref<2000x2300xf64>) -> () } return %c0_i32 : i32 } func private @init_array(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: memref<?xf64>, %arg4: memref<?xf64>, %arg5: memref<2000x2300xf64>, %arg6: memref<2000x2600xf64>, %arg7: memref<2600x2300xf64>) { %c0 = constant 0 : index %cst = constant 1.500000e+00 : f64 %cst_0 = constant 1.200000e+00 : f64 %c0_i32 = constant 0 : i32 %c2_i32 = constant 2 : i32 %c1_i32 = constant 1 : i32 store %cst, %arg3[%c0] : memref<?xf64> store %cst_0, %arg4[%c0] : memref<?xf64> br ^bb1(%c0_i32 : i32) ^bb1(%0: i32): // 2 preds: ^bb0, ^bb4 %1 = cmpi "slt", %0, %arg0 : i32 %2 = index_cast %0 : i32 to index cond_br %1, ^bb2(%c0_i32 : i32), ^bb5(%c0_i32 : i32) ^bb2(%3: i32): // 2 preds: ^bb1, ^bb3 %4 = cmpi "slt", %3, %arg1 : i32 %5 = index_cast %3 : i32 to index cond_br %4, ^bb3, ^bb4 ^bb3: // pred: ^bb2 %6 = muli %0, %3 : i32 %7 = addi %6, %c1_i32 : i32 %8 = remi_signed %7, %arg0 : i32 %9 = sitofp %8 : i32 to f64 %10 = sitofp %arg0 : i32 to f64 %11 = divf %9, %10 : f64 store %11, %arg5[%2, %5] : memref<2000x2300xf64> %12 = addi %3, %c1_i32 : i32 br ^bb2(%12 : i32) ^bb4: // pred: ^bb2 %13 = addi %0, %c1_i32 : i32 br ^bb1(%13 : i32) ^bb5(%14: i32): // 2 preds: ^bb1, ^bb8 %15 = cmpi "slt", %14, %arg0 : i32 %16 = index_cast %14 : i32 to index cond_br %15, ^bb6(%c0_i32 : i32), ^bb9(%c0_i32 : i32) ^bb6(%17: i32): // 2 preds: ^bb5, ^bb7 %18 = cmpi "slt", %17, %arg2 : i32 %19 = index_cast %17 : i32 to index cond_br %18, ^bb7, ^bb8 ^bb7: // pred: ^bb6 %20 = addi %17, %c1_i32 : i32 %21 = muli %14, %20 : i32 %22 = remi_signed %21, %arg2 : i32 %23 = sitofp %22 : i32 to f64 %24 = sitofp %arg2 : i32 to f64 %25 = divf %23, %24 : f64 store %25, %arg6[%16, %19] : memref<2000x2600xf64> br ^bb6(%20 : i32) ^bb8: // pred: ^bb6 %26 = addi %14, %c1_i32 : i32 br ^bb5(%26 : i32) ^bb9(%27: i32): // 2 preds: ^bb5, ^bb13 %28 = cmpi "slt", %27, %arg2 : i32 %29 = index_cast %27 : i32 to index cond_br %28, ^bb11(%c0_i32 : i32), ^bb10 ^bb10: // pred: ^bb9 return ^bb11(%30: i32): // 2 preds: ^bb9, ^bb12 %31 = cmpi "slt", %30, %arg1 : i32 %32 = index_cast %30 : i32 to index cond_br %31, ^bb12, ^bb13 ^bb12: // pred: ^bb11 %33 = addi %30, %c2_i32 : i32 %34 = muli %27, %33 : i32 %35 = remi_signed %34, %arg1 : i32 %36 = sitofp %35 : i32 to f64 %37 = sitofp %arg1 : i32 to f64 %38 = divf %36, %37 : f64 store %38, %arg7[%29, %32] : memref<2600x2300xf64> %39 = addi %30, %c1_i32 : i32 br ^bb11(%39 : i32) ^bb13: // pred: ^bb11 %40 = addi %27, %c1_i32 : i32 br ^bb9(%40 : i32) } func private @polybench_timer_start() func private @kernel_gemm(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: f64, %arg4: f64, %arg5: memref<2000x2300xf64>, %arg6: memref<2000x2600xf64>, %arg7: memref<2600x2300xf64>) { %0 = index_cast %arg0 : i32 to index %1 = index_cast %arg1 : i32 to index %2 = index_cast %arg2 : i32 to index affine.for %arg8 = 0 to %0 { affine.for %arg9 = 0 to %1 { call @S0(%arg5, %arg8, %arg9, %arg4) : (memref<2000x2300xf64>, index, index, f64) -> () } {scop.point_loop} affine.for %arg9 = 0 to %2 { affine.for %arg10 = 0 to %1 { call @S1(%arg5, %arg8, %arg10, %arg7, %arg9, %arg3, %arg6) : (memref<2000x2300xf64>, index, index, memref<2600x2300xf64>, index, f64, memref<2000x2600xf64>) -> () } {scop.point_loop} } {scop.point_loop} } {scop.point_loop} return } func private @polybench_timer_stop() func private @polybench_timer_print() func private @print_array(%arg0: i32, %arg1: i32, %arg2: memref<2000x2300xf64>) { %c0_i32 = constant 0 : i32 %c20_i32 = constant 20 : i32 %c1_i32 = constant 1 : i32 %0 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %1 = llvm.load %0 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %2 = llvm.mlir.addressof @str1 : !llvm.ptr<array<23 x i8>> %3 = llvm.mlir.constant(0 : index) : !llvm.i64 %4 = llvm.getelementptr %2[%3, %3] : (!llvm.ptr<array<23 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %5 = llvm.call @fprintf(%1, %4) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>) -> !llvm.i32 %6 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %7 = llvm.load %6 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %8 = llvm.mlir.addressof @str2 : !llvm.ptr<array<15 x i8>> %9 = llvm.getelementptr %8[%3, %3] : (!llvm.ptr<array<15 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %10 = llvm.mlir.addressof @str3 : !llvm.ptr<array<2 x i8>> %11 = llvm.getelementptr %10[%3, %3] : (!llvm.ptr<array<2 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %12 = llvm.call @fprintf(%7, %9, %11) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>, !llvm.ptr<i8>) -> !llvm.i32 br ^bb1(%c0_i32 : i32) ^bb1(%13: i32): // 2 preds: ^bb0, ^bb5 %14 = cmpi "slt", %13, %arg0 : i32 %15 = index_cast %13 : i32 to index cond_br %14, ^bb3(%c0_i32 : i32), ^bb2 ^bb2: // pred: ^bb1 %16 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %17 = llvm.load %16 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %18 = llvm.mlir.addressof @str6 : !llvm.ptr<array<17 x i8>> %19 = llvm.getelementptr %18[%3, %3] : (!llvm.ptr<array<17 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %20 = llvm.mlir.addressof @str3 : !llvm.ptr<array<2 x i8>> %21 = llvm.getelementptr %20[%3, %3] : (!llvm.ptr<array<2 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %22 = llvm.call @fprintf(%17, %19, %21) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>, !llvm.ptr<i8>) -> !llvm.i32 %23 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %24 = llvm.load %23 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %25 = llvm.mlir.addressof @str7 : !llvm.ptr<array<23 x i8>> %26 = llvm.getelementptr %25[%3, %3] : (!llvm.ptr<array<23 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %27 = llvm.call @fprintf(%24, %26) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>) -> !llvm.i32 return ^bb3(%28: i32): // 2 preds: ^bb1, ^bb4 %29 = cmpi "slt", %28, %arg1 : i32 %30 = index_cast %28 : i32 to index cond_br %29, ^bb4, ^bb5 ^bb4: // pred: ^bb3 %31 = muli %13, %arg0 : i32 %32 = addi %31, %28 : i32 %33 = remi_signed %32, %c20_i32 : i32 %34 = cmpi "eq", %33, %c0_i32 : i32 scf.if %34 { %44 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %45 = llvm.load %44 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %46 = llvm.mlir.addressof @str4 : !llvm.ptr<array<2 x i8>> %47 = llvm.getelementptr %46[%3, %3] : (!llvm.ptr<array<2 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %48 = llvm.call @fprintf(%45, %47) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>) -> !llvm.i32 } %35 = llvm.mlir.addressof @stderr : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %36 = llvm.load %35 : !llvm.ptr<ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>> %37 = llvm.mlir.addressof @str5 : !llvm.ptr<array<8 x i8>> %38 = llvm.getelementptr %37[%3, %3] : (!llvm.ptr<array<8 x i8>>, !llvm.i64, !llvm.i64) -> !llvm.ptr<i8> %39 = load %arg2[%15, %30] : memref<2000x2300xf64> %40 = llvm.mlir.cast %39 : f64 to !llvm.double %41 = llvm.call @fprintf(%36, %38, %40) : (!llvm.ptr<struct<"struct._IO_FILE", (i32, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<i8>, ptr<struct<"struct._IO_marker", opaque>>, ptr<struct<"struct._IO_FILE">>, i32, i32, i64, i16, i8, array<1 x i8>, ptr<i8>, i64, ptr<struct<"struct._IO_codecvt", opaque>>, ptr<struct<"struct._IO_wide_data", opaque>>, ptr<struct<"struct._IO_FILE">>, ptr<i8>, i64, i32, array<20 x i8>)>>, !llvm.ptr<i8>, !llvm.double) -> !llvm.i32 %42 = addi %28, %c1_i32 : i32 br ^bb3(%42 : i32) ^bb5: // pred: ^bb3 %43 = addi %13, %c1_i32 : i32 br ^bb1(%43 : i32) } func private @S0(%arg0: memref<2000x2300xf64>, %arg1: index, %arg2: index, %arg3: f64) attributes {scop.stmt} { %0 = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<2000x2300xf64> %1 = mulf %0, %arg3 : f64 affine.store %1, %arg0[symbol(%arg1), symbol(%arg2)] : memref<2000x2300xf64> return } func private @S1(%arg0: memref<2000x2300xf64>, %arg1: index, %arg2: index, %arg3: memref<2600x2300xf64>, %arg4: index, %arg5: f64, %arg6: memref<2000x2600xf64>) attributes {scop.stmt} { %0 = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<2000x2300xf64> %1 = affine.load %arg6[symbol(%arg1), symbol(%arg4)] : memref<2000x2600xf64> %2 = mulf %arg5, %1 : f64 %3 = affine.load %arg3[symbol(%arg4), symbol(%arg2)] : memref<2600x2300xf64> %4 = mulf %2, %3 : f64 %5 = addf %0, %4 : f64 affine.store %5, %arg0[symbol(%arg1), symbol(%arg2)] : memref<2000x2300xf64> return } func private @kernel_gemm_new(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: f64, %arg4: f64, %arg5: memref<2000x2300xf64>, %arg6: memref<2000x2600xf64>, %arg7: memref<2600x2300xf64>) { %0 = index_cast %arg0 : i32 to index %1 = index_cast %arg1 : i32 to index %2 = index_cast %arg2 : i32 to index affine.if #set0()[%0, %1] { affine.for %arg8 = 0 to #map0()[%0] { affine.for %arg9 = 0 to #map0()[%1] { affine.for %arg10 = #map1(%arg8) to min #map2(%arg8)[%0] { affine.for %arg11 = #map1(%arg9) to min #map2(%arg9)[%1] { call @S0(%arg5, %arg10, %arg11, %arg4) : (memref<2000x2300xf64>, index, index, f64) -> () } {scop.point_loop} } {scop.point_loop} } } affine.if #set1()[%2] { affine.for %arg8 = 0 to #map0()[%0] { affine.for %arg9 = 0 to #map0()[%1] { affine.for %arg10 = 0 to #map0()[%2] { affine.for %arg11 = #map1(%arg8) to min #map2(%arg8)[%0] { affine.for %arg12 = #map1(%arg10) to min #map2(%arg10)[%2] { affine.for %arg13 = #map1(%arg9) to min #map2(%arg9)[%1] { call @S1(%arg5, %arg11, %arg13, %arg7, %arg12, %arg3, %arg6) : (memref<2000x2300xf64>, index, index, memref<2600x2300xf64>, index, f64, memref<2000x2600xf64>) -> () } {scop.point_loop} } {scop.point_loop} } {scop.point_loop} } } } } } return } func private @kernel_gemm__PE0(%arg0: memref<2000x2300xf64>, %arg1: f64, %arg2: index, %arg3: memref<2600x2300xf64>, %arg4: f64, %arg5: memref<2000x2600xf64>, %arg6: index, %arg7: index) { affine.for %arg8 = 0 to %arg7 { affine.for %arg9 = 0 to %arg2 { call @S0(%arg0, %arg8, %arg9, %arg1) : (memref<2000x2300xf64>, index, index, f64) -> () } {scop.point_loop} affine.for %arg9 = 0 to %arg6 { affine.for %arg10 = 0 to %arg2 { call @S1(%arg0, %arg8, %arg10, %arg3, %arg9, %arg4, %arg5) : (memref<2000x2300xf64>, index, index, memref<2600x2300xf64>, index, f64, memref<2000x2600xf64>) -> () } {scop.point_loop} } {scop.point_loop} } {scop.point_loop} return } func private @kernel_gemm_new__PE1(%arg0: memref<2000x2300xf64>, %arg1: f64, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { affine.for %arg6 = #map1(%arg4) to min #map2(%arg4)[%arg5] { affine.for %arg7 = #map1(%arg2) to min #map2(%arg2)[%arg3] { call @S0(%arg0, %arg6, %arg7, %arg1) : (memref<2000x2300xf64>, index, index, f64) -> () } {scop.point_loop} } {scop.point_loop} return } func private @kernel_gemm_new__PE2(%arg0: memref<2000x2300xf64>, %arg1: memref<2600x2300xf64>, %arg2: f64, %arg3: memref<2000x2600xf64>, %arg4: index, %arg5: index, %arg6: index, %arg7: index, %arg8: index, %arg9: index) { affine.for %arg10 = #map1(%arg8) to min #map2(%arg8)[%arg9] { affine.for %arg11 = #map1(%arg6) to min #map2(%arg6)[%arg7] { affine.for %arg12 = #map1(%arg4) to min #map2(%arg4)[%arg5] { call @S1(%arg0, %arg10, %arg12, %arg1, %arg11, %arg2, %arg3) : (memref<2000x2300xf64>, index, index, memref<2600x2300xf64>, index, f64, memref<2000x2600xf64>) -> () } {scop.point_loop} } {scop.point_loop} } {scop.point_loop} return } }
The text was updated successfully, but these errors were encountered:
No branches or pull requests
This will be useful for further optimisation concentrating on the point loop bands.
cd build ./bin/polymer-opt ../example/polybench/EXTRALARGE/gemm/gemm.mlir \ -reg2mem \ -extract-scop-stmt \ -pluto-opt \ -canonicalize \ -annotate-point-loops \ -extract-point-loops
The text was updated successfully, but these errors were encountered: