diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f39c52c..4d5d7af5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules) if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c- /GR-") else () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti -g") endif () #------------------------------------------------------------------------------- diff --git a/first.mlir b/first.mlir index 341dd434..514c26d7 100644 --- a/first.mlir +++ b/first.mlir @@ -1,544 +1,1076 @@ -// -----// IR Dump After {anonymous}::FuncOpLoweringPass () //----- // -#map = affine_map<(d0, d1) -> (d0, d1)> -#map1 = affine_map<(d0, d1) -> (d1, d0)> -module { - func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor - %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () - "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor) -> () - %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [#map, #map1, #map1], operand_segment_sizes = array, semiring = "plusxy_times"} : (tensor, tensor, !ta.range, !ta.range) -> tensor - "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor, tensor) -> () - "ta.print"(%2) : (tensor) -> () - "ta.print"(%3) : (tensor) -> () - "ta.print"(%4) : (tensor) -> () - return +module attributes {llvm.data_layout = ""} { + llvm.func @free(!llvm.ptr) + llvm.func @malloc(i64) -> !llvm.ptr + llvm.func @main() { + %0 = llvm.mlir.constant(8 : index) : i64 + %1 = llvm.mlir.constant(32 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %3 = llvm.mlir.constant(1 : index) : i64 + %4 = llvm.mlir.constant(13 : index) : i64 + %5 = llvm.mlir.constant(1 : index) : i64 + %6 = llvm.mlir.constant(0 : index) : i64 + %7 = llvm.mlir.constant(-1 : index) : i64 + %8 = llvm.mlir.constant(2 : index) : i64 + %9 = llvm.mlir.constant(3 : index) : i64 + %10 = llvm.mlir.constant(0 : i32) : i32 + %11 = llvm.mlir.constant(1 : i32) : i32 + %12 = llvm.mlir.constant(4 : index) : i64 + %13 = llvm.mlir.constant(5 : index) : i64 + %14 = llvm.mlir.constant(6 : index) : i64 + %15 = llvm.mlir.constant(7 : index) : i64 + %16 = llvm.mlir.constant(8 : index) : i64 + %17 = llvm.mlir.constant(9 : index) : i64 + %18 = llvm.mlir.constant(true) : i1 + %19 = llvm.mlir.constant(false) : i1 + %20 = llvm.mlir.constant(0.000000e+00 : f64) : f64 + %21 = llvm.mlir.constant(10 : index) : i64 + %22 = llvm.mlir.null : !llvm.ptr + %23 = llvm.getelementptr %22[13] : (!llvm.ptr) -> !llvm.ptr + %24 = llvm.ptrtoint %23 : !llvm.ptr to i64 + %25 = llvm.call @malloc(%24) : (i64) -> !llvm.ptr + %26 = llvm.bitcast %25 : !llvm.ptr to !llvm.ptr + %27 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %28 = llvm.insertvalue %26, %27[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %29 = llvm.insertvalue %26, %28[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %30 = llvm.insertvalue %2, %29[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %31 = llvm.insertvalue %4, %30[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %32 = llvm.insertvalue %3, %31[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %33 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %32, %33 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %34 = llvm.bitcast %33 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %35 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %36 = llvm.insertvalue %3, %35[0] : !llvm.struct<(i64, ptr)> + %37 = llvm.insertvalue %34, %36[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_sizes_2D_f64(%10, %6, %7, %5, %7, %3, %34, %11) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i32) -> () + %38 = llvm.load %26 : !llvm.ptr + %39 = llvm.getelementptr %26[1] : (!llvm.ptr) -> !llvm.ptr + %40 = llvm.load %39 : !llvm.ptr + %41 = llvm.getelementptr %26[2] : (!llvm.ptr) -> !llvm.ptr + %42 = llvm.load %41 : !llvm.ptr + %43 = llvm.getelementptr %26[3] : (!llvm.ptr) -> !llvm.ptr + %44 = llvm.load %43 : !llvm.ptr + %45 = llvm.getelementptr %26[4] : (!llvm.ptr) -> !llvm.ptr + %46 = llvm.load %45 : !llvm.ptr + %47 = llvm.getelementptr %26[5] : (!llvm.ptr) -> !llvm.ptr + %48 = llvm.load %47 : !llvm.ptr + %49 = llvm.getelementptr %26[6] : (!llvm.ptr) -> !llvm.ptr + %50 = llvm.load %49 : !llvm.ptr + %51 = llvm.getelementptr %26[7] : (!llvm.ptr) -> !llvm.ptr + %52 = llvm.load %51 : !llvm.ptr + %53 = llvm.getelementptr %26[8] : (!llvm.ptr) -> !llvm.ptr + %54 = llvm.load %53 : !llvm.ptr + %55 = llvm.getelementptr %26[9] : (!llvm.ptr) -> !llvm.ptr + %56 = llvm.load %55 : !llvm.ptr + %57 = llvm.getelementptr %26[10] : (!llvm.ptr) -> !llvm.ptr + %58 = llvm.load %57 : !llvm.ptr + %59 = llvm.mlir.null : !llvm.ptr + %60 = llvm.getelementptr %59[%38] : (!llvm.ptr, i64) -> !llvm.ptr + %61 = llvm.ptrtoint %60 : !llvm.ptr to i64 + %62 = llvm.call @malloc(%61) : (i64) -> !llvm.ptr + %63 = llvm.bitcast %62 : !llvm.ptr to !llvm.ptr + %64 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %65 = llvm.insertvalue %63, %64[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %66 = llvm.insertvalue %63, %65[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %67 = llvm.insertvalue %2, %66[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %68 = llvm.insertvalue %38, %67[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %69 = llvm.insertvalue %3, %68[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb1(%6 : i64) + ^bb1(%70: i64): // 2 preds: ^bb0, ^bb2 + %71 = llvm.icmp "slt" %70, %38 : i64 + llvm.cond_br %71, ^bb2, ^bb3 + ^bb2: // pred: ^bb1 + %72 = llvm.getelementptr %63[%70] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %72 : !llvm.ptr + %73 = llvm.add %70, %5 : i64 + llvm.br ^bb1(%73 : i64) + ^bb3: // pred: ^bb1 + %74 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %69, %74 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %75 = llvm.bitcast %74 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %76 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %77 = llvm.insertvalue %3, %76[0] : !llvm.struct<(i64, ptr)> + %78 = llvm.insertvalue %75, %77[1] : !llvm.struct<(i64, ptr)> + %79 = llvm.mlir.null : !llvm.ptr + %80 = llvm.getelementptr %79[%40] : (!llvm.ptr, i64) -> !llvm.ptr + %81 = llvm.ptrtoint %80 : !llvm.ptr to i64 + %82 = llvm.call @malloc(%81) : (i64) -> !llvm.ptr + %83 = llvm.bitcast %82 : !llvm.ptr to !llvm.ptr + %84 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %85 = llvm.insertvalue %83, %84[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %86 = llvm.insertvalue %83, %85[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %87 = llvm.insertvalue %2, %86[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %88 = llvm.insertvalue %40, %87[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %89 = llvm.insertvalue %3, %88[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb4(%6 : i64) + ^bb4(%90: i64): // 2 preds: ^bb3, ^bb5 + %91 = llvm.icmp "slt" %90, %40 : i64 + llvm.cond_br %91, ^bb5, ^bb6 + ^bb5: // pred: ^bb4 + %92 = llvm.getelementptr %83[%90] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %92 : !llvm.ptr + %93 = llvm.add %90, %5 : i64 + llvm.br ^bb4(%93 : i64) + ^bb6: // pred: ^bb4 + %94 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %89, %94 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %95 = llvm.bitcast %94 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %96 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %97 = llvm.insertvalue %3, %96[0] : !llvm.struct<(i64, ptr)> + %98 = llvm.insertvalue %95, %97[1] : !llvm.struct<(i64, ptr)> + %99 = llvm.mlir.null : !llvm.ptr + %100 = llvm.getelementptr %99[%42] : (!llvm.ptr, i64) -> !llvm.ptr + %101 = llvm.ptrtoint %100 : !llvm.ptr to i64 + %102 = llvm.call @malloc(%101) : (i64) -> !llvm.ptr + %103 = llvm.bitcast %102 : !llvm.ptr to !llvm.ptr + %104 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %105 = llvm.insertvalue %103, %104[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %106 = llvm.insertvalue %103, %105[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %107 = llvm.insertvalue %2, %106[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %108 = llvm.insertvalue %42, %107[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %109 = llvm.insertvalue %3, %108[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb7(%6 : i64) + ^bb7(%110: i64): // 2 preds: ^bb6, ^bb8 + %111 = llvm.icmp "slt" %110, %42 : i64 + llvm.cond_br %111, ^bb8, ^bb9 + ^bb8: // pred: ^bb7 + %112 = llvm.getelementptr %103[%110] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %112 : !llvm.ptr + %113 = llvm.add %110, %5 : i64 + llvm.br ^bb7(%113 : i64) + ^bb9: // pred: ^bb7 + %114 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %109, %114 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %115 = llvm.bitcast %114 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %116 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %117 = llvm.insertvalue %3, %116[0] : !llvm.struct<(i64, ptr)> + %118 = llvm.insertvalue %115, %117[1] : !llvm.struct<(i64, ptr)> + %119 = llvm.mlir.null : !llvm.ptr + %120 = llvm.getelementptr %119[%44] : (!llvm.ptr, i64) -> !llvm.ptr + %121 = llvm.ptrtoint %120 : !llvm.ptr to i64 + %122 = llvm.call @malloc(%121) : (i64) -> !llvm.ptr + %123 = llvm.bitcast %122 : !llvm.ptr to !llvm.ptr + %124 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %125 = llvm.insertvalue %123, %124[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %126 = llvm.insertvalue %123, %125[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %127 = llvm.insertvalue %2, %126[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %128 = llvm.insertvalue %44, %127[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %129 = llvm.insertvalue %3, %128[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb10(%6 : i64) + ^bb10(%130: i64): // 2 preds: ^bb9, ^bb11 + %131 = llvm.icmp "slt" %130, %44 : i64 + llvm.cond_br %131, ^bb11, ^bb12 + ^bb11: // pred: ^bb10 + %132 = llvm.getelementptr %123[%130] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %132 : !llvm.ptr + %133 = llvm.add %130, %5 : i64 + llvm.br ^bb10(%133 : i64) + ^bb12: // pred: ^bb10 + %134 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %129, %134 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %135 = llvm.bitcast %134 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %136 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %137 = llvm.insertvalue %3, %136[0] : !llvm.struct<(i64, ptr)> + %138 = llvm.insertvalue %135, %137[1] : !llvm.struct<(i64, ptr)> + %139 = llvm.mlir.null : !llvm.ptr + %140 = llvm.getelementptr %139[%46] : (!llvm.ptr, i64) -> !llvm.ptr + %141 = llvm.ptrtoint %140 : !llvm.ptr to i64 + %142 = llvm.call @malloc(%141) : (i64) -> !llvm.ptr + %143 = llvm.bitcast %142 : !llvm.ptr to !llvm.ptr + %144 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %145 = llvm.insertvalue %143, %144[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %146 = llvm.insertvalue %143, %145[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %147 = llvm.insertvalue %2, %146[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %148 = llvm.insertvalue %46, %147[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %149 = llvm.insertvalue %3, %148[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb13(%6 : i64) + ^bb13(%150: i64): // 2 preds: ^bb12, ^bb14 + %151 = llvm.icmp "slt" %150, %46 : i64 + llvm.cond_br %151, ^bb14, ^bb15 + ^bb14: // pred: ^bb13 + %152 = llvm.getelementptr %143[%150] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %152 : !llvm.ptr + %153 = llvm.add %150, %5 : i64 + llvm.br ^bb13(%153 : i64) + ^bb15: // pred: ^bb13 + %154 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %149, %154 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %155 = llvm.bitcast %154 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %156 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %157 = llvm.insertvalue %3, %156[0] : !llvm.struct<(i64, ptr)> + %158 = llvm.insertvalue %155, %157[1] : !llvm.struct<(i64, ptr)> + %159 = llvm.mlir.null : !llvm.ptr + %160 = llvm.getelementptr %159[%48] : (!llvm.ptr, i64) -> !llvm.ptr + %161 = llvm.ptrtoint %160 : !llvm.ptr to i64 + %162 = llvm.call @malloc(%161) : (i64) -> !llvm.ptr + %163 = llvm.bitcast %162 : !llvm.ptr to !llvm.ptr + %164 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %165 = llvm.insertvalue %163, %164[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %166 = llvm.insertvalue %163, %165[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %167 = llvm.insertvalue %2, %166[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %168 = llvm.insertvalue %48, %167[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %169 = llvm.insertvalue %3, %168[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb16(%6 : i64) + ^bb16(%170: i64): // 2 preds: ^bb15, ^bb17 + %171 = llvm.icmp "slt" %170, %48 : i64 + llvm.cond_br %171, ^bb17, ^bb18 + ^bb17: // pred: ^bb16 + %172 = llvm.getelementptr %163[%170] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %172 : !llvm.ptr + %173 = llvm.add %170, %5 : i64 + llvm.br ^bb16(%173 : i64) + ^bb18: // pred: ^bb16 + %174 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %169, %174 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %175 = llvm.bitcast %174 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %176 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %177 = llvm.insertvalue %3, %176[0] : !llvm.struct<(i64, ptr)> + %178 = llvm.insertvalue %175, %177[1] : !llvm.struct<(i64, ptr)> + %179 = llvm.mlir.null : !llvm.ptr + %180 = llvm.getelementptr %179[%50] : (!llvm.ptr, i64) -> !llvm.ptr + %181 = llvm.ptrtoint %180 : !llvm.ptr to i64 + %182 = llvm.call @malloc(%181) : (i64) -> !llvm.ptr + %183 = llvm.bitcast %182 : !llvm.ptr to !llvm.ptr + %184 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %185 = llvm.insertvalue %183, %184[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %186 = llvm.insertvalue %183, %185[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %187 = llvm.insertvalue %2, %186[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %188 = llvm.insertvalue %50, %187[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %189 = llvm.insertvalue %3, %188[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb19(%6 : i64) + ^bb19(%190: i64): // 2 preds: ^bb18, ^bb20 + %191 = llvm.icmp "slt" %190, %50 : i64 + llvm.cond_br %191, ^bb20, ^bb21 + ^bb20: // pred: ^bb19 + %192 = llvm.getelementptr %183[%190] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %192 : !llvm.ptr + %193 = llvm.add %190, %5 : i64 + llvm.br ^bb19(%193 : i64) + ^bb21: // pred: ^bb19 + %194 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %189, %194 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %195 = llvm.bitcast %194 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %196 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %197 = llvm.insertvalue %3, %196[0] : !llvm.struct<(i64, ptr)> + %198 = llvm.insertvalue %195, %197[1] : !llvm.struct<(i64, ptr)> + %199 = llvm.mlir.null : !llvm.ptr + %200 = llvm.getelementptr %199[%52] : (!llvm.ptr, i64) -> !llvm.ptr + %201 = llvm.ptrtoint %200 : !llvm.ptr to i64 + %202 = llvm.call @malloc(%201) : (i64) -> !llvm.ptr + %203 = llvm.bitcast %202 : !llvm.ptr to !llvm.ptr + %204 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %205 = llvm.insertvalue %203, %204[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %206 = llvm.insertvalue %203, %205[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %207 = llvm.insertvalue %2, %206[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %208 = llvm.insertvalue %52, %207[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %209 = llvm.insertvalue %3, %208[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb22(%6 : i64) + ^bb22(%210: i64): // 2 preds: ^bb21, ^bb23 + %211 = llvm.icmp "slt" %210, %52 : i64 + llvm.cond_br %211, ^bb23, ^bb24 + ^bb23: // pred: ^bb22 + %212 = llvm.getelementptr %203[%210] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %212 : !llvm.ptr + %213 = llvm.add %210, %5 : i64 + llvm.br ^bb22(%213 : i64) + ^bb24: // pred: ^bb22 + %214 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %209, %214 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %215 = llvm.bitcast %214 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %216 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %217 = llvm.insertvalue %3, %216[0] : !llvm.struct<(i64, ptr)> + %218 = llvm.insertvalue %215, %217[1] : !llvm.struct<(i64, ptr)> + %219 = llvm.mlir.null : !llvm.ptr + %220 = llvm.getelementptr %219[%54] : (!llvm.ptr, i64) -> !llvm.ptr + %221 = llvm.ptrtoint %220 : !llvm.ptr to i64 + %222 = llvm.call @malloc(%221) : (i64) -> !llvm.ptr + %223 = llvm.bitcast %222 : !llvm.ptr to !llvm.ptr + %224 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %225 = llvm.insertvalue %223, %224[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %226 = llvm.insertvalue %223, %225[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %227 = llvm.insertvalue %2, %226[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %228 = llvm.insertvalue %54, %227[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %229 = llvm.insertvalue %3, %228[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb25(%6 : i64) + ^bb25(%230: i64): // 2 preds: ^bb24, ^bb26 + %231 = llvm.icmp "slt" %230, %54 : i64 + llvm.cond_br %231, ^bb26, ^bb27 + ^bb26: // pred: ^bb25 + %232 = llvm.getelementptr %223[%230] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %232 : !llvm.ptr + %233 = llvm.add %230, %5 : i64 + llvm.br ^bb25(%233 : i64) + ^bb27: // pred: ^bb25 + %234 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %229, %234 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %235 = llvm.bitcast %234 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %236 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %237 = llvm.insertvalue %3, %236[0] : !llvm.struct<(i64, ptr)> + %238 = llvm.insertvalue %235, %237[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_2D_f64(%10, %6, %7, %5, %7, %3, %75, %3, %95, %3, %115, %3, %135, %3, %155, %3, %175, %3, %195, %3, %215, %3, %235, %11) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i32) -> () + %239 = llvm.mlir.null : !llvm.ptr + %240 = llvm.getelementptr %239[13] : (!llvm.ptr) -> !llvm.ptr + %241 = llvm.ptrtoint %240 : !llvm.ptr to i64 + %242 = llvm.call @malloc(%241) : (i64) -> !llvm.ptr + %243 = llvm.bitcast %242 : !llvm.ptr to !llvm.ptr + %244 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %245 = llvm.insertvalue %243, %244[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %246 = llvm.insertvalue %243, %245[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %247 = llvm.insertvalue %2, %246[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %248 = llvm.insertvalue %4, %247[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %249 = llvm.insertvalue %3, %248[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %250 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %249, %250 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %251 = llvm.bitcast %250 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %252 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %253 = llvm.insertvalue %3, %252[0] : !llvm.struct<(i64, ptr)> + %254 = llvm.insertvalue %251, %253[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_sizes_2D_f64(%11, %6, %7, %5, %7, %3, %251, %11) {filename = "SPARSE_FILE_NAME1"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i32) -> () + %255 = llvm.load %243 : !llvm.ptr + %256 = llvm.getelementptr %243[1] : (!llvm.ptr) -> !llvm.ptr + %257 = llvm.load %256 : !llvm.ptr + %258 = llvm.getelementptr %243[2] : (!llvm.ptr) -> !llvm.ptr + %259 = llvm.load %258 : !llvm.ptr + %260 = llvm.getelementptr %243[3] : (!llvm.ptr) -> !llvm.ptr + %261 = llvm.load %260 : !llvm.ptr + %262 = llvm.getelementptr %243[4] : (!llvm.ptr) -> !llvm.ptr + %263 = llvm.load %262 : !llvm.ptr + %264 = llvm.getelementptr %243[5] : (!llvm.ptr) -> !llvm.ptr + %265 = llvm.load %264 : !llvm.ptr + %266 = llvm.getelementptr %243[6] : (!llvm.ptr) -> !llvm.ptr + %267 = llvm.load %266 : !llvm.ptr + %268 = llvm.getelementptr %243[7] : (!llvm.ptr) -> !llvm.ptr + %269 = llvm.load %268 : !llvm.ptr + %270 = llvm.getelementptr %243[8] : (!llvm.ptr) -> !llvm.ptr + %271 = llvm.load %270 : !llvm.ptr + %272 = llvm.mlir.null : !llvm.ptr + %273 = llvm.getelementptr %272[%255] : (!llvm.ptr, i64) -> !llvm.ptr + %274 = llvm.ptrtoint %273 : !llvm.ptr to i64 + %275 = llvm.call @malloc(%274) : (i64) -> !llvm.ptr + %276 = llvm.bitcast %275 : !llvm.ptr to !llvm.ptr + %277 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %278 = llvm.insertvalue %276, %277[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %279 = llvm.insertvalue %276, %278[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %280 = llvm.insertvalue %2, %279[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %281 = llvm.insertvalue %255, %280[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %282 = llvm.insertvalue %3, %281[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb28(%6 : i64) + ^bb28(%283: i64): // 2 preds: ^bb27, ^bb29 + %284 = llvm.icmp "slt" %283, %255 : i64 + llvm.cond_br %284, ^bb29, ^bb30 + ^bb29: // pred: ^bb28 + %285 = llvm.getelementptr %276[%283] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %285 : !llvm.ptr + %286 = llvm.add %283, %5 : i64 + llvm.br ^bb28(%286 : i64) + ^bb30: // pred: ^bb28 + %287 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %282, %287 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %288 = llvm.bitcast %287 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %289 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %290 = llvm.insertvalue %3, %289[0] : !llvm.struct<(i64, ptr)> + %291 = llvm.insertvalue %288, %290[1] : !llvm.struct<(i64, ptr)> + %292 = llvm.mlir.null : !llvm.ptr + %293 = llvm.getelementptr %292[%257] : (!llvm.ptr, i64) -> !llvm.ptr + %294 = llvm.ptrtoint %293 : !llvm.ptr to i64 + %295 = llvm.call @malloc(%294) : (i64) -> !llvm.ptr + %296 = llvm.bitcast %295 : !llvm.ptr to !llvm.ptr + %297 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %298 = llvm.insertvalue %296, %297[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %299 = llvm.insertvalue %296, %298[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %300 = llvm.insertvalue %2, %299[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %301 = llvm.insertvalue %257, %300[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %302 = llvm.insertvalue %3, %301[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb31(%6 : i64) + ^bb31(%303: i64): // 2 preds: ^bb30, ^bb32 + %304 = llvm.icmp "slt" %303, %257 : i64 + llvm.cond_br %304, ^bb32, ^bb33 + ^bb32: // pred: ^bb31 + %305 = llvm.getelementptr %296[%303] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %305 : !llvm.ptr + %306 = llvm.add %303, %5 : i64 + llvm.br ^bb31(%306 : i64) + ^bb33: // pred: ^bb31 + %307 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %302, %307 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %308 = llvm.bitcast %307 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %309 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %310 = llvm.insertvalue %3, %309[0] : !llvm.struct<(i64, ptr)> + %311 = llvm.insertvalue %308, %310[1] : !llvm.struct<(i64, ptr)> + %312 = llvm.mlir.null : !llvm.ptr + %313 = llvm.getelementptr %312[%259] : (!llvm.ptr, i64) -> !llvm.ptr + %314 = llvm.ptrtoint %313 : !llvm.ptr to i64 + %315 = llvm.call @malloc(%314) : (i64) -> !llvm.ptr + %316 = llvm.bitcast %315 : !llvm.ptr to !llvm.ptr + %317 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %318 = llvm.insertvalue %316, %317[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %319 = llvm.insertvalue %316, %318[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %320 = llvm.insertvalue %2, %319[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %321 = llvm.insertvalue %259, %320[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %322 = llvm.insertvalue %3, %321[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb34(%6 : i64) + ^bb34(%323: i64): // 2 preds: ^bb33, ^bb35 + %324 = llvm.icmp "slt" %323, %259 : i64 + llvm.cond_br %324, ^bb35, ^bb36 + ^bb35: // pred: ^bb34 + %325 = llvm.getelementptr %316[%323] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %325 : !llvm.ptr + %326 = llvm.add %323, %5 : i64 + llvm.br ^bb34(%326 : i64) + ^bb36: // pred: ^bb34 + %327 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %322, %327 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %328 = llvm.bitcast %327 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %329 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %330 = llvm.insertvalue %3, %329[0] : !llvm.struct<(i64, ptr)> + %331 = llvm.insertvalue %328, %330[1] : !llvm.struct<(i64, ptr)> + %332 = llvm.mlir.null : !llvm.ptr + %333 = llvm.getelementptr %332[%261] : (!llvm.ptr, i64) -> !llvm.ptr + %334 = llvm.ptrtoint %333 : !llvm.ptr to i64 + %335 = llvm.call @malloc(%334) : (i64) -> !llvm.ptr + %336 = llvm.bitcast %335 : !llvm.ptr to !llvm.ptr + %337 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %338 = llvm.insertvalue %336, %337[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %339 = llvm.insertvalue %336, %338[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %340 = llvm.insertvalue %2, %339[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %341 = llvm.insertvalue %261, %340[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %342 = llvm.insertvalue %3, %341[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb37(%6 : i64) + ^bb37(%343: i64): // 2 preds: ^bb36, ^bb38 + %344 = llvm.icmp "slt" %343, %261 : i64 + llvm.cond_br %344, ^bb38, ^bb39 + ^bb38: // pred: ^bb37 + %345 = llvm.getelementptr %336[%343] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %345 : !llvm.ptr + %346 = llvm.add %343, %5 : i64 + llvm.br ^bb37(%346 : i64) + ^bb39: // pred: ^bb37 + %347 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %342, %347 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %348 = llvm.bitcast %347 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %349 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %350 = llvm.insertvalue %3, %349[0] : !llvm.struct<(i64, ptr)> + %351 = llvm.insertvalue %348, %350[1] : !llvm.struct<(i64, ptr)> + %352 = llvm.mlir.null : !llvm.ptr + %353 = llvm.getelementptr %352[%263] : (!llvm.ptr, i64) -> !llvm.ptr + %354 = llvm.ptrtoint %353 : !llvm.ptr to i64 + %355 = llvm.call @malloc(%354) : (i64) -> !llvm.ptr + %356 = llvm.bitcast %355 : !llvm.ptr to !llvm.ptr + %357 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %358 = llvm.insertvalue %356, %357[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %359 = llvm.insertvalue %356, %358[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %360 = llvm.insertvalue %2, %359[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %361 = llvm.insertvalue %263, %360[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %362 = llvm.insertvalue %3, %361[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb40(%6 : i64) + ^bb40(%363: i64): // 2 preds: ^bb39, ^bb41 + %364 = llvm.icmp "slt" %363, %263 : i64 + llvm.cond_br %364, ^bb41, ^bb42 + ^bb41: // pred: ^bb40 + %365 = llvm.getelementptr %356[%363] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %365 : !llvm.ptr + %366 = llvm.add %363, %5 : i64 + llvm.br ^bb40(%366 : i64) + ^bb42: // pred: ^bb40 + %367 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %362, %367 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %368 = llvm.bitcast %367 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %369 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %370 = llvm.insertvalue %3, %369[0] : !llvm.struct<(i64, ptr)> + %371 = llvm.insertvalue %368, %370[1] : !llvm.struct<(i64, ptr)> + %372 = llvm.mlir.null : !llvm.ptr + %373 = llvm.getelementptr %372[%265] : (!llvm.ptr, i64) -> !llvm.ptr + %374 = llvm.ptrtoint %373 : !llvm.ptr to i64 + %375 = llvm.call @malloc(%374) : (i64) -> !llvm.ptr + %376 = llvm.bitcast %375 : !llvm.ptr to !llvm.ptr + %377 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %378 = llvm.insertvalue %376, %377[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %379 = llvm.insertvalue %376, %378[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %380 = llvm.insertvalue %2, %379[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %381 = llvm.insertvalue %265, %380[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %382 = llvm.insertvalue %3, %381[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb43(%6 : i64) + ^bb43(%383: i64): // 2 preds: ^bb42, ^bb44 + %384 = llvm.icmp "slt" %383, %265 : i64 + llvm.cond_br %384, ^bb44, ^bb45 + ^bb44: // pred: ^bb43 + %385 = llvm.getelementptr %376[%383] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %385 : !llvm.ptr + %386 = llvm.add %383, %5 : i64 + llvm.br ^bb43(%386 : i64) + ^bb45: // pred: ^bb43 + %387 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %382, %387 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %388 = llvm.bitcast %387 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %389 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %390 = llvm.insertvalue %3, %389[0] : !llvm.struct<(i64, ptr)> + %391 = llvm.insertvalue %388, %390[1] : !llvm.struct<(i64, ptr)> + %392 = llvm.mlir.null : !llvm.ptr + %393 = llvm.getelementptr %392[%267] : (!llvm.ptr, i64) -> !llvm.ptr + %394 = llvm.ptrtoint %393 : !llvm.ptr to i64 + %395 = llvm.call @malloc(%394) : (i64) -> !llvm.ptr + %396 = llvm.bitcast %395 : !llvm.ptr to !llvm.ptr + %397 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %398 = llvm.insertvalue %396, %397[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %399 = llvm.insertvalue %396, %398[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %400 = llvm.insertvalue %2, %399[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %401 = llvm.insertvalue %267, %400[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %402 = llvm.insertvalue %3, %401[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb46(%6 : i64) + ^bb46(%403: i64): // 2 preds: ^bb45, ^bb47 + %404 = llvm.icmp "slt" %403, %267 : i64 + llvm.cond_br %404, ^bb47, ^bb48 + ^bb47: // pred: ^bb46 + %405 = llvm.getelementptr %396[%403] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %405 : !llvm.ptr + %406 = llvm.add %403, %5 : i64 + llvm.br ^bb46(%406 : i64) + ^bb48: // pred: ^bb46 + %407 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %402, %407 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %408 = llvm.bitcast %407 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %409 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %410 = llvm.insertvalue %3, %409[0] : !llvm.struct<(i64, ptr)> + %411 = llvm.insertvalue %408, %410[1] : !llvm.struct<(i64, ptr)> + %412 = llvm.mlir.null : !llvm.ptr + %413 = llvm.getelementptr %412[%269] : (!llvm.ptr, i64) -> !llvm.ptr + %414 = llvm.ptrtoint %413 : !llvm.ptr to i64 + %415 = llvm.call @malloc(%414) : (i64) -> !llvm.ptr + %416 = llvm.bitcast %415 : !llvm.ptr to !llvm.ptr + %417 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %418 = llvm.insertvalue %416, %417[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %419 = llvm.insertvalue %416, %418[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %420 = llvm.insertvalue %2, %419[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %421 = llvm.insertvalue %269, %420[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %422 = llvm.insertvalue %3, %421[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb49(%6 : i64) + ^bb49(%423: i64): // 2 preds: ^bb48, ^bb50 + %424 = llvm.icmp "slt" %423, %269 : i64 + llvm.cond_br %424, ^bb50, ^bb51 + ^bb50: // pred: ^bb49 + %425 = llvm.getelementptr %416[%423] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %425 : !llvm.ptr + %426 = llvm.add %423, %5 : i64 + llvm.br ^bb49(%426 : i64) + ^bb51: // pred: ^bb49 + %427 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %422, %427 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %428 = llvm.bitcast %427 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %429 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %430 = llvm.insertvalue %3, %429[0] : !llvm.struct<(i64, ptr)> + %431 = llvm.insertvalue %428, %430[1] : !llvm.struct<(i64, ptr)> + %432 = llvm.mlir.null : !llvm.ptr + %433 = llvm.getelementptr %432[%271] : (!llvm.ptr, i64) -> !llvm.ptr + %434 = llvm.ptrtoint %433 : !llvm.ptr to i64 + %435 = llvm.call @malloc(%434) : (i64) -> !llvm.ptr + %436 = llvm.bitcast %435 : !llvm.ptr to !llvm.ptr + %437 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %438 = llvm.insertvalue %436, %437[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %439 = llvm.insertvalue %436, %438[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %440 = llvm.insertvalue %2, %439[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %441 = llvm.insertvalue %271, %440[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %442 = llvm.insertvalue %3, %441[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb52(%6 : i64) + ^bb52(%443: i64): // 2 preds: ^bb51, ^bb53 + %444 = llvm.icmp "slt" %443, %271 : i64 + llvm.cond_br %444, ^bb53, ^bb54 + ^bb53: // pred: ^bb52 + %445 = llvm.getelementptr %436[%443] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %445 : !llvm.ptr + %446 = llvm.add %443, %5 : i64 + llvm.br ^bb52(%446 : i64) + ^bb54: // pred: ^bb52 + %447 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %442, %447 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %448 = llvm.bitcast %447 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %449 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %450 = llvm.insertvalue %3, %449[0] : !llvm.struct<(i64, ptr)> + %451 = llvm.insertvalue %448, %450[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_2D_f64(%11, %6, %7, %5, %7, %3, %288, %3, %308, %3, %328, %3, %348, %3, %368, %3, %388, %3, %408, %3, %428, %3, %448, %11) {filename = "SPARSE_FILE_NAME1"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i32) -> () + %452 = llvm.mlir.null : !llvm.ptr + %453 = llvm.getelementptr %452[%58] : (!llvm.ptr, i64) -> !llvm.ptr + %454 = llvm.ptrtoint %453 : !llvm.ptr to i64 + %455 = llvm.add %454, %1 : i64 + %456 = llvm.call @malloc(%455) : (i64) -> !llvm.ptr + %457 = llvm.bitcast %456 : !llvm.ptr to !llvm.ptr + %458 = llvm.ptrtoint %457 : !llvm.ptr to i64 + %459 = llvm.sub %1, %3 : i64 + %460 = llvm.add %458, %459 : i64 + %461 = llvm.urem %460, %1 : i64 + %462 = llvm.sub %460, %461 : i64 + %463 = llvm.inttoptr %462 : i64 to !llvm.ptr + llvm.br ^bb55(%6 : i64) + ^bb55(%464: i64): // 2 preds: ^bb54, ^bb56 + %465 = llvm.icmp "slt" %464, %58 : i64 + llvm.cond_br %465, ^bb56, ^bb57 + ^bb56: // pred: ^bb55 + %466 = llvm.getelementptr %463[%464] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %466 : !llvm.ptr + %467 = llvm.add %464, %5 : i64 + llvm.br ^bb55(%467 : i64) + ^bb57: // pred: ^bb55 + %468 = llvm.mlir.null : !llvm.ptr + %469 = llvm.getelementptr %468[%58] : (!llvm.ptr, i64) -> !llvm.ptr + %470 = llvm.ptrtoint %469 : !llvm.ptr to i64 + %471 = llvm.add %470, %1 : i64 + %472 = llvm.call @malloc(%471) : (i64) -> !llvm.ptr + %473 = llvm.bitcast %472 : !llvm.ptr to !llvm.ptr + %474 = llvm.ptrtoint %473 : !llvm.ptr to i64 + %475 = llvm.sub %1, %3 : i64 + %476 = llvm.add %474, %475 : i64 + %477 = llvm.urem %476, %1 : i64 + %478 = llvm.sub %476, %477 : i64 + %479 = llvm.inttoptr %478 : i64 to !llvm.ptr + llvm.br ^bb58(%6 : i64) + ^bb58(%480: i64): // 2 preds: ^bb57, ^bb59 + %481 = llvm.icmp "slt" %480, %58 : i64 + llvm.cond_br %481, ^bb59, ^bb60 + ^bb59: // pred: ^bb58 + %482 = llvm.getelementptr %479[%480] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %482 : !llvm.ptr + %483 = llvm.add %480, %5 : i64 + llvm.br ^bb58(%483 : i64) + ^bb60: // pred: ^bb58 + %484 = llvm.mlir.null : !llvm.ptr + %485 = llvm.getelementptr %484[1] : (!llvm.ptr) -> !llvm.ptr + %486 = llvm.ptrtoint %485 : !llvm.ptr to i64 + %487 = llvm.call @malloc(%486) : (i64) -> !llvm.ptr + %488 = llvm.bitcast %487 : !llvm.ptr to !llvm.ptr + %489 = llvm.mlir.null : !llvm.ptr + %490 = llvm.getelementptr %489[%58] : (!llvm.ptr, i64) -> !llvm.ptr + %491 = llvm.ptrtoint %490 : !llvm.ptr to i64 + %492 = llvm.add %491, %1 : i64 + %493 = llvm.call @malloc(%492) : (i64) -> !llvm.ptr + %494 = llvm.bitcast %493 : !llvm.ptr to !llvm.ptr + %495 = llvm.ptrtoint %494 : !llvm.ptr to i64 + %496 = llvm.sub %1, %3 : i64 + %497 = llvm.add %495, %496 : i64 + %498 = llvm.urem %497, %1 : i64 + %499 = llvm.sub %497, %498 : i64 + %500 = llvm.inttoptr %499 : i64 to !llvm.ptr + llvm.br ^bb61(%6 : i64) + ^bb61(%501: i64): // 2 preds: ^bb60, ^bb62 + %502 = llvm.icmp "slt" %501, %58 : i64 + llvm.cond_br %502, ^bb62, ^bb63 + ^bb62: // pred: ^bb61 + %503 = llvm.getelementptr %500[%501] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %503 : !llvm.ptr + %504 = llvm.add %501, %5 : i64 + llvm.br ^bb61(%504 : i64) + ^bb63: // pred: ^bb61 + %505 = llvm.add %56, %5 : i64 + %506 = llvm.mlir.null : !llvm.ptr + %507 = llvm.getelementptr %506[1] : (!llvm.ptr) -> !llvm.ptr + %508 = llvm.ptrtoint %507 : !llvm.ptr to i64 + %509 = llvm.call @malloc(%508) : (i64) -> !llvm.ptr + %510 = llvm.bitcast %509 : !llvm.ptr to !llvm.ptr + %511 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %512 = llvm.insertvalue %510, %511[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %513 = llvm.insertvalue %510, %512[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %514 = llvm.insertvalue %2, %513[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %515 = llvm.insertvalue %3, %514[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %516 = llvm.insertvalue %3, %515[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.store %6, %510 : !llvm.ptr + %517 = llvm.mlir.null : !llvm.ptr + %518 = llvm.getelementptr %517[1] : (!llvm.ptr) -> !llvm.ptr + %519 = llvm.ptrtoint %518 : !llvm.ptr to i64 + %520 = llvm.call @malloc(%519) : (i64) -> !llvm.ptr + %521 = llvm.bitcast %520 : !llvm.ptr to !llvm.ptr + %522 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %523 = llvm.insertvalue %521, %522[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %524 = llvm.insertvalue %521, %523[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %525 = llvm.insertvalue %2, %524[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %526 = llvm.insertvalue %3, %525[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %527 = llvm.insertvalue %3, %526[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.store %6, %521 : !llvm.ptr + %528 = llvm.mlir.null : !llvm.ptr + %529 = llvm.getelementptr %528[1] : (!llvm.ptr) -> !llvm.ptr + %530 = llvm.ptrtoint %529 : !llvm.ptr to i64 + %531 = llvm.call @malloc(%530) : (i64) -> !llvm.ptr + %532 = llvm.bitcast %531 : !llvm.ptr to !llvm.ptr + %533 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %534 = llvm.insertvalue %532, %533[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %535 = llvm.insertvalue %532, %534[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %536 = llvm.insertvalue %2, %535[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %537 = llvm.insertvalue %2, %536[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %538 = llvm.insertvalue %3, %537[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %539 = llvm.mlir.null : !llvm.ptr + %540 = llvm.getelementptr %539[1] : (!llvm.ptr) -> !llvm.ptr + %541 = llvm.ptrtoint %540 : !llvm.ptr to i64 + %542 = llvm.call @malloc(%541) : (i64) -> !llvm.ptr + %543 = llvm.bitcast %542 : !llvm.ptr to !llvm.ptr + %544 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %545 = llvm.insertvalue %543, %544[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %546 = llvm.insertvalue %543, %545[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %547 = llvm.insertvalue %2, %546[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %548 = llvm.insertvalue %2, %547[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %549 = llvm.insertvalue %3, %548[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %550 = llvm.mlir.null : !llvm.ptr + %551 = llvm.getelementptr %550[%505] : (!llvm.ptr, i64) -> !llvm.ptr + %552 = llvm.ptrtoint %551 : !llvm.ptr to i64 + %553 = llvm.call @malloc(%552) : (i64) -> !llvm.ptr + %554 = llvm.bitcast %553 : !llvm.ptr to !llvm.ptr + %555 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %556 = llvm.insertvalue %554, %555[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %557 = llvm.insertvalue %554, %556[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %558 = llvm.insertvalue %2, %557[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %559 = llvm.insertvalue %505, %558[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %560 = llvm.insertvalue %3, %559[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb64(%6 : i64) + ^bb64(%561: i64): // 2 preds: ^bb63, ^bb65 + %562 = llvm.icmp "slt" %561, %505 : i64 + llvm.cond_br %562, ^bb65, ^bb66 + ^bb65: // pred: ^bb64 + %563 = llvm.getelementptr %554[%561] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %563 : !llvm.ptr + %564 = llvm.add %561, %5 : i64 + llvm.br ^bb64(%564 : i64) + ^bb66: // pred: ^bb64 + %565 = llvm.mlir.null : !llvm.ptr + %566 = llvm.getelementptr %565[1] : (!llvm.ptr) -> !llvm.ptr + %567 = llvm.ptrtoint %566 : !llvm.ptr to i64 + %568 = llvm.call @malloc(%567) : (i64) -> !llvm.ptr + %569 = llvm.bitcast %568 : !llvm.ptr to !llvm.ptr + %570 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %571 = llvm.insertvalue %569, %570[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %572 = llvm.insertvalue %569, %571[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %573 = llvm.insertvalue %2, %572[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %574 = llvm.insertvalue %2, %573[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %575 = llvm.insertvalue %3, %574[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %576 = llvm.mlir.null : !llvm.ptr + %577 = llvm.getelementptr %576[1] : (!llvm.ptr) -> !llvm.ptr + %578 = llvm.ptrtoint %577 : !llvm.ptr to i64 + %579 = llvm.call @malloc(%578) : (i64) -> !llvm.ptr + %580 = llvm.bitcast %579 : !llvm.ptr to !llvm.ptr + %581 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %582 = llvm.insertvalue %580, %581[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %583 = llvm.insertvalue %580, %582[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %584 = llvm.insertvalue %2, %583[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %585 = llvm.insertvalue %2, %584[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %586 = llvm.insertvalue %3, %585[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.store %56, %510 : !llvm.ptr + %587 = llvm.load %63 : !llvm.ptr + %588 = llvm.mlir.null : !llvm.ptr + %589 = llvm.getelementptr %588[1] : (!llvm.ptr) -> !llvm.ptr + %590 = llvm.ptrtoint %589 : !llvm.ptr to i64 + %591 = llvm.call @malloc(%590) : (i64) -> !llvm.ptr + %592 = llvm.bitcast %591 : !llvm.ptr to !llvm.ptr + llvm.store %6, %592 : !llvm.ptr + %593 = llvm.mlir.null : !llvm.ptr + %594 = llvm.getelementptr %593[%58] : (!llvm.ptr, i64) -> !llvm.ptr + %595 = llvm.ptrtoint %594 : !llvm.ptr to i64 + %596 = llvm.add %595, %0 : i64 + %597 = llvm.call @malloc(%596) : (i64) -> !llvm.ptr + %598 = llvm.bitcast %597 : !llvm.ptr to !llvm.ptr + %599 = llvm.ptrtoint %598 : !llvm.ptr to i64 + %600 = llvm.sub %0, %3 : i64 + %601 = llvm.add %599, %600 : i64 + %602 = llvm.urem %601, %0 : i64 + %603 = llvm.sub %601, %602 : i64 + %604 = llvm.inttoptr %603 : i64 to !llvm.ptr + llvm.br ^bb67(%6 : i64) + ^bb67(%605: i64): // 2 preds: ^bb66, ^bb68 + %606 = llvm.icmp "slt" %605, %58 : i64 + llvm.cond_br %606, ^bb68, ^bb69(%6 : i64) + ^bb68: // pred: ^bb67 + %607 = llvm.getelementptr %604[%605] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %19, %607 : !llvm.ptr + %608 = llvm.add %605, %5 : i64 + llvm.br ^bb67(%608 : i64) + ^bb69(%609: i64): // 2 preds: ^bb67, ^bb81 + %610 = llvm.icmp "slt" %609, %587 : i64 + llvm.cond_br %610, ^bb70, ^bb82 + ^bb70: // pred: ^bb69 + %611 = llvm.load %592 : !llvm.ptr + %612 = llvm.add %611, %8 : i64 + llvm.store %612, %592 : !llvm.ptr + llvm.store %6, %488 : !llvm.ptr + llvm.br ^bb71(%6 : i64) + ^bb71(%613: i64): // 2 preds: ^bb70, ^bb72 + %614 = llvm.icmp "slt" %613, %58 : i64 + llvm.cond_br %614, ^bb72, ^bb73 + ^bb72: // pred: ^bb71 + %615 = llvm.getelementptr %500[%613] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %615 : !llvm.ptr + %616 = llvm.add %613, %5 : i64 + llvm.br ^bb71(%616 : i64) + ^bb73: // pred: ^bb71 + %617 = llvm.add %609, %5 : i64 + %618 = llvm.getelementptr %143[%609] : (!llvm.ptr, i64) -> !llvm.ptr + %619 = llvm.load %618 : !llvm.ptr + %620 = llvm.getelementptr %143[%617] : (!llvm.ptr, i64) -> !llvm.ptr + %621 = llvm.load %620 : !llvm.ptr + llvm.br ^bb74(%619 : i64) + ^bb74(%622: i64): // 2 preds: ^bb73, ^bb75 + %623 = llvm.icmp "slt" %622, %621 : i64 + llvm.cond_br %623, ^bb75, ^bb76 + ^bb75: // pred: ^bb74 + %624 = llvm.getelementptr %163[%622] : (!llvm.ptr, i64) -> !llvm.ptr + %625 = llvm.load %624 : !llvm.ptr + %626 = llvm.getelementptr %223[%622] : (!llvm.ptr, i64) -> !llvm.ptr + %627 = llvm.load %626 : !llvm.ptr + %628 = llvm.getelementptr %500[%625] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %627, %628 : !llvm.ptr + %629 = llvm.add %622, %5 : i64 + llvm.br ^bb74(%629 : i64) + ^bb76: // pred: ^bb74 + %630 = llvm.getelementptr %356[%609] : (!llvm.ptr, i64) -> !llvm.ptr + %631 = llvm.load %630 : !llvm.ptr + %632 = llvm.getelementptr %356[%617] : (!llvm.ptr, i64) -> !llvm.ptr + %633 = llvm.load %632 : !llvm.ptr + llvm.br ^bb77(%631 : i64) + ^bb77(%634: i64): // 2 preds: ^bb76, ^bb80 + %635 = llvm.icmp "slt" %634, %633 : i64 + llvm.cond_br %635, ^bb78, ^bb81 + ^bb78: // pred: ^bb77 + %636 = llvm.getelementptr %376[%634] : (!llvm.ptr, i64) -> !llvm.ptr + %637 = llvm.load %636 : !llvm.ptr + %638 = llvm.getelementptr %479[%637] : (!llvm.ptr, i64) -> !llvm.ptr + %639 = llvm.load %638 : !llvm.ptr + %640 = llvm.icmp "ne" %639, %612 : i64 + llvm.cond_br %640, ^bb79, ^bb80 + ^bb79: // pred: ^bb78 + %641 = llvm.getelementptr %479[%637] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %612, %641 : !llvm.ptr + %642 = llvm.load %488 : !llvm.ptr + %643 = llvm.add %642, %5 : i64 + llvm.store %643, %488 : !llvm.ptr + llvm.br ^bb80 + ^bb80: // 2 preds: ^bb78, ^bb79 + %644 = llvm.add %634, %5 : i64 + llvm.br ^bb77(%644 : i64) + ^bb81: // pred: ^bb77 + %645 = llvm.load %488 : !llvm.ptr + %646 = llvm.getelementptr %554[%609] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %645, %646 : !llvm.ptr + llvm.br ^bb69(%617 : i64) + ^bb82: // pred: ^bb69 + %647 = llvm.getelementptr %554[%56] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %6, %647 : !llvm.ptr + %648 = llvm.mlir.null : !llvm.ptr + %649 = llvm.getelementptr %648[1] : (!llvm.ptr) -> !llvm.ptr + %650 = llvm.ptrtoint %649 : !llvm.ptr to i64 + %651 = llvm.call @malloc(%650) : (i64) -> !llvm.ptr + %652 = llvm.bitcast %651 : !llvm.ptr to !llvm.ptr + llvm.store %6, %652 : !llvm.ptr + llvm.br ^bb83(%6 : i64) + ^bb83(%653: i64): // 2 preds: ^bb82, ^bb84 + %654 = llvm.icmp "slt" %653, %505 : i64 + llvm.cond_br %654, ^bb84, ^bb85 + ^bb84: // pred: ^bb83 + %655 = llvm.getelementptr %554[%653] : (!llvm.ptr, i64) -> !llvm.ptr + %656 = llvm.load %655 : !llvm.ptr + %657 = llvm.load %652 : !llvm.ptr + %658 = llvm.getelementptr %554[%653] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %657, %658 : !llvm.ptr + %659 = llvm.add %656, %657 : i64 + llvm.store %659, %652 : !llvm.ptr + %660 = llvm.add %653, %5 : i64 + llvm.br ^bb83(%660 : i64) + ^bb85: // pred: ^bb83 + %661 = llvm.load %652 : !llvm.ptr + %662 = llvm.mlir.null : !llvm.ptr + %663 = llvm.getelementptr %662[%661] : (!llvm.ptr, i64) -> !llvm.ptr + %664 = llvm.ptrtoint %663 : !llvm.ptr to i64 + %665 = llvm.call @malloc(%664) : (i64) -> !llvm.ptr + %666 = llvm.bitcast %665 : !llvm.ptr to !llvm.ptr + %667 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %668 = llvm.insertvalue %666, %667[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %669 = llvm.insertvalue %666, %668[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %670 = llvm.insertvalue %2, %669[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %671 = llvm.insertvalue %661, %670[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %672 = llvm.insertvalue %3, %671[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %673 = llvm.mlir.null : !llvm.ptr + %674 = llvm.getelementptr %673[%661] : (!llvm.ptr, i64) -> !llvm.ptr + %675 = llvm.ptrtoint %674 : !llvm.ptr to i64 + %676 = llvm.call @malloc(%675) : (i64) -> !llvm.ptr + %677 = llvm.bitcast %676 : !llvm.ptr to !llvm.ptr + %678 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %679 = llvm.insertvalue %677, %678[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %680 = llvm.insertvalue %677, %679[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %681 = llvm.insertvalue %2, %680[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %682 = llvm.insertvalue %661, %681[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %683 = llvm.insertvalue %3, %682[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %684 = llvm.load %63 : !llvm.ptr + llvm.br ^bb86(%6 : i64) + ^bb86(%685: i64): // 2 preds: ^bb85, ^bb102 + %686 = llvm.icmp "slt" %685, %684 : i64 + llvm.cond_br %686, ^bb87, ^bb103 + ^bb87: // pred: ^bb86 + %687 = llvm.getelementptr %554[%685] : (!llvm.ptr, i64) -> !llvm.ptr + %688 = llvm.load %687 : !llvm.ptr + llvm.store %688, %488 : !llvm.ptr + llvm.br ^bb88(%6 : i64) + ^bb88(%689: i64): // 2 preds: ^bb87, ^bb89 + %690 = llvm.icmp "slt" %689, %58 : i64 + llvm.cond_br %690, ^bb89, ^bb90 + ^bb89: // pred: ^bb88 + %691 = llvm.getelementptr %500[%689] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %20, %691 : !llvm.ptr + %692 = llvm.add %689, %5 : i64 + llvm.br ^bb88(%692 : i64) + ^bb90: // pred: ^bb88 + %693 = llvm.add %685, %5 : i64 + %694 = llvm.getelementptr %143[%685] : (!llvm.ptr, i64) -> !llvm.ptr + %695 = llvm.load %694 : !llvm.ptr + %696 = llvm.getelementptr %143[%693] : (!llvm.ptr, i64) -> !llvm.ptr + %697 = llvm.load %696 : !llvm.ptr + llvm.br ^bb91(%695 : i64) + ^bb91(%698: i64): // 2 preds: ^bb90, ^bb92 + %699 = llvm.icmp "slt" %698, %697 : i64 + llvm.cond_br %699, ^bb92, ^bb93 + ^bb92: // pred: ^bb91 + %700 = llvm.getelementptr %163[%698] : (!llvm.ptr, i64) -> !llvm.ptr + %701 = llvm.load %700 : !llvm.ptr + %702 = llvm.getelementptr %223[%698] : (!llvm.ptr, i64) -> !llvm.ptr + %703 = llvm.load %702 : !llvm.ptr + %704 = llvm.getelementptr %500[%701] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %703, %704 : !llvm.ptr + %705 = llvm.add %698, %5 : i64 + llvm.br ^bb91(%705 : i64) + ^bb93: // pred: ^bb91 + %706 = llvm.getelementptr %356[%685] : (!llvm.ptr, i64) -> !llvm.ptr + %707 = llvm.load %706 : !llvm.ptr + %708 = llvm.getelementptr %356[%693] : (!llvm.ptr, i64) -> !llvm.ptr + %709 = llvm.load %708 : !llvm.ptr + llvm.br ^bb94(%707 : i64) + ^bb94(%710: i64): // 2 preds: ^bb93, ^bb98 + %711 = llvm.icmp "slt" %710, %709 : i64 + llvm.cond_br %711, ^bb95, ^bb99 + ^bb95: // pred: ^bb94 + %712 = llvm.getelementptr %376[%710] : (!llvm.ptr, i64) -> !llvm.ptr + %713 = llvm.load %712 : !llvm.ptr + %714 = llvm.getelementptr %604[%713] : (!llvm.ptr, i64) -> !llvm.ptr + %715 = llvm.load %714 : !llvm.ptr + %716 = llvm.icmp "eq" %715, %19 : i1 + llvm.cond_br %716, ^bb96, ^bb97 + ^bb96: // pred: ^bb95 + %717 = llvm.getelementptr %500[%713] : (!llvm.ptr, i64) -> !llvm.ptr + %718 = llvm.load %717 : !llvm.ptr + %719 = llvm.getelementptr %436[%710] : (!llvm.ptr, i64) -> !llvm.ptr + %720 = llvm.load %719 : !llvm.ptr + %721 = llvm.fadd %718, %720 : f64 + %722 = llvm.getelementptr %463[%713] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %721, %722 : !llvm.ptr + %723 = llvm.getelementptr %604[%713] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %18, %723 : !llvm.ptr + %724 = llvm.load %488 : !llvm.ptr + %725 = llvm.getelementptr %666[%724] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %713, %725 : !llvm.ptr + %726 = llvm.add %724, %5 : i64 + llvm.store %726, %488 : !llvm.ptr + llvm.br ^bb98 + ^bb97: // pred: ^bb95 + %727 = llvm.getelementptr %500[%713] : (!llvm.ptr, i64) -> !llvm.ptr + %728 = llvm.load %727 : !llvm.ptr + %729 = llvm.getelementptr %436[%710] : (!llvm.ptr, i64) -> !llvm.ptr + %730 = llvm.load %729 : !llvm.ptr + %731 = llvm.fadd %728, %730 : f64 + %732 = llvm.getelementptr %463[%713] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %731, %732 : !llvm.ptr + llvm.br ^bb98 + ^bb98: // 2 preds: ^bb96, ^bb97 + %733 = llvm.add %710, %5 : i64 + llvm.br ^bb94(%733 : i64) + ^bb99: // pred: ^bb94 + %734 = llvm.getelementptr %554[%685] : (!llvm.ptr, i64) -> !llvm.ptr + %735 = llvm.load %734 : !llvm.ptr + %736 = llvm.getelementptr %554[%693] : (!llvm.ptr, i64) -> !llvm.ptr + %737 = llvm.load %736 : !llvm.ptr + %738 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %672, %738 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %739 = llvm.bitcast %738 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %740 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %741 = llvm.insertvalue %3, %740[0] : !llvm.struct<(i64, ptr)> + %742 = llvm.insertvalue %739, %741[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_sort_index(%3, %739, %735, %737) : (i64, !llvm.ptr, i64, i64) -> () + llvm.br ^bb100(%735 : i64) + ^bb100(%743: i64): // 2 preds: ^bb99, ^bb101 + %744 = llvm.icmp "slt" %743, %737 : i64 + llvm.cond_br %744, ^bb101, ^bb102 + ^bb101: // pred: ^bb100 + %745 = llvm.getelementptr %666[%743] : (!llvm.ptr, i64) -> !llvm.ptr + %746 = llvm.load %745 : !llvm.ptr + %747 = llvm.getelementptr %463[%746] : (!llvm.ptr, i64) -> !llvm.ptr + %748 = llvm.load %747 : !llvm.ptr + %749 = llvm.getelementptr %677[%743] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %748, %749 : !llvm.ptr + %750 = llvm.getelementptr %604[%746] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %19, %750 : !llvm.ptr + %751 = llvm.add %743, %5 : i64 + llvm.br ^bb100(%751 : i64) + ^bb102: // pred: ^bb100 + llvm.br ^bb86(%693 : i64) + ^bb103: // pred: ^bb86 + llvm.call @free(%456) : (!llvm.ptr) -> () + llvm.call @free(%597) : (!llvm.ptr) -> () + %752 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %516, %752 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %753 = llvm.bitcast %752 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %754 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %755 = llvm.insertvalue %3, %754[0] : !llvm.struct<(i64, ptr)> + %756 = llvm.insertvalue %753, %755[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %753) : (i64, !llvm.ptr) -> () + %757 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %527, %757 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %758 = llvm.bitcast %757 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %759 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %760 = llvm.insertvalue %3, %759[0] : !llvm.struct<(i64, ptr)> + %761 = llvm.insertvalue %758, %760[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %758) : (i64, !llvm.ptr) -> () + %762 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %538, %762 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %763 = llvm.bitcast %762 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %764 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %765 = llvm.insertvalue %3, %764[0] : !llvm.struct<(i64, ptr)> + %766 = llvm.insertvalue %763, %765[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %763) : (i64, !llvm.ptr) -> () + %767 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %549, %767 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %768 = llvm.bitcast %767 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %769 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %770 = llvm.insertvalue %3, %769[0] : !llvm.struct<(i64, ptr)> + %771 = llvm.insertvalue %768, %770[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %768) : (i64, !llvm.ptr) -> () + %772 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %560, %772 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %773 = llvm.bitcast %772 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %774 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %775 = llvm.insertvalue %3, %774[0] : !llvm.struct<(i64, ptr)> + %776 = llvm.insertvalue %773, %775[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %773) : (i64, !llvm.ptr) -> () + %777 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %672, %777 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %778 = llvm.bitcast %777 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %779 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %780 = llvm.insertvalue %3, %779[0] : !llvm.struct<(i64, ptr)> + %781 = llvm.insertvalue %778, %780[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %778) : (i64, !llvm.ptr) -> () + %782 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %575, %782 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %783 = llvm.bitcast %782 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %784 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %785 = llvm.insertvalue %3, %784[0] : !llvm.struct<(i64, ptr)> + %786 = llvm.insertvalue %783, %785[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %783) : (i64, !llvm.ptr) -> () + %787 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %586, %787 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %788 = llvm.bitcast %787 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %789 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %790 = llvm.insertvalue %3, %789[0] : !llvm.struct<(i64, ptr)> + %791 = llvm.insertvalue %788, %790[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_i64(%3, %788) : (i64, !llvm.ptr) -> () + %792 = llvm.alloca %3 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %683, %792 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %793 = llvm.bitcast %792 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %794 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %795 = llvm.insertvalue %3, %794[0] : !llvm.struct<(i64, ptr)> + %796 = llvm.insertvalue %793, %795[1] : !llvm.struct<(i64, ptr)> + llvm.call @comet_print_memref_f64(%3, %793) : (i64, !llvm.ptr) -> () + llvm.return } + llvm.func @read_input_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i32) attributes {sym_visibility = "private"} + llvm.func @read_input_sizes_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr, i32) attributes {sym_visibility = "private"} + llvm.func @comet_sort_index(i64, !llvm.ptr, i64, i64) attributes {sym_visibility = "private"} + llvm.func @comet_print_memref_f64(i64, !llvm.ptr) attributes {sym_visibility = "private"} + llvm.func @comet_print_memref_i64(i64, !llvm.ptr) attributes {sym_visibility = "private"} } - - -// -----// IR Dump After {anonymous}::RemoveLabeledTensorOpPass () //----- // -func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor - %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () - "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor) -> () - %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array, semiring = "plusxy_times"} : (tensor, tensor, !ta.range, !ta.range) -> tensor - "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor, tensor) -> () - "ta.print"(%2) : (tensor) -> () - "ta.print"(%3) : (tensor) -> () - "ta.print"(%4) : (tensor) -> () - return -} - -// -----// IR Dump After {anonymous}::LowerTAMulChainPass () //----- // -func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor - %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () - "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor) -> () - %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array, semiring = "plusxy_times"} : (tensor, tensor, !ta.range, !ta.range) -> tensor - "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor, tensor) -> () - "ta.print"(%2) : (tensor) -> () - "ta.print"(%3) : (tensor) -> () - "ta.print"(%4) : (tensor) -> () - return -} - -// -----// IR Dump After {anonymous}::TensorAlgebraCheckImplicitTensorDeclPass () //----- // -func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor - %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () - "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor) -> () - %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array, semiring = "plusxy_times"} : (tensor, tensor, !ta.range, !ta.range) -> tensor - "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor, tensor) -> () - "ta.print"(%2) : (tensor) -> () - "ta.print"(%3) : (tensor) -> () - "ta.print"(%4) : (tensor) -> () - return -} - -// -----// IR Dump After {anonymous}::LowerTensorAlgebraToIndexTreePass () //----- // -func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor - %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () - "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor) -> () - %5 = "it.ComputeRHS"(%2, %3) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (tensor, tensor) -> tensor<*xf64> - %6 = "it.ComputeLHS"(%4) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor) -> tensor<*xf64> - %7 = "it.Compute"(%5, %6) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64 - %8 = "it.Indices"(%7) {indices = [1]} : (i64) -> i64 - %9 = "it.Indices"(%8) {indices = [0]} : (i64) -> i64 - %10 = "it.itree"(%9) : (i64) -> i64 - "ta.print"(%2) : (tensor) -> () - "ta.print"(%3) : (tensor) -> () - "ta.print"(%4) : (tensor) -> () - return -} - -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1144 SparseInputTensorDeclOpLowering in format begin -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1145 %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1156 --- CSR -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1158 2 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 -Check the tensor is input or output -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 %5 = "it.ComputeRHS"(%2, %3) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (tensor, tensor) -> tensor<*xf64> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1245 used in ta.itComputeRHS op -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 -Check the tensor is input or output -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 "ta.print"(%2) : (tensor) -> () -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1260 the tensor is in PrintOp -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 -Check the tensor is input or output -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor) -> () -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1256 the tensor is in fill_from_file op -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1287 isOutputTensor: 0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1296 CSR isDense: 0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1301 Sparse input tensor -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1356 filename: SPARSE_FILE_NAME0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1360 SPARSE_FILE_NAME0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1363 readMode: 1 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1367 sp_decl.getParameterCount(): 13 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1370 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1371 %alloc = memref.alloc() : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1376 Get the dim_format -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1395 Parsed fileID: 0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1413 2D -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:291 Inserting insertReadFileLibCall -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:311 Rank Size is 2 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:346 Adding read_input_2D_f64 to the module -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:373 Adding read_input_sizes_2D_f64 to the module -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %2 = memref.load %alloc[%c0_4] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %3 = memref.load %alloc[%c1_5] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %4 = memref.load %alloc[%c2_6] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %5 = memref.load %alloc[%c3_7] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %6 = memref.load %alloc[%c4] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %7 = memref.load %alloc[%c5] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %8 = memref.load %alloc[%c6] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %9 = memref.load %alloc[%c7] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %10 = memref.load %alloc[%c8] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %11 = memref.load %alloc[%c9] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %12 = memref.load %alloc[%c10] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %13 = memref.load %alloc[%c11] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %14 = memref.load %alloc[%c12] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %2 = memref.load %alloc[%c0_4] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_8 = memref.alloc(%2) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %3 = memref.load %alloc[%c1_5] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_13 = memref.alloc(%3) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %4 = memref.load %alloc[%c2_6] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_18 = memref.alloc(%4) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %5 = memref.load %alloc[%c3_7] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_23 = memref.alloc(%5) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %6 = memref.load %alloc[%c4] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_28 = memref.alloc(%6) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %7 = memref.load %alloc[%c5] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_33 = memref.alloc(%7) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %8 = memref.load %alloc[%c6] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_38 = memref.alloc(%8) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %9 = memref.load %alloc[%c7] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_43 = memref.alloc(%9) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1492 /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1493 %alloc_48 = memref.alloc(%10) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1556 Generate read_input_2D or read_input_3D functions -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1603 SparseTensorConstructOp generated for input sparse tensor: -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1604 %24 = ta.sptensor_construct(%15, %16, %17, %18, %19, %20, %21, %22, %23, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12) {tensor_rank = 2 : i32} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index) -> (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>) -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1612 %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1613 %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1622 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1612 %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1613 %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1622 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1668 SparseInputTensorDeclOpLowering in format end -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1803 ---------------SparseTensorDeclLoweringPass end -// -----// IR Dump After {anonymous}::SparseTensorDeclLoweringPass () //----- // -func.func @main() { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range - %c0_0 = arith.constant 0 : index - %c1_1 = arith.constant 1 : index - %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range - %alloc = memref.alloc() : memref<13xindex> - %cast = memref.cast %alloc : memref<13xindex> to memref<*xindex> - %c-1 = arith.constant -1 : index - %c0_2 = arith.constant 0 : index - %c1_3 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - %c0_i32 = arith.constant 0 : i32 - %c1_i32 = arith.constant 1 : i32 - call @read_input_sizes_2D_f64(%c0_i32, %c0_2, %c-1, %c1_3, %c-1, %cast, %c1_i32) {filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, i32) -> () - %c0_4 = arith.constant 0 : index - %2 = memref.load %alloc[%c0_4] : memref<13xindex> - %c1_5 = arith.constant 1 : index - %3 = memref.load %alloc[%c1_5] : memref<13xindex> - %c2_6 = arith.constant 2 : index - %4 = memref.load %alloc[%c2_6] : memref<13xindex> - %c3_7 = arith.constant 3 : index - %5 = memref.load %alloc[%c3_7] : memref<13xindex> - %c4 = arith.constant 4 : index - %6 = memref.load %alloc[%c4] : memref<13xindex> - %c5 = arith.constant 5 : index - %7 = memref.load %alloc[%c5] : memref<13xindex> - %c6 = arith.constant 6 : index - %8 = memref.load %alloc[%c6] : memref<13xindex> - %c7 = arith.constant 7 : index - %9 = memref.load %alloc[%c7] : memref<13xindex> - %c8 = arith.constant 8 : index - %10 = memref.load %alloc[%c8] : memref<13xindex> - %c9 = arith.constant 9 : index - %11 = memref.load %alloc[%c9] : memref<13xindex> - %c10 = arith.constant 10 : index - %12 = memref.load %alloc[%c10] : memref<13xindex> - %c11 = arith.constant 11 : index - %13 = memref.load %alloc[%c11] : memref<13xindex> - %c12 = arith.constant 12 : index - %14 = memref.load %alloc[%c12] : memref<13xindex> - %alloc_8 = memref.alloc(%2) : memref - %c0_9 = arith.constant 0 : index - %c0_10 = arith.constant 0 : index - %c1_11 = arith.constant 1 : index - scf.for %arg0 = %c0_10 to %2 step %c1_11 { - memref.store %c0_9, %alloc_8[%arg0] : memref - } - %cast_12 = memref.cast %alloc_8 : memref to memref<*xindex> - %alloc_13 = memref.alloc(%3) : memref - %c0_14 = arith.constant 0 : index - %c0_15 = arith.constant 0 : index - %c1_16 = arith.constant 1 : index - scf.for %arg0 = %c0_15 to %3 step %c1_16 { - memref.store %c0_14, %alloc_13[%arg0] : memref - } - %cast_17 = memref.cast %alloc_13 : memref to memref<*xindex> - %alloc_18 = memref.alloc(%4) : memref - %c0_19 = arith.constant 0 : index - %c0_20 = arith.constant 0 : index - %c1_21 = arith.constant 1 : index - scf.for %arg0 = %c0_20 to %4 step %c1_21 { - memref.store %c0_19, %alloc_18[%arg0] : memref - } - %cast_22 = memref.cast %alloc_18 : memref to memref<*xindex> - %alloc_23 = memref.alloc(%5) : memref - %c0_24 = arith.constant 0 : index - %c0_25 = arith.constant 0 : index - %c1_26 = arith.constant 1 : index - scf.for %arg0 = %c0_25 to %5 step %c1_26 { - memref.store %c0_24, %alloc_23[%arg0] : memref - } - %cast_27 = memref.cast %alloc_23 : memref to memref<*xindex> - %alloc_28 = memref.alloc(%6) : memref - %c0_29 = arith.constant 0 : index - %c0_30 = arith.constant 0 : index - %c1_31 = arith.constant 1 : index - scf.for %arg0 = %c0_30 to %6 step %c1_31 { - memref.store %c0_29, %alloc_28[%arg0] : memref - } - %cast_32 = memref.cast %alloc_28 : memref to memref<*xindex> - %alloc_33 = memref.alloc(%7) : memref - %c0_34 = arith.constant 0 : index - %c0_35 = arith.constant 0 : index - %c1_36 = arith.constant 1 : index - scf.for %arg0 = %c0_35 to %7 step %c1_36 { - memref.store %c0_34, %alloc_33[%arg0] : memref - } - %cast_37 = memref.cast %alloc_33 : memref to memref<*xindex> - %alloc_38 = memref.alloc(%8) : memref - %c0_39 = arith.constant 0 : index - %c0_40 = arith.constant 0 : index - %c1_41 = arith.constant 1 : index - scf.for %arg0 = %c0_40 to %8 step %c1_41 { - memref.store %c0_39, %alloc_38[%arg0] : memref - } - %cast_42 = memref.cast %alloc_38 : memref to memref<*xindex> - %alloc_43 = memref.alloc(%9) : memref - %c0_44 = arith.constant 0 : index - %c0_45 = arith.constant 0 : index - %c1_46 = arith.constant 1 : index - scf.for %arg0 = %c0_45 to %9 step %c1_46 { - memref.store %c0_44, %alloc_43[%arg0] : memref - } - %cast_47 = memref.cast %alloc_43 : memref to memref<*xindex> - %alloc_48 = memref.alloc(%10) : memref - %cst = arith.constant 0.000000e+00 : f64 - %c0_49 = arith.constant 0 : index - %c1_50 = arith.constant 1 : index - scf.for %arg0 = %c0_49 to %10 step %c1_50 { - memref.store %cst, %alloc_48[%arg0] : memref - } - %cast_51 = memref.cast %alloc_48 : memref to memref<*xf64> - call @read_input_2D_f64(%c0_i32, %c0_2, %c-1, %c1_3, %c-1, %cast_12, %cast_17, %cast_22, %cast_27, %cast_32, %cast_37, %cast_42, %cast_47, %cast_51, %c1_i32) {filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xf64>, i32) -> () - %15 = bufferization.to_tensor %alloc_8 : memref - %16 = bufferization.to_tensor %alloc_13 : memref - %17 = bufferization.to_tensor %alloc_18 : memref - %18 = bufferization.to_tensor %alloc_23 : memref - %19 = bufferization.to_tensor %alloc_28 : memref - %20 = bufferization.to_tensor %alloc_33 : memref - %21 = bufferization.to_tensor %alloc_38 : memref - %22 = bufferization.to_tensor %alloc_43 : memref - %23 = bufferization.to_tensor %alloc_48 : memref - %24 = ta.sptensor_construct(%15, %16, %17, %18, %19, %20, %21, %22, %23, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12) {tensor_rank = 2 : i32} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index) -> (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>) - %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range - %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range - %27 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - %28 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor - "ta.fill"(%27) {value = 1.000000e+00 : f64} : (tensor) -> () - %29 = "it.ComputeRHS"(%24, %27) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>, tensor) -> tensor<*xf64> - %30 = "it.ComputeLHS"(%28) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor) -> tensor<*xf64> - %31 = "it.Compute"(%29, %30) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64 - %32 = "it.Indices"(%31) {indices = [1]} : (i64) -> i64 - %33 = "it.Indices"(%32) {indices = [0]} : (i64) -> i64 - %34 = "it.itree"(%33) : (i64) -> i64 - "ta.print"(%24) : (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>) -> () - "ta.print"(%27) : (tensor) -> () - "ta.print"(%28) : (tensor) -> () - return -} - -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1056 --------------DenseTensorDeclarationLowering in format begin -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1058 - -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1060 %27 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor ------------------------------------ -memref ------------------------------------ -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %12 = memref.load %alloc[%c10] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %11 = memref.load %alloc[%c9] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 %29 = "it.ComputeRHS"(%24, %27) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>, tensor) -> tensor<*xf64> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.print"(%27) : (tensor) -> () -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.fill"(%27) {value = 1.000000e+00 : f64} : (tensor) -> () -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1097 AllocOp for initialization is_filled: 1 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1103 %alloc_52 = memref.alloc(%12, %11) : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1115 TensorLoad: -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1116 %27 = bufferization.to_tensor %alloc_52 : memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1122 --------------DenseTensorDeclarationLowering in format end -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1123 - -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1056 --------------DenseTensorDeclarationLowering in format begin -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1058 - -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1060 %29 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor ------------------------------------ -memref ------------------------------------ -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %12 = memref.load %alloc[%c10] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %11 = memref.load %alloc[%c9] : memref<13xindex> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 %31 = "it.ComputeLHS"(%29) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor) -> tensor<*xf64> -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.print"(%29) : (tensor) -> () -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1097 AllocOp for initialization is_filled: 0 -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1109 %101 = "memref.alloc"(%36, %34) {operand_segment_sizes = array} : (index, index) -> memref -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1115 TensorLoad: -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1116 %105 = "bufferization.to_tensor"(%101) : (memref) -> tensor -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1122 --------------DenseTensorDeclarationLowering in format end -/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1123 - -loc("first.ta":8:2): error: 'memref.store' op store index operand count not equal to memref rank -// -----// IR Dump After {anonymous}::DenseTensorDeclLoweringPass Failed () //----- // -"func.func"() ({ - %0 = "arith.constant"() {value = 0 : index} : () -> index - %1 = "arith.constant"() {value = 1 : index} : () -> index - %2 = "ta.dynamic_index_label"(%0, %1) : (index, index) -> !ta.range - %3 = "arith.constant"() {value = 0 : index} : () -> index - %4 = "arith.constant"() {value = 1 : index} : () -> index - %5 = "ta.dynamic_index_label"(%3, %4) : (index, index) -> !ta.range - %6 = "memref.alloc"() {operand_segment_sizes = array} : () -> memref<13xindex> - %7 = "memref.cast"(%6) : (memref<13xindex>) -> memref<*xindex> - %8 = "arith.constant"() {value = -1 : index} : () -> index - %9 = "arith.constant"() {value = 0 : index} : () -> index - %10 = "arith.constant"() {value = 1 : index} : () -> index - %11 = "arith.constant"() {value = 2 : index} : () -> index - %12 = "arith.constant"() {value = 3 : index} : () -> index - %13 = "arith.constant"() {value = 0 : i32} : () -> i32 - %14 = "arith.constant"() {value = 1 : i32} : () -> i32 - "func.call"(%13, %9, %8, %10, %8, %7, %14) {callee = @read_input_sizes_2D_f64, filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, i32) -> () - %15 = "arith.constant"() {value = 0 : index} : () -> index - %16 = "memref.load"(%6, %15) {nontemporal = false} : (memref<13xindex>, index) -> index - %17 = "arith.constant"() {value = 1 : index} : () -> index - %18 = "memref.load"(%6, %17) {nontemporal = false} : (memref<13xindex>, index) -> index - %19 = "arith.constant"() {value = 2 : index} : () -> index - %20 = "memref.load"(%6, %19) {nontemporal = false} : (memref<13xindex>, index) -> index - %21 = "arith.constant"() {value = 3 : index} : () -> index - %22 = "memref.load"(%6, %21) {nontemporal = false} : (memref<13xindex>, index) -> index - %23 = "arith.constant"() {value = 4 : index} : () -> index - %24 = "memref.load"(%6, %23) {nontemporal = false} : (memref<13xindex>, index) -> index - %25 = "arith.constant"() {value = 5 : index} : () -> index - %26 = "memref.load"(%6, %25) {nontemporal = false} : (memref<13xindex>, index) -> index - %27 = "arith.constant"() {value = 6 : index} : () -> index - %28 = "memref.load"(%6, %27) {nontemporal = false} : (memref<13xindex>, index) -> index - %29 = "arith.constant"() {value = 7 : index} : () -> index - %30 = "memref.load"(%6, %29) {nontemporal = false} : (memref<13xindex>, index) -> index - %31 = "arith.constant"() {value = 8 : index} : () -> index - %32 = "memref.load"(%6, %31) {nontemporal = false} : (memref<13xindex>, index) -> index - %33 = "arith.constant"() {value = 9 : index} : () -> index - %34 = "memref.load"(%6, %33) {nontemporal = false} : (memref<13xindex>, index) -> index - %35 = "arith.constant"() {value = 10 : index} : () -> index - %36 = "memref.load"(%6, %35) {nontemporal = false} : (memref<13xindex>, index) -> index - %37 = "arith.constant"() {value = 11 : index} : () -> index - %38 = "memref.load"(%6, %37) {nontemporal = false} : (memref<13xindex>, index) -> index - %39 = "arith.constant"() {value = 12 : index} : () -> index - %40 = "memref.load"(%6, %39) {nontemporal = false} : (memref<13xindex>, index) -> index - %41 = "memref.alloc"(%16) {operand_segment_sizes = array} : (index) -> memref - %42 = "arith.constant"() {value = 0 : index} : () -> index - %43 = "arith.constant"() {value = 0 : index} : () -> index - %44 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%43, %16, %44) ({ - ^bb0(%arg0: index): - "memref.store"(%42, %41, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %45 = "memref.cast"(%41) : (memref) -> memref<*xindex> - %46 = "memref.alloc"(%18) {operand_segment_sizes = array} : (index) -> memref - %47 = "arith.constant"() {value = 0 : index} : () -> index - %48 = "arith.constant"() {value = 0 : index} : () -> index - %49 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%48, %18, %49) ({ - ^bb0(%arg0: index): - "memref.store"(%47, %46, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %50 = "memref.cast"(%46) : (memref) -> memref<*xindex> - %51 = "memref.alloc"(%20) {operand_segment_sizes = array} : (index) -> memref - %52 = "arith.constant"() {value = 0 : index} : () -> index - %53 = "arith.constant"() {value = 0 : index} : () -> index - %54 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%53, %20, %54) ({ - ^bb0(%arg0: index): - "memref.store"(%52, %51, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %55 = "memref.cast"(%51) : (memref) -> memref<*xindex> - %56 = "memref.alloc"(%22) {operand_segment_sizes = array} : (index) -> memref - %57 = "arith.constant"() {value = 0 : index} : () -> index - %58 = "arith.constant"() {value = 0 : index} : () -> index - %59 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%58, %22, %59) ({ - ^bb0(%arg0: index): - "memref.store"(%57, %56, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %60 = "memref.cast"(%56) : (memref) -> memref<*xindex> - %61 = "memref.alloc"(%24) {operand_segment_sizes = array} : (index) -> memref - %62 = "arith.constant"() {value = 0 : index} : () -> index - %63 = "arith.constant"() {value = 0 : index} : () -> index - %64 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%63, %24, %64) ({ - ^bb0(%arg0: index): - "memref.store"(%62, %61, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %65 = "memref.cast"(%61) : (memref) -> memref<*xindex> - %66 = "memref.alloc"(%26) {operand_segment_sizes = array} : (index) -> memref - %67 = "arith.constant"() {value = 0 : index} : () -> index - %68 = "arith.constant"() {value = 0 : index} : () -> index - %69 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%68, %26, %69) ({ - ^bb0(%arg0: index): - "memref.store"(%67, %66, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %70 = "memref.cast"(%66) : (memref) -> memref<*xindex> - %71 = "memref.alloc"(%28) {operand_segment_sizes = array} : (index) -> memref - %72 = "arith.constant"() {value = 0 : index} : () -> index - %73 = "arith.constant"() {value = 0 : index} : () -> index - %74 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%73, %28, %74) ({ - ^bb0(%arg0: index): - "memref.store"(%72, %71, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %75 = "memref.cast"(%71) : (memref) -> memref<*xindex> - %76 = "memref.alloc"(%30) {operand_segment_sizes = array} : (index) -> memref - %77 = "arith.constant"() {value = 0 : index} : () -> index - %78 = "arith.constant"() {value = 0 : index} : () -> index - %79 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%78, %30, %79) ({ - ^bb0(%arg0: index): - "memref.store"(%77, %76, %arg0) {nontemporal = false} : (index, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %80 = "memref.cast"(%76) : (memref) -> memref<*xindex> - %81 = "memref.alloc"(%32) {operand_segment_sizes = array} : (index) -> memref - %82 = "arith.constant"() {value = 0.000000e+00 : f64} : () -> f64 - %83 = "arith.constant"() {value = 0 : index} : () -> index - %84 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%83, %32, %84) ({ - ^bb0(%arg0: index): - "memref.store"(%82, %81, %arg0) {nontemporal = false} : (f64, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %85 = "memref.cast"(%81) : (memref) -> memref<*xf64> - "func.call"(%13, %9, %8, %10, %8, %45, %50, %55, %60, %65, %70, %75, %80, %85, %14) {callee = @read_input_2D_f64, filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xf64>, i32) -> () - %86 = "bufferization.to_tensor"(%41) : (memref) -> tensor - %87 = "bufferization.to_tensor"(%46) : (memref) -> tensor - %88 = "bufferization.to_tensor"(%51) : (memref) -> tensor - %89 = "bufferization.to_tensor"(%56) : (memref) -> tensor - %90 = "bufferization.to_tensor"(%61) : (memref) -> tensor - %91 = "bufferization.to_tensor"(%66) : (memref) -> tensor - %92 = "bufferization.to_tensor"(%71) : (memref) -> tensor - %93 = "bufferization.to_tensor"(%76) : (memref) -> tensor - %94 = "bufferization.to_tensor"(%81) : (memref) -> tensor - %95 = "ta.sptensor_construct"(%86, %87, %88, %89, %90, %91, %92, %93, %94, %16, %18, %20, %22, %24, %26, %28, %30, %32, %34, %36) {tensor_rank = 2 : i32} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index) -> !ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index> - %96 = "ta.static_index_label"(%0, %34, %1) : (index, index, index) -> !ta.range - %97 = "ta.static_index_label"(%3, %36, %4) : (index, index, index) -> !ta.range - %98 = "memref.alloc"(%36, %34) {alignment = 32 : i64, operand_segment_sizes = array} : (index, index) -> memref - %99 = "bufferization.to_tensor"(%98) : (memref) -> tensor - %100 = "memref.alloc"(%36, %34) {alignment = 32 : i64, operand_segment_sizes = array} : (index, index) -> memref - %101 = "arith.constant"() {value = 0.000000e+00 : f64} : () -> f64 - %102 = "arith.constant"() {value = 0 : index} : () -> index - %103 = "arith.constant"() {value = 1 : index} : () -> index - "scf.for"(%102, %36, %103) ({ - ^bb0(%arg0: index): - "memref.store"(%101, %100, %arg0) {nontemporal = false} : (f64, memref, index) -> () - "scf.yield"() : () -> () - }) : (index, index, index) -> () - %104 = "bufferization.to_tensor"(%100) : (memref) -> tensor - "ta.fill"(%99) {value = 1.000000e+00 : f64} : (tensor) -> () - %105 = "it.ComputeRHS"(%95, %99) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>, tensor) -> tensor<*xf64> - %106 = "it.ComputeLHS"(%104) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor) -> tensor<*xf64> - %107 = "it.Compute"(%105, %106) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64 - %108 = "it.Indices"(%107) {indices = [1]} : (i64) -> i64 - %109 = "it.Indices"(%108) {indices = [0]} : (i64) -> i64 - %110 = "it.itree"(%109) : (i64) -> i64 - "ta.print"(%95) : (!ta.sptensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, index, index, index, index, index, index, index, index, index, index, index>) -> () - "ta.print"(%99) : (tensor) -> () - "ta.print"(%104) : (tensor) -> () - "func.return"() : () -> () -}) {function_type = () -> (), sym_name = "main"} : () -> () - diff --git a/first.ta b/first.ta index d2260157..527e37dc 100644 --- a/first.ta +++ b/first.ta @@ -1,19 +1,38 @@ +# Sparse matrix sparse matrix elementwise addition +# Sparse matrix is in CSR format. Currently workspace transformation on the IndexTree dialect works for only CSR format +# RUN: comet-opt --opt-comp-workspace --convert-ta-to-it --convert-to-loops --convert-to-llvm %s &> eltwise_add_CSRxCSR_oCSR.llvm +# RUN: export SPARSE_FILE_NAME0=%comet_integration_test_data_dir/test_rank2.mtx +# RUN: export SPARSE_FILE_NAME1=%comet_integration_test_data_dir/test_rank2_transpose.mtx +# RUN: mlir-cpu-runner eltwise_add_CSRxCSR_oCSR.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s + + def main() { - #IndexLabel Declarations - IndexLabel [a] = [?]; - IndexLabel [b] = [?]; + #IndexLabel Declarations + IndexLabel [i] = [?]; + IndexLabel [j] = [?]; + + #Tensor Declarations + Tensor A([i, j], {CSR}); + Tensor B([i, j], {CSR}); + Tensor C([i, j], {CSR}); - Tensor A([a, b], {CSR}); - Tensor B([b, a], {Dense}); - Tensor C([b, a], {Dense}); - - A[a, b] = comet_read(0); - B[b, a] = 1.0; - C[b, a] = A[a, b] * B[b, a]; - #C[b, a] = 1.0; + #Tensor Readfile Operation + A[i, j] = comet_read(0); + B[i, j] = comet_read(1); - print(A); - print(B); - print(C); + #Tensor Contraction + C[i, j] = A[i, j] + B[i, j]; + print(C); } +# Print the result for verification. +# CHECK: data = +# CHECK-NEXT: 5, +# CHECK-NEXT: data = +# CHECK-NEXT: 0, +# CHECK-NEXT: data = +# CHECK-NEXT: 0,2,4,5,7,9, +# CHECK-NEXT: data = +# CHECK-NEXT: 0,3,1,4,2,0,3,1,4, +# CHECK-NEXT: data = +# CHECK-NEXT: 2,5.5,4,7.7,6,5.5,8,7.7,10, diff --git a/include/comet/Dialect/Utils/Utils.h b/include/comet/Dialect/Utils/Utils.h index a4518423..def3a3c6 100644 --- a/include/comet/Dialect/Utils/Utils.h +++ b/include/comet/Dialect/Utils/Utils.h @@ -211,6 +211,7 @@ namespace mlir void getFormatsPermsOfComputeOp(Value computeOp, std::vector> &opFormats, + std::vector> &opBlocks, std::vector> &opPerms, std::vector> &inputOutputMapping); diff --git a/lib/Dialect/IndexTree/Transforms/Fusion.cpp b/lib/Dialect/IndexTree/Transforms/Fusion.cpp index c2a0421b..f54f13ee 100644 --- a/lib/Dialect/IndexTree/Transforms/Fusion.cpp +++ b/lib/Dialect/IndexTree/Transforms/Fusion.cpp @@ -185,9 +185,10 @@ namespace comet_debug() << " is_comp_worksp_opt: " << is_comp_worksp_opt << " semiring: " << semiring << "\n"; std::vector> opFormats; + std::vector> opBlocks; std::vector> opPerms; std::vector > inputOutputMapping; - getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping); + getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping); /// opFormats comet_debug() << "["; for (auto strings: opFormats) { @@ -537,7 +538,7 @@ mlir::Value IndexTreeKernelFusionPass::createReducedComputeRHS( SmallVector blocks; if (b_i == tensor_id) { /// for the new reduced tensor - blocks.insert(blocks.end(), old_formats_strs[b_i].begin() + rank_base, old_blocks_strs[b_i].end()); + blocks.insert(blocks.end(), old_blocks_strs[b_i].begin() + rank_base, old_blocks_strs[b_i].end()); } else { /// for other remaining old operands diff --git a/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp b/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp index b9ebecc2..665c413a 100644 --- a/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp +++ b/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp @@ -671,7 +671,7 @@ std::vector CompressedWorkspaceOutput(std::vector sparseDimsOutput, /// Convert blocks string array into StrAttr std::vector c3_blocks_str_0 = {"UNK"}; - std::vector c3_blocks_str_1 = opFormats[2]; + std::vector c3_blocks_str_1 = opBlocks[2]; std::vector> c3_blocks_str = {c3_blocks_str_0, c3_blocks_str_1}; std::vector c3_rhs = workspaceTensors; @@ -779,7 +779,7 @@ void CompressedWorkspaceInput(std::vector computeOps, OpBuilder &builder, std::vector> opBlocks; std::vector> opPerms; std::vector> inputOutputMapping; - getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping); + getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping); comet_debug() << " \n"; for (auto n : opFormats) { @@ -903,8 +903,8 @@ void CompressedWorkspaceInput(std::vector computeOps, OpBuilder &builder, std::vector c2_formats_str_1 = {"D"}; std::vector> c2_formats_str = {c2_formats_str_0, c2_formats_str_1}; - std::vector c2_blocks_str_0 = opFormats[sparseDimsInput[0].tensorId]; - std::vector c2_blocks_str_1 = {"D"}; + std::vector c2_blocks_str_0 = opBlocks[sparseDimsInput[0].tensorId]; + std::vector c2_blocks_str_1 = {"UNK"}; std::vector> c2_blocks_str = {c2_blocks_str_0, c2_blocks_str_1}; std::vector c2_rhs = {tensors_rhs[sparseDimsInput[0].tensorId]}; @@ -1081,7 +1081,7 @@ void IndexTreeWorkspaceTransformationsPass::CompressedWorkspaceTransforms(mlir:: std::vector> opBlocks; std::vector> opPerms; std::vector> inputOutputMapping; - getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping); + getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping); #ifdef DEBUG_MODE_WorkspaceTransformsPass comet_debug() << "Print opFormats:\n"; diff --git a/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp b/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp index 4f6ce9b5..f2195715 100644 --- a/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp +++ b/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp @@ -51,7 +51,7 @@ using namespace mlir::indexTree; #define DEBUG_TYPE "tensor-decl-lowering" // *********** For debug purpose *********// -#define COMET_DEBUG_MODE +// #define COMET_DEBUG_MODE #include "comet/Utils/debug.h" #undef COMET_DEBUG_MODE // *********** For debug purpose *********// diff --git a/lib/Dialect/Utils/Utils.cpp b/lib/Dialect/Utils/Utils.cpp index d83fe16c..d37868ea 100644 --- a/lib/Dialect/Utils/Utils.cpp +++ b/lib/Dialect/Utils/Utils.cpp @@ -1476,15 +1476,19 @@ namespace mlir /// Get the perms and formats of the itCompute op void getFormatsPermsOfComputeOp(Value computeOp, std::vector> &opFormats, + std::vector> &opBlocks, std::vector> &opPerms, std::vector> &inputOutputMapping) { indexTree::IndexTreeComputeRHSOp itComputeOp_rhs = dyn_cast(computeOp.getDefiningOp()->getOperand(0).getDefiningOp()); ArrayAttr opFormatsArrayAttr_rhs = itComputeOp_rhs.getAllFormats(); + ArrayAttr opBlocksArrayAttr_rhs = itComputeOp_rhs.getAllBlocks(); ArrayAttr opPermsArrayAttr_rhs = itComputeOp_rhs.getAllPerms(); indexTree::IndexTreeComputeLHSOp itComputeOp_lhs = dyn_cast(computeOp.getDefiningOp()->getOperand(1).getDefiningOp()); ArrayAttr opFormatsArrayAttr_lhs = itComputeOp_lhs.getAllFormats(); + ArrayAttr opBlocksArrayAttr_lhs = itComputeOp_lhs.getAllBlocks(); ArrayAttr opPermsArrayAttr_lhs = itComputeOp_lhs.getAllPerms(); + ///TODO(patrick) We should probably verify the block sizes assert(opFormatsArrayAttr_rhs.size() == opPermsArrayAttr_rhs.size() && "not equal RHS formats size with perms size\n"); assert(opFormatsArrayAttr_lhs.size() == opPermsArrayAttr_lhs.size() && "not equal LHS formats size with perms size\n"); @@ -1493,17 +1497,25 @@ namespace mlir comet_debug() << "Start printing opFormats_rhs\n"; std::vector> opFormats_rhs = convertArrayAttrStrTo2DVector(opFormatsArrayAttr_rhs); comet_debug() << "End printing opFormats_rhs\n"; + comet_debug() << "Start printing opBlocks_rhs\n"; + std::vector> opBlocks_rhs = convertArrayAttrStrTo2DVector(opBlocksArrayAttr_rhs); + comet_debug() << "End printing opBlocks_rhs\n"; std::vector> opPerms_rhs = convertArrayAttrIntTo2DVector(opPermsArrayAttr_rhs); std::vector> inputMapping = createInputOutputMapping(opPermsArrayAttr_rhs, true); comet_debug() << "Start printing opFormats_lhs\n"; std::vector> opFormats_lhs = convertArrayAttrStrTo2DVector(opFormatsArrayAttr_lhs); comet_debug() << "End printing opFormats_lhs\n"; + comet_debug() << "Start printing opBlocks_lhs\n"; + std::vector> opBlocks_lhs = convertArrayAttrStrTo2DVector(opBlocksArrayAttr_lhs); + comet_debug() << "End printing opBlocks_lhs\n"; std::vector> opPerms_lhs = convertArrayAttrIntTo2DVector(opPermsArrayAttr_lhs); std::vector> outputMapping = createInputOutputMapping(opPermsArrayAttr_lhs, false); opFormats = opFormats_rhs; opFormats.insert(opFormats.end(), opFormats_lhs.begin(), opFormats_lhs.end()); + opBlocks = opBlocks_rhs; + opBlocks.insert(opBlocks.end(), opBlocks_lhs.begin(), opBlocks_lhs.end()); opPerms = opPerms_rhs; opPerms.insert(opPerms.end(), opPerms_lhs.begin(), opPerms_lhs.end()); inputOutputMapping = inputMapping; @@ -1616,10 +1628,11 @@ namespace mlir { comet_debug() << " getFormatsInfo:leafs[" << j << "] is computeOp\n"; std::vector> allFormats; + std::vector> allBlocks; std::vector> allPerms; std::vector> inputOutputMapping; OpBuilder builder(leafop); - getFormatsPermsOfComputeOp(leafop, allFormats, allPerms, inputOutputMapping); + getFormatsPermsOfComputeOp(leafop, allFormats, allBlocks, allPerms, inputOutputMapping); comet_debug() << " getFormatsInfo:Allformats allFormats.size(): " << allFormats.size() << "\n"; for (auto m : allFormats) diff --git a/run.sh b/run.sh index 8b388783..58cb2d60 100755 --- a/run.sh +++ b/run.sh @@ -1,9 +1,13 @@ #!/bin/bash export LD_LIBRARY_PATH="/home/patrick/Work/PNNL/COMET/install/lib" -export SPARSE_FILE_NAME0=first.mtx -build/bin/comet-opt --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir +#export SPARSE_FILE_NAME0=first.mtx +#build/bin/comet-opt --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir + +export SPARSE_FILE_NAME0=integration_test/data/test_rank2.mtx +export SPARSE_FILE_NAME1=integration_test/data/test_rank2_transpose.mtx +build/bin/comet-opt --opt-comp-workspace --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir llvm/build/bin/mlir-cpu-runner first.mlir -O3 -e main -entry-point-result=void \ -shared-libs=build/lib/libcomet_runner_utils.so diff --git a/test.mtx b/test.mtx new file mode 100644 index 00000000..34cb95ac --- /dev/null +++ b/test.mtx @@ -0,0 +1,13 @@ +%%MatrixMarket matrix coordinate real general +% +% This is a test sparse matrix in Matrix Market Exchange Format. +% see https://math.nist.gov/MatrixMarket +% +4 6 7 +1 1 5.0 +1 2 1.0 +2 1 7.0 +2 2 3.0 +4 1 8.0 +4 4 4.0 +4 5 9.0 diff --git a/test.ta b/test.ta new file mode 100644 index 00000000..b0bb1a03 --- /dev/null +++ b/test.ta @@ -0,0 +1,20 @@ +def main() { + #IndexLabel Declarations + IndexLabel [a] = [4]; + IndexLabel [b] = [?]; + IndexLabel [c] = [?]; + + #Tensor Declarations + Tensor B([b, c], {CSR}); #sparse tensor declarations should be before dense tensor declarations + Tensor A([a, b], {Dense}); + Tensor C([a, c], {Dense}); + + #Tensor Fill Operation + A[a, b] = 1.0; + B[b, c] = comet_read(0); + C[a, c] = 0.0; + + C[a, c] = A[a, b] * B[b, c]; + print(C); + #print(B); +}