diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6f39c52c..4d5d7af5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
 if (MSVC)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c- /GR-")
 else ()
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti -g")
 endif ()
 
 #-------------------------------------------------------------------------------
diff --git a/first.mlir b/first.mlir
index 341dd434..514c26d7 100644
--- a/first.mlir
+++ b/first.mlir
@@ -1,544 +1,1076 @@
-// -----// IR Dump After {anonymous}::FuncOpLoweringPass () //----- //
-#map = affine_map<(d0, d1) -> (d0, d1)>
-#map1 = affine_map<(d0, d1) -> (d1, d0)>
-module {
-  func.func @main() {
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-    %c0_0 = arith.constant 0 : index
-    %c1_1 = arith.constant 1 : index
-    %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-    %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-    %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-    %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-    "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-    "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-    %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [#map, #map1, #map1], operand_segment_sizes = array<i32: 1, 1, 2, 0>, semiring = "plusxy_times"} : (tensor<?x?xf64>, tensor<?x?xf64>, !ta.range, !ta.range) -> tensor<?x?xf64>
-    "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor<?x?xf64>, tensor<?x?xf64>) -> ()
-    "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-    "ta.print"(%3) : (tensor<?x?xf64>) -> ()
-    "ta.print"(%4) : (tensor<?x?xf64>) -> ()
-    return
+module attributes {llvm.data_layout = ""} {
+  llvm.func @free(!llvm.ptr<i8>)
+  llvm.func @malloc(i64) -> !llvm.ptr<i8>
+  llvm.func @main() {
+    %0 = llvm.mlir.constant(8 : index) : i64
+    %1 = llvm.mlir.constant(32 : index) : i64
+    %2 = llvm.mlir.constant(0 : index) : i64
+    %3 = llvm.mlir.constant(1 : index) : i64
+    %4 = llvm.mlir.constant(13 : index) : i64
+    %5 = llvm.mlir.constant(1 : index) : i64
+    %6 = llvm.mlir.constant(0 : index) : i64
+    %7 = llvm.mlir.constant(-1 : index) : i64
+    %8 = llvm.mlir.constant(2 : index) : i64
+    %9 = llvm.mlir.constant(3 : index) : i64
+    %10 = llvm.mlir.constant(0 : i32) : i32
+    %11 = llvm.mlir.constant(1 : i32) : i32
+    %12 = llvm.mlir.constant(4 : index) : i64
+    %13 = llvm.mlir.constant(5 : index) : i64
+    %14 = llvm.mlir.constant(6 : index) : i64
+    %15 = llvm.mlir.constant(7 : index) : i64
+    %16 = llvm.mlir.constant(8 : index) : i64
+    %17 = llvm.mlir.constant(9 : index) : i64
+    %18 = llvm.mlir.constant(true) : i1
+    %19 = llvm.mlir.constant(false) : i1
+    %20 = llvm.mlir.constant(0.000000e+00 : f64) : f64
+    %21 = llvm.mlir.constant(10 : index) : i64
+    %22 = llvm.mlir.null : !llvm.ptr<i64>
+    %23 = llvm.getelementptr %22[13] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %24 = llvm.ptrtoint %23 : !llvm.ptr<i64> to i64
+    %25 = llvm.call @malloc(%24) : (i64) -> !llvm.ptr<i8>
+    %26 = llvm.bitcast %25 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %27 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %28 = llvm.insertvalue %26, %27[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %29 = llvm.insertvalue %26, %28[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %30 = llvm.insertvalue %2, %29[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %31 = llvm.insertvalue %4, %30[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %32 = llvm.insertvalue %3, %31[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %33 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %32, %33 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %34 = llvm.bitcast %33 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %35 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %36 = llvm.insertvalue %3, %35[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %37 = llvm.insertvalue %34, %36[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @read_input_sizes_2D_f64(%10, %6, %7, %5, %7, %3, %34, %11) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i32) -> ()
+    %38 = llvm.load %26 : !llvm.ptr<i64>
+    %39 = llvm.getelementptr %26[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %40 = llvm.load %39 : !llvm.ptr<i64>
+    %41 = llvm.getelementptr %26[2] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %42 = llvm.load %41 : !llvm.ptr<i64>
+    %43 = llvm.getelementptr %26[3] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %44 = llvm.load %43 : !llvm.ptr<i64>
+    %45 = llvm.getelementptr %26[4] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %46 = llvm.load %45 : !llvm.ptr<i64>
+    %47 = llvm.getelementptr %26[5] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %48 = llvm.load %47 : !llvm.ptr<i64>
+    %49 = llvm.getelementptr %26[6] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %50 = llvm.load %49 : !llvm.ptr<i64>
+    %51 = llvm.getelementptr %26[7] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %52 = llvm.load %51 : !llvm.ptr<i64>
+    %53 = llvm.getelementptr %26[8] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %54 = llvm.load %53 : !llvm.ptr<i64>
+    %55 = llvm.getelementptr %26[9] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %56 = llvm.load %55 : !llvm.ptr<i64>
+    %57 = llvm.getelementptr %26[10] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %58 = llvm.load %57 : !llvm.ptr<i64>
+    %59 = llvm.mlir.null : !llvm.ptr<i64>
+    %60 = llvm.getelementptr %59[%38] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %61 = llvm.ptrtoint %60 : !llvm.ptr<i64> to i64
+    %62 = llvm.call @malloc(%61) : (i64) -> !llvm.ptr<i8>
+    %63 = llvm.bitcast %62 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %64 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %65 = llvm.insertvalue %63, %64[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %66 = llvm.insertvalue %63, %65[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %67 = llvm.insertvalue %2, %66[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %68 = llvm.insertvalue %38, %67[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %69 = llvm.insertvalue %3, %68[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb1(%6 : i64)
+  ^bb1(%70: i64):  // 2 preds: ^bb0, ^bb2
+    %71 = llvm.icmp "slt" %70, %38 : i64
+    llvm.cond_br %71, ^bb2, ^bb3
+  ^bb2:  // pred: ^bb1
+    %72 = llvm.getelementptr %63[%70] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %72 : !llvm.ptr<i64>
+    %73 = llvm.add %70, %5  : i64
+    llvm.br ^bb1(%73 : i64)
+  ^bb3:  // pred: ^bb1
+    %74 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %69, %74 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %75 = llvm.bitcast %74 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %76 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %77 = llvm.insertvalue %3, %76[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %78 = llvm.insertvalue %75, %77[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %79 = llvm.mlir.null : !llvm.ptr<i64>
+    %80 = llvm.getelementptr %79[%40] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %81 = llvm.ptrtoint %80 : !llvm.ptr<i64> to i64
+    %82 = llvm.call @malloc(%81) : (i64) -> !llvm.ptr<i8>
+    %83 = llvm.bitcast %82 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %84 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %85 = llvm.insertvalue %83, %84[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %86 = llvm.insertvalue %83, %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %87 = llvm.insertvalue %2, %86[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %88 = llvm.insertvalue %40, %87[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %89 = llvm.insertvalue %3, %88[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb4(%6 : i64)
+  ^bb4(%90: i64):  // 2 preds: ^bb3, ^bb5
+    %91 = llvm.icmp "slt" %90, %40 : i64
+    llvm.cond_br %91, ^bb5, ^bb6
+  ^bb5:  // pred: ^bb4
+    %92 = llvm.getelementptr %83[%90] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %92 : !llvm.ptr<i64>
+    %93 = llvm.add %90, %5  : i64
+    llvm.br ^bb4(%93 : i64)
+  ^bb6:  // pred: ^bb4
+    %94 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %89, %94 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %95 = llvm.bitcast %94 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %96 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %97 = llvm.insertvalue %3, %96[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %98 = llvm.insertvalue %95, %97[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %99 = llvm.mlir.null : !llvm.ptr<i64>
+    %100 = llvm.getelementptr %99[%42] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %101 = llvm.ptrtoint %100 : !llvm.ptr<i64> to i64
+    %102 = llvm.call @malloc(%101) : (i64) -> !llvm.ptr<i8>
+    %103 = llvm.bitcast %102 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %104 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %105 = llvm.insertvalue %103, %104[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %106 = llvm.insertvalue %103, %105[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %107 = llvm.insertvalue %2, %106[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %108 = llvm.insertvalue %42, %107[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %109 = llvm.insertvalue %3, %108[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb7(%6 : i64)
+  ^bb7(%110: i64):  // 2 preds: ^bb6, ^bb8
+    %111 = llvm.icmp "slt" %110, %42 : i64
+    llvm.cond_br %111, ^bb8, ^bb9
+  ^bb8:  // pred: ^bb7
+    %112 = llvm.getelementptr %103[%110] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %112 : !llvm.ptr<i64>
+    %113 = llvm.add %110, %5  : i64
+    llvm.br ^bb7(%113 : i64)
+  ^bb9:  // pred: ^bb7
+    %114 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %109, %114 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %115 = llvm.bitcast %114 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %116 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %117 = llvm.insertvalue %3, %116[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %118 = llvm.insertvalue %115, %117[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %119 = llvm.mlir.null : !llvm.ptr<i64>
+    %120 = llvm.getelementptr %119[%44] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %121 = llvm.ptrtoint %120 : !llvm.ptr<i64> to i64
+    %122 = llvm.call @malloc(%121) : (i64) -> !llvm.ptr<i8>
+    %123 = llvm.bitcast %122 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %124 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %125 = llvm.insertvalue %123, %124[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %126 = llvm.insertvalue %123, %125[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %127 = llvm.insertvalue %2, %126[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %128 = llvm.insertvalue %44, %127[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %129 = llvm.insertvalue %3, %128[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb10(%6 : i64)
+  ^bb10(%130: i64):  // 2 preds: ^bb9, ^bb11
+    %131 = llvm.icmp "slt" %130, %44 : i64
+    llvm.cond_br %131, ^bb11, ^bb12
+  ^bb11:  // pred: ^bb10
+    %132 = llvm.getelementptr %123[%130] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %132 : !llvm.ptr<i64>
+    %133 = llvm.add %130, %5  : i64
+    llvm.br ^bb10(%133 : i64)
+  ^bb12:  // pred: ^bb10
+    %134 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %129, %134 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %135 = llvm.bitcast %134 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %136 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %137 = llvm.insertvalue %3, %136[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %138 = llvm.insertvalue %135, %137[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %139 = llvm.mlir.null : !llvm.ptr<i64>
+    %140 = llvm.getelementptr %139[%46] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %141 = llvm.ptrtoint %140 : !llvm.ptr<i64> to i64
+    %142 = llvm.call @malloc(%141) : (i64) -> !llvm.ptr<i8>
+    %143 = llvm.bitcast %142 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %144 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %145 = llvm.insertvalue %143, %144[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %146 = llvm.insertvalue %143, %145[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %147 = llvm.insertvalue %2, %146[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %148 = llvm.insertvalue %46, %147[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %149 = llvm.insertvalue %3, %148[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb13(%6 : i64)
+  ^bb13(%150: i64):  // 2 preds: ^bb12, ^bb14
+    %151 = llvm.icmp "slt" %150, %46 : i64
+    llvm.cond_br %151, ^bb14, ^bb15
+  ^bb14:  // pred: ^bb13
+    %152 = llvm.getelementptr %143[%150] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %152 : !llvm.ptr<i64>
+    %153 = llvm.add %150, %5  : i64
+    llvm.br ^bb13(%153 : i64)
+  ^bb15:  // pred: ^bb13
+    %154 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %149, %154 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %155 = llvm.bitcast %154 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %156 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %157 = llvm.insertvalue %3, %156[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %158 = llvm.insertvalue %155, %157[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %159 = llvm.mlir.null : !llvm.ptr<i64>
+    %160 = llvm.getelementptr %159[%48] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %161 = llvm.ptrtoint %160 : !llvm.ptr<i64> to i64
+    %162 = llvm.call @malloc(%161) : (i64) -> !llvm.ptr<i8>
+    %163 = llvm.bitcast %162 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %164 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %165 = llvm.insertvalue %163, %164[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %166 = llvm.insertvalue %163, %165[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %167 = llvm.insertvalue %2, %166[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %168 = llvm.insertvalue %48, %167[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %169 = llvm.insertvalue %3, %168[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb16(%6 : i64)
+  ^bb16(%170: i64):  // 2 preds: ^bb15, ^bb17
+    %171 = llvm.icmp "slt" %170, %48 : i64
+    llvm.cond_br %171, ^bb17, ^bb18
+  ^bb17:  // pred: ^bb16
+    %172 = llvm.getelementptr %163[%170] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %172 : !llvm.ptr<i64>
+    %173 = llvm.add %170, %5  : i64
+    llvm.br ^bb16(%173 : i64)
+  ^bb18:  // pred: ^bb16
+    %174 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %169, %174 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %175 = llvm.bitcast %174 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %176 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %177 = llvm.insertvalue %3, %176[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %178 = llvm.insertvalue %175, %177[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %179 = llvm.mlir.null : !llvm.ptr<i64>
+    %180 = llvm.getelementptr %179[%50] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %181 = llvm.ptrtoint %180 : !llvm.ptr<i64> to i64
+    %182 = llvm.call @malloc(%181) : (i64) -> !llvm.ptr<i8>
+    %183 = llvm.bitcast %182 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %184 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %185 = llvm.insertvalue %183, %184[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %186 = llvm.insertvalue %183, %185[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %187 = llvm.insertvalue %2, %186[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %188 = llvm.insertvalue %50, %187[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %189 = llvm.insertvalue %3, %188[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb19(%6 : i64)
+  ^bb19(%190: i64):  // 2 preds: ^bb18, ^bb20
+    %191 = llvm.icmp "slt" %190, %50 : i64
+    llvm.cond_br %191, ^bb20, ^bb21
+  ^bb20:  // pred: ^bb19
+    %192 = llvm.getelementptr %183[%190] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %192 : !llvm.ptr<i64>
+    %193 = llvm.add %190, %5  : i64
+    llvm.br ^bb19(%193 : i64)
+  ^bb21:  // pred: ^bb19
+    %194 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %189, %194 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %195 = llvm.bitcast %194 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %196 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %197 = llvm.insertvalue %3, %196[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %198 = llvm.insertvalue %195, %197[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %199 = llvm.mlir.null : !llvm.ptr<i64>
+    %200 = llvm.getelementptr %199[%52] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %201 = llvm.ptrtoint %200 : !llvm.ptr<i64> to i64
+    %202 = llvm.call @malloc(%201) : (i64) -> !llvm.ptr<i8>
+    %203 = llvm.bitcast %202 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %204 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %205 = llvm.insertvalue %203, %204[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %206 = llvm.insertvalue %203, %205[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %207 = llvm.insertvalue %2, %206[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %208 = llvm.insertvalue %52, %207[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %209 = llvm.insertvalue %3, %208[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb22(%6 : i64)
+  ^bb22(%210: i64):  // 2 preds: ^bb21, ^bb23
+    %211 = llvm.icmp "slt" %210, %52 : i64
+    llvm.cond_br %211, ^bb23, ^bb24
+  ^bb23:  // pred: ^bb22
+    %212 = llvm.getelementptr %203[%210] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %212 : !llvm.ptr<i64>
+    %213 = llvm.add %210, %5  : i64
+    llvm.br ^bb22(%213 : i64)
+  ^bb24:  // pred: ^bb22
+    %214 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %209, %214 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %215 = llvm.bitcast %214 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %216 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %217 = llvm.insertvalue %3, %216[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %218 = llvm.insertvalue %215, %217[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %219 = llvm.mlir.null : !llvm.ptr<f64>
+    %220 = llvm.getelementptr %219[%54] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %221 = llvm.ptrtoint %220 : !llvm.ptr<f64> to i64
+    %222 = llvm.call @malloc(%221) : (i64) -> !llvm.ptr<i8>
+    %223 = llvm.bitcast %222 : !llvm.ptr<i8> to !llvm.ptr<f64>
+    %224 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>
+    %225 = llvm.insertvalue %223, %224[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %226 = llvm.insertvalue %223, %225[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %227 = llvm.insertvalue %2, %226[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %228 = llvm.insertvalue %54, %227[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %229 = llvm.insertvalue %3, %228[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb25(%6 : i64)
+  ^bb25(%230: i64):  // 2 preds: ^bb24, ^bb26
+    %231 = llvm.icmp "slt" %230, %54 : i64
+    llvm.cond_br %231, ^bb26, ^bb27
+  ^bb26:  // pred: ^bb25
+    %232 = llvm.getelementptr %223[%230] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %232 : !llvm.ptr<f64>
+    %233 = llvm.add %230, %5  : i64
+    llvm.br ^bb25(%233 : i64)
+  ^bb27:  // pred: ^bb25
+    %234 = llvm.alloca %3 x !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %229, %234 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %235 = llvm.bitcast %234 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %236 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %237 = llvm.insertvalue %3, %236[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %238 = llvm.insertvalue %235, %237[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @read_input_2D_f64(%10, %6, %7, %5, %7, %3, %75, %3, %95, %3, %115, %3, %135, %3, %155, %3, %175, %3, %195, %3, %215, %3, %235, %11) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i32) -> ()
+    %239 = llvm.mlir.null : !llvm.ptr<i64>
+    %240 = llvm.getelementptr %239[13] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %241 = llvm.ptrtoint %240 : !llvm.ptr<i64> to i64
+    %242 = llvm.call @malloc(%241) : (i64) -> !llvm.ptr<i8>
+    %243 = llvm.bitcast %242 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %244 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %245 = llvm.insertvalue %243, %244[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %246 = llvm.insertvalue %243, %245[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %247 = llvm.insertvalue %2, %246[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %248 = llvm.insertvalue %4, %247[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %249 = llvm.insertvalue %3, %248[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %250 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %249, %250 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %251 = llvm.bitcast %250 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %252 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %253 = llvm.insertvalue %3, %252[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %254 = llvm.insertvalue %251, %253[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @read_input_sizes_2D_f64(%11, %6, %7, %5, %7, %3, %251, %11) {filename = "SPARSE_FILE_NAME1"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i32) -> ()
+    %255 = llvm.load %243 : !llvm.ptr<i64>
+    %256 = llvm.getelementptr %243[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %257 = llvm.load %256 : !llvm.ptr<i64>
+    %258 = llvm.getelementptr %243[2] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %259 = llvm.load %258 : !llvm.ptr<i64>
+    %260 = llvm.getelementptr %243[3] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %261 = llvm.load %260 : !llvm.ptr<i64>
+    %262 = llvm.getelementptr %243[4] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %263 = llvm.load %262 : !llvm.ptr<i64>
+    %264 = llvm.getelementptr %243[5] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %265 = llvm.load %264 : !llvm.ptr<i64>
+    %266 = llvm.getelementptr %243[6] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %267 = llvm.load %266 : !llvm.ptr<i64>
+    %268 = llvm.getelementptr %243[7] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %269 = llvm.load %268 : !llvm.ptr<i64>
+    %270 = llvm.getelementptr %243[8] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %271 = llvm.load %270 : !llvm.ptr<i64>
+    %272 = llvm.mlir.null : !llvm.ptr<i64>
+    %273 = llvm.getelementptr %272[%255] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %274 = llvm.ptrtoint %273 : !llvm.ptr<i64> to i64
+    %275 = llvm.call @malloc(%274) : (i64) -> !llvm.ptr<i8>
+    %276 = llvm.bitcast %275 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %277 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %278 = llvm.insertvalue %276, %277[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %279 = llvm.insertvalue %276, %278[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %280 = llvm.insertvalue %2, %279[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %281 = llvm.insertvalue %255, %280[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %282 = llvm.insertvalue %3, %281[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb28(%6 : i64)
+  ^bb28(%283: i64):  // 2 preds: ^bb27, ^bb29
+    %284 = llvm.icmp "slt" %283, %255 : i64
+    llvm.cond_br %284, ^bb29, ^bb30
+  ^bb29:  // pred: ^bb28
+    %285 = llvm.getelementptr %276[%283] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %285 : !llvm.ptr<i64>
+    %286 = llvm.add %283, %5  : i64
+    llvm.br ^bb28(%286 : i64)
+  ^bb30:  // pred: ^bb28
+    %287 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %282, %287 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %288 = llvm.bitcast %287 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %289 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %290 = llvm.insertvalue %3, %289[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %291 = llvm.insertvalue %288, %290[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %292 = llvm.mlir.null : !llvm.ptr<i64>
+    %293 = llvm.getelementptr %292[%257] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %294 = llvm.ptrtoint %293 : !llvm.ptr<i64> to i64
+    %295 = llvm.call @malloc(%294) : (i64) -> !llvm.ptr<i8>
+    %296 = llvm.bitcast %295 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %297 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %298 = llvm.insertvalue %296, %297[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %299 = llvm.insertvalue %296, %298[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %300 = llvm.insertvalue %2, %299[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %301 = llvm.insertvalue %257, %300[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %302 = llvm.insertvalue %3, %301[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb31(%6 : i64)
+  ^bb31(%303: i64):  // 2 preds: ^bb30, ^bb32
+    %304 = llvm.icmp "slt" %303, %257 : i64
+    llvm.cond_br %304, ^bb32, ^bb33
+  ^bb32:  // pred: ^bb31
+    %305 = llvm.getelementptr %296[%303] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %305 : !llvm.ptr<i64>
+    %306 = llvm.add %303, %5  : i64
+    llvm.br ^bb31(%306 : i64)
+  ^bb33:  // pred: ^bb31
+    %307 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %302, %307 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %308 = llvm.bitcast %307 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %309 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %310 = llvm.insertvalue %3, %309[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %311 = llvm.insertvalue %308, %310[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %312 = llvm.mlir.null : !llvm.ptr<i64>
+    %313 = llvm.getelementptr %312[%259] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %314 = llvm.ptrtoint %313 : !llvm.ptr<i64> to i64
+    %315 = llvm.call @malloc(%314) : (i64) -> !llvm.ptr<i8>
+    %316 = llvm.bitcast %315 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %317 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %318 = llvm.insertvalue %316, %317[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %319 = llvm.insertvalue %316, %318[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %320 = llvm.insertvalue %2, %319[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %321 = llvm.insertvalue %259, %320[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %322 = llvm.insertvalue %3, %321[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb34(%6 : i64)
+  ^bb34(%323: i64):  // 2 preds: ^bb33, ^bb35
+    %324 = llvm.icmp "slt" %323, %259 : i64
+    llvm.cond_br %324, ^bb35, ^bb36
+  ^bb35:  // pred: ^bb34
+    %325 = llvm.getelementptr %316[%323] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %325 : !llvm.ptr<i64>
+    %326 = llvm.add %323, %5  : i64
+    llvm.br ^bb34(%326 : i64)
+  ^bb36:  // pred: ^bb34
+    %327 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %322, %327 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %328 = llvm.bitcast %327 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %329 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %330 = llvm.insertvalue %3, %329[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %331 = llvm.insertvalue %328, %330[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %332 = llvm.mlir.null : !llvm.ptr<i64>
+    %333 = llvm.getelementptr %332[%261] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %334 = llvm.ptrtoint %333 : !llvm.ptr<i64> to i64
+    %335 = llvm.call @malloc(%334) : (i64) -> !llvm.ptr<i8>
+    %336 = llvm.bitcast %335 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %337 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %338 = llvm.insertvalue %336, %337[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %339 = llvm.insertvalue %336, %338[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %340 = llvm.insertvalue %2, %339[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %341 = llvm.insertvalue %261, %340[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %342 = llvm.insertvalue %3, %341[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb37(%6 : i64)
+  ^bb37(%343: i64):  // 2 preds: ^bb36, ^bb38
+    %344 = llvm.icmp "slt" %343, %261 : i64
+    llvm.cond_br %344, ^bb38, ^bb39
+  ^bb38:  // pred: ^bb37
+    %345 = llvm.getelementptr %336[%343] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %345 : !llvm.ptr<i64>
+    %346 = llvm.add %343, %5  : i64
+    llvm.br ^bb37(%346 : i64)
+  ^bb39:  // pred: ^bb37
+    %347 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %342, %347 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %348 = llvm.bitcast %347 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %349 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %350 = llvm.insertvalue %3, %349[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %351 = llvm.insertvalue %348, %350[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %352 = llvm.mlir.null : !llvm.ptr<i64>
+    %353 = llvm.getelementptr %352[%263] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %354 = llvm.ptrtoint %353 : !llvm.ptr<i64> to i64
+    %355 = llvm.call @malloc(%354) : (i64) -> !llvm.ptr<i8>
+    %356 = llvm.bitcast %355 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %357 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %358 = llvm.insertvalue %356, %357[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %359 = llvm.insertvalue %356, %358[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %360 = llvm.insertvalue %2, %359[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %361 = llvm.insertvalue %263, %360[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %362 = llvm.insertvalue %3, %361[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb40(%6 : i64)
+  ^bb40(%363: i64):  // 2 preds: ^bb39, ^bb41
+    %364 = llvm.icmp "slt" %363, %263 : i64
+    llvm.cond_br %364, ^bb41, ^bb42
+  ^bb41:  // pred: ^bb40
+    %365 = llvm.getelementptr %356[%363] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %365 : !llvm.ptr<i64>
+    %366 = llvm.add %363, %5  : i64
+    llvm.br ^bb40(%366 : i64)
+  ^bb42:  // pred: ^bb40
+    %367 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %362, %367 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %368 = llvm.bitcast %367 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %369 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %370 = llvm.insertvalue %3, %369[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %371 = llvm.insertvalue %368, %370[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %372 = llvm.mlir.null : !llvm.ptr<i64>
+    %373 = llvm.getelementptr %372[%265] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %374 = llvm.ptrtoint %373 : !llvm.ptr<i64> to i64
+    %375 = llvm.call @malloc(%374) : (i64) -> !llvm.ptr<i8>
+    %376 = llvm.bitcast %375 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %377 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %378 = llvm.insertvalue %376, %377[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %379 = llvm.insertvalue %376, %378[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %380 = llvm.insertvalue %2, %379[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %381 = llvm.insertvalue %265, %380[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %382 = llvm.insertvalue %3, %381[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb43(%6 : i64)
+  ^bb43(%383: i64):  // 2 preds: ^bb42, ^bb44
+    %384 = llvm.icmp "slt" %383, %265 : i64
+    llvm.cond_br %384, ^bb44, ^bb45
+  ^bb44:  // pred: ^bb43
+    %385 = llvm.getelementptr %376[%383] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %385 : !llvm.ptr<i64>
+    %386 = llvm.add %383, %5  : i64
+    llvm.br ^bb43(%386 : i64)
+  ^bb45:  // pred: ^bb43
+    %387 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %382, %387 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %388 = llvm.bitcast %387 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %389 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %390 = llvm.insertvalue %3, %389[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %391 = llvm.insertvalue %388, %390[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %392 = llvm.mlir.null : !llvm.ptr<i64>
+    %393 = llvm.getelementptr %392[%267] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %394 = llvm.ptrtoint %393 : !llvm.ptr<i64> to i64
+    %395 = llvm.call @malloc(%394) : (i64) -> !llvm.ptr<i8>
+    %396 = llvm.bitcast %395 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %397 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %398 = llvm.insertvalue %396, %397[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %399 = llvm.insertvalue %396, %398[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %400 = llvm.insertvalue %2, %399[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %401 = llvm.insertvalue %267, %400[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %402 = llvm.insertvalue %3, %401[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb46(%6 : i64)
+  ^bb46(%403: i64):  // 2 preds: ^bb45, ^bb47
+    %404 = llvm.icmp "slt" %403, %267 : i64
+    llvm.cond_br %404, ^bb47, ^bb48
+  ^bb47:  // pred: ^bb46
+    %405 = llvm.getelementptr %396[%403] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %405 : !llvm.ptr<i64>
+    %406 = llvm.add %403, %5  : i64
+    llvm.br ^bb46(%406 : i64)
+  ^bb48:  // pred: ^bb46
+    %407 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %402, %407 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %408 = llvm.bitcast %407 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %409 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %410 = llvm.insertvalue %3, %409[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %411 = llvm.insertvalue %408, %410[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %412 = llvm.mlir.null : !llvm.ptr<i64>
+    %413 = llvm.getelementptr %412[%269] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %414 = llvm.ptrtoint %413 : !llvm.ptr<i64> to i64
+    %415 = llvm.call @malloc(%414) : (i64) -> !llvm.ptr<i8>
+    %416 = llvm.bitcast %415 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %417 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %418 = llvm.insertvalue %416, %417[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %419 = llvm.insertvalue %416, %418[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %420 = llvm.insertvalue %2, %419[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %421 = llvm.insertvalue %269, %420[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %422 = llvm.insertvalue %3, %421[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb49(%6 : i64)
+  ^bb49(%423: i64):  // 2 preds: ^bb48, ^bb50
+    %424 = llvm.icmp "slt" %423, %269 : i64
+    llvm.cond_br %424, ^bb50, ^bb51
+  ^bb50:  // pred: ^bb49
+    %425 = llvm.getelementptr %416[%423] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %425 : !llvm.ptr<i64>
+    %426 = llvm.add %423, %5  : i64
+    llvm.br ^bb49(%426 : i64)
+  ^bb51:  // pred: ^bb49
+    %427 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %422, %427 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %428 = llvm.bitcast %427 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %429 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %430 = llvm.insertvalue %3, %429[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %431 = llvm.insertvalue %428, %430[1] : !llvm.struct<(i64, ptr<i8>)> 
+    %432 = llvm.mlir.null : !llvm.ptr<f64>
+    %433 = llvm.getelementptr %432[%271] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %434 = llvm.ptrtoint %433 : !llvm.ptr<f64> to i64
+    %435 = llvm.call @malloc(%434) : (i64) -> !llvm.ptr<i8>
+    %436 = llvm.bitcast %435 : !llvm.ptr<i8> to !llvm.ptr<f64>
+    %437 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>
+    %438 = llvm.insertvalue %436, %437[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %439 = llvm.insertvalue %436, %438[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %440 = llvm.insertvalue %2, %439[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %441 = llvm.insertvalue %271, %440[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %442 = llvm.insertvalue %3, %441[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb52(%6 : i64)
+  ^bb52(%443: i64):  // 2 preds: ^bb51, ^bb53
+    %444 = llvm.icmp "slt" %443, %271 : i64
+    llvm.cond_br %444, ^bb53, ^bb54
+  ^bb53:  // pred: ^bb52
+    %445 = llvm.getelementptr %436[%443] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %445 : !llvm.ptr<f64>
+    %446 = llvm.add %443, %5  : i64
+    llvm.br ^bb52(%446 : i64)
+  ^bb54:  // pred: ^bb52
+    %447 = llvm.alloca %3 x !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %442, %447 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %448 = llvm.bitcast %447 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %449 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %450 = llvm.insertvalue %3, %449[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %451 = llvm.insertvalue %448, %450[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @read_input_2D_f64(%11, %6, %7, %5, %7, %3, %288, %3, %308, %3, %328, %3, %348, %3, %368, %3, %388, %3, %408, %3, %428, %3, %448, %11) {filename = "SPARSE_FILE_NAME1"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i32) -> ()
+    %452 = llvm.mlir.null : !llvm.ptr<f64>
+    %453 = llvm.getelementptr %452[%58] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %454 = llvm.ptrtoint %453 : !llvm.ptr<f64> to i64
+    %455 = llvm.add %454, %1  : i64
+    %456 = llvm.call @malloc(%455) : (i64) -> !llvm.ptr<i8>
+    %457 = llvm.bitcast %456 : !llvm.ptr<i8> to !llvm.ptr<f64>
+    %458 = llvm.ptrtoint %457 : !llvm.ptr<f64> to i64
+    %459 = llvm.sub %1, %3  : i64
+    %460 = llvm.add %458, %459  : i64
+    %461 = llvm.urem %460, %1  : i64
+    %462 = llvm.sub %460, %461  : i64
+    %463 = llvm.inttoptr %462 : i64 to !llvm.ptr<f64>
+    llvm.br ^bb55(%6 : i64)
+  ^bb55(%464: i64):  // 2 preds: ^bb54, ^bb56
+    %465 = llvm.icmp "slt" %464, %58 : i64
+    llvm.cond_br %465, ^bb56, ^bb57
+  ^bb56:  // pred: ^bb55
+    %466 = llvm.getelementptr %463[%464] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %466 : !llvm.ptr<f64>
+    %467 = llvm.add %464, %5  : i64
+    llvm.br ^bb55(%467 : i64)
+  ^bb57:  // pred: ^bb55
+    %468 = llvm.mlir.null : !llvm.ptr<i64>
+    %469 = llvm.getelementptr %468[%58] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %470 = llvm.ptrtoint %469 : !llvm.ptr<i64> to i64
+    %471 = llvm.add %470, %1  : i64
+    %472 = llvm.call @malloc(%471) : (i64) -> !llvm.ptr<i8>
+    %473 = llvm.bitcast %472 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %474 = llvm.ptrtoint %473 : !llvm.ptr<i64> to i64
+    %475 = llvm.sub %1, %3  : i64
+    %476 = llvm.add %474, %475  : i64
+    %477 = llvm.urem %476, %1  : i64
+    %478 = llvm.sub %476, %477  : i64
+    %479 = llvm.inttoptr %478 : i64 to !llvm.ptr<i64>
+    llvm.br ^bb58(%6 : i64)
+  ^bb58(%480: i64):  // 2 preds: ^bb57, ^bb59
+    %481 = llvm.icmp "slt" %480, %58 : i64
+    llvm.cond_br %481, ^bb59, ^bb60
+  ^bb59:  // pred: ^bb58
+    %482 = llvm.getelementptr %479[%480] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %482 : !llvm.ptr<i64>
+    %483 = llvm.add %480, %5  : i64
+    llvm.br ^bb58(%483 : i64)
+  ^bb60:  // pred: ^bb58
+    %484 = llvm.mlir.null : !llvm.ptr<i64>
+    %485 = llvm.getelementptr %484[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %486 = llvm.ptrtoint %485 : !llvm.ptr<i64> to i64
+    %487 = llvm.call @malloc(%486) : (i64) -> !llvm.ptr<i8>
+    %488 = llvm.bitcast %487 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %489 = llvm.mlir.null : !llvm.ptr<f64>
+    %490 = llvm.getelementptr %489[%58] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %491 = llvm.ptrtoint %490 : !llvm.ptr<f64> to i64
+    %492 = llvm.add %491, %1  : i64
+    %493 = llvm.call @malloc(%492) : (i64) -> !llvm.ptr<i8>
+    %494 = llvm.bitcast %493 : !llvm.ptr<i8> to !llvm.ptr<f64>
+    %495 = llvm.ptrtoint %494 : !llvm.ptr<f64> to i64
+    %496 = llvm.sub %1, %3  : i64
+    %497 = llvm.add %495, %496  : i64
+    %498 = llvm.urem %497, %1  : i64
+    %499 = llvm.sub %497, %498  : i64
+    %500 = llvm.inttoptr %499 : i64 to !llvm.ptr<f64>
+    llvm.br ^bb61(%6 : i64)
+  ^bb61(%501: i64):  // 2 preds: ^bb60, ^bb62
+    %502 = llvm.icmp "slt" %501, %58 : i64
+    llvm.cond_br %502, ^bb62, ^bb63
+  ^bb62:  // pred: ^bb61
+    %503 = llvm.getelementptr %500[%501] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %503 : !llvm.ptr<f64>
+    %504 = llvm.add %501, %5  : i64
+    llvm.br ^bb61(%504 : i64)
+  ^bb63:  // pred: ^bb61
+    %505 = llvm.add %56, %5  : i64
+    %506 = llvm.mlir.null : !llvm.ptr<i64>
+    %507 = llvm.getelementptr %506[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %508 = llvm.ptrtoint %507 : !llvm.ptr<i64> to i64
+    %509 = llvm.call @malloc(%508) : (i64) -> !llvm.ptr<i8>
+    %510 = llvm.bitcast %509 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %511 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %512 = llvm.insertvalue %510, %511[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %513 = llvm.insertvalue %510, %512[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %514 = llvm.insertvalue %2, %513[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %515 = llvm.insertvalue %3, %514[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %516 = llvm.insertvalue %3, %515[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.store %6, %510 : !llvm.ptr<i64>
+    %517 = llvm.mlir.null : !llvm.ptr<i64>
+    %518 = llvm.getelementptr %517[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %519 = llvm.ptrtoint %518 : !llvm.ptr<i64> to i64
+    %520 = llvm.call @malloc(%519) : (i64) -> !llvm.ptr<i8>
+    %521 = llvm.bitcast %520 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %522 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %523 = llvm.insertvalue %521, %522[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %524 = llvm.insertvalue %521, %523[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %525 = llvm.insertvalue %2, %524[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %526 = llvm.insertvalue %3, %525[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %527 = llvm.insertvalue %3, %526[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.store %6, %521 : !llvm.ptr<i64>
+    %528 = llvm.mlir.null : !llvm.ptr<i64>
+    %529 = llvm.getelementptr %528[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %530 = llvm.ptrtoint %529 : !llvm.ptr<i64> to i64
+    %531 = llvm.call @malloc(%530) : (i64) -> !llvm.ptr<i8>
+    %532 = llvm.bitcast %531 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %533 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %534 = llvm.insertvalue %532, %533[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %535 = llvm.insertvalue %532, %534[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %536 = llvm.insertvalue %2, %535[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %537 = llvm.insertvalue %2, %536[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %538 = llvm.insertvalue %3, %537[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %539 = llvm.mlir.null : !llvm.ptr<i64>
+    %540 = llvm.getelementptr %539[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %541 = llvm.ptrtoint %540 : !llvm.ptr<i64> to i64
+    %542 = llvm.call @malloc(%541) : (i64) -> !llvm.ptr<i8>
+    %543 = llvm.bitcast %542 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %544 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %545 = llvm.insertvalue %543, %544[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %546 = llvm.insertvalue %543, %545[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %547 = llvm.insertvalue %2, %546[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %548 = llvm.insertvalue %2, %547[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %549 = llvm.insertvalue %3, %548[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %550 = llvm.mlir.null : !llvm.ptr<i64>
+    %551 = llvm.getelementptr %550[%505] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %552 = llvm.ptrtoint %551 : !llvm.ptr<i64> to i64
+    %553 = llvm.call @malloc(%552) : (i64) -> !llvm.ptr<i8>
+    %554 = llvm.bitcast %553 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %555 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %556 = llvm.insertvalue %554, %555[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %557 = llvm.insertvalue %554, %556[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %558 = llvm.insertvalue %2, %557[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %559 = llvm.insertvalue %505, %558[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %560 = llvm.insertvalue %3, %559[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.br ^bb64(%6 : i64)
+  ^bb64(%561: i64):  // 2 preds: ^bb63, ^bb65
+    %562 = llvm.icmp "slt" %561, %505 : i64
+    llvm.cond_br %562, ^bb65, ^bb66
+  ^bb65:  // pred: ^bb64
+    %563 = llvm.getelementptr %554[%561] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %563 : !llvm.ptr<i64>
+    %564 = llvm.add %561, %5  : i64
+    llvm.br ^bb64(%564 : i64)
+  ^bb66:  // pred: ^bb64
+    %565 = llvm.mlir.null : !llvm.ptr<i64>
+    %566 = llvm.getelementptr %565[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %567 = llvm.ptrtoint %566 : !llvm.ptr<i64> to i64
+    %568 = llvm.call @malloc(%567) : (i64) -> !llvm.ptr<i8>
+    %569 = llvm.bitcast %568 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %570 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %571 = llvm.insertvalue %569, %570[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %572 = llvm.insertvalue %569, %571[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %573 = llvm.insertvalue %2, %572[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %574 = llvm.insertvalue %2, %573[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %575 = llvm.insertvalue %3, %574[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %576 = llvm.mlir.null : !llvm.ptr<i64>
+    %577 = llvm.getelementptr %576[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %578 = llvm.ptrtoint %577 : !llvm.ptr<i64> to i64
+    %579 = llvm.call @malloc(%578) : (i64) -> !llvm.ptr<i8>
+    %580 = llvm.bitcast %579 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %581 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %582 = llvm.insertvalue %580, %581[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %583 = llvm.insertvalue %580, %582[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %584 = llvm.insertvalue %2, %583[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %585 = llvm.insertvalue %2, %584[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %586 = llvm.insertvalue %3, %585[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    llvm.store %56, %510 : !llvm.ptr<i64>
+    %587 = llvm.load %63 : !llvm.ptr<i64>
+    %588 = llvm.mlir.null : !llvm.ptr<i64>
+    %589 = llvm.getelementptr %588[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %590 = llvm.ptrtoint %589 : !llvm.ptr<i64> to i64
+    %591 = llvm.call @malloc(%590) : (i64) -> !llvm.ptr<i8>
+    %592 = llvm.bitcast %591 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    llvm.store %6, %592 : !llvm.ptr<i64>
+    %593 = llvm.mlir.null : !llvm.ptr<i1>
+    %594 = llvm.getelementptr %593[%58] : (!llvm.ptr<i1>, i64) -> !llvm.ptr<i1>
+    %595 = llvm.ptrtoint %594 : !llvm.ptr<i1> to i64
+    %596 = llvm.add %595, %0  : i64
+    %597 = llvm.call @malloc(%596) : (i64) -> !llvm.ptr<i8>
+    %598 = llvm.bitcast %597 : !llvm.ptr<i8> to !llvm.ptr<i1>
+    %599 = llvm.ptrtoint %598 : !llvm.ptr<i1> to i64
+    %600 = llvm.sub %0, %3  : i64
+    %601 = llvm.add %599, %600  : i64
+    %602 = llvm.urem %601, %0  : i64
+    %603 = llvm.sub %601, %602  : i64
+    %604 = llvm.inttoptr %603 : i64 to !llvm.ptr<i1>
+    llvm.br ^bb67(%6 : i64)
+  ^bb67(%605: i64):  // 2 preds: ^bb66, ^bb68
+    %606 = llvm.icmp "slt" %605, %58 : i64
+    llvm.cond_br %606, ^bb68, ^bb69(%6 : i64)
+  ^bb68:  // pred: ^bb67
+    %607 = llvm.getelementptr %604[%605] : (!llvm.ptr<i1>, i64) -> !llvm.ptr<i1>
+    llvm.store %19, %607 : !llvm.ptr<i1>
+    %608 = llvm.add %605, %5  : i64
+    llvm.br ^bb67(%608 : i64)
+  ^bb69(%609: i64):  // 2 preds: ^bb67, ^bb81
+    %610 = llvm.icmp "slt" %609, %587 : i64
+    llvm.cond_br %610, ^bb70, ^bb82
+  ^bb70:  // pred: ^bb69
+    %611 = llvm.load %592 : !llvm.ptr<i64>
+    %612 = llvm.add %611, %8  : i64
+    llvm.store %612, %592 : !llvm.ptr<i64>
+    llvm.store %6, %488 : !llvm.ptr<i64>
+    llvm.br ^bb71(%6 : i64)
+  ^bb71(%613: i64):  // 2 preds: ^bb70, ^bb72
+    %614 = llvm.icmp "slt" %613, %58 : i64
+    llvm.cond_br %614, ^bb72, ^bb73
+  ^bb72:  // pred: ^bb71
+    %615 = llvm.getelementptr %500[%613] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %615 : !llvm.ptr<f64>
+    %616 = llvm.add %613, %5  : i64
+    llvm.br ^bb71(%616 : i64)
+  ^bb73:  // pred: ^bb71
+    %617 = llvm.add %609, %5  : i64
+    %618 = llvm.getelementptr %143[%609] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %619 = llvm.load %618 : !llvm.ptr<i64>
+    %620 = llvm.getelementptr %143[%617] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %621 = llvm.load %620 : !llvm.ptr<i64>
+    llvm.br ^bb74(%619 : i64)
+  ^bb74(%622: i64):  // 2 preds: ^bb73, ^bb75
+    %623 = llvm.icmp "slt" %622, %621 : i64
+    llvm.cond_br %623, ^bb75, ^bb76
+  ^bb75:  // pred: ^bb74
+    %624 = llvm.getelementptr %163[%622] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %625 = llvm.load %624 : !llvm.ptr<i64>
+    %626 = llvm.getelementptr %223[%622] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %627 = llvm.load %626 : !llvm.ptr<f64>
+    %628 = llvm.getelementptr %500[%625] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %627, %628 : !llvm.ptr<f64>
+    %629 = llvm.add %622, %5  : i64
+    llvm.br ^bb74(%629 : i64)
+  ^bb76:  // pred: ^bb74
+    %630 = llvm.getelementptr %356[%609] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %631 = llvm.load %630 : !llvm.ptr<i64>
+    %632 = llvm.getelementptr %356[%617] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %633 = llvm.load %632 : !llvm.ptr<i64>
+    llvm.br ^bb77(%631 : i64)
+  ^bb77(%634: i64):  // 2 preds: ^bb76, ^bb80
+    %635 = llvm.icmp "slt" %634, %633 : i64
+    llvm.cond_br %635, ^bb78, ^bb81
+  ^bb78:  // pred: ^bb77
+    %636 = llvm.getelementptr %376[%634] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %637 = llvm.load %636 : !llvm.ptr<i64>
+    %638 = llvm.getelementptr %479[%637] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %639 = llvm.load %638 : !llvm.ptr<i64>
+    %640 = llvm.icmp "ne" %639, %612 : i64
+    llvm.cond_br %640, ^bb79, ^bb80
+  ^bb79:  // pred: ^bb78
+    %641 = llvm.getelementptr %479[%637] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %612, %641 : !llvm.ptr<i64>
+    %642 = llvm.load %488 : !llvm.ptr<i64>
+    %643 = llvm.add %642, %5  : i64
+    llvm.store %643, %488 : !llvm.ptr<i64>
+    llvm.br ^bb80
+  ^bb80:  // 2 preds: ^bb78, ^bb79
+    %644 = llvm.add %634, %5  : i64
+    llvm.br ^bb77(%644 : i64)
+  ^bb81:  // pred: ^bb77
+    %645 = llvm.load %488 : !llvm.ptr<i64>
+    %646 = llvm.getelementptr %554[%609] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %645, %646 : !llvm.ptr<i64>
+    llvm.br ^bb69(%617 : i64)
+  ^bb82:  // pred: ^bb69
+    %647 = llvm.getelementptr %554[%56] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %6, %647 : !llvm.ptr<i64>
+    %648 = llvm.mlir.null : !llvm.ptr<i64>
+    %649 = llvm.getelementptr %648[1] : (!llvm.ptr<i64>) -> !llvm.ptr<i64>
+    %650 = llvm.ptrtoint %649 : !llvm.ptr<i64> to i64
+    %651 = llvm.call @malloc(%650) : (i64) -> !llvm.ptr<i8>
+    %652 = llvm.bitcast %651 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    llvm.store %6, %652 : !llvm.ptr<i64>
+    llvm.br ^bb83(%6 : i64)
+  ^bb83(%653: i64):  // 2 preds: ^bb82, ^bb84
+    %654 = llvm.icmp "slt" %653, %505 : i64
+    llvm.cond_br %654, ^bb84, ^bb85
+  ^bb84:  // pred: ^bb83
+    %655 = llvm.getelementptr %554[%653] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %656 = llvm.load %655 : !llvm.ptr<i64>
+    %657 = llvm.load %652 : !llvm.ptr<i64>
+    %658 = llvm.getelementptr %554[%653] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %657, %658 : !llvm.ptr<i64>
+    %659 = llvm.add %656, %657  : i64
+    llvm.store %659, %652 : !llvm.ptr<i64>
+    %660 = llvm.add %653, %5  : i64
+    llvm.br ^bb83(%660 : i64)
+  ^bb85:  // pred: ^bb83
+    %661 = llvm.load %652 : !llvm.ptr<i64>
+    %662 = llvm.mlir.null : !llvm.ptr<i64>
+    %663 = llvm.getelementptr %662[%661] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %664 = llvm.ptrtoint %663 : !llvm.ptr<i64> to i64
+    %665 = llvm.call @malloc(%664) : (i64) -> !llvm.ptr<i8>
+    %666 = llvm.bitcast %665 : !llvm.ptr<i8> to !llvm.ptr<i64>
+    %667 = llvm.mlir.undef : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+    %668 = llvm.insertvalue %666, %667[0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %669 = llvm.insertvalue %666, %668[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %670 = llvm.insertvalue %2, %669[2] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %671 = llvm.insertvalue %661, %670[3, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %672 = llvm.insertvalue %3, %671[4, 0] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %673 = llvm.mlir.null : !llvm.ptr<f64>
+    %674 = llvm.getelementptr %673[%661] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %675 = llvm.ptrtoint %674 : !llvm.ptr<f64> to i64
+    %676 = llvm.call @malloc(%675) : (i64) -> !llvm.ptr<i8>
+    %677 = llvm.bitcast %676 : !llvm.ptr<i8> to !llvm.ptr<f64>
+    %678 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>
+    %679 = llvm.insertvalue %677, %678[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %680 = llvm.insertvalue %677, %679[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %681 = llvm.insertvalue %2, %680[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %682 = llvm.insertvalue %661, %681[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %683 = llvm.insertvalue %3, %682[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> 
+    %684 = llvm.load %63 : !llvm.ptr<i64>
+    llvm.br ^bb86(%6 : i64)
+  ^bb86(%685: i64):  // 2 preds: ^bb85, ^bb102
+    %686 = llvm.icmp "slt" %685, %684 : i64
+    llvm.cond_br %686, ^bb87, ^bb103
+  ^bb87:  // pred: ^bb86
+    %687 = llvm.getelementptr %554[%685] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %688 = llvm.load %687 : !llvm.ptr<i64>
+    llvm.store %688, %488 : !llvm.ptr<i64>
+    llvm.br ^bb88(%6 : i64)
+  ^bb88(%689: i64):  // 2 preds: ^bb87, ^bb89
+    %690 = llvm.icmp "slt" %689, %58 : i64
+    llvm.cond_br %690, ^bb89, ^bb90
+  ^bb89:  // pred: ^bb88
+    %691 = llvm.getelementptr %500[%689] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %20, %691 : !llvm.ptr<f64>
+    %692 = llvm.add %689, %5  : i64
+    llvm.br ^bb88(%692 : i64)
+  ^bb90:  // pred: ^bb88
+    %693 = llvm.add %685, %5  : i64
+    %694 = llvm.getelementptr %143[%685] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %695 = llvm.load %694 : !llvm.ptr<i64>
+    %696 = llvm.getelementptr %143[%693] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %697 = llvm.load %696 : !llvm.ptr<i64>
+    llvm.br ^bb91(%695 : i64)
+  ^bb91(%698: i64):  // 2 preds: ^bb90, ^bb92
+    %699 = llvm.icmp "slt" %698, %697 : i64
+    llvm.cond_br %699, ^bb92, ^bb93
+  ^bb92:  // pred: ^bb91
+    %700 = llvm.getelementptr %163[%698] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %701 = llvm.load %700 : !llvm.ptr<i64>
+    %702 = llvm.getelementptr %223[%698] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %703 = llvm.load %702 : !llvm.ptr<f64>
+    %704 = llvm.getelementptr %500[%701] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %703, %704 : !llvm.ptr<f64>
+    %705 = llvm.add %698, %5  : i64
+    llvm.br ^bb91(%705 : i64)
+  ^bb93:  // pred: ^bb91
+    %706 = llvm.getelementptr %356[%685] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %707 = llvm.load %706 : !llvm.ptr<i64>
+    %708 = llvm.getelementptr %356[%693] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %709 = llvm.load %708 : !llvm.ptr<i64>
+    llvm.br ^bb94(%707 : i64)
+  ^bb94(%710: i64):  // 2 preds: ^bb93, ^bb98
+    %711 = llvm.icmp "slt" %710, %709 : i64
+    llvm.cond_br %711, ^bb95, ^bb99
+  ^bb95:  // pred: ^bb94
+    %712 = llvm.getelementptr %376[%710] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %713 = llvm.load %712 : !llvm.ptr<i64>
+    %714 = llvm.getelementptr %604[%713] : (!llvm.ptr<i1>, i64) -> !llvm.ptr<i1>
+    %715 = llvm.load %714 : !llvm.ptr<i1>
+    %716 = llvm.icmp "eq" %715, %19 : i1
+    llvm.cond_br %716, ^bb96, ^bb97
+  ^bb96:  // pred: ^bb95
+    %717 = llvm.getelementptr %500[%713] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %718 = llvm.load %717 : !llvm.ptr<f64>
+    %719 = llvm.getelementptr %436[%710] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %720 = llvm.load %719 : !llvm.ptr<f64>
+    %721 = llvm.fadd %718, %720  : f64
+    %722 = llvm.getelementptr %463[%713] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %721, %722 : !llvm.ptr<f64>
+    %723 = llvm.getelementptr %604[%713] : (!llvm.ptr<i1>, i64) -> !llvm.ptr<i1>
+    llvm.store %18, %723 : !llvm.ptr<i1>
+    %724 = llvm.load %488 : !llvm.ptr<i64>
+    %725 = llvm.getelementptr %666[%724] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    llvm.store %713, %725 : !llvm.ptr<i64>
+    %726 = llvm.add %724, %5  : i64
+    llvm.store %726, %488 : !llvm.ptr<i64>
+    llvm.br ^bb98
+  ^bb97:  // pred: ^bb95
+    %727 = llvm.getelementptr %500[%713] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %728 = llvm.load %727 : !llvm.ptr<f64>
+    %729 = llvm.getelementptr %436[%710] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %730 = llvm.load %729 : !llvm.ptr<f64>
+    %731 = llvm.fadd %728, %730  : f64
+    %732 = llvm.getelementptr %463[%713] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %731, %732 : !llvm.ptr<f64>
+    llvm.br ^bb98
+  ^bb98:  // 2 preds: ^bb96, ^bb97
+    %733 = llvm.add %710, %5  : i64
+    llvm.br ^bb94(%733 : i64)
+  ^bb99:  // pred: ^bb94
+    %734 = llvm.getelementptr %554[%685] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %735 = llvm.load %734 : !llvm.ptr<i64>
+    %736 = llvm.getelementptr %554[%693] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %737 = llvm.load %736 : !llvm.ptr<i64>
+    %738 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %672, %738 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %739 = llvm.bitcast %738 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %740 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %741 = llvm.insertvalue %3, %740[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %742 = llvm.insertvalue %739, %741[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_sort_index(%3, %739, %735, %737) : (i64, !llvm.ptr<i8>, i64, i64) -> ()
+    llvm.br ^bb100(%735 : i64)
+  ^bb100(%743: i64):  // 2 preds: ^bb99, ^bb101
+    %744 = llvm.icmp "slt" %743, %737 : i64
+    llvm.cond_br %744, ^bb101, ^bb102
+  ^bb101:  // pred: ^bb100
+    %745 = llvm.getelementptr %666[%743] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+    %746 = llvm.load %745 : !llvm.ptr<i64>
+    %747 = llvm.getelementptr %463[%746] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    %748 = llvm.load %747 : !llvm.ptr<f64>
+    %749 = llvm.getelementptr %677[%743] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
+    llvm.store %748, %749 : !llvm.ptr<f64>
+    %750 = llvm.getelementptr %604[%746] : (!llvm.ptr<i1>, i64) -> !llvm.ptr<i1>
+    llvm.store %19, %750 : !llvm.ptr<i1>
+    %751 = llvm.add %743, %5  : i64
+    llvm.br ^bb100(%751 : i64)
+  ^bb102:  // pred: ^bb100
+    llvm.br ^bb86(%693 : i64)
+  ^bb103:  // pred: ^bb86
+    llvm.call @free(%456) : (!llvm.ptr<i8>) -> ()
+    llvm.call @free(%597) : (!llvm.ptr<i8>) -> ()
+    %752 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %516, %752 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %753 = llvm.bitcast %752 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %754 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %755 = llvm.insertvalue %3, %754[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %756 = llvm.insertvalue %753, %755[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %753) : (i64, !llvm.ptr<i8>) -> ()
+    %757 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %527, %757 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %758 = llvm.bitcast %757 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %759 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %760 = llvm.insertvalue %3, %759[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %761 = llvm.insertvalue %758, %760[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %758) : (i64, !llvm.ptr<i8>) -> ()
+    %762 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %538, %762 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %763 = llvm.bitcast %762 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %764 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %765 = llvm.insertvalue %3, %764[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %766 = llvm.insertvalue %763, %765[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %763) : (i64, !llvm.ptr<i8>) -> ()
+    %767 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %549, %767 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %768 = llvm.bitcast %767 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %769 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %770 = llvm.insertvalue %3, %769[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %771 = llvm.insertvalue %768, %770[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %768) : (i64, !llvm.ptr<i8>) -> ()
+    %772 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %560, %772 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %773 = llvm.bitcast %772 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %774 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %775 = llvm.insertvalue %3, %774[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %776 = llvm.insertvalue %773, %775[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %773) : (i64, !llvm.ptr<i8>) -> ()
+    %777 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %672, %777 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %778 = llvm.bitcast %777 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %779 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %780 = llvm.insertvalue %3, %779[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %781 = llvm.insertvalue %778, %780[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %778) : (i64, !llvm.ptr<i8>) -> ()
+    %782 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %575, %782 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %783 = llvm.bitcast %782 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %784 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %785 = llvm.insertvalue %3, %784[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %786 = llvm.insertvalue %783, %785[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %783) : (i64, !llvm.ptr<i8>) -> ()
+    %787 = llvm.alloca %3 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %586, %787 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %788 = llvm.bitcast %787 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %789 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %790 = llvm.insertvalue %3, %789[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %791 = llvm.insertvalue %788, %790[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_i64(%3, %788) : (i64, !llvm.ptr<i8>) -> ()
+    %792 = llvm.alloca %3 x !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    llvm.store %683, %792 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>>
+    %793 = llvm.bitcast %792 : !llvm.ptr<struct<(ptr<f64>, ptr<f64>, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr<i8>
+    %794 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
+    %795 = llvm.insertvalue %3, %794[0] : !llvm.struct<(i64, ptr<i8>)> 
+    %796 = llvm.insertvalue %793, %795[1] : !llvm.struct<(i64, ptr<i8>)> 
+    llvm.call @comet_print_memref_f64(%3, %793) : (i64, !llvm.ptr<i8>) -> ()
+    llvm.return
   }
+  llvm.func @read_input_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i64, !llvm.ptr<i8>, i32) attributes {sym_visibility = "private"}
+  llvm.func @read_input_sizes_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr<i8>, i32) attributes {sym_visibility = "private"}
+  llvm.func @comet_sort_index(i64, !llvm.ptr<i8>, i64, i64) attributes {sym_visibility = "private"}
+  llvm.func @comet_print_memref_f64(i64, !llvm.ptr<i8>) attributes {sym_visibility = "private"}
+  llvm.func @comet_print_memref_i64(i64, !llvm.ptr<i8>) attributes {sym_visibility = "private"}
 }
-
-
-// -----// IR Dump After {anonymous}::RemoveLabeledTensorOpPass () //----- //
-func.func @main() {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-  %c0_0 = arith.constant 0 : index
-  %c1_1 = arith.constant 1 : index
-  %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-  %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-  "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array<i32: 1, 1, 2, 0>, semiring = "plusxy_times"} : (tensor<?x?xf64>, tensor<?x?xf64>, !ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor<?x?xf64>, tensor<?x?xf64>) -> ()
-  "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%3) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%4) : (tensor<?x?xf64>) -> ()
-  return
-}
-
-// -----// IR Dump After {anonymous}::LowerTAMulChainPass () //----- //
-func.func @main() {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-  %c0_0 = arith.constant 0 : index
-  %c1_1 = arith.constant 1 : index
-  %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-  %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-  "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array<i32: 1, 1, 2, 0>, semiring = "plusxy_times"} : (tensor<?x?xf64>, tensor<?x?xf64>, !ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor<?x?xf64>, tensor<?x?xf64>) -> ()
-  "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%3) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%4) : (tensor<?x?xf64>) -> ()
-  return
-}
-
-// -----// IR Dump After {anonymous}::TensorAlgebraCheckImplicitTensorDeclPass () //----- //
-func.func @main() {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-  %c0_0 = arith.constant 0 : index
-  %c1_1 = arith.constant 1 : index
-  %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-  %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-  "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %5 = "ta.mul"(%2, %3, %1, %0) {MaskType = "none", __alpha__ = 1.000000e+00 : f64, __beta__ = 0.000000e+00 : f64, formats = ["CSR", "Dense", "Dense"], indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>], operand_segment_sizes = array<i32: 1, 1, 2, 0>, semiring = "plusxy_times"} : (tensor<?x?xf64>, tensor<?x?xf64>, !ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.set_op"(%5, %4) {__beta__ = 0.000000e+00 : f64} : (tensor<?x?xf64>, tensor<?x?xf64>) -> ()
-  "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%3) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%4) : (tensor<?x?xf64>) -> ()
-  return
-}
-
-// -----// IR Dump After {anonymous}::LowerTensorAlgebraToIndexTreePass () //----- //
-func.func @main() {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-  %c0_0 = arith.constant 0 : index
-  %c1_1 = arith.constant 1 : index
-  %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-  %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %3 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %4 = "ta.dense_tensor_decl"(%1, %0) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-  "ta.fill"(%3) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %5 = "it.ComputeRHS"(%2, %3) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<*xf64>
-  %6 = "it.ComputeLHS"(%4) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor<?x?xf64>) -> tensor<*xf64>
-  %7 = "it.Compute"(%5, %6) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64
-  %8 = "it.Indices"(%7) {indices = [1]} : (i64) -> i64
-  %9 = "it.Indices"(%8) {indices = [0]} : (i64) -> i64
-  %10 = "it.itree"(%9) : (i64) -> i64
-  "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%3) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%4) : (tensor<?x?xf64>) -> ()
-  return
-}
-
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1144  SparseInputTensorDeclOpLowering in format begin
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1145 %2 = "ta.sparse_tensor_decl"(%0, %1) {format = "CSR", temporal_tensor = false} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1156  --- CSR
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1158  2
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 
-Check the tensor is input or output
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 %5 = "it.ComputeRHS"(%2, %3) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<*xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1245  used in ta.itComputeRHS op
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 
-Check the tensor is input or output
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 "ta.print"(%2) : (tensor<?x?xf64>) -> ()
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1260  the tensor is in PrintOp
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1168 
-Check the tensor is input or output
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1169 "ta.fill_from_file"(%2) {filename = "SPARSE_FILE_NAME0", readMode = 1 : i32} : (tensor<?x?xf64>) -> ()
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1256  the tensor is in fill_from_file op
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1287  isOutputTensor: 0
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1296  CSR isDense: 0
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1301  Sparse input tensor 
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1356  filename: SPARSE_FILE_NAME0
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1360  SPARSE_FILE_NAME0
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1363  readMode: 1
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1367 sp_decl.getParameterCount(): 13
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1370  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1371 %alloc = memref.alloc() : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1376  Get the dim_format
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1395  Parsed fileID: 0
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1413  2D
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:291 Inserting insertReadFileLibCall
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:311  Rank Size is 2
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:346 Adding read_input_2D_f64 to the module
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:373 Adding read_input_sizes_2D_f64 to the module
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %2 = memref.load %alloc[%c0_4] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %3 = memref.load %alloc[%c1_5] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %4 = memref.load %alloc[%c2_6] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %5 = memref.load %alloc[%c3_7] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %6 = memref.load %alloc[%c4] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %7 = memref.load %alloc[%c5] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %8 = memref.load %alloc[%c6] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %9 = memref.load %alloc[%c7] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %10 = memref.load %alloc[%c8] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %11 = memref.load %alloc[%c9] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %12 = memref.load %alloc[%c10] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %13 = memref.load %alloc[%c11] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1466  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1467 %14 = memref.load %alloc[%c12] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %2 = memref.load %alloc[%c0_4] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_8 = memref.alloc(%2) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %3 = memref.load %alloc[%c1_5] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_13 = memref.alloc(%3) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %4 = memref.load %alloc[%c2_6] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_18 = memref.alloc(%4) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %5 = memref.load %alloc[%c3_7] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_23 = memref.alloc(%5) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %6 = memref.load %alloc[%c4] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_28 = memref.alloc(%6) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %7 = memref.load %alloc[%c5] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_33 = memref.alloc(%7) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %8 = memref.load %alloc[%c6] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_38 = memref.alloc(%8) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1477 %9 = memref.load %alloc[%c7] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1479  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1480 %alloc_43 = memref.alloc(%9) : memref<?xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1492  /home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1493 %alloc_48 = memref.alloc(%10) : memref<?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1556  Generate read_input_2D or read_input_3D functions
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1603 SparseTensorConstructOp generated for input sparse tensor:
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1604 %24 = ta.sptensor_construct(%15, %16, %17, %18, %19, %20, %21, %22, %23, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12) {tensor_rank = 2 : i32} : (tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index) -> (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>)
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1612 %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1613 %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1622 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1612 %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1613 %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1622 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1668  SparseInputTensorDeclOpLowering in format end
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1803 ---------------SparseTensorDeclLoweringPass end
-// -----// IR Dump After {anonymous}::SparseTensorDeclLoweringPass () //----- //
-func.func @main() {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = "ta.dynamic_index_label"(%c0, %c1) : (index, index) -> !ta.range
-  %c0_0 = arith.constant 0 : index
-  %c1_1 = arith.constant 1 : index
-  %1 = "ta.dynamic_index_label"(%c0_0, %c1_1) : (index, index) -> !ta.range
-  %alloc = memref.alloc() : memref<13xindex>
-  %cast = memref.cast %alloc : memref<13xindex> to memref<*xindex>
-  %c-1 = arith.constant -1 : index
-  %c0_2 = arith.constant 0 : index
-  %c1_3 = arith.constant 1 : index
-  %c2 = arith.constant 2 : index
-  %c3 = arith.constant 3 : index
-  %c0_i32 = arith.constant 0 : i32
-  %c1_i32 = arith.constant 1 : i32
-  call @read_input_sizes_2D_f64(%c0_i32, %c0_2, %c-1, %c1_3, %c-1, %cast, %c1_i32) {filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, i32) -> ()
-  %c0_4 = arith.constant 0 : index
-  %2 = memref.load %alloc[%c0_4] : memref<13xindex>
-  %c1_5 = arith.constant 1 : index
-  %3 = memref.load %alloc[%c1_5] : memref<13xindex>
-  %c2_6 = arith.constant 2 : index
-  %4 = memref.load %alloc[%c2_6] : memref<13xindex>
-  %c3_7 = arith.constant 3 : index
-  %5 = memref.load %alloc[%c3_7] : memref<13xindex>
-  %c4 = arith.constant 4 : index
-  %6 = memref.load %alloc[%c4] : memref<13xindex>
-  %c5 = arith.constant 5 : index
-  %7 = memref.load %alloc[%c5] : memref<13xindex>
-  %c6 = arith.constant 6 : index
-  %8 = memref.load %alloc[%c6] : memref<13xindex>
-  %c7 = arith.constant 7 : index
-  %9 = memref.load %alloc[%c7] : memref<13xindex>
-  %c8 = arith.constant 8 : index
-  %10 = memref.load %alloc[%c8] : memref<13xindex>
-  %c9 = arith.constant 9 : index
-  %11 = memref.load %alloc[%c9] : memref<13xindex>
-  %c10 = arith.constant 10 : index
-  %12 = memref.load %alloc[%c10] : memref<13xindex>
-  %c11 = arith.constant 11 : index
-  %13 = memref.load %alloc[%c11] : memref<13xindex>
-  %c12 = arith.constant 12 : index
-  %14 = memref.load %alloc[%c12] : memref<13xindex>
-  %alloc_8 = memref.alloc(%2) : memref<?xindex>
-  %c0_9 = arith.constant 0 : index
-  %c0_10 = arith.constant 0 : index
-  %c1_11 = arith.constant 1 : index
-  scf.for %arg0 = %c0_10 to %2 step %c1_11 {
-    memref.store %c0_9, %alloc_8[%arg0] : memref<?xindex>
-  }
-  %cast_12 = memref.cast %alloc_8 : memref<?xindex> to memref<*xindex>
-  %alloc_13 = memref.alloc(%3) : memref<?xindex>
-  %c0_14 = arith.constant 0 : index
-  %c0_15 = arith.constant 0 : index
-  %c1_16 = arith.constant 1 : index
-  scf.for %arg0 = %c0_15 to %3 step %c1_16 {
-    memref.store %c0_14, %alloc_13[%arg0] : memref<?xindex>
-  }
-  %cast_17 = memref.cast %alloc_13 : memref<?xindex> to memref<*xindex>
-  %alloc_18 = memref.alloc(%4) : memref<?xindex>
-  %c0_19 = arith.constant 0 : index
-  %c0_20 = arith.constant 0 : index
-  %c1_21 = arith.constant 1 : index
-  scf.for %arg0 = %c0_20 to %4 step %c1_21 {
-    memref.store %c0_19, %alloc_18[%arg0] : memref<?xindex>
-  }
-  %cast_22 = memref.cast %alloc_18 : memref<?xindex> to memref<*xindex>
-  %alloc_23 = memref.alloc(%5) : memref<?xindex>
-  %c0_24 = arith.constant 0 : index
-  %c0_25 = arith.constant 0 : index
-  %c1_26 = arith.constant 1 : index
-  scf.for %arg0 = %c0_25 to %5 step %c1_26 {
-    memref.store %c0_24, %alloc_23[%arg0] : memref<?xindex>
-  }
-  %cast_27 = memref.cast %alloc_23 : memref<?xindex> to memref<*xindex>
-  %alloc_28 = memref.alloc(%6) : memref<?xindex>
-  %c0_29 = arith.constant 0 : index
-  %c0_30 = arith.constant 0 : index
-  %c1_31 = arith.constant 1 : index
-  scf.for %arg0 = %c0_30 to %6 step %c1_31 {
-    memref.store %c0_29, %alloc_28[%arg0] : memref<?xindex>
-  }
-  %cast_32 = memref.cast %alloc_28 : memref<?xindex> to memref<*xindex>
-  %alloc_33 = memref.alloc(%7) : memref<?xindex>
-  %c0_34 = arith.constant 0 : index
-  %c0_35 = arith.constant 0 : index
-  %c1_36 = arith.constant 1 : index
-  scf.for %arg0 = %c0_35 to %7 step %c1_36 {
-    memref.store %c0_34, %alloc_33[%arg0] : memref<?xindex>
-  }
-  %cast_37 = memref.cast %alloc_33 : memref<?xindex> to memref<*xindex>
-  %alloc_38 = memref.alloc(%8) : memref<?xindex>
-  %c0_39 = arith.constant 0 : index
-  %c0_40 = arith.constant 0 : index
-  %c1_41 = arith.constant 1 : index
-  scf.for %arg0 = %c0_40 to %8 step %c1_41 {
-    memref.store %c0_39, %alloc_38[%arg0] : memref<?xindex>
-  }
-  %cast_42 = memref.cast %alloc_38 : memref<?xindex> to memref<*xindex>
-  %alloc_43 = memref.alloc(%9) : memref<?xindex>
-  %c0_44 = arith.constant 0 : index
-  %c0_45 = arith.constant 0 : index
-  %c1_46 = arith.constant 1 : index
-  scf.for %arg0 = %c0_45 to %9 step %c1_46 {
-    memref.store %c0_44, %alloc_43[%arg0] : memref<?xindex>
-  }
-  %cast_47 = memref.cast %alloc_43 : memref<?xindex> to memref<*xindex>
-  %alloc_48 = memref.alloc(%10) : memref<?xf64>
-  %cst = arith.constant 0.000000e+00 : f64
-  %c0_49 = arith.constant 0 : index
-  %c1_50 = arith.constant 1 : index
-  scf.for %arg0 = %c0_49 to %10 step %c1_50 {
-    memref.store %cst, %alloc_48[%arg0] : memref<?xf64>
-  }
-  %cast_51 = memref.cast %alloc_48 : memref<?xf64> to memref<*xf64>
-  call @read_input_2D_f64(%c0_i32, %c0_2, %c-1, %c1_3, %c-1, %cast_12, %cast_17, %cast_22, %cast_27, %cast_32, %cast_37, %cast_42, %cast_47, %cast_51, %c1_i32) {filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xf64>, i32) -> ()
-  %15 = bufferization.to_tensor %alloc_8 : memref<?xindex>
-  %16 = bufferization.to_tensor %alloc_13 : memref<?xindex>
-  %17 = bufferization.to_tensor %alloc_18 : memref<?xindex>
-  %18 = bufferization.to_tensor %alloc_23 : memref<?xindex>
-  %19 = bufferization.to_tensor %alloc_28 : memref<?xindex>
-  %20 = bufferization.to_tensor %alloc_33 : memref<?xindex>
-  %21 = bufferization.to_tensor %alloc_38 : memref<?xindex>
-  %22 = bufferization.to_tensor %alloc_43 : memref<?xindex>
-  %23 = bufferization.to_tensor %alloc_48 : memref<?xf64>
-  %24 = ta.sptensor_construct(%15, %16, %17, %18, %19, %20, %21, %22, %23, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12) {tensor_rank = 2 : i32} : (tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index) -> (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>)
-  %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range
-  %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range
-  %27 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  %28 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
-  "ta.fill"(%27) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %29 = "it.ComputeRHS"(%24, %27) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>, tensor<?x?xf64>) -> tensor<*xf64>
-  %30 = "it.ComputeLHS"(%28) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor<?x?xf64>) -> tensor<*xf64>
-  %31 = "it.Compute"(%29, %30) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64
-  %32 = "it.Indices"(%31) {indices = [1]} : (i64) -> i64
-  %33 = "it.Indices"(%32) {indices = [0]} : (i64) -> i64
-  %34 = "it.itree"(%33) : (i64) -> i64
-  "ta.print"(%24) : (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>) -> ()
-  "ta.print"(%27) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%28) : (tensor<?x?xf64>) -> ()
-  return
-}
-
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1056 --------------DenseTensorDeclarationLowering in format begin
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1058 
-
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1060 %27 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
------------------------------------
-memref<?x?xf64>
------------------------------------
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %12 = memref.load %alloc[%c10] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %11 = memref.load %alloc[%c9] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 %29 = "it.ComputeRHS"(%24, %27) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>, tensor<?x?xf64>) -> tensor<*xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.print"(%27) : (tensor<?x?xf64>) -> ()
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.fill"(%27) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1097  AllocOp for initialization is_filled: 1 
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1103 %alloc_52 = memref.alloc(%12, %11) : memref<?x?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1115  TensorLoad:
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1116 %27 = bufferization.to_tensor %alloc_52 : memref<?x?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1122 --------------DenseTensorDeclarationLowering in format end
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1123 
-
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1056 --------------DenseTensorDeclarationLowering in format begin
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1058 
-
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1060 %29 = "ta.dense_tensor_decl"(%26, %25) {format = "Dense"} : (!ta.range, !ta.range) -> tensor<?x?xf64>
------------------------------------
-memref<?x?xf64>
------------------------------------
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %26 = "ta.static_index_label"(%c0_0, %12, %c1_1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %12 = memref.load %alloc[%c10] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1077 %25 = "ta.static_index_label"(%c0, %11, %c1) : (index, index, index) -> !ta.range
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1083 %11 = memref.load %alloc[%c9] : memref<13xindex>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 %31 = "it.ComputeLHS"(%29) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor<?x?xf64>) -> tensor<*xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1092 "ta.print"(%29) : (tensor<?x?xf64>) -> ()
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1097  AllocOp for initialization is_filled: 0 
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1109 %101 = "memref.alloc"(%36, %34) {operand_segment_sizes = array<i32: 2, 0>} : (index, index) -> memref<?x?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1115  TensorLoad:
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1116 %105 = "bufferization.to_tensor"(%101) : (memref<?x?xf64>) -> tensor<?x?xf64>
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1122 --------------DenseTensorDeclarationLowering in format end
-/home/patrick/Work/PNNL/COMET/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp:1123 
-
-loc("first.ta":8:2): error: 'memref.store' op store index operand count not equal to memref rank
-// -----// IR Dump After {anonymous}::DenseTensorDeclLoweringPass Failed () //----- //
-"func.func"() ({
-  %0 = "arith.constant"() {value = 0 : index} : () -> index
-  %1 = "arith.constant"() {value = 1 : index} : () -> index
-  %2 = "ta.dynamic_index_label"(%0, %1) : (index, index) -> !ta.range
-  %3 = "arith.constant"() {value = 0 : index} : () -> index
-  %4 = "arith.constant"() {value = 1 : index} : () -> index
-  %5 = "ta.dynamic_index_label"(%3, %4) : (index, index) -> !ta.range
-  %6 = "memref.alloc"() {operand_segment_sizes = array<i32: 0, 0>} : () -> memref<13xindex>
-  %7 = "memref.cast"(%6) : (memref<13xindex>) -> memref<*xindex>
-  %8 = "arith.constant"() {value = -1 : index} : () -> index
-  %9 = "arith.constant"() {value = 0 : index} : () -> index
-  %10 = "arith.constant"() {value = 1 : index} : () -> index
-  %11 = "arith.constant"() {value = 2 : index} : () -> index
-  %12 = "arith.constant"() {value = 3 : index} : () -> index
-  %13 = "arith.constant"() {value = 0 : i32} : () -> i32
-  %14 = "arith.constant"() {value = 1 : i32} : () -> i32
-  "func.call"(%13, %9, %8, %10, %8, %7, %14) {callee = @read_input_sizes_2D_f64, filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, i32) -> ()
-  %15 = "arith.constant"() {value = 0 : index} : () -> index
-  %16 = "memref.load"(%6, %15) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %17 = "arith.constant"() {value = 1 : index} : () -> index
-  %18 = "memref.load"(%6, %17) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %19 = "arith.constant"() {value = 2 : index} : () -> index
-  %20 = "memref.load"(%6, %19) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %21 = "arith.constant"() {value = 3 : index} : () -> index
-  %22 = "memref.load"(%6, %21) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %23 = "arith.constant"() {value = 4 : index} : () -> index
-  %24 = "memref.load"(%6, %23) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %25 = "arith.constant"() {value = 5 : index} : () -> index
-  %26 = "memref.load"(%6, %25) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %27 = "arith.constant"() {value = 6 : index} : () -> index
-  %28 = "memref.load"(%6, %27) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %29 = "arith.constant"() {value = 7 : index} : () -> index
-  %30 = "memref.load"(%6, %29) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %31 = "arith.constant"() {value = 8 : index} : () -> index
-  %32 = "memref.load"(%6, %31) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %33 = "arith.constant"() {value = 9 : index} : () -> index
-  %34 = "memref.load"(%6, %33) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %35 = "arith.constant"() {value = 10 : index} : () -> index
-  %36 = "memref.load"(%6, %35) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %37 = "arith.constant"() {value = 11 : index} : () -> index
-  %38 = "memref.load"(%6, %37) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %39 = "arith.constant"() {value = 12 : index} : () -> index
-  %40 = "memref.load"(%6, %39) {nontemporal = false} : (memref<13xindex>, index) -> index
-  %41 = "memref.alloc"(%16) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %42 = "arith.constant"() {value = 0 : index} : () -> index
-  %43 = "arith.constant"() {value = 0 : index} : () -> index
-  %44 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%43, %16, %44) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%42, %41, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %45 = "memref.cast"(%41) : (memref<?xindex>) -> memref<*xindex>
-  %46 = "memref.alloc"(%18) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %47 = "arith.constant"() {value = 0 : index} : () -> index
-  %48 = "arith.constant"() {value = 0 : index} : () -> index
-  %49 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%48, %18, %49) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%47, %46, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %50 = "memref.cast"(%46) : (memref<?xindex>) -> memref<*xindex>
-  %51 = "memref.alloc"(%20) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %52 = "arith.constant"() {value = 0 : index} : () -> index
-  %53 = "arith.constant"() {value = 0 : index} : () -> index
-  %54 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%53, %20, %54) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%52, %51, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %55 = "memref.cast"(%51) : (memref<?xindex>) -> memref<*xindex>
-  %56 = "memref.alloc"(%22) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %57 = "arith.constant"() {value = 0 : index} : () -> index
-  %58 = "arith.constant"() {value = 0 : index} : () -> index
-  %59 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%58, %22, %59) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%57, %56, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %60 = "memref.cast"(%56) : (memref<?xindex>) -> memref<*xindex>
-  %61 = "memref.alloc"(%24) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %62 = "arith.constant"() {value = 0 : index} : () -> index
-  %63 = "arith.constant"() {value = 0 : index} : () -> index
-  %64 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%63, %24, %64) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%62, %61, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %65 = "memref.cast"(%61) : (memref<?xindex>) -> memref<*xindex>
-  %66 = "memref.alloc"(%26) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %67 = "arith.constant"() {value = 0 : index} : () -> index
-  %68 = "arith.constant"() {value = 0 : index} : () -> index
-  %69 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%68, %26, %69) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%67, %66, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %70 = "memref.cast"(%66) : (memref<?xindex>) -> memref<*xindex>
-  %71 = "memref.alloc"(%28) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %72 = "arith.constant"() {value = 0 : index} : () -> index
-  %73 = "arith.constant"() {value = 0 : index} : () -> index
-  %74 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%73, %28, %74) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%72, %71, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %75 = "memref.cast"(%71) : (memref<?xindex>) -> memref<*xindex>
-  %76 = "memref.alloc"(%30) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xindex>
-  %77 = "arith.constant"() {value = 0 : index} : () -> index
-  %78 = "arith.constant"() {value = 0 : index} : () -> index
-  %79 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%78, %30, %79) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%77, %76, %arg0) {nontemporal = false} : (index, memref<?xindex>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %80 = "memref.cast"(%76) : (memref<?xindex>) -> memref<*xindex>
-  %81 = "memref.alloc"(%32) {operand_segment_sizes = array<i32: 1, 0>} : (index) -> memref<?xf64>
-  %82 = "arith.constant"() {value = 0.000000e+00 : f64} : () -> f64
-  %83 = "arith.constant"() {value = 0 : index} : () -> index
-  %84 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%83, %32, %84) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%82, %81, %arg0) {nontemporal = false} : (f64, memref<?xf64>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %85 = "memref.cast"(%81) : (memref<?xf64>) -> memref<*xf64>
-  "func.call"(%13, %9, %8, %10, %8, %45, %50, %55, %60, %65, %70, %75, %80, %85, %14) {callee = @read_input_2D_f64, filename = "SPARSE_FILE_NAME0"} : (i32, index, index, index, index, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xindex>, memref<*xf64>, i32) -> ()
-  %86 = "bufferization.to_tensor"(%41) : (memref<?xindex>) -> tensor<?xindex>
-  %87 = "bufferization.to_tensor"(%46) : (memref<?xindex>) -> tensor<?xindex>
-  %88 = "bufferization.to_tensor"(%51) : (memref<?xindex>) -> tensor<?xindex>
-  %89 = "bufferization.to_tensor"(%56) : (memref<?xindex>) -> tensor<?xindex>
-  %90 = "bufferization.to_tensor"(%61) : (memref<?xindex>) -> tensor<?xindex>
-  %91 = "bufferization.to_tensor"(%66) : (memref<?xindex>) -> tensor<?xindex>
-  %92 = "bufferization.to_tensor"(%71) : (memref<?xindex>) -> tensor<?xindex>
-  %93 = "bufferization.to_tensor"(%76) : (memref<?xindex>) -> tensor<?xindex>
-  %94 = "bufferization.to_tensor"(%81) : (memref<?xf64>) -> tensor<?xf64>
-  %95 = "ta.sptensor_construct"(%86, %87, %88, %89, %90, %91, %92, %93, %94, %16, %18, %20, %22, %24, %26, %28, %30, %32, %34, %36) {tensor_rank = 2 : i32} : (tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index) -> !ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>
-  %96 = "ta.static_index_label"(%0, %34, %1) : (index, index, index) -> !ta.range
-  %97 = "ta.static_index_label"(%3, %36, %4) : (index, index, index) -> !ta.range
-  %98 = "memref.alloc"(%36, %34) {alignment = 32 : i64, operand_segment_sizes = array<i32: 2, 0>} : (index, index) -> memref<?x?xf64>
-  %99 = "bufferization.to_tensor"(%98) : (memref<?x?xf64>) -> tensor<?x?xf64>
-  %100 = "memref.alloc"(%36, %34) {alignment = 32 : i64, operand_segment_sizes = array<i32: 2, 0>} : (index, index) -> memref<?x?xf64>
-  %101 = "arith.constant"() {value = 0.000000e+00 : f64} : () -> f64
-  %102 = "arith.constant"() {value = 0 : index} : () -> index
-  %103 = "arith.constant"() {value = 1 : index} : () -> index
-  "scf.for"(%102, %36, %103) ({
-  ^bb0(%arg0: index):
-    "memref.store"(%101, %100, %arg0) {nontemporal = false} : (f64, memref<?x?xf64>, index) -> ()
-    "scf.yield"() : () -> ()
-  }) : (index, index, index) -> ()
-  %104 = "bufferization.to_tensor"(%100) : (memref<?x?xf64>) -> tensor<?x?xf64>
-  "ta.fill"(%99) {value = 1.000000e+00 : f64} : (tensor<?x?xf64>) -> ()
-  %105 = "it.ComputeRHS"(%95, %99) {allBlocks = [["UNK", "UNK"], ["UNK", "UNK"]], allFormats = [["D", "CU"], ["D", "D"]], allPerms = [[0, 1], [1, 0]]} : (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>, tensor<?x?xf64>) -> tensor<*xf64>
-  %106 = "it.ComputeLHS"(%104) {allBlocks = [["UNK", "UNK"]], allFormats = [["D", "D"]], allPerms = [[1, 0]]} : (tensor<?x?xf64>) -> tensor<*xf64>
-  %107 = "it.Compute"(%105, %106) {MaskType = "none", comp_worksp_opt = false, semiring = "plusxy_times"} : (tensor<*xf64>, tensor<*xf64>) -> i64
-  %108 = "it.Indices"(%107) {indices = [1]} : (i64) -> i64
-  %109 = "it.Indices"(%108) {indices = [0]} : (i64) -> i64
-  %110 = "it.itree"(%109) : (i64) -> i64
-  "ta.print"(%95) : (!ta.sptensor<tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xindex>, tensor<?xf64>, index, index, index, index, index, index, index, index, index, index, index>) -> ()
-  "ta.print"(%99) : (tensor<?x?xf64>) -> ()
-  "ta.print"(%104) : (tensor<?x?xf64>) -> ()
-  "func.return"() : () -> ()
-}) {function_type = () -> (), sym_name = "main"} : () -> ()
-
diff --git a/first.ta b/first.ta
index d2260157..527e37dc 100644
--- a/first.ta
+++ b/first.ta
@@ -1,19 +1,38 @@
+# Sparse matrix sparse matrix elementwise addition
+# Sparse matrix is in CSR format. Currently workspace transformation on the IndexTree dialect works for only CSR format
+# RUN: comet-opt --opt-comp-workspace --convert-ta-to-it --convert-to-loops --convert-to-llvm %s &> eltwise_add_CSRxCSR_oCSR.llvm
+# RUN: export SPARSE_FILE_NAME0=%comet_integration_test_data_dir/test_rank2.mtx
+# RUN: export SPARSE_FILE_NAME1=%comet_integration_test_data_dir/test_rank2_transpose.mtx
+# RUN: mlir-cpu-runner eltwise_add_CSRxCSR_oCSR.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
+
+
 def main() {
-	#IndexLabel Declarations
-	IndexLabel [a] = [?];
-	IndexLabel [b] = [?];   
+    #IndexLabel Declarations
+    IndexLabel [i] = [?];
+    IndexLabel [j] = [?];
+    
+    #Tensor Declarations
+    Tensor<double> A([i, j], {CSR});	 
+    Tensor<double> B([i, j], {CSR});
+    Tensor<double> C([i, j], {CSR});
     
-	Tensor<double> A([a, b], {CSR});
-	Tensor<double> B([b, a], {Dense});
-	Tensor<double> C([b, a], {Dense});
-	
-    A[a, b] = comet_read(0);
-    B[b, a] = 1.0;
-    C[b, a] = A[a, b] * B[b, a];
-    #C[b, a] = 1.0;
+    #Tensor Readfile Operation
+    A[i, j] = comet_read(0);
+    B[i, j] = comet_read(1);
     
-	print(A);
-	print(B);
-	print(C);
+    #Tensor Contraction
+    C[i, j] = A[i, j] + B[i, j];
+    print(C);
 }
 
+# Print the result for verification.
+# CHECK: data = 
+# CHECK-NEXT: 5,
+# CHECK-NEXT: data = 
+# CHECK-NEXT: 0,
+# CHECK-NEXT: data = 
+# CHECK-NEXT: 0,2,4,5,7,9,
+# CHECK-NEXT: data = 
+# CHECK-NEXT: 0,3,1,4,2,0,3,1,4,
+# CHECK-NEXT: data = 
+# CHECK-NEXT: 2,5.5,4,7.7,6,5.5,8,7.7,10,
diff --git a/include/comet/Dialect/Utils/Utils.h b/include/comet/Dialect/Utils/Utils.h
index a4518423..def3a3c6 100644
--- a/include/comet/Dialect/Utils/Utils.h
+++ b/include/comet/Dialect/Utils/Utils.h
@@ -211,6 +211,7 @@ namespace mlir
 
     void getFormatsPermsOfComputeOp(Value computeOp,
                                     std::vector<std::vector<std::string>> &opFormats,
+                                    std::vector<std::vector<std::string>> &opBlocks,
                                     std::vector<std::vector<int>> &opPerms,
                                     std::vector<std::vector<bool>> &inputOutputMapping);
 
diff --git a/lib/Dialect/IndexTree/Transforms/Fusion.cpp b/lib/Dialect/IndexTree/Transforms/Fusion.cpp
index c2a0421b..f54f13ee 100644
--- a/lib/Dialect/IndexTree/Transforms/Fusion.cpp
+++ b/lib/Dialect/IndexTree/Transforms/Fusion.cpp
@@ -185,9 +185,10 @@ namespace
                   comet_debug() <<  " is_comp_worksp_opt: " << is_comp_worksp_opt << " semiring: " << semiring << "\n";
 
                   std::vector<std::vector<std::string>> opFormats;
+                  std::vector<std::vector<std::string>> opBlocks;
                   std::vector<std::vector<int>> opPerms;
                   std::vector<std::vector<bool> > inputOutputMapping;
-                  getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping);
+                  getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping);
                   /// opFormats
                   comet_debug() << "[";
                   for (auto strings: opFormats) {
@@ -537,7 +538,7 @@ mlir::Value IndexTreeKernelFusionPass::createReducedComputeRHS(
     SmallVector<StringRef, 8> blocks;
     if (b_i == tensor_id)
     { /// for the new reduced tensor
-      blocks.insert(blocks.end(), old_formats_strs[b_i].begin() + rank_base, old_blocks_strs[b_i].end());
+      blocks.insert(blocks.end(), old_blocks_strs[b_i].begin() + rank_base, old_blocks_strs[b_i].end());
     }
     else
     { /// for other remaining old operands
diff --git a/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp b/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp
index b9ebecc2..665c413a 100644
--- a/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp
+++ b/lib/Dialect/IndexTree/Transforms/WorkspaceTransforms.cpp
@@ -671,7 +671,7 @@ std::vector<Value> CompressedWorkspaceOutput(std::vector<int> sparseDimsOutput,
   
   /// Convert blocks string array into StrAttr
   std::vector<std::string> c3_blocks_str_0 = {"UNK"};
-  std::vector<std::string> c3_blocks_str_1 = opFormats[2];
+  std::vector<std::string> c3_blocks_str_1 = opBlocks[2];
   std::vector<std::vector<std::string>> c3_blocks_str = {c3_blocks_str_0, c3_blocks_str_1};
 
   std::vector<mlir::Value> c3_rhs = workspaceTensors;
@@ -779,7 +779,7 @@ void CompressedWorkspaceInput(std::vector<Value> computeOps, OpBuilder &builder,
     std::vector<std::vector<std::string>> opBlocks;
     std::vector<std::vector<int>> opPerms;
     std::vector<std::vector<bool>> inputOutputMapping;
-    getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping);
+    getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping);
     comet_debug() << " \n";
     for (auto n : opFormats)
     {
@@ -903,8 +903,8 @@ void CompressedWorkspaceInput(std::vector<Value> computeOps, OpBuilder &builder,
       std::vector<std::string> c2_formats_str_1 = {"D"};
       std::vector<std::vector<std::string>> c2_formats_str = {c2_formats_str_0, c2_formats_str_1};
       
-      std::vector<std::string> c2_blocks_str_0 = opFormats[sparseDimsInput[0].tensorId];
-      std::vector<std::string> c2_blocks_str_1 = {"D"};
+      std::vector<std::string> c2_blocks_str_0 = opBlocks[sparseDimsInput[0].tensorId];
+      std::vector<std::string> c2_blocks_str_1 = {"UNK"};
       std::vector<std::vector<std::string>> c2_blocks_str = {c2_blocks_str_0, c2_blocks_str_1};
 
       std::vector<mlir::Value> c2_rhs = {tensors_rhs[sparseDimsInput[0].tensorId]};
@@ -1081,7 +1081,7 @@ void IndexTreeWorkspaceTransformationsPass::CompressedWorkspaceTransforms(mlir::
                 std::vector<std::vector<std::string>> opBlocks;
                 std::vector<std::vector<int>> opPerms;
                 std::vector<std::vector<bool>> inputOutputMapping;
-                getFormatsPermsOfComputeOp(computeOp, opFormats, opPerms, inputOutputMapping);
+                getFormatsPermsOfComputeOp(computeOp, opFormats, opBlocks, opPerms, inputOutputMapping);
 
 #ifdef DEBUG_MODE_WorkspaceTransformsPass
                 comet_debug() << "Print opFormats:\n";
diff --git a/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp b/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp
index 4f6ce9b5..f2195715 100644
--- a/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp
+++ b/lib/Dialect/TensorAlgebra/Transforms/TensorDeclLowering.cpp
@@ -51,7 +51,7 @@ using namespace mlir::indexTree;
 #define DEBUG_TYPE "tensor-decl-lowering"
 
 // *********** For debug purpose *********//
-#define COMET_DEBUG_MODE
+// #define COMET_DEBUG_MODE
 #include "comet/Utils/debug.h"
 #undef COMET_DEBUG_MODE
 // *********** For debug purpose *********//
diff --git a/lib/Dialect/Utils/Utils.cpp b/lib/Dialect/Utils/Utils.cpp
index d83fe16c..d37868ea 100644
--- a/lib/Dialect/Utils/Utils.cpp
+++ b/lib/Dialect/Utils/Utils.cpp
@@ -1476,15 +1476,19 @@ namespace mlir
     /// Get the perms and formats of the itCompute op
     void getFormatsPermsOfComputeOp(Value computeOp,
                                     std::vector<std::vector<std::string>> &opFormats,
+                                    std::vector<std::vector<std::string>> &opBlocks,
                                     std::vector<std::vector<int>> &opPerms,
                                     std::vector<std::vector<bool>> &inputOutputMapping)
     {
       indexTree::IndexTreeComputeRHSOp itComputeOp_rhs = dyn_cast<indexTree::IndexTreeComputeRHSOp>(computeOp.getDefiningOp()->getOperand(0).getDefiningOp());
       ArrayAttr opFormatsArrayAttr_rhs = itComputeOp_rhs.getAllFormats();
+      ArrayAttr opBlocksArrayAttr_rhs = itComputeOp_rhs.getAllBlocks();
       ArrayAttr opPermsArrayAttr_rhs = itComputeOp_rhs.getAllPerms();
       indexTree::IndexTreeComputeLHSOp itComputeOp_lhs = dyn_cast<indexTree::IndexTreeComputeLHSOp>(computeOp.getDefiningOp()->getOperand(1).getDefiningOp());
       ArrayAttr opFormatsArrayAttr_lhs = itComputeOp_lhs.getAllFormats();
+      ArrayAttr opBlocksArrayAttr_lhs = itComputeOp_lhs.getAllBlocks();
       ArrayAttr opPermsArrayAttr_lhs = itComputeOp_lhs.getAllPerms();
+      ///TODO(patrick) We should probably verify the block sizes
       assert(opFormatsArrayAttr_rhs.size() == opPermsArrayAttr_rhs.size() && "not equal RHS formats size with perms size\n");
       assert(opFormatsArrayAttr_lhs.size() == opPermsArrayAttr_lhs.size() && "not equal LHS formats size with perms size\n");
 
@@ -1493,17 +1497,25 @@ namespace mlir
       comet_debug() << "Start printing opFormats_rhs\n";
       std::vector<std::vector<std::string>> opFormats_rhs = convertArrayAttrStrTo2DVector(opFormatsArrayAttr_rhs);
       comet_debug() << "End printing opFormats_rhs\n";
+      comet_debug() << "Start printing opBlocks_rhs\n";
+      std::vector<std::vector<std::string>> opBlocks_rhs = convertArrayAttrStrTo2DVector(opBlocksArrayAttr_rhs);
+      comet_debug() << "End printing opBlocks_rhs\n";
       std::vector<std::vector<int>> opPerms_rhs = convertArrayAttrIntTo2DVector(opPermsArrayAttr_rhs);
       std::vector<std::vector<bool>> inputMapping = createInputOutputMapping(opPermsArrayAttr_rhs, true);
 
       comet_debug() << "Start printing opFormats_lhs\n";
       std::vector<std::vector<std::string>> opFormats_lhs = convertArrayAttrStrTo2DVector(opFormatsArrayAttr_lhs);
       comet_debug() << "End printing opFormats_lhs\n";
+      comet_debug() << "Start printing opBlocks_lhs\n";
+      std::vector<std::vector<std::string>> opBlocks_lhs = convertArrayAttrStrTo2DVector(opBlocksArrayAttr_lhs);
+      comet_debug() << "End printing opBlocks_lhs\n";
       std::vector<std::vector<int>> opPerms_lhs = convertArrayAttrIntTo2DVector(opPermsArrayAttr_lhs);
       std::vector<std::vector<bool>> outputMapping = createInputOutputMapping(opPermsArrayAttr_lhs, false);
 
       opFormats = opFormats_rhs;
       opFormats.insert(opFormats.end(), opFormats_lhs.begin(), opFormats_lhs.end());
+      opBlocks = opBlocks_rhs;
+      opBlocks.insert(opBlocks.end(), opBlocks_lhs.begin(), opBlocks_lhs.end());
       opPerms = opPerms_rhs;
       opPerms.insert(opPerms.end(), opPerms_lhs.begin(), opPerms_lhs.end());
       inputOutputMapping = inputMapping;
@@ -1616,10 +1628,11 @@ namespace mlir
           {
             comet_debug() << " getFormatsInfo:leafs[" << j << "] is computeOp\n";
             std::vector<std::vector<std::string>> allFormats;
+            std::vector<std::vector<std::string>> allBlocks;
             std::vector<std::vector<int>> allPerms;
             std::vector<std::vector<bool>> inputOutputMapping;
             OpBuilder builder(leafop);
-            getFormatsPermsOfComputeOp(leafop, allFormats, allPerms, inputOutputMapping);
+            getFormatsPermsOfComputeOp(leafop, allFormats, allBlocks, allPerms, inputOutputMapping);
 
             comet_debug() << " getFormatsInfo:Allformats allFormats.size(): " << allFormats.size() << "\n";
             for (auto m : allFormats)
diff --git a/run.sh b/run.sh
index 8b388783..58cb2d60 100755
--- a/run.sh
+++ b/run.sh
@@ -1,9 +1,13 @@
 #!/bin/bash
 
 export LD_LIBRARY_PATH="/home/patrick/Work/PNNL/COMET/install/lib"
-export SPARSE_FILE_NAME0=first.mtx
 
-build/bin/comet-opt --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir
+#export SPARSE_FILE_NAME0=first.mtx
+#build/bin/comet-opt --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir
+
+export SPARSE_FILE_NAME0=integration_test/data/test_rank2.mtx
+export SPARSE_FILE_NAME1=integration_test/data/test_rank2_transpose.mtx
+build/bin/comet-opt --opt-comp-workspace --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir
 
 llvm/build/bin/mlir-cpu-runner first.mlir -O3 -e main -entry-point-result=void \
     -shared-libs=build/lib/libcomet_runner_utils.so
diff --git a/test.mtx b/test.mtx
new file mode 100644
index 00000000..34cb95ac
--- /dev/null
+++ b/test.mtx
@@ -0,0 +1,13 @@
+%%MatrixMarket matrix coordinate real general
+%
+% This is a test sparse matrix in Matrix Market Exchange Format.
+% see https://math.nist.gov/MatrixMarket
+%
+4 6 7
+1 1 5.0
+1 2 1.0
+2 1 7.0
+2 2 3.0
+4 1 8.0
+4 4 4.0
+4 5 9.0
diff --git a/test.ta b/test.ta
new file mode 100644
index 00000000..b0bb1a03
--- /dev/null
+++ b/test.ta
@@ -0,0 +1,20 @@
+def main() {
+	#IndexLabel Declarations
+	IndexLabel [a] = [4];
+	IndexLabel [b] = [?];
+	IndexLabel [c] = [?];             
+
+	#Tensor Declarations
+	Tensor<double> B([b, c], {CSR});   #sparse tensor declarations should be before dense tensor declarations
+	Tensor<double> A([a, b], {Dense});	  
+	Tensor<double> C([a, c], {Dense});
+
+	#Tensor Fill Operation
+	A[a, b] = 1.0;
+	B[b, c] = comet_read(0);
+	C[a, c] = 0.0;
+
+	C[a, c] = A[a, b] * B[b, c];
+	print(C);
+	#print(B);
+}