diff --git a/first.mlir b/first.mlir new file mode 100644 index 00000000..fa8f53d0 --- /dev/null +++ b/first.mlir @@ -0,0 +1,314 @@ +module attributes {llvm.data_layout = ""} { + llvm.func @malloc(i64) -> !llvm.ptr + llvm.func @main() { + %0 = llvm.mlir.constant(0 : index) : i64 + %1 = llvm.mlir.constant(1 : index) : i64 + %2 = llvm.mlir.constant(13 : index) : i64 + %3 = llvm.mlir.constant(1 : index) : i64 + %4 = llvm.mlir.constant(0 : index) : i64 + %5 = llvm.mlir.constant(2 : index) : i64 + %6 = llvm.mlir.constant(3 : index) : i64 + %7 = llvm.mlir.constant(0 : i32) : i32 + %8 = llvm.mlir.constant(1 : i32) : i32 + %9 = llvm.mlir.constant(4 : index) : i64 + %10 = llvm.mlir.constant(5 : index) : i64 + %11 = llvm.mlir.constant(6 : index) : i64 + %12 = llvm.mlir.constant(7 : index) : i64 + %13 = llvm.mlir.constant(0.000000e+00 : f64) : f64 + %14 = llvm.mlir.constant(8 : index) : i64 + %15 = llvm.mlir.null : !llvm.ptr + %16 = llvm.getelementptr %15[13] : (!llvm.ptr) -> !llvm.ptr + %17 = llvm.ptrtoint %16 : !llvm.ptr to i64 + %18 = llvm.call @malloc(%17) : (i64) -> !llvm.ptr + %19 = llvm.bitcast %18 : !llvm.ptr to !llvm.ptr + %20 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %21 = llvm.insertvalue %19, %20[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %22 = llvm.insertvalue %19, %21[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %23 = llvm.insertvalue %0, %22[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %24 = llvm.insertvalue %2, %23[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %25 = llvm.insertvalue %1, %24[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %26 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %25, %26 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %27 = llvm.bitcast %26 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %28 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %29 = llvm.insertvalue %1, %28[0] : !llvm.struct<(i64, ptr)> + %30 = llvm.insertvalue %27, %29[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_sizes_2D_f64(%7, %4, %4, %5, %4, %1, %27, %8) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i32) -> () + %31 = llvm.load %19 : !llvm.ptr + %32 = llvm.getelementptr %19[1] : (!llvm.ptr) -> !llvm.ptr + %33 = llvm.load %32 : !llvm.ptr + %34 = llvm.getelementptr %19[2] : (!llvm.ptr) -> !llvm.ptr + %35 = llvm.load %34 : !llvm.ptr + %36 = llvm.getelementptr %19[3] : (!llvm.ptr) -> !llvm.ptr + %37 = llvm.load %36 : !llvm.ptr + %38 = llvm.getelementptr %19[4] : (!llvm.ptr) -> !llvm.ptr + %39 = llvm.load %38 : !llvm.ptr + %40 = llvm.getelementptr %19[5] : (!llvm.ptr) -> !llvm.ptr + %41 = llvm.load %40 : !llvm.ptr + %42 = llvm.getelementptr %19[6] : (!llvm.ptr) -> !llvm.ptr + %43 = llvm.load %42 : !llvm.ptr + %44 = llvm.getelementptr %19[7] : (!llvm.ptr) -> !llvm.ptr + %45 = llvm.load %44 : !llvm.ptr + %46 = llvm.getelementptr %19[8] : (!llvm.ptr) -> !llvm.ptr + %47 = llvm.load %46 : !llvm.ptr + %48 = llvm.mlir.null : !llvm.ptr + %49 = llvm.getelementptr %48[%31] : (!llvm.ptr, i64) -> !llvm.ptr + %50 = llvm.ptrtoint %49 : !llvm.ptr to i64 + %51 = llvm.call @malloc(%50) : (i64) -> !llvm.ptr + %52 = llvm.bitcast %51 : !llvm.ptr to !llvm.ptr + %53 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %54 = llvm.insertvalue %52, %53[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %55 = llvm.insertvalue %52, %54[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %56 = llvm.insertvalue %0, %55[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %57 = llvm.insertvalue %31, %56[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %58 = llvm.insertvalue %1, %57[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb1(%4 : i64) + ^bb1(%59: i64): // 2 preds: ^bb0, ^bb2 + %60 = llvm.icmp "slt" %59, %31 : i64 + llvm.cond_br %60, ^bb2, ^bb3 + ^bb2: // pred: ^bb1 + %61 = llvm.getelementptr %52[%59] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %61 : !llvm.ptr + %62 = llvm.add %59, %3 : i64 + llvm.br ^bb1(%62 : i64) + ^bb3: // pred: ^bb1 + %63 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %58, %63 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %64 = llvm.bitcast %63 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %65 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %66 = llvm.insertvalue %1, %65[0] : !llvm.struct<(i64, ptr)> + %67 = llvm.insertvalue %64, %66[1] : !llvm.struct<(i64, ptr)> + %68 = llvm.mlir.null : !llvm.ptr + %69 = llvm.getelementptr %68[%33] : (!llvm.ptr, i64) -> !llvm.ptr + %70 = llvm.ptrtoint %69 : !llvm.ptr to i64 + %71 = llvm.call @malloc(%70) : (i64) -> !llvm.ptr + %72 = llvm.bitcast %71 : !llvm.ptr to !llvm.ptr + %73 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %74 = llvm.insertvalue %72, %73[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %75 = llvm.insertvalue %72, %74[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %76 = llvm.insertvalue %0, %75[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %77 = llvm.insertvalue %33, %76[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %78 = llvm.insertvalue %1, %77[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb4(%4 : i64) + ^bb4(%79: i64): // 2 preds: ^bb3, ^bb5 + %80 = llvm.icmp "slt" %79, %33 : i64 + llvm.cond_br %80, ^bb5, ^bb6 + ^bb5: // pred: ^bb4 + %81 = llvm.getelementptr %72[%79] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %81 : !llvm.ptr + %82 = llvm.add %79, %3 : i64 + llvm.br ^bb4(%82 : i64) + ^bb6: // pred: ^bb4 + %83 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %78, %83 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %84 = llvm.bitcast %83 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %85 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %86 = llvm.insertvalue %1, %85[0] : !llvm.struct<(i64, ptr)> + %87 = llvm.insertvalue %84, %86[1] : !llvm.struct<(i64, ptr)> + %88 = llvm.mlir.null : !llvm.ptr + %89 = llvm.getelementptr %88[%35] : (!llvm.ptr, i64) -> !llvm.ptr + %90 = llvm.ptrtoint %89 : !llvm.ptr to i64 + %91 = llvm.call @malloc(%90) : (i64) -> !llvm.ptr + %92 = llvm.bitcast %91 : !llvm.ptr to !llvm.ptr + %93 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %94 = llvm.insertvalue %92, %93[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %95 = llvm.insertvalue %92, %94[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %96 = llvm.insertvalue %0, %95[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %97 = llvm.insertvalue %35, %96[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %98 = llvm.insertvalue %1, %97[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb7(%4 : i64) + ^bb7(%99: i64): // 2 preds: ^bb6, ^bb8 + %100 = llvm.icmp "slt" %99, %35 : i64 + llvm.cond_br %100, ^bb8, ^bb9 + ^bb8: // pred: ^bb7 + %101 = llvm.getelementptr %92[%99] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %101 : !llvm.ptr + %102 = llvm.add %99, %3 : i64 + llvm.br ^bb7(%102 : i64) + ^bb9: // pred: ^bb7 + %103 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %98, %103 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %104 = llvm.bitcast %103 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %105 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %106 = llvm.insertvalue %1, %105[0] : !llvm.struct<(i64, ptr)> + %107 = llvm.insertvalue %104, %106[1] : !llvm.struct<(i64, ptr)> + %108 = llvm.mlir.null : !llvm.ptr + %109 = llvm.getelementptr %108[%37] : (!llvm.ptr, i64) -> !llvm.ptr + %110 = llvm.ptrtoint %109 : !llvm.ptr to i64 + %111 = llvm.call @malloc(%110) : (i64) -> !llvm.ptr + %112 = llvm.bitcast %111 : !llvm.ptr to !llvm.ptr + %113 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %114 = llvm.insertvalue %112, %113[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %115 = llvm.insertvalue %112, %114[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %116 = llvm.insertvalue %0, %115[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %117 = llvm.insertvalue %37, %116[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %118 = llvm.insertvalue %1, %117[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb10(%4 : i64) + ^bb10(%119: i64): // 2 preds: ^bb9, ^bb11 + %120 = llvm.icmp "slt" %119, %37 : i64 + llvm.cond_br %120, ^bb11, ^bb12 + ^bb11: // pred: ^bb10 + %121 = llvm.getelementptr %112[%119] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %121 : !llvm.ptr + %122 = llvm.add %119, %3 : i64 + llvm.br ^bb10(%122 : i64) + ^bb12: // pred: ^bb10 + %123 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %118, %123 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %124 = llvm.bitcast %123 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %125 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %126 = llvm.insertvalue %1, %125[0] : !llvm.struct<(i64, ptr)> + %127 = llvm.insertvalue %124, %126[1] : !llvm.struct<(i64, ptr)> + %128 = llvm.mlir.null : !llvm.ptr + %129 = llvm.getelementptr %128[%39] : (!llvm.ptr, i64) -> !llvm.ptr + %130 = llvm.ptrtoint %129 : !llvm.ptr to i64 + %131 = llvm.call @malloc(%130) : (i64) -> !llvm.ptr + %132 = llvm.bitcast %131 : !llvm.ptr to !llvm.ptr + %133 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %134 = llvm.insertvalue %132, %133[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %135 = llvm.insertvalue %132, %134[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %136 = llvm.insertvalue %0, %135[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %137 = llvm.insertvalue %39, %136[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %138 = llvm.insertvalue %1, %137[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb13(%4 : i64) + ^bb13(%139: i64): // 2 preds: ^bb12, ^bb14 + %140 = llvm.icmp "slt" %139, %39 : i64 + llvm.cond_br %140, ^bb14, ^bb15 + ^bb14: // pred: ^bb13 + %141 = llvm.getelementptr %132[%139] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %141 : !llvm.ptr + %142 = llvm.add %139, %3 : i64 + llvm.br ^bb13(%142 : i64) + ^bb15: // pred: ^bb13 + %143 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %138, %143 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %144 = llvm.bitcast %143 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %145 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %146 = llvm.insertvalue %1, %145[0] : !llvm.struct<(i64, ptr)> + %147 = llvm.insertvalue %144, %146[1] : !llvm.struct<(i64, ptr)> + %148 = llvm.mlir.null : !llvm.ptr + %149 = llvm.getelementptr %148[%41] : (!llvm.ptr, i64) -> !llvm.ptr + %150 = llvm.ptrtoint %149 : !llvm.ptr to i64 + %151 = llvm.call @malloc(%150) : (i64) -> !llvm.ptr + %152 = llvm.bitcast %151 : !llvm.ptr to !llvm.ptr + %153 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %154 = llvm.insertvalue %152, %153[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %155 = llvm.insertvalue %152, %154[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %156 = llvm.insertvalue %0, %155[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %157 = llvm.insertvalue %41, %156[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %158 = llvm.insertvalue %1, %157[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb16(%4 : i64) + ^bb16(%159: i64): // 2 preds: ^bb15, ^bb17 + %160 = llvm.icmp "slt" %159, %41 : i64 + llvm.cond_br %160, ^bb17, ^bb18 + ^bb17: // pred: ^bb16 + %161 = llvm.getelementptr %152[%159] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %161 : !llvm.ptr + %162 = llvm.add %159, %3 : i64 + llvm.br ^bb16(%162 : i64) + ^bb18: // pred: ^bb16 + %163 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %158, %163 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %164 = llvm.bitcast %163 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %165 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %166 = llvm.insertvalue %1, %165[0] : !llvm.struct<(i64, ptr)> + %167 = llvm.insertvalue %164, %166[1] : !llvm.struct<(i64, ptr)> + %168 = llvm.mlir.null : !llvm.ptr + %169 = llvm.getelementptr %168[%43] : (!llvm.ptr, i64) -> !llvm.ptr + %170 = llvm.ptrtoint %169 : !llvm.ptr to i64 + %171 = llvm.call @malloc(%170) : (i64) -> !llvm.ptr + %172 = llvm.bitcast %171 : !llvm.ptr to !llvm.ptr + %173 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %174 = llvm.insertvalue %172, %173[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %175 = llvm.insertvalue %172, %174[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %176 = llvm.insertvalue %0, %175[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %177 = llvm.insertvalue %43, %176[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %178 = llvm.insertvalue %1, %177[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb19(%4 : i64) + ^bb19(%179: i64): // 2 preds: ^bb18, ^bb20 + %180 = llvm.icmp "slt" %179, %43 : i64 + llvm.cond_br %180, ^bb20, ^bb21 + ^bb20: // pred: ^bb19 + %181 = llvm.getelementptr %172[%179] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %181 : !llvm.ptr + %182 = llvm.add %179, %3 : i64 + llvm.br ^bb19(%182 : i64) + ^bb21: // pred: ^bb19 + %183 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %178, %183 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %184 = llvm.bitcast %183 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %185 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %186 = llvm.insertvalue %1, %185[0] : !llvm.struct<(i64, ptr)> + %187 = llvm.insertvalue %184, %186[1] : !llvm.struct<(i64, ptr)> + %188 = llvm.mlir.null : !llvm.ptr + %189 = llvm.getelementptr %188[%45] : (!llvm.ptr, i64) -> !llvm.ptr + %190 = llvm.ptrtoint %189 : !llvm.ptr to i64 + %191 = llvm.call @malloc(%190) : (i64) -> !llvm.ptr + %192 = llvm.bitcast %191 : !llvm.ptr to !llvm.ptr + %193 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %194 = llvm.insertvalue %192, %193[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %195 = llvm.insertvalue %192, %194[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %196 = llvm.insertvalue %0, %195[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %197 = llvm.insertvalue %45, %196[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %198 = llvm.insertvalue %1, %197[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb22(%4 : i64) + ^bb22(%199: i64): // 2 preds: ^bb21, ^bb23 + %200 = llvm.icmp "slt" %199, %45 : i64 + llvm.cond_br %200, ^bb23, ^bb24 + ^bb23: // pred: ^bb22 + %201 = llvm.getelementptr %192[%199] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %4, %201 : !llvm.ptr + %202 = llvm.add %199, %3 : i64 + llvm.br ^bb22(%202 : i64) + ^bb24: // pred: ^bb22 + %203 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %198, %203 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %204 = llvm.bitcast %203 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %205 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %206 = llvm.insertvalue %1, %205[0] : !llvm.struct<(i64, ptr)> + %207 = llvm.insertvalue %204, %206[1] : !llvm.struct<(i64, ptr)> + %208 = llvm.mlir.null : !llvm.ptr + %209 = llvm.getelementptr %208[%47] : (!llvm.ptr, i64) -> !llvm.ptr + %210 = llvm.ptrtoint %209 : !llvm.ptr to i64 + %211 = llvm.call @malloc(%210) : (i64) -> !llvm.ptr + %212 = llvm.bitcast %211 : !llvm.ptr to !llvm.ptr + %213 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %214 = llvm.insertvalue %212, %213[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %215 = llvm.insertvalue %212, %214[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %216 = llvm.insertvalue %0, %215[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %217 = llvm.insertvalue %47, %216[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %218 = llvm.insertvalue %1, %217[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.br ^bb25(%4 : i64) + ^bb25(%219: i64): // 2 preds: ^bb24, ^bb26 + %220 = llvm.icmp "slt" %219, %47 : i64 + llvm.cond_br %220, ^bb26, ^bb27 + ^bb26: // pred: ^bb25 + %221 = llvm.getelementptr %212[%219] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %13, %221 : !llvm.ptr + %222 = llvm.add %219, %3 : i64 + llvm.br ^bb25(%222 : i64) + ^bb27: // pred: ^bb25 + %223 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %218, %223 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + %224 = llvm.bitcast %223 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> to !llvm.ptr + %225 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + %226 = llvm.insertvalue %1, %225[0] : !llvm.struct<(i64, ptr)> + %227 = llvm.insertvalue %224, %226[1] : !llvm.struct<(i64, ptr)> + llvm.call @read_input_2D_f64(%7, %4, %4, %5, %4, %1, %64, %1, %84, %1, %104, %1, %124, %1, %144, %1, %164, %1, %184, %1, %204, %1, %224, %8) {filename = "SPARSE_FILE_NAME0"} : (i32, i64, i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i32) -> () + llvm.call @comet_print_memref_i64(%1, %64) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %84) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %104) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %124) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %144) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %164) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %184) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_i64(%1, %204) : (i64, !llvm.ptr) -> () + llvm.call @comet_print_memref_f64(%1, %224) : (i64, !llvm.ptr) -> () + llvm.return + } + llvm.func @read_input_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i64, !llvm.ptr, i32) attributes {sym_visibility = "private"} + llvm.func @read_input_sizes_2D_f64(i32, i64, i64, i64, i64, i64, !llvm.ptr, i32) attributes {sym_visibility = "private"} + llvm.func @comet_sort_index(i64, !llvm.ptr, i64, i64) attributes {sym_visibility = "private"} + llvm.func @comet_print_memref_f64(i64, !llvm.ptr) attributes {sym_visibility = "private"} + llvm.func @comet_print_memref_i64(i64, !llvm.ptr) attributes {sym_visibility = "private"} +} diff --git a/first.mtx b/first.mtx new file mode 100644 index 00000000..34cb95ac --- /dev/null +++ b/first.mtx @@ -0,0 +1,13 @@ +%%MatrixMarket matrix coordinate real general +% +% This is a test sparse matrix in Matrix Market Exchange Format. +% see https://math.nist.gov/MatrixMarket +% +4 6 7 +1 1 5.0 +1 2 1.0 +2 1 7.0 +2 2 3.0 +4 1 8.0 +4 4 4.0 +4 5 9.0 diff --git a/first.ta b/first.ta new file mode 100644 index 00000000..44462d0e --- /dev/null +++ b/first.ta @@ -0,0 +1,11 @@ +def main() { + #IndexLabel Declarations + IndexLabel [a] = [?]; + IndexLabel [b] = [?]; + + #Tensor Declarations + Tensor A([a, b], {BCSR}); + A[a, b] = comet_read(0); + print(A); +} + diff --git a/lib/Dialect/Utils/Utils.cpp b/lib/Dialect/Utils/Utils.cpp index 5fc431c4..74655289 100644 --- a/lib/Dialect/Utils/Utils.cpp +++ b/lib/Dialect/Utils/Utils.cpp @@ -572,8 +572,8 @@ namespace mlir else if (formats_str.compare("BCSR") == 0) { allFormats[i].push_back("D"); - allFormats[i].push_back("CN"); allFormats[i].push_back("D"); + allFormats[i].push_back("CN"); allFormats[i].push_back("D"); } else if (formats_str.compare("CSB") == 0) @@ -877,8 +877,8 @@ namespace mlir else if (formats_str.compare(0, 4, "BCSR") == 0) { /// BCSR dim_format.push_back(format_dense); - dim_format.push_back(format_compressednonunique); dim_format.push_back(format_dense); + dim_format.push_back(format_compressednonunique); dim_format.push_back(format_dense); } else if (formats_str.compare(0, 3, "CSB") == 0) @@ -1040,8 +1040,8 @@ namespace mlir else if (formats_str.compare(0, 4, "BCSR") == 0) { /// BCSR dim_format.push_back(format_dense); - dim_format.push_back(format_compressed); dim_format.push_back(format_dense); + dim_format.push_back(format_compressed); dim_format.push_back(format_dense); } else if (formats_str.compare(0, 3, "CSB") == 0) diff --git a/lib/ExecutionEngine/SparseUtils.cpp b/lib/ExecutionEngine/SparseUtils.cpp index 06f7f1a1..3561b420 100644 --- a/lib/ExecutionEngine/SparseUtils.cpp +++ b/lib/ExecutionEngine/SparseUtils.cpp @@ -911,6 +911,211 @@ struct EllpackMatrix } }; +///===----------------------------------------------------------------------===// +/// BCSR matrix type +///===----------------------------------------------------------------------===// + +template +struct BCSRMatrix +{ + uint64_t num_blocks; + + uint64_t block_rows; + uint64_t block_cols; + + uint64_t *colptr; + uint64_t *colidx; + uint64_t colptr_len; + uint64_t colidx_len; + + T *Aval; + uint64_t value_len; + + bool has_values(uint64_t i, uint64_t mi, uint64_t j, uint64_t mj, CooMatrix *coo_matrix) { + for (uint64_t bi = i; binum_nonzeros; p++) { + auto coord = coo_matrix->coo_tuples[p]; + if (coord.row == bi && coord.col == bj) { + return true; + } + } + } + } + + return false; + } + + /// Initializer + void Init(CooMatrix *coo_matrix, bool verbose = false) + { + //num_rows = coo_matrix->num_rows; + //num_cols = 0; + uint64_t num_nonzeros = coo_matrix->num_nonzeros; + + /// Sort by rows, then columns + if (verbose) + printf("Ordering..."); + fflush(stdout); + std::stable_sort(coo_matrix->coo_tuples, coo_matrix->coo_tuples + num_nonzeros, CooComparatorRow()); + if (verbose) + printf("done."); + fflush(stdout); + + /// Calculate the column count + uint64_t max = 0; + uint64_t buffer = 0; + // int current = -1; + uint64_t current = num_nonzeros > 0 ? coo_matrix->coo_tuples[0].row : 0; + for (uint64_t i = 0; i < num_nonzeros; i++) + { + if (coo_matrix->coo_tuples[i].row == current) + { + ++buffer; + } + else + { + if (buffer > max) + max = buffer; + buffer = 1; + current = coo_matrix->coo_tuples[i].row; + } + } + if (buffer > max) + max = buffer; + //num_cols = max; + + // Temporary + num_blocks = 1; + block_rows = 1; + block_cols = 1; + colptr_len = 1; + colidx_len = 1; + value_len = 1; + + /////////////////////////////////////////////// + //for (uint64_t p = 0; pnum_nonzeros; p++) { + // auto coord = coo_matrix->coo_tuples[p]; + // printf("(%d, %d, %.0f)\n", coord.row, coord.col, coord.val); + //} + + /////////////////////////////////////////////// + uint64_t rows = coo_matrix->num_rows; + uint64_t cols = coo_matrix->num_cols; + //printf("Num_rows: %d | Num_cols: %d\n", rows, cols); + + std::vector A2pos_nc; + std::vector A2crd; + std::vector Aval_nc; + + // Step 1: Determine block size + // TODO: Let us think about this. For now, quick solution + block_rows = rows/2; + block_cols = cols/2; + //printf("Block_rows: %d | Block_cols: %d\n", block_rows, block_cols); + + // Step 2: Examine the blocks + // We only want the blocks with values + // + // From here, we can start building the A2 dimension + // + for (uint64_t i=0; inum_nonzeros; p++) { + auto coord = coo_matrix->coo_tuples[p]; + if (coord.row == bi && coord.col == bj) { + Aval_nc.push_back(coord.val); + found = true; + break; + } + } + + if (found == false) { + Aval_nc.push_back(0); + } + } + } + } + } + } + + // Compress the row coordinates + std::vector A2pos; + A2pos.push_back(0); + + int curr = A2pos_nc[0]; + int curr_end = 1; + for (uint64_t i = 1; i *coo_matrix, bool verbose = false) + { + Init(coo_matrix, verbose); + } + + /// Destructor + ~BCSRMatrix() + { + Clear(); + } +}; + //===----------------------------------------------------------------------===// /// COO tensor 3D type. A COO tensor is just a vector of edge tuples. Tuples are sorted /// first by first dim, then by second dim and so on. @@ -1756,20 +1961,6 @@ void read_input_sizes_2D(int32_t fileID, EllpackMatrix ellpack_matrix(FileReader.coo_matrix); int cols = ellpack_matrix.num_cols * ellpack_matrix.num_rows; - /* - desc_sizes->data[0] = 1; // A1pos - desc_sizes->data[1] = 1; // A1crd - desc_sizes->data[2] = 1; // A2pos - desc_sizes->data[3] = cols; // A2crd - desc_sizes->data[4] = 1; // A1_tile_pos - desc_sizes->data[5] = 1; // A1_tile_crd - desc_sizes->data[6] = 0; // A2_tile_pos - desc_sizes->data[7] = 0; // A2_tile_crd - desc_sizes->data[8] = cols; // Controls count of value dimension - desc_sizes->data[9] = FileReader.coo_matrix->num_rows; - desc_sizes->data[10] = FileReader.coo_matrix->num_cols; - */ - desc_sizes->data[0] = 1; /// A1pos desc_sizes->data[1] = 1; /// A1crd desc_sizes->data[2] = 1; /// A1_tile_pos @@ -1796,7 +1987,30 @@ void read_input_sizes_2D(int32_t fileID, /// BCSR else if (A1format == Dense && A2format == Compressed_nonunique && A1_tile_format == Dense && A2_tile_format == Dense) { - puts("BCSR"); + BCSRMatrix bcsr_matrix(FileReader.coo_matrix); + + desc_sizes->data[0] = 1; /// A1pos + desc_sizes->data[1] = 1; /// A1crd + desc_sizes->data[2] = 1; /// A1_tile_pos + desc_sizes->data[3] = 1; /// A1_tile_crd + desc_sizes->data[4] = bcsr_matrix.colptr_len; /// A2pos + desc_sizes->data[5] = bcsr_matrix.colidx_len; /// A2crd + desc_sizes->data[6] = 1; /// A2_tile_pos + desc_sizes->data[7] = 1; /// A2_tile_crd + desc_sizes->data[8] = bcsr_matrix.value_len; + desc_sizes->data[9] = FileReader.coo_matrix->num_rows; + desc_sizes->data[10] = FileReader.coo_matrix->num_cols; + + /*****************DEBUG******************/ + //std::cout << "BCSR detail: \n" + // << "desc_sizes->data[0]: " << desc_sizes->data[0] << "\n" + // << "desc_sizes->data[1]: " << desc_sizes->data[1] << "\n" + // << "desc_sizes->data[2]: " << desc_sizes->data[2] << "\n" + // << "desc_sizes->data[3]: " << desc_sizes->data[3] << "\n" + // << "desc_sizes->data[4]: " << desc_sizes->data[4] << "\n" + // << "desc_sizes->data[5]: " << desc_sizes->data[5] << "\n" + // << "desc_sizes->data[6]: " << desc_sizes->data[6] << "\n"; + /*****************DEBUG******************/ } /// CSB else if (A1format == Compressed_unique && A2format == singleton && A1_tile_format == Dense && A2_tile_format == Dense) @@ -2061,12 +2275,29 @@ void read_input_2D(int32_t fileID, } } /// BCSR - /*else if (A1format == Dense && A2format == Compressed_nonunique && A3format == Dense && A4format == Dense) + else if (A1format == Dense && A1_tile_format == Dense && A2format == Compressed_nonunique && A2_tile_format == Dense) { - puts("BCSR"); + BCSRMatrix bcsr_matrix(FileReader.coo_matrix); + FileReader.FileReaderWrapperFinalize(); + + desc_A1pos->data[0] = bcsr_matrix.num_blocks; + desc_A1tile_pos->data[0] = bcsr_matrix.block_rows; + desc_A2tile_pos->data[0] = bcsr_matrix.block_cols; + + for (uint64_t i = 0; idata[i] = bcsr_matrix.colptr[i]; + } + + for (uint64_t i = 0; idata[i] = bcsr_matrix.colidx[i]; + } + + for (uint64_t i = 0; idata[i] = bcsr_matrix.Aval[i]; + } } /// CSB - else if (A1format == Compressed_unique && A2format == singleton && A3format == Dense && A4format == Dense) + /*else if (A1format == Compressed_unique && A2format == singleton && A3format == Dense && A4format == Dense) { puts("CSB"); }*/ @@ -2505,4 +2736,4 @@ extern "C" void comet_sort_index(int64_t rank, void *ptr, int64_t index_first, i { UnrankedMemRefType descriptor = {rank, ptr}; _milr_ciface_comet_sort(&descriptor, index_first, index_last); -} \ No newline at end of file +} diff --git a/run.sh b/run.sh new file mode 100755 index 00000000..8b388783 --- /dev/null +++ b/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +export LD_LIBRARY_PATH="/home/patrick/Work/PNNL/COMET/install/lib" +export SPARSE_FILE_NAME0=first.mtx + +build/bin/comet-opt --convert-ta-to-it --convert-to-loops --convert-to-llvm first.ta &> first.mlir + +llvm/build/bin/mlir-cpu-runner first.mlir -O3 -e main -entry-point-result=void \ + -shared-libs=build/lib/libcomet_runner_utils.so +