cuda: add Merge and Transpose functions

Signed-off-by: deadprogram <[email protected]>
hybridgroup · Sep 9, 2024 · 1ea14c4 · 1ea14c4
1 parent 384a02d
commit 1ea14c4
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 0 deletions.
diff --git a/cuda/arithm.cpp b/cuda/arithm.cpp
@@ -146,3 +146,25 @@ void GpuFlip(GpuMat src, GpuMat dst, int flipCode, Stream s) {
     }
     cv::cuda::flip(*src, *dst, flipCode, *s);
 }
+
+void GpuMerge(struct GpuMats mats, GpuMat dst, Stream s) {
+    std::vector<cv::cuda::GpuMat> images;
+
+    for (int i = 0; i < mats.length; ++i) {
+        images.push_back(*mats.mats[i]);
+    }
+
+    if (s == NULL) {
+        cv::cuda::merge(images, *dst);
+        return;
+    }
+    cv::cuda::merge(images, *dst, *s);
+}
+
+void GpuTranspose(GpuMat src, GpuMat dst, Stream s) {
+    if (s == NULL) {
+        cv::cuda::transpose(*src, *dst);
+        return;
+    }
+    cv::cuda::transpose(*src, *dst, *s);
+}
diff --git a/cuda/arithm.go b/cuda/arithm.go
@@ -318,3 +318,54 @@ func Flip(src GpuMat, dst *GpuMat, flipCode int) {
 func FlipWithStream(src GpuMat, dst *GpuMat, flipCode int, stream Stream) {
 	C.GpuFlip(src.p, dst.p, C.int(flipCode), stream.p)
 }
+
+// Merge makes a multi-channel matrix out of several single-channel matrices.
+//
+// For further details, please see:
+// https://docs.opencv.org/4.x/de/d09/group__cudaarithm__core.html#gafce19eb0fcad23f67ab45d544992436d
+func Merge(mv []GpuMat, dst *GpuMat) {
+	cMatArray := make([]C.GpuMat, len(mv))
+	for i, r := range mv {
+		cMatArray[i] = r.p
+	}
+	cMats := C.GpuMats{
+		mats:   (*C.GpuMat)(&cMatArray[0]),
+		length: C.int(len(mv)),
+	}
+
+	C.GpuMerge(cMats, dst.p, nil)
+}
+
+// MergeWithStream makes a multi-channel matrix out of several single-channel matrices
+// using a Stream for concurrency.
+//
+// For further details, please see:
+// https://docs.opencv.org/4.x/de/d09/group__cudaarithm__core.html#gafce19eb0fcad23f67ab45d544992436d
+func MergeWithStream(mv []GpuMat, dst *GpuMat, s Stream) {
+	cMatArray := make([]C.GpuMat, len(mv))
+	for i, r := range mv {
+		cMatArray[i] = r.p
+	}
+	cMats := C.GpuMats{
+		mats:   (*C.GpuMat)(&cMatArray[0]),
+		length: C.int(len(mv)),
+	}
+
+	C.GpuMerge(cMats, dst.p, s.p)
+}
+
+// Transpose transposes a matrix.
+//
+// For further details, please see:
+// https://docs.opencv.org/4.x/de/d09/group__cudaarithm__core.html#ga327b71c3cb811a904ccf5fba37fc29f2
+func Transpose(src GpuMat, dst *GpuMat) {
+	C.GpuTranspose(src.p, dst.p, nil)
+}
+
+// Transpose transposes a matrix using a Stream for concurrency.
+//
+// For further details, please see:
+// https://docs.opencv.org/4.x/de/d09/group__cudaarithm__core.html#ga327b71c3cb811a904ccf5fba37fc29f2
+func TransposeWithStream(src GpuMat, dst *GpuMat, s Stream) {
+	C.GpuTranspose(src.p, dst.p, s.p)
+}
diff --git a/cuda/arithm.h b/cuda/arithm.h
@@ -29,6 +29,8 @@ void GpuSqrt(GpuMat src, GpuMat dst, Stream s);
 void GpuSubtract(GpuMat src1, GpuMat src2, GpuMat dst, Stream s);
 void GpuThreshold(GpuMat src, GpuMat dst, double thresh, double maxval, int typ, Stream s);
 void GpuFlip(GpuMat src, GpuMat dst, int flipCode, Stream s);
+void GpuMerge(struct GpuMats mats, GpuMat dst, Stream s);
+void GpuTranspose(GpuMat src, GpuMat dst, Stream s);
 
 #ifdef __cplusplus
 }

diff --git a/cuda/arithm_test.go b/cuda/arithm_test.go
@@ -507,3 +507,91 @@ func TestFlipWithStream(t *testing.T) {
 		t.Error("Invalid Flip test")
 	}
 }
+
+func TestMerge(t *testing.T) {
+	src := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src.Close()
+	src2 := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src2.Close()
+	src3 := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src3.Close()
+
+	dstGPU := NewGpuMat()
+	defer dstGPU.Close()
+
+	Merge([]GpuMat{src, src2, src3}, &dstGPU)
+	if dstGPU.Empty() {
+		t.Error("TestMerge dst should not be empty.")
+	}
+}
+
+func TestMergeWithStream(t *testing.T) {
+	src := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src.Close()
+	src2 := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src2.Close()
+	src3 := NewGpuMatWithSize(101, 102, gocv.MatTypeCV8U)
+	defer src3.Close()
+	s := NewStream()
+	defer s.Close()
+
+	dstGPU := NewGpuMat()
+	defer dstGPU.Close()
+
+	MergeWithStream([]GpuMat{src, src2, src3}, &dstGPU, s)
+
+	s.WaitForCompletion()
+	if dstGPU.Empty() {
+		t.Error("TestMergeWithStream dst should not be empty.")
+	}
+}
+
+func TestTranspose(t *testing.T) {
+	src := gocv.IMRead("../images/gocvlogo.jpg", gocv.IMReadGrayScale)
+	if src.Empty() {
+		t.Error("Invalid read of Mat in Transpose test")
+	}
+	defer src.Close()
+
+	var cimg, dimg = NewGpuMat(), NewGpuMat()
+	defer cimg.Close()
+	defer dimg.Close()
+
+	cimg.Upload(src)
+
+	dest := gocv.NewMat()
+	defer dest.Close()
+
+	Transpose(cimg, &dimg)
+	dimg.Download(&dest)
+	if dest.Empty() || src.Rows() != dest.Cols() || src.Cols() != dest.Rows() {
+		t.Error("Invalid Transpose test")
+	}
+}
+
+func TestTransposeWithStream(t *testing.T) {
+	src := gocv.IMRead("../images/gocvlogo.jpg", gocv.IMReadGrayScale)
+	if src.Empty() {
+		t.Error("Invalid read of Mat in TransposeWithStream test")
+	}
+	defer src.Close()
+
+	var cimg, dimg, s = NewGpuMat(), NewGpuMat(), NewStream()
+	defer cimg.Close()
+	defer dimg.Close()
+	defer s.Close()
+
+	cimg.Upload(src)
+
+	dest := gocv.NewMat()
+	defer dest.Close()
+
+	TransposeWithStream(cimg, &dimg, s)
+	dimg.DownloadWithStream(&dest, s)
+
+	s.WaitForCompletion()
+
+	if dest.Empty() || src.Rows() != dest.Cols() || src.Cols() != dest.Rows() {
+		t.Error("Invalid TransposeWithStream test")
+	}
+}
diff --git a/cuda/cuda.h b/cuda/cuda.h
@@ -18,6 +18,12 @@ typedef void* GpuMat;
 typedef void* Stream;
 #endif
 
+// Wrapper for the vector of GpuMat aka std::vector<GpuMat>
+typedef struct GpuMats {
+    GpuMat* mats;
+    int length;
+} GpuMats;
+
 GpuMat GpuMat_New();
 GpuMat GpuMat_NewFromMat(Mat mat);
 GpuMat GpuMat_NewWithSize(int rows, int cols, int type);