-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuda_utils.py
172 lines (136 loc) · 6.86 KB
/
cuda_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from pathlib import Path
from typing import List, Tuple, Union
import torch
import cv2
import numpy as np
from numpy import ndarray
import cupy as cp
# Cuda version:
def cp_array_from_cv_cuda_gpumat(mat: cv2.cuda.GpuMat) -> cp.ndarray:
class CudaArrayInterface:
def __init__(self, gpu_mat: cv2.cuda.GpuMat):
w, h = gpu_mat.size()
type_map = {
cv2.CV_8U: "|u1",
cv2.CV_8S: "|i1",
cv2.CV_16U: "<u2", cv2.CV_16S: "<i2",
cv2.CV_32S: "<i4",
cv2.CV_32F: "<f4", cv2.CV_64F: "<f8",
}
self.__cuda_array_interface__ = {
"version": 3,
"shape": (h, w, gpu_mat.channels()) if gpu_mat.channels() > 1 else (h, w),
"typestr": type_map[gpu_mat.depth()],
"descr": [("", type_map[gpu_mat.depth()])],
"stream": 1,
"strides": (gpu_mat.step, gpu_mat.elemSize(), gpu_mat.elemSize1()) if gpu_mat.channels() > 1
else (gpu_mat.step, gpu_mat.elemSize()),
"data": (gpu_mat.cudaPtr(), False),
}
arr = cp.asarray(CudaArrayInterface(mat))
return arr
def gpu_mat_to_torch_tensor(gpu_mat):
# Convert GpuMat to CuPy array
# cupy_array = cp.asarray(GpuMatWrapper(gpu_mat))
# print("before gpumat to cupy")
# in bgr
gpu_mat_32f = cv2.cuda.GpuMat(gpu_mat.size(), cv2.CV_32FC3)
gpu_mat.convertTo(cv2.CV_32FC3, gpu_mat_32f)
# gpu_mat.release()
# print(f"gpu_mat_32f: {gpu_mat_32f.download()}")
cupy_array = cp_array_from_cv_cuda_gpumat(gpu_mat_32f)
# print("after gpumat to cupy")
# assert cupy_array.__cuda_array_interface__['data'][0] == b.__cuda_array_interface__['data'][0]
# cupy_array.resize([640, 640])
# Convert BGR to RGB (OpenCV uses BGR)
cupy_array = cp.ascontiguousarray(cupy_array[:, :, ::-1]) # Assumes HWC format
# Pad the top and bottom to get the frame to be the correct size.
current_height = cupy_array.shape[0]
target_height = cp.ceil(current_height / 32) * 32
padding_height = int(target_height - current_height)
padded_cupy_array = cp.pad(cupy_array, ((0, padding_height), (0, 0), (0, 0)), mode='constant', constant_values=0)
# Convert CuPy array to PyTorch tensor
# torch_tensor = torch.from_numpy(cupy_array).float().to('cuda')
# print("before cupy to torch")
# print(padded_cupy_array)
# print(f"shape: {padded_cupy_array.shape}")
# print(f"type: {padded_cupy_array.dtype}")
# print(f"strides: {padded_cupy_array.strides}")
test = padded_cupy_array.toDlpack()
# print(test)
# torch_tensor = torch.as_tensor(cupy_array, device='cuda')
# assert torch_tensor.__cuda_array_interface__['data'][0] == cupy_array.__cuda_array_interface__['data'][0]
torch_tensor = torch.from_dlpack(padded_cupy_array)
# print("made it through cupy to torch")
# Normalize pixel values to [0, 1]
torch_tensor = torch_tensor.div(255.0)
# Permute dimensions to BCHW format
torch_tensor = torch_tensor.permute(2, 0, 1).unsqueeze(0)
# print(torch_tensor.shape)
return torch_tensor
def pad_image_on_gpu_with_pytorch(image_tensor: torch.Tensor,
pad: Tuple[int, int, int, int],
padding_mode: str = 'constant',
value: int = 114) -> torch.Tensor:
"""
Pad an image tensor on the GPU using PyTorch.
Parameters:
- image_tensor (torch.Tensor): The input image tensor on the GPU. Expected shape is (C, H, W).
- pad (Tuple[int, int, int, int]): The padding to apply on each side of the image as
(left, right, top, bottom).
- padding_mode (str, optional): The type of padding to apply. Can be 'constant', 'reflect', etc.
Default is 'constant'.
- value (int, optional): The value to use for constant padding. Default is 114.
Returns:
- torch.Tensor: The padded image tensor on the GPU.
Note:
- Ensure the input image tensor is already transferred to the GPU.
- This function directly utilizes PyTorch's functionality for padding and GPU computation.
"""
return F.pad(image_tensor, pad, mode=padding_mode, value=value)
def letterbox_cuda(im: cv2.cuda_GpuMat,
new_shape: Union[Tuple[int, int], List[int]] = (640, 640),
color: Union[Tuple[int, int, int], List[int]] = (114, 114, 114)) \
-> Tuple[cv2.cuda_GpuMat, float, Tuple[float, float]]:
"""
Resize and pad an image to a new shape using CUDA-accelerated functions,
maintaining the aspect ratio of the original image. The padding is applied
to ensure the resized image matches the new shape while keeping its original
aspect ratio. This function is designed to work with images processed on a GPU.
Parameters:
- im (cv2.cuda_GpuMat): The input image as a GPU matrix.
- new_shape (Union[Tuple[int, int], List[int]], optional): The target size of the image
as a tuple (width, height). Default is (640, 640).
- color (Union[Tuple[int, int, int], List[int]], optional): The color of the padding
as a tuple (B, G, R). Default is (114, 114, 114).
Returns:
- Tuple[cv2.cuda_GpuMat, float, Tuple[float, float]]: A tuple containing the resized
and padded image as a GPU matrix, the scale ratio (new / old), and the padding applied
on the width and height as a tuple (dw, dh).
Note:
- The function assumes that the input image is already on the GPU.
- The resizing operation is performed using linear interpolation.
"""
# Resize and pad image while meeting stride-multiple constraints
shape = im.size() # current shape [width, height] for GpuMat
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old) - swapping width and height to match GpuMat.size() order
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding [width, height]
new_unpad = (int(round(shape[0] * r)), int(round(shape[1] * r)))
dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[1] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape != new_unpad: # resize
im = cv2.cuda.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = pad_image_on_gpu_with_pytorch(im, )
# # Since there's no direct cuda equivalent for copyMakeBorder, we need to copy the GPU mat to CPU, apply borders, and then copy back to GPU
# im_cpu = im.download() # Download the GpuMat to CPU for border application
# im_cpu = cv2.copyMakeBorder(im_cpu, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
# # Upload the image back to GPU
# im = cv2.cuda_GpuMat()
# im.upload(im_cpu)
return im, r, (dw, dh)