-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
190 lines (160 loc) · 7.24 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#include "opencv2/core/base.hpp"
#include "opencv2/core/mat.hpp"
#include "opencv2/videoio.hpp"
#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
std::vector<std::string> classes;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<int> indices;
void frameProcess(cv::Mat &frame, cv::dnn::Net &net, const std::vector<std::string> &classes, float confThreshold, float nmsThreshold) {
cv::Mat blob;
cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(320, 320), cv::Scalar(0, 0, 0), true, false);
// std::cout << "blob size: " << blob.size << std::endl;
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
// std::cout << "output size: " << outputs[0].size << std::endl;
for (int i = 0; i < outputs[0].size[1]; i ++) {
float* data = (float*)outputs[0].data + i * outputs[0].size[2];
if (data[4] < confThreshold) continue;
float x_center = data[0] / 320.0f;
float y_center = data[1] / 320.0f;
float width = data[2] / 320.0f;
float height = data[3] / 320.0f;
float confidence = data[4];
float* classes_scores = data + 5;
cv::Mat scores(1, classes.size(), CV_32FC1);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, NULL, &maxClassScore, NULL, &classIdPoint);
int class_id = classIdPoint.x;
/********************************DEBUG USE********************************/
// std::cout << "Detection " << i << ": " << std::endl;
// std::cout << " x_center: " << x_center << std::endl;
// std::cout << " y_center: " << y_center << std::endl;
// std::cout << " width: " << width << std::endl;
// std::cout << " height: " << height << std::endl;
// std::cout << " confidence: " << confidence << std::endl;
// std::cout << " class_id: " << class_id << std::endl;
// std::cout << " frame cols: " << frame.cols << std::endl;
// std::cout << " frame rows: " << frame.rows << std::endl;
// std::cout << std::endl;
/********************************DEBUG USE********************************/
int leftTop_x = (int)((x_center - width / 2) * frame.cols);
int leftTop_y = (int)((y_center - height / 2) * frame.rows);
int rect_width = (int)(width * frame.cols);
int rect_height = (int)(height * frame.rows);
confidences.push_back(confidence);
boxes.push_back(cv::Rect(leftTop_x, leftTop_y, rect_width, rect_height));
classIds.push_back((int)class_id);
}
cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
// std::cout << "indices size: " << indices.size() << std::endl;
}
void frameDisplay (bool is_display, cv::Mat &frame, bool isImage) {
if (is_display) {
for (int idx : indices) {
cv::Rect box = boxes[idx];
std::string label = classes[classIds[idx]] + ": " + cv::format("%.2f", confidences[idx]);
cv::rectangle(frame, box, cv::Scalar(120, 83, 34), 4);
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 1.2, 2, &baseLine);
cv::rectangle(frame, cv::Rect(box.x, box.y - labelSize.height - baseLine, labelSize.width, labelSize.height + baseLine), cv::Scalar(120, 83, 34), cv::FILLED);
cv::putText(frame, label, cv::Point(box.x, box.y - baseLine / 2), cv::FONT_HERSHEY_SIMPLEX, 1.2, cv::Scalar(0, 127, 235), 2, cv::LINE_4);
}
cv::imshow("Detection", frame);
if (isImage) {
cv::waitKey(0);
} else {
cv::waitKey(1);
}
}
}
std::string keys =
"{help h usage| | Print help message.}"
"{model m | ../best.onnx | Path to the .onnx model you trained / wanna use.}"
"{device | 0 | camera device number.}"
"{input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{classes | ../classes.txt | Path to the text file with names of classes.}"
"{conf | 0.5 | Confidence threshold.}"
"{nms | 0.4 | Non-Maximum Suppression threshold.}"
"{display d | true | Display the result on screen}";
int main(int argc, char** argv) {
cv::CommandLineParser parser(argc, argv, keys);
parser.about(" Object Detection using YOLOv5 (.onnx) model.\n\nFor example:\n./main -i=your/file/path\n");
if (parser.has("help")) {
parser.printMessage();
return 0;
}
CV_Assert(parser.has("model"));
cv::String modelPath = parser.get<cv::String>("model");
int deviceNum = parser.get<int>("device");
cv::String inputPath = parser.get<cv::String>("input");
cv::String classesPath = parser.get<cv::String>("classes");
float confThreshold = parser.get<float>("conf");
float nmsThreshold = parser.get<float>("nms");
bool is_display = parser.get<bool>("display");
std::ifstream ifs(classesPath);
CV_Assert(ifs.is_open());
std::string line;
while (getline(ifs, line)) {
classes.push_back(line);
}
cv::dnn::Net net = cv::dnn::readNetFromONNX(modelPath);
CV_Assert(!net.empty());
cv::VideoCapture cap;
cv::Mat frame;
bool isImage = false;
if (!inputPath.empty()) {
// judge if it's image or video
std::string ext = inputPath.substr(inputPath.find_last_of(".") + 1);
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
if (ext == "bmp" || ext == "dib" || ext == "jpeg" || ext == "jpg" ||
ext == "jpe" || ext == "jp2" || ext == "png" || ext == "webp" ||
ext == "avif" || ext == "pbm" || ext == "pgm" || ext == "ppm" ||
ext == "pxm" || ext == "pnm" || ext == "pfm" || ext == "sr" ||
ext == "ras" || ext == "tiff" || ext == "tif" || ext == "exr" ||
ext == "hdr" || ext == "pic") {
isImage = true;
frame = cv::imread(inputPath);
CV_Assert(!frame.empty());
} else {
cap.open(inputPath);
}
} else {
cap.open(deviceNum);
}
CV_Assert(isImage || cap.isOpened());
cv::TickMeter tm;
if (isImage) {
tm.start();
frameProcess(frame, net, classes, confThreshold, nmsThreshold);
tm.stop();
std::cout << "process time: " << tm.getTimeMilli() << " ms" << std::endl;
frameDisplay(is_display, frame, isImage);
confidences.clear();
boxes.clear();
classIds.clear();
} else {
while (true) {
CV_Assert(cap.read(frame));
tm.start();
frameProcess(frame, net, classes, confThreshold, nmsThreshold);
tm.stop();
frameDisplay(is_display, frame, isImage);
std::cout << "process time: " << tm.getFPS() << std::endl;
std::cout << "fps: " << 1000 / tm.getFPS() << std::endl;
std::cout << std::endl;
confidences.clear();
boxes.clear();
classIds.clear();
}
}
return 0;
}