-
Notifications
You must be signed in to change notification settings - Fork 1
/
linearCombination_aot_compile.cpp
238 lines (202 loc) · 10 KB
/
linearCombination_aot_compile.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// On linux, you can compile and run like so:
// g++ linearCombination_aot_compile.cpp -g -std=c++11 -I ./include -L ./bin -lHalide -lpthread -ldl -o lincombo_aot_generate
// LD_LIBRARY_PATH=./bin ./lincombo_aot_generate
// g++ linearCombination_aot_run.cpp lincombo_aot.o -lpthread -o lincombo_aot_run
// ./lincombo_aot_run
// On os x:
// g++ linearCombination_aot_compile.cpp -g -std=c++11 -I ./include -L ./bin -lHalide -o lincombo_aot_generate
// DYLD_LIBRARY_PATH=./bin ./lincombo_aot_generate
// g++ linearCombination_aot_run.cpp -g lincombo_aot.o -I ./include -I DarwinX86/pex_policy/10.1+1/include/ -I DarwinX86/daf_persistence/10.1+1/include/ -I DarwinX86/utils/10.1+1/include/ -I DarwinX86/daf_base/10.1+2/include/ -I DarwinX86/base/10.1+1/include/ -I DarwinX86/ndarray/10.1+2/include/ -I DarwinX86/pex_exceptions/10.1+1/include/ -I DarwinX86/eigen/3.2.0/include/ -I DarwinX86/afw/10.1+1/include -L ./bin -L DarwinX86/afw/10.1+1/lib -L DarwinX86/daf_base/10.1+2/lib/ -L DarwinX86/daf_persistence/10.1+1/lib/ -L DarwinX86/boost/1.55.0.1.lsst2+3/lib/ -lHalide -lafw -ldaf_base -ldaf_persistence -lboost_system `libpng-config --cflags --ldflags` -o lincombo_aot_run -std=c++11
// DYLD_LIBRARY_PATH=./bin:DarwinX86/afw/10.1+1/lib/:DarwinX86/daf_persistence/10.1+1/lib/:DarwinX86/daf_base/10.1+2/lib/:DarwinX86/boost/1.55.0.1.lsst2+3/lib/:DarwinX86/xpa/2.1.15.lsst2/lib/:DarwinX86/pex_policy/10.1+1/lib/:DarwinX86/pex_logging/10.1+1/lib/:DarwinX86/utils/10.1+1/lib/:DarwinX86/pex_exceptions/10.1+1/lib/:DarwinX86/base/10.1+1/lib/ ./lincombo_aot_run
/*
*
* This file serves of an example of how to perform "ahead of time"
* (AOT) compilation of Halide pipelines for later use by other
* applications.
*
* The code defines a Halide pipeline and its accompanying schedule,
* and then compiles the resulting pipeline to a object file and
* associated C header file. The object file can then be linked into
* any application that wishes to use the resulting Halide pipeline.
*
* The Halide pipeline uses a spatial varying convolution kernel
* formed from the weighted combination of 5 basis kernels.
*
*/
#include <stdio.h>
#include <Halide.h>
#include <bitset>
using namespace std;
using namespace Halide;
int main(int argc, char *argv[]) {
const int num_kernels = 5;
ImageParam image(type_of<float>(), 2);
ImageParam variance(type_of<float>(), 2);
ImageParam mask(type_of<uint16_t>(), 2);
ImageParam polynomialCoefficients(type_of<float>(), 2); //array with dimension [10][5]
//Kernel parameters array with dimensions [3][5]
//first dimension: sigmaX, sigmaY, theta
ImageParam kerParams(type_of<float>(), 2);
/*
*
* First define the Halide pipeline. This is the functional
* specification of the image processing algorithm.
*
*/
//Kernel has dimensions (boundingBox*2 + 1) x (boundingBox*2 + 1)
int boundingBox = 2;
Var x, y, i, j, y0, yi;
float pi = 3.14159265359f;
Func polynomials[num_kernels];
for(int k = 0; k < num_kernels; k++){
polynomials[k](x, y) = polynomialCoefficients(0,k) +
polynomialCoefficients(1,k)*x + polynomialCoefficients(2,k)*y +
polynomialCoefficients(3,k)*x*x + polynomialCoefficients(4,k)*x*y +
polynomialCoefficients(5,k)*y*y + polynomialCoefficients(6,k)*x*x*x +
polynomialCoefficients(7,k)*x*x*y + polynomialCoefficients(8,k)*x*y*y
+ polynomialCoefficients(9,k)*y*y*y;
}
Func kernels[num_kernels];
for(int k = 0; k < num_kernels; k++){
kernels[k](i, j) = exp(-((i*cos(kerParams(2,k)) + j*sin(kerParams(2,k)))*
(i*cos(kerParams(2,k)) + j*sin(kerParams(2,k))))
/ (2*kerParams(0,k)*kerParams(0,k))
-((j*cos(kerParams(2,k)) - i*sin(kerParams(2,k)))*
(j*cos(kerParams(2,k)) - i*sin(kerParams(2,k))))
/ (2*kerParams(1,k)*kerParams(1,k))) /
(2.0f*pi*kerParams(0,k)*kerParams(1,k));
}
//Compute output image plane
Func image_bounded ("image_bounded");
image_bounded = BoundaryConditions::repeat_edge(image);
Expr blur_image_help = 0.0f;
Expr norm = 0.0f;
//Compute output variance plane
Func variance_bounded ("variance_bounded");
variance_bounded = BoundaryConditions::repeat_edge(variance);
Func blurVariance ("blurVariance");
Expr blur_variance_help = 0.0f;
//Compute output mask plane
Func mask_bounded ("mask_bounded");
mask_bounded = BoundaryConditions::repeat_edge(mask);
Func maskOut ("maskOut");
Expr maskOutHelp = cast<uint16_t>(0);
Expr curKernelVal = 0.0f;
for(int i = -boundingBox; i <= boundingBox; i++){
for(int j = -boundingBox; j <= boundingBox; j++){
for(int k = 0; k < num_kernels; k++){
curKernelVal += polynomials[k](x, y)*kernels[k](i, j);
}
blur_image_help += image_bounded(x + i, y + j)*curKernelVal;
blur_variance_help += variance_bounded(x + i, y + j)*curKernelVal*curKernelVal;
maskOutHelp = select(curKernelVal == 0.0f, maskOutHelp,
maskOutHelp | mask_bounded(x + i, y + j));
norm += curKernelVal;
}
}
blur_image_help = blur_image_help/norm;
blur_variance_help = blur_variance_help/(norm*norm);
//Evaluate image, mask, and variance planes concurrently using a tuple
Func combined_output ("combined_output");
combined_output(x, y) = Tuple(blur_image_help, blur_variance_help, maskOutHelp);
/*
*
* Here is the definition of the Halide schedule. The schedule
* describes how to implement the kernel described above.
*
*/
// Split the y coordinate of the output into strips of 32 scanlines:
combined_output.split(y, y0, yi, 32);
// Compute the strips in parallel using all the machines
// cores. (You can think of processing a strip as a job that
// placed task queue, and serviced by a bunch of worker threads).
combined_output.parallel(y0);
// Vectorize across x loop by a factor of eight.
combined_output.vectorize(x, 8);
/*
*
* The statement below generates the .o file (and .h) for the
* Halide kernel that can be linked against by other applications.
*
*/
std::vector<Argument> args = {image, variance, mask, polynomialCoefficients, kerParams};
combined_output.compile_to_file("lincombo_aot", args);
printf("Halide pipeline has been compiled. You can now link against the resulting .o\n");
return 0;
}
// //Five 3rd degree polynomials which will be used as spatially varying
// //coefficients in the linear combination of the five gaussian basis kernels
// Func polynomial1 ("polynomial1");
// polynomial1(x, y) = 0.1f + 0.002f*x + 0.003f*y + 0.4f*x*x + 0.5f*x*y
// + 0.6f*y*y + 0.0007f*x*x*x + 0.0008f*x*x*y + 0.0009f*x*y*y
// + 0.00011f*y*y*y;
//
// Func polynomial2 ("polynomial2");
// polynomial2(x, y) = 1.1f + 1.002f*x + 1.003f*y + 1.4f*x*x + 1.5f*x*y
// + 1.6f*y*y + 1.0007f*x*x*x + 1.0008f*x*x*y + 1.0009f*x*y*y
// + 1.00011f*y*y*y;
//
// Func polynomial3 ("polynomial3");
// polynomial3(x, y) = 2.1f + 2.002f*x + 2.003f*y + 2.4f*x*x + 2.5f*x*y
// + 2.6f*y*y + 2.0007f*x*x*x + 2.0008f*x*x*y + 2.0009f*x*y*y
// + 2.00011f*y*y*y;
//
// Func polynomial4 ("polynomial4");
// polynomial4(x, y) = 3.1f + 3.002f*x + 3.003f*y + 3.4f*x*x + 3.5f*x*y
// + 3.6f*y*y + 3.0007f*x*x*x + 3.0008f*x*x*y + 3.0009f*x*y*y
// + 3.00011f*y*y*y;
//
// Func polynomial5 ("polynomial5");
// polynomial5(x, y) = 4.1f + 4.002f*x + 4.003f*y + 4.4f*x*x + 4.5f*x*y
// + 4.6f*y*y + 4.0007f*x*x*x + 4.0008f*x*x*y + 4.0009f*x*y*y
// + 4.00011f*y*y*y;
//5 Guassian basis kernels
//Kernel #1
// Func kernel1;
// float sigmaX1 = 2.0f;
// float sigmaY1 = 2.0f;
// float theta1 = 0.0f; //rotation of sigmaX axis
// kernel1(x, y) = exp(-((x*cos(theta1) + y*sin(theta1))*(x*cos(theta1) + y*sin(theta1)))
// /(2*sigmaX1*sigmaX1)
// -((y*cos(theta1) - x*sin(theta1))*(y*cos(theta1) - x*sin(theta1)))
// /(2*sigmaY1*sigmaY1)) / (2.0f*pi*sigmaX1*sigmaY1);
//
// //Kernel #2
// Func kernel2;
// float sigmaX2 = 0.5f;
// float sigmaY2 = 4.0f;
// float theta2 = 0.0f; //rotation of sigmaX axis
// kernel2(x, y) = exp(-((x*cos(theta2) + y*sin(theta2))*(x*cos(theta2) + y*sin(theta2)))
// /(2*sigmaX2*sigmaX2)
// -((y*cos(theta2) - x*sin(theta2))*(y*cos(theta2) - x*sin(theta2)))
// /(2*sigmaY2*sigmaY2)) / (2.0f*pi*sigmaX2*sigmaY2);
//
// //Kernel #3
// Func kernel3;
// float sigmaX3 = 0.5f;
// float sigmaY3 = 4.0f;
// float theta3 = 3.14159f/4; //rotation of sigmaX axis
// kernel3(x, y) = exp(-((x*cos(theta3) + y*sin(theta3))*(x*cos(theta3) + y*sin(theta3)))
// /(2*sigmaX3*sigmaX3)
// -((y*cos(theta3) - x*sin(theta3))*(y*cos(theta3) - x*sin(theta3)))
// /(2*sigmaY3*sigmaY3)) / (2.0f*pi*sigmaX3*sigmaY3);
// //Kernel #4
// Func kernel4;
// float sigmaX4 = 0.5f;
// float sigmaY4 = 4.0f;
// float theta4 = 3.14159f/2; //rotation of sigmaX axis
// kernel4(x, y) = exp(-((x*cos(theta4) + y*sin(theta4))*(x*cos(theta4) + y*sin(theta4)))
// /(2*sigmaX4*sigmaX4)
// -((y*cos(theta4) - x*sin(theta4))*(y*cos(theta4) - x*sin(theta4)))
// /(2*sigmaY4*sigmaY4)) / (2.0f*pi*sigmaX4*sigmaY4);
//
//
// //Kernel #5
// Func kernel5;
// float sigmaX5 = 4.0f;
// float sigmaY5 = 4.0f;
// float theta5 = 0.0; //rotation of sigmaX axis
// kernel5(x, y) = (exp(-((x*cos(theta5) +y*sin(theta5))*(x*cos(theta5) +y*sin(theta5)))
// /(2*sigmaX5*sigmaX5)))
// *(exp(-((y*cos(theta5) - x*sin(theta5))*(y*cos(theta5) - x*sin(theta5)))
// /(2*sigmaY5*sigmaY5)) / (2.0f*pi*sigmaX5*sigmaY5));