Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump z3-solver from 4.13.2.0 to 4.13.3.0 #722

Merged
merged 2 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 52 additions & 10 deletions examples/rvm_conv1d/conv1Di32.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,58 @@
#ifndef _CONV1Di32
#define _CONV1Di32
#ifndef _CONV1Di32
#define _CONV1Di32
// This file is automatically generated
int32_t __attribute__((section(".xheep_data_interleaved"))) DATA[] = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
int32_t __attribute__((section(".xheep_data_interleaved")))
DATA[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

int32_t __attribute__((section(".xheep_data_interleaved"))) KERNELS[] = {
0,1,2,3,10,11,12,13,20,21,22,23,30,31,32,33,100,101,102,103,110,111,112,113,120,121,122,123,130,131,132,133,200,201,202,203,210,211,212,213,220,221,222,223,230,231,232,233,300,301,302,303,310,311,312,313,320,321,322,323,330,331,332,333,400,401,402,403,410,411,412,413,420,421,422,423,430,431,432,433,500,501,502,503,510,511,512,513,520,521,522,523,530,531,532,533,600,601,602,603,610,611,612,613,620,621,622,623,630,631,632,633,700,701,702,703,710,711,712,713,720,721,722,723,730,731,732,733,
800,801,802,803,810,811,812,813,820,821,822,823,830,831,832,833,900,901,902,903,910,911,912,913,920,921,922,923,930,931,932,933,1000,1001,1002,1003,1010,1011,1012,1013,1020,1021,1022,1023,1030,1031,1032,1033,1100,1101,1102,1103,1110,1111,1112,1113,1120,1121,1122,1123,1130,1131,1132,1133,1200,1201,1202,1203,1210,1211,1212,1213,1220,1221,1222,1223,1230,1231,1232,1233,1300,1301,1302,1303,1310,1311,1312,1313,1320,1321,1322,1323,1330,1331,1332,1333,1400,1401,1402,1403,1410,1411,1412,1413,1420,1421,1422,1423,1430,1431,1432,1433,1500,1501,1502,1503,1510,1511,1512,1513,1520,1521,1522,1523,1530,1531,1532,1533};
int32_t __attribute__((section(".xheep_data_interleaved")))
KERNELS[] = {0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33, 100,
101, 102, 103, 110, 111, 112, 113, 120, 121, 122, 123, 130, 131, 132, 133,
200, 201, 202, 203, 210, 211, 212, 213, 220, 221, 222, 223, 230, 231, 232,
233, 300, 301, 302, 303, 310, 311, 312, 313, 320, 321, 322, 323, 330, 331,
332, 333, 400, 401, 402, 403, 410, 411, 412, 413, 420, 421, 422, 423, 430,
431, 432, 433, 500, 501, 502, 503, 510, 511, 512, 513, 520, 521, 522, 523,
530, 531, 532, 533, 600, 601, 602, 603, 610, 611, 612, 613, 620, 621, 622,
623, 630, 631, 632, 633, 700, 701, 702, 703, 710, 711, 712, 713, 720, 721,
722, 723, 730, 731, 732, 733, 800, 801, 802, 803, 810, 811, 812, 813, 820,
821, 822, 823, 830, 831, 832, 833, 900, 901, 902, 903, 910, 911, 912, 913,
920, 921, 922, 923, 930, 931, 932, 933, 1000, 1001, 1002, 1003, 1010, 1011,
1012, 1013, 1020, 1021, 1022, 1023, 1030, 1031, 1032, 1033, 1100, 1101,
1102, 1103, 1110, 1111, 1112, 1113, 1120, 1121, 1122, 1123, 1130, 1131,
1132, 1133, 1200, 1201, 1202, 1203, 1210, 1211, 1212, 1213, 1220, 1221,
1222, 1223, 1230, 1231, 1232, 1233, 1300, 1301, 1302, 1303, 1310, 1311,
1312, 1313, 1320, 1321, 1322, 1323, 1330, 1331, 1332, 1333, 1400, 1401,
1402, 1403, 1410, 1411, 1412, 1413, 1420, 1421, 1422, 1423, 1430, 1431,
1432, 1433, 1500, 1501, 1502, 1503, 1510, 1511, 1512, 1513, 1520, 1521,
1522, 1523, 1530, 1531, 1532, 1533};

int32_t __attribute__((section(".xheep_data_interleaved"))) EXPECTED[] = {
416,680,944,1208,1472,1736,2000,2264,2528,2792,3056,3320,3584,2696,1800,900,2816,4680,6544,8408,10272,12136,14000,15864,17728,19592,21456,23320,25184,19496,13400,6900,5216,8680,12144,15608,19072,22536,26000,29464,32928,36392,39856,43320,46784,36296,25000,12900,7616,12680,17744,22808,27872,32936,38000,43064,48128,53192,58256,63320,68384,53096,36600,18900,10016,16680,23344,30008,36672,43336,50000,56664,63328,69992,76656,83320,89984,69896,48200,24900,12416,20680,28944,37208,45472,53736,62000,70264,78528,86792,95056,103320,111584,86696,59800,30900,14816,24680,34544,44408,54272,64136,74000,83864,93728,103592,113456,123320,133184,103496,71400,36900,17216,28680,40144,51608,63072,74536,86000,97464,108928,120392,131856,143320,154784,120296,83000,42900,
19616,32680,45744,58808,71872,84936,98000,111064,124128,137192,150256,163320,176384,137096,94600,48900,22016,36680,51344,66008,80672,95336,110000,124664,139328,153992,168656,183320,197984,153896,106200,54900,24416,40680,56944,73208,89472,105736,122000,138264,154528,170792,187056,203320,219584,170696,117800,60900,26816,44680,62544,80408,98272,116136,134000,151864,169728,187592,205456,223320,241184,187496,129400,66900,29216,48680,68144,87608,107072,126536,146000,165464,184928,204392,223856,243320,262784,204296,141000,72900,31616,52680,73744,94808,115872,136936,158000,179064,200128,221192,242256,263320,284384,221096,152600,78900,34016,56680,79344,102008,124672,147336,170000,192664,215328,237992,260656,283320,305984,237896,164200,84900,36416,60680,84944,109208,133472,157736,182000,206264,230528,254792,279056,303320,327584,254696,175800,90900};
int32_t __attribute__((section(".xheep_data_interleaved"))) EXPECTED[] = {416,
680, 944, 1208, 1472, 1736, 2000, 2264, 2528, 2792, 3056, 3320, 3584, 2696,
1800, 900, 2816, 4680, 6544, 8408, 10272, 12136, 14000, 15864, 17728, 19592,
21456, 23320, 25184, 19496, 13400, 6900, 5216, 8680, 12144, 15608, 19072,
22536, 26000, 29464, 32928, 36392, 39856, 43320, 46784, 36296, 25000, 12900,
7616, 12680, 17744, 22808, 27872, 32936, 38000, 43064, 48128, 53192, 58256,
63320, 68384, 53096, 36600, 18900, 10016, 16680, 23344, 30008, 36672, 43336,
50000, 56664, 63328, 69992, 76656, 83320, 89984, 69896, 48200, 24900, 12416,
20680, 28944, 37208, 45472, 53736, 62000, 70264, 78528, 86792, 95056,
103320, 111584, 86696, 59800, 30900, 14816, 24680, 34544, 44408, 54272,
64136, 74000, 83864, 93728, 103592, 113456, 123320, 133184, 103496, 71400,
36900, 17216, 28680, 40144, 51608, 63072, 74536, 86000, 97464, 108928,
120392, 131856, 143320, 154784, 120296, 83000, 42900, 19616, 32680, 45744,
58808, 71872, 84936, 98000, 111064, 124128, 137192, 150256, 163320, 176384,
137096, 94600, 48900, 22016, 36680, 51344, 66008, 80672, 95336, 110000,
124664, 139328, 153992, 168656, 183320, 197984, 153896, 106200, 54900,
24416, 40680, 56944, 73208, 89472, 105736, 122000, 138264, 154528, 170792,
187056, 203320, 219584, 170696, 117800, 60900, 26816, 44680, 62544, 80408,
98272, 116136, 134000, 151864, 169728, 187592, 205456, 223320, 241184,
187496, 129400, 66900, 29216, 48680, 68144, 87608, 107072, 126536, 146000,
165464, 184928, 204392, 223856, 243320, 262784, 204296, 141000, 72900,
31616, 52680, 73744, 94808, 115872, 136936, 158000, 179064, 200128, 221192,
242256, 263320, 284384, 221096, 152600, 78900, 34016, 56680, 79344, 102008,
124672, 147336, 170000, 192664, 215328, 237992, 260656, 283320, 305984,
237896, 164200, 84900, 36416, 60680, 84944, 109208, 133472, 157736, 182000,
206264, 230528, 254792, 279056, 303320, 327584, 254696, 175800, 90900};

#define N 16
#define IC 4
Expand Down
21 changes: 16 additions & 5 deletions examples/rvm_conv1d/exo/conv1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
W = 4
OC = 16
TILE = 4


def gen_conv1d():
@proc
def generic_conv1d(
Expand All @@ -38,12 +40,15 @@ def generic_conv1d(
else:
y = 0
out[i, j] += kernels[i, c, r] * y

return generic_conv1d


##############
# HW LIBRARY #
##############


class RVM_TILE(StaticMemory):
NUM_RVM_TILES = 8
StaticMemory.init_state(NUM_RVM_TILES)
Expand Down Expand Up @@ -125,10 +130,12 @@ def rvm_mmasa(
for k in seq(0, 4):
md[i, j] += ms2[i, k] * ms1[j, k]


##########################
# CUSTOM REWRITING RULES #
##########################


def fuse_two_loops(p, c):
"""
for i in ...: <- c
Expand Down Expand Up @@ -186,6 +193,7 @@ def fuse_all_loops(p, cursor):

return p


def autolift_alloc(p, alloc_c, dep_set=None, max_size=0, lift=True):
"""
for i in seq(0, 10):
Expand Down Expand Up @@ -219,6 +227,7 @@ def autolift_alloc(p, alloc_c, dep_set=None, max_size=0, lift=True):
break
return p


def reorder_top(p, c):
"""
for i in seq(0, 10):
Expand Down Expand Up @@ -317,10 +326,12 @@ def remove_redundant_loops(p, c, num=0):
continue
return p


##############
# SCHEDULING #
##############


def optimize_conv(p):
p = rename(p, "exo_conv1d_tile_lt_kw")

Expand All @@ -341,7 +352,7 @@ def optimize_conv(p):
p, (out_alloc, out_tile, body, _) = auto_stage_mem(
p, p.find_loop("c").expand(1, 0), "out", "out_tile", rc=True
)
p = autolift_alloc(p, out_tile, max_size=4 * 4 * 4, dep_set=["ioi","ii","ji"])
p = autolift_alloc(p, out_tile, max_size=4 * 4 * 4, dep_set=["ioi", "ii", "ji"])

# Block the zero initialization and store blocks
p = fission_as_much_as_possible(p, body)
Expand All @@ -351,13 +362,13 @@ def optimize_conv(p):
p = lift_scope_n(p, c_loop, 3)

# Stage y
p = autolift_alloc(p, y_alloc, max_size=4 * 4, dep_set=["r","ji"])
p = autolift_alloc(p, y_alloc, max_size=4 * 4, dep_set=["r", "ji"])
p = lift_alloc(p, y_alloc, n_lifts=2)

# Fission the initialization loop and remove redundant loops
p = fission_as_much_as_possible(p, y_assign.parent())
p = remove_redundant_loops(p, y_assign.parent(), num=2)

# Stage kernels to kernel_tile and y to data_tile
ii_loop = p.forward(c_loop).body()[2].body()[0]
p, (kernel_alloc, _, _, _) = auto_stage_mem(
Expand Down Expand Up @@ -386,7 +397,7 @@ def optimize_conv(p):
p = unroll_buffer(p, kernel_alloc, 0)
p = reuse_buffer(p, "kernel_tile_0: _", "kernel_tile_3: _")
p = unroll_buffer(p, "out_tile", 0)

return p


Expand All @@ -396,4 +407,4 @@ def make_routine():
return rvm_optimized


exo_conv1d_tile_lt_kw = make_routine()
exo_conv1d_tile_lt_kw = make_routine()
92 changes: 57 additions & 35 deletions examples/rvm_conv1d/gen_stimuli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,26 @@
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.


def write_arr(f, name, arr, ctype, size, linebreak):
f.write(ctype + " " + name + "[] = {\n\t")
i = 1
for v in arr:
if i % size == 0:
f.write('%d};\n\n' % (v))
f.write("%d};\n\n" % (v))
elif i % linebreak == 0:
f.write('%d,\n\t' % (v))
f.write("%d,\n\t" % (v))
else:
f.write('%d,' % (v))
i+=1
f.write("%d," % (v))
i += 1
return


################################################################################
f = open('conv1Di32.h', 'w')
f.write('#ifndef _CONV1Di32 \n')
f.write('#define _CONV1Di32 \n')
f.write('// This file is automatically generated\n')
f = open("conv1Di32.h", "w")
f.write("#ifndef _CONV1Di32 \n")
f.write("#define _CONV1Di32 \n")
f.write("// This file is automatically generated\n")


N = 16
Expand All @@ -40,48 +41,69 @@ def write_arr(f, name, arr, ctype, size, linebreak):
W = 4
RANGE = 4095

data = []
kernel = []
data = []
kernel = []
expected = []

pad = 1

# N C W format
for i in range(0,IC):
for j in range(0,N):
for i in range(0, IC):
for j in range(0, N):
data.append(j)
#data.append(random.randint(-RANGE, RANGE-1))
# data.append(random.randint(-RANGE, RANGE-1))

# O I W format
for i in range(0,OC):
for j in range(0,IC):
for k in range(0,W):
kernel.append(i*100+j*10+k)
#kernel.append(random.randint(-RANGE, RANGE-1))
for i in range(0, OC):
for j in range(0, IC):
for k in range(0, W):
kernel.append(i * 100 + j * 10 + k)
# kernel.append(random.randint(-RANGE, RANGE-1))

# O W format
for i in range(0,OC):
for j in range(0,N):
for i in range(0, OC):
for j in range(0, N):
sum = 0
for w_i in range(0,W):
for w_j in range(0,IC):
for w_i in range(0, W):
for w_j in range(0, IC):
data_idx = j + w_i
data_at_idx = 0
if data_idx < N:
data_at_idx = data[w_j * N + j + w_i]
sum += kernel[(IC * i + w_j)*W + w_i] * data_at_idx
sum += kernel[(IC * i + w_j) * W + w_i] * data_at_idx
expected.append(sum)


write_arr(f, 'DATA' , data, 'int32_t __attribute__((section(".xheep_data_interleaved")))', IC * N, 128)
write_arr(f, 'KERNELS' , kernel, 'int32_t __attribute__((section(".xheep_data_interleaved")))', OC * IC * W, 128)
write_arr(f, 'EXPECTED', expected, 'int32_t __attribute__((section(".xheep_data_interleaved")))', OC * N, 128)

f.write('#define N %d\n' % N)
f.write('#define IC %d\n' % IC)
f.write('#define W %d\n' % W)
f.write('#define OC %d\n' % OC)
f.write('#define PAD %d\n' % 1)


f.write('#endif')
write_arr(
f,
"DATA",
data,
'int32_t __attribute__((section(".xheep_data_interleaved")))',
IC * N,
128,
)
write_arr(
f,
"KERNELS",
kernel,
'int32_t __attribute__((section(".xheep_data_interleaved")))',
OC * IC * W,
128,
)
write_arr(
f,
"EXPECTED",
expected,
'int32_t __attribute__((section(".xheep_data_interleaved")))',
OC * N,
128,
)

f.write("#define N %d\n" % N)
f.write("#define IC %d\n" % IC)
f.write("#define W %d\n" % W)
f.write("#define OC %d\n" % OC)
f.write("#define PAD %d\n" % 1)


f.write("#endif")
Loading