From 32127bf7f5a622c94e68f3192365cb0ff6ce8518 Mon Sep 17 00:00:00 2001 From: Jack Lo <36210336+jackl-xilinx@users.noreply.github.com> Date: Wed, 24 Apr 2024 14:25:34 -0700 Subject: [PATCH] Change vec_scal_add examples to vec_scal_mul and cleaned up README references (#1400) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .../matrix_multiplication/makefile-common | 2 +- .../assets/trace_vector_scalar_mul1.png | Bin 0 -> 54798 bytes programming_guide/quick_reference.md | 6 + programming_guide/section-1/Makefile | 2 +- programming_guide/section-3/Makefile | 6 +- programming_guide/section-3/README.md | 1 - programming_guide/section-3/test.cpp | 14 +- programming_guide/section-3/test.py | 8 +- programming_guide/section-4/CMakeLists.txt | 70 ----- programming_guide/section-4/aie2.py | 74 ----- .../section-4/section-4a/Makefile | 10 +- .../section-4/section-4a/README.md | 5 +- .../section-4/section-4a/aie2.py | 96 ++++--- .../section-4/section-4a/answers/aie2.py | 79 ------ .../section-4/section-4a/answers/test.cpp | 256 ------------------ .../section-4/section-4a/test.cpp | 171 +++++------- .../section-4/section-4a/test.py | 52 ++-- .../section-4/section-4a/vector_scalar_mul.cc | 25 ++ .../section-4/section-4b/Makefile | 11 +- .../section-4/section-4b/README.md | 34 ++- .../section-4/section-4b/aie2.py | 104 ++++--- .../section-4/section-4b/test.cpp | 174 +++++------- .../section-4/section-4b/test.py | 50 ++-- .../section-4/section-4b/vector_scalar_mul.cc | 26 ++ .../section-4/section-4c/README.md | 2 +- programming_guide/section-4/test.cpp | 256 ------------------ programming_guide/section-4/test.py | 132 --------- programming_guide/section-4/test_trace.cpp | 256 ------------------ programming_guide/section-4/test_trace.py | 142 ---------- 29 files changed, 379 insertions(+), 1685 deletions(-) create mode 100755 programming_guide/assets/trace_vector_scalar_mul1.png delete mode 100644 programming_guide/section-4/CMakeLists.txt delete mode 100644 programming_guide/section-4/aie2.py delete mode 100644 programming_guide/section-4/section-4a/answers/aie2.py delete mode 100644 programming_guide/section-4/section-4a/answers/test.cpp create mode 100755 programming_guide/section-4/section-4a/vector_scalar_mul.cc create mode 100755 programming_guide/section-4/section-4b/vector_scalar_mul.cc delete mode 100644 programming_guide/section-4/test.cpp delete mode 100644 programming_guide/section-4/test.py delete mode 100644 programming_guide/section-4/test_trace.cpp delete mode 100644 programming_guide/section-4/test_trace.py diff --git a/programming_examples/basic/matrix_multiplication/makefile-common b/programming_examples/basic/matrix_multiplication/makefile-common index a57dff389c..c8486817a0 100644 --- a/programming_examples/basic/matrix_multiplication/makefile-common +++ b/programming_examples/basic/matrix_multiplication/makefile-common @@ -100,4 +100,4 @@ clean: clean_trace .PHONY: clean_trace clean_trace: - rm -rf tmpTrace parse*.json + rm -rf tmpTrace parse*.json trace.txt diff --git a/programming_guide/assets/trace_vector_scalar_mul1.png b/programming_guide/assets/trace_vector_scalar_mul1.png new file mode 100755 index 0000000000000000000000000000000000000000..0e634677158e53f2833d333adeaca2c319278d59 GIT binary patch literal 54798 zcmeFZby$>J+c(S>)KL+K?vie#OVB~OTe`cuL8Jtvy9NOnq@}y0B}N!Ra-0^?|XlQ6pWu)JzqM<$HMnn4} z4RjBQ*AJ~D2 zKtY_uDPY`wjD$z1SM-n4&A8HkKY)S{(ISG-j6sjgzQuK726=rvcuqT86>qj{YwK9+ zgDff8x>okkbzpPYojAGmbarkmDN)KURYv5oGy)eMe3?}H*9EV?*^l&K)K2v z%%?*YG0@P~FnWi8LEeq$vOPgVOJ#`6OOjoMSOUFV@BcYqoKTJVA85a-LIW}WKw~%F zxO@9wZK!@a%`)YAO9JRYLcwEmgfWzcU5tB0^MQ7aKQO?*TJMYeky0v}pnF0_^jQ&h zV@>VlJA;M6B2s>@d)JcrpV^cMCuTgX#irml{JRSpS{DEHh~1}mF6D*d+Unh}3Yfb1 z;}%IjL{q;%D}VK25I)!7($K`cDQ3$xQ<|YIOY~1qszrrDZWhk&TBt{P;WVD?r$^Lp zWX|xuNzPVg!H&@#mI%GCMLYgNB1dh)`gWou(5qaYJ;zVQ_G9y6VhFW3vB`fW|hp-b` zjg{+}8l#oxiEw7T$fw(rX{bX<4m^y7ZB(tN2Y-D{rJY$GTv1%oXC~P2`g{3yJvXK} z_H)om6UUsOiRaJ~{oh|jLo<;t<7VlPaBUrGgsy3<6RMIezCWUT$n6`(qpNKRrEg%J z)y<+%w?K}p2WV(|ib=%t_@DXmyZPuHqLU9`8&}0wGbbOekyf;O%xdI1y#?n5J27SC zF-C6f)V2K1oWi0kSrTw4ZWbbtP>~}@-D&aI4!k}MZ}&xI$745tjK6=cQ$Va-vxv|B z%kN8c2BoRqqo@g+34GE?AOE(*!ma1=({^;|XlNa8-&4J1O<7yBi0$h_i6|g^S_2N2 zdd|_){6Y&+vSp3IZ&Ekhq4kZ`en>jaEsUG2Kelluql1h|OldJrtplp5}RajWi9k`JUTZ+`X6cdjO&j}^eq2LL3%47; zne&(jk4CwIoYX4~&u|XJ6cWy&>{)Okyjn%;XL0qH+!}Reg`3%0#cn!pr~P`u)N-bL zTiebODG6yzF(0%}ZM%Al5E_yK)cVYT6G0~IVdv%bmOc?5A3r%M$*NW-2R=13Gc!7B zT&ojHE?no0vItARQB#t_#poPE%r1=LV}@E~D9R`rRQg5e6f~VNSiW2K4o9SKT(25& zZa4|i*oAyVrqN;ZEjbBsbC314O)N}LjLop;TW7~CTV6Gvzga(hxPxWMNwjZofLL>D z<~4=3ajK9D#t3WZI@SvlNy4T!Eo$@&x8+T%XP2=tUgv9a=&NQxQ|Q}xFP=!NB)zUM ztTU?|O`~lS7BG8>_?eUrZL_nUmd8me29Y`oWK|!hVgk$4(`4t`woU4YdzXGl2yLfS zo@X*^;aAk1%*wee3p~?tP&|1zakA~15pZt&8bNH@org8G_0wV(i`@~a6;mx_6=e8|)xT?h1SaI9y}7efGi`_2rLL}CYS5}vGJylB(b?UaC}Ck{ zCJL*Zw#&}WuC8r787b$E9!gZCb;~ zr|hz2Ovg7}vIX2{wo$8VX80@X&SYAyP30AVul(xET2a8ZQ-QS8iHn#9C@N07ix4rc z_0tC;@G5MA(-J`8zgqfms@0s8ambz<>RQ6BNWF@yxc9++5SFgG-*_1l$;-kNDRi`1 zNu4?PGU2E$`=+?l-U;zOXB2mlp7|_`bJ+0nC3n$yum|u;oBtkAViCwpd34kfu*9AgT0v7#uq9OSH{Tj@IsBTeUPA1nX0#c+u~xpQmm$jn+}NDCfUZmr}(Mu z)XM4_EdwcK!>Pl;_X$1|F}Jgy2|WedljS1=BV*g{&*nV&2dhHjjA#49DJhS9{CN~O z*qID;+n71{b@NqR$=02kGIeJZ$Nfa@NYfXLfn8}3aVE2@e2ekA9}efwG~K7+%N*v* zEX&XDGO5JF#~(1OB6f%|p9WM_vT9?UU$SBeT}Vxxdrh6^Tb~PpRQ3$x-}h-j_ehPI zbnQnzdAwT|KJp-X-B(E`%e@1Twkc52*I>Z6SX0=RBfz&F-&&O$ zOkvY!&ELE|>k2GZFHTs>OESyX%x7ZlS5o4}hUhuO5;cCJ+9!RskeZl1KRxl%j&#(z zmVa-?c@^ef(Q-3sIl*AgB}DVGtl^7t$pj~t_COwl{TnvK=kh^uQt?MOHD{F|&3u@3 zv71W#*;lS;Nj6I&NPp9zMquD6xu9TrSy}evqH%$0quD`TnU+1XzWPzPSS!C7IkeYU z+le3PHUF&rGFE4IJiy@a{7UTN^!9S$GB7*86b6k)xp)_2-T( z!q#@--mmtbXUA|O-bgx1XFWuunSny5rD?f*iYJ3QbITp%81XE$>n`3X7zcS9Iel0^ zFOVZ(S#fy{CdttGw)lIjYOL+%b>U!gX5u_zwy&ymG)-Oo?I!_3%RK+sgaNB)$$WLD z>!5cNHiN^%7VJC82%a)kW>bfd(}(vgNiw>Rieqwk*4NF=Ex?dD2PI!iEk-<5=H#Bx z`wxPlS($OE#zX~>J-5bGc5O0l!Y$fx&%<{<*&e`5^1j@2g^pV7Z0xuQ>AN(xG<<#4 zl5D_g9$>K7@k^NzZ*2RRE-F%wz5Z>~cv8|*-n|S_-)bEyB0Kx--d-09ndHYmzk`jHLvB55{123UoS{kwOsA~#3(z`A1(9Xp!(R7 zL;*IAKbZ1nTR`Xx_>`|DAslrb9L#=tP9ruEjRU#5L8D2eBZVvG+RAfT@oWK;c^tay z;VUq-Mqp6_Pv#Y7sU93#KJfAvZC5O$bN-iq>3N-NfOeAs}55|p~2%%Pz<-!OKQf7Pul97?QSra#0^pR z>uHv?-O2e?ps-g%LG870Y{0xl_3c{y?c(V}5H;(dP}WH7?I~hy@}#fr`1h|kcO;7= z#(U}mw)#F>S%pUHsgWeSo&>Nk*lxIj!tuvZ_-_-{oQTP#yQeSEyfj>b}=K;%2{&7pmpsOHfElw zZ0nufZQ#M<#&3HY^}<=ClNuim<{gym{ex9=L17TALfhl-tHhg?!`jZ674q~5t8xEp z^vT7=$cd7K#ONiFG#VO@nDykx5>()%*sU)3*WO z9lzo<^EDaq9AYh<`i1OBRTx)TNr@|k=N$H%A}8=96sq=B?yp_lb8Ty&DYM@#+a(WO zyUK&O))=Oz$BTL-XtRIw_P0d1`nD|Le-cnt1!H6@ZoBw~m3FbDuni21;4rK_eIWjx zs^5&O5P^?KCqMmNob-JF{o?u$Vg;6D*+42IPXS<_Y3BHk8|N3@nqO{DrP$Vzw2N`t zkyb|kOqRV5e?sFCJ|RSAp&hC>BKe%g<6)mOz&{{r(VX@RGX+kfvF(~5qkcXUOB%ae zK`|W~$Ok?4XJHVN6a~oU`7aKA- zn4#V2_WAB)YXXwOtZ8Z}dyJJq^0JW~HE_%6Kv<9JYwMw9eq$ zI37)AiX5}yF{n@yxkrn2Fy)q@55~wXZ_BL@e8rwRxcbq)o1*nytkYaXV&M)}p=AD| zphNe<*0GmX`C9@d2f(yB5l5E1WLBmX*{a`nY3}fn9(p&;TcSDVzg{-2O}w-pf;>kB zN^vN^g&c#^6P2ysP=)*mF~co)Wqw8(CHsOzi&b}QY|^26_7Syi`P^GQ_1_z&VrMPo zud|vvR$T(!e*Nfm#}}?QyxvX?Alx0oda7;cvwO;*2=!=uIU4GZ9!ycePxSa1mUa&| zW1d<5!PYP-@;0a4-=%@8>9GI9b#(pB(Qe>R-AbPxhH#c_MM9dy`@z&XNaU2*FVuNH z<+E^o%&rg!bq*Z3vq8ywI+wTfzN#wW`W97hs?moYR1LTPDx5`mz5C;^z`R6@{kHUC z<-`1oXqxJ;3%<7%L>E91i;9I=XTWIUrK!N2Bz@kB{#4*!-cUemUOLjRxOjVLTFBY@ z9!F$c&1Eg+y&px_VSDKxWaQLSRnRD?9rWtcmKR`qJ_&O0<2^n-eztYo6umvg#ic!; z-rIH)1jLB~Cu(2}(*$Gt9ui4dZDq5$yA2;6pbz=MQ){sr zS;Lc9PFfId^Vc1e1;q7v-(?BYQK;u+o>y<8yEbctvj86z zclUB5MmNczLbuY-+SOJU_@(x!>Zx0e3}%0M<euv#YkQwG>X4 zxW87_Lx&wxQYy6P;T!qn$#;TOFaI{cJJ-wvoD+aqJ+)aojjW~BAb5G}J>1-5fA0FUj*KDJQI}Csj|o%pP$Gt|O_O{kgj1A6H5XSm zz-J`xTfE39DCDp}7J8a|Ovy}SgNEQTajvuAMb2D=N%R2!?sHFDDSAxaSvje~u1!6? zDn{?|-xCo~OllCsEYfwbR}CTK(Yfe`^>kJ>AvzyU_u}x^uQ+WGX?B{I`h^p9HSTX_t4vmca&z~f3s)^< z!2y!7Rrdk+x)caaqa^`WxVXIR)6Z>h7sH9za1*-z;_Sg)`Q?k#^cYvV{N%K~;O~LO zEnME9U(W1q<_97Fqc)(V8rui4hBD8h|f;QVr|239nOj$;2J{+#<(`lCGK1{956+G zAChkOLE!_+nI-vqFDykoJhtaH)4INI`}AQ#w4}2kV0Ma_n1YOcEFVO&Y#cXLx-!K@ zJlmH*XV#E)#xSoM?e8~x5NzO=o@DmPT1e=<27O{gmk4pXdQ>ecY)O`!?J%u&Tn#MreS&VIC??Ya*+F z4{+88qG>*pLTepX0>HguAf<`_(6^m^tPe2PJ2~+dbS-nyAW(>M2q?t)$x*114KF?M z<00?SPOM9@E8QWbCo~}^;0$mK_#5sLKKmFp5ck?6J(5r7BtCw$I6SQ8FEiBDQ9U{c z5~mDsQ@--1=is;O-cx2Y2{%3S3xF=Gs;TMB!bPLB!TlS1yDN{M96pc#Ftx3rFA$mn zUuU={qbQrP5lm?QWBYl$^pR8pW4qWd58s67ur!%wV2VaKT_Pc%k=`LA1Gr-5X=i3yQk;* zU5}PUKssfTB`TNKL9eKQc_qn){&f17tIA}ZL4ZL`^jM*0KhcqKIH_2rM69C3Q$Sy_ zkcF2{5PH=Ie*tMAY8fAyR%kOXQ+U{@EU_Ao@T}AHn-n~n^Y$7$T z#Ap)Ci<0u68%|9^%}wE@UA%?cz1exj1kt*HZV6iUip=B5aHdx|czBPjFWcHKWt4}S z77q)TE0j_k*w|k#xLTGeOCQQ#|4w-nPx~B&ZJ*z9#@^84)TGB|PaY)~NgM_^C$|^% z!7tiN9C6cRUmMf5MB)fxtbK8cf1>Ofu&1@(7!~yZ8*soq!k4u;+76MIVlGW{OD55h z+3C;|RL%|O+VVgzZ6wTknrB~k+=>@Ao+L%#(;)^6-9la7H@?7~c;O-m#t0aauM&Yv zJ;x3+Han81ByFYojll`P=8U1Cq5l5DceGCDdsowCVwRBc_Ihnt%dS>dQv zLgKQ`c~9(JP*<9@BY%*BiYeb)%7Z{aea;jW98DPz_CiM4tb{5@qaSHWW+U)pua;5W z(Y}l?B}9e90U{){bb^h8W88M62j}R3SM2Qj{zdQnsm!Ej^ULUAHoz^Dr)Obj=FCLF zl2(pjg=s5iL*M-f55SgkU@vNJZvBydUk0K*@~G!#EkhGF$FS(_nHgaKX^hPgvFF^- zVl(8cOB7{c=v5zf$_K_@nP3SIw=cBWo)K;yL&ztNTg99>Ba@o&;kg0mdKgDi6`n(s06z%Nv#= zzkio4nE+tOnHis(;&*wB8L;bsTgx2(I%@>XnISJb;U=gv47MgA5xRsM#|XUsePeU` z^!y4>LM3F(YMgYcB6`y_<|ldZW7gi-%p-Vq~>U>{37g2lc(JY=dVC=m4yvGYMs`#cQ=I|V@bO4uR z7uCW~Kl`mH{Oicg&E1vEu+k!#iI5 zFy~6w6uzu(P;7f50uS$6bQYCh6zsBltMy_}+VxJi5*>J0FhwfD67Uq02d$T_Wiwmf ztB}mKGBm`csuqkDvFL=A-QENSvRQ5{jo}XtR@i-cNdY>;o8htVt@`TO?`Zqf)sDQ3 zAp;ZVYy1bt!{PJmAi(m65( zs7`#jePF~$n>ad@raMa{S+2^2-uXkF(vS+<>mJ}(uu=iAy7?eMRL|Jl+|GfQ7Zk7% z6;l;&DP)Jin!u!XxgVdE%x5I$%TJ}*M6m|djG!~oDdI9%MgVk$K| zxzWryf-6SR0n_=hR&3v(bCx_kfPJr?aAKM^{8ayjOTLhI4|Q};QHhZ(Wsump$@n>l zHpcKfK)3qYWz=O)p>(upsG`5Yx?3Z&W(60~5fEdMDQZ!HL7`pwx+~RQCf$xcq#FIJ zc)5BGKo|U+whqbXdLgDn#59&4CDFS$ySnOtIMK^PFpIqcUZ_%1yR5*<>Dl~g^ow;y z3>MCMPbXmr7=rmAdvF&EBiO`J6O0kF+-q@7EVau1*u@XkHrV?pFX{; znwqbWW&cWc(U_0=z-s%XdL>}NBl0CGv(#-^!SKq)!s8~e&ypeK>T3G;Edg*P+_sv-ZUZVcJ z($q*WW;xV(0YH61!1*sCV4I^_698;(V^cNEx0F-8ZF;A1h$K`jss$~e+M_UqOp6l1 z%mh1mJ`#HxnnKYE7M}Ho;<>OCO^CFl4+%9b+prmF<Ra)W z5Gy>x(gE@(`s|*T^%CF^X|t?@;G!ZXAjoj)xB0RC)JNadwwVfK^4se(jFy3pxU{RT zcP*uK=;KKYdk)gD>Wegcs?x%{#8Dmv1OTr)^=H`01LzjO>7S;2MHW80DoNs0viia* zy*HOs>-%OAvlXh$Y6;PaJIyXF#Om%syOV5MisSXIjM<8r4zjzXM1X9(H5%F8ru;ff zE~uR@l&wQAM2! zyi;}b2FXQ|_#nI#zy|;apN0l2b}BQ=?xWxiLi9ymm_5xu09SWoCSvxBJpJInfQ_Av zpuVn_s@`W+=2@Yp7Ewb7j}+M=b!I$bYgk)Q=Q4v$9!3I-vV+8&VUzvDo>m5Jk7NxIyC#z0R+dB*C zZtYG3ED~-t@v(xGadhkuO<=c)X}5rd(Fm5r+3bHe0bM zAyF;5cSXHyBrdUi1EtodA<3Q3#dwtdQAluQE8n#uzMu?OzQm%0+xJj?H=2K^0RWQt zZ_n1XvmSmbYYl!{oyk}FQpbC4H~s56>F_MBZKof|3?a<@=Q32gyZ6Ju8Dz&q++9D+)BO-eq z!ifn$uL3OG!C=+6oOP7*E4Q$}YXQ!EeJ+b*rofODm-D3r6F?oQGd>$pjZ9UZ; z58tMyyWd9{8KG>xp)m1W=e?-d@6t*v%$!k$aq{$XGJHa0iU(}22IW)Ry8!c&RpCI~ zgmx3g64K)TxZ^fRt}9P1+DKMq?UZi1TQE%D57le)v(_Hooau!TMCN_${k&m12Lwd2kx@XB zvir(9gPn67K6+*g5U2s8dp5m*Nhenw8$u-M2St|va>f2Pw@bl~M)W~r<6rPWxgZHp zt{F2#1&`Yn^@?0jbAEixXCLW5U?nf)-88xA^nxp0wFp3T7oE=B>|gdNtXl(+ZQrfm zRXYIl`=^=5l9g0p&4Q?9Lm@@#->oiCO%E#IQlLlGi-66WC`m#3Eg$EQi^(8rKBl;z z9{GqYaFXs-9WB(@_CYfpz6hy^BX|@Bo7h@+v-KI2CJgFLS^AvAV>PGkXzoDn2BTB= zS*FNhL5=OxSd?qV(c&W`fSJsze`cARRV4#Jo?%`0I%<$;dfIpF5p{?%Uag3-Jt7qB0*W7I zd9@4&s<45;R}D`BfMS%cej&So+LgHWom(qsnS<-ox#LxiUFr}LvZ|Y<+g;&n?^dd~ z(}i0nXPHs0{p-Vd&jne~gc#@94vNFyz;~^z9=Ym$6P7)t=eav6O$$-s>FNiPQw9M5 zMyND(C;~+T68xN(lKhq>B_$PY7hBdtP9zeD8iR_=G2M^ye4|pms!YiTBC@s~Vxku` z@2N0n?Oa0r<_d`v@B$OMxCmEV8vU4Kv7#~=9@g{495MCb)WiPDF_eepSq-n=gn z3mdfIj6N`pQf&<_a9`Waoc!o@(fe+MUa?YtRN?zseYgP)hm#Ux?9NZ z@_4Y_cQ;xrW4Mt?l4h_fSiCD((^h~Cx&Sg{>cH&)$bZqjW~ZQ_U~RoAKU0<30wlyH zPp~ScwA78Dw~`dEuB zbRkXqtsnO?hNYof8ILzg4X8T6!E-8C`Rn1EFi{^EZDNX0kK8^dqH%h3{?^E~$x0>|Uo z&^N$-{@vIgaqkU1&iZA84Z-T1V!*H`Y@-X&Yn^No8+OC}`z64WNFnyL@QxuB_~z>y z5~l{jgiFf)+tY@0xCIr+IF(aX_(|j4{f|;$j7Xs94dCf__o+Z$P(HK{x~M;k<(?-F z5C{Qy;akpJWsq3vJ+uyUX>f=#W6cksY_3BuJ(}U&A44w-Ad5gA>8}4gg@=B>PU9{%jqk##Pl^ghCJB zMSw&&l<^c)-jpEN-MIdKj#eFJ?ckudL59|=qO2U3lS!AFLpeEIg)O)LZrEbkJKzso z^fb!Avo6ZL7L+2>L;0k%NAYHP-SQu3*PmNJXww;A;65LC6Oj&<8Ic$Yk>*k{p?{MQ zfx}ImkQMdMtGIN)tAd1oy(&m36B9a(`6|z31}KJW%o>0X16j6>p>L}ILJd>~{D1e6 z5msq+d0oCH<%+2%_n;3Rq>9$+*mDwnCgc2no+Yp#dRbsWF6{p-=+CvU=-sUymu^ju zR**obI?ia^hF7TCNl^NO;VIple|EqX8`y#Wi_Pjz(|+gtgQxWe6WY+j3>Xk!C!zm2 zkH0qd|904{DB8Zfs>i9r$9!lo?tSRwKVJG2OYZ zRKvEU$sW@>GsY$53gLv`lL0Qwzl>yk`+WzQ{ng%`husyd{i}8OiCYrlO`hT^A@21!a#rGRn$2o_s1I!2>?w5gXG(=*+nb8nT?_4#NS5;cfq@8brg8uk zLX3|7=>!e}5qz)yLm0+Hya1l`cV+Sj@gQ2P`w3j?QcKEIz{O|GiF64fV8(Go4<0-K za;ysc^PeYde0_XO3?jF^L@3&k?XK%1ytb1({QP|_cvFwUyMlv*XY1@|TfAK@EepFA ziJGzG@&D5rqBt!G(5PPg-1TVGQv?AeTJh4|n{F`y+WPvtZbGRXh9jsJd3vB!PXz8O z0v+Yh)zKj#A*r*Q#=(5B3Y5*V6Jm7kdc+!7=ALq?{6!SvKfm`s$yE&k%4OemgHar6 zLxacLQ@9WxUtb38!kW7_zU%SiJ?E$p=ga>@XV!0afIpyGuj?AZq04GyTZZ$o|t}3;!fICQu>(R0)BXj(s$kHHPGz$;&iZ5(flgP(H0LjUsCW z4KV=uN+wtbo8}!dU{4=({d6}f{*ycaMeb+?kcUa!{f<_D5-wYt7Qs-+;f95_%*f0W z+k7>d>6o=Hk46h%HT-;hlT%Yq-hcHu1`;V^7v>5I!|B|XDXiKi^|MM)k!d8-(7*ti zks)-k+R@Rm*cC1(D=QFFQe0g1met(cTu@Lje`<95_(^O2qeuUdQPedTt3pTqFe00+ z*XP|kDztN{n-^4V0Mgfn3 z`@Dik$35vGR;DAhWcNc$>n;tnT)zE=<7fE?%!>Uc{{Fkh;CAxU!kJ~y+D z^G8u_=?*9Jj%5L!uor90B=U*nU;U2r%);Wn)vm6*QK+}f#r>yHJFc|oWBTsR?8DaY zk-h;}?kEArHRRFH*&NLi&+${>mvc3U@>~Z)4g=2=1N#^pcLeLl>XAD;XDiXO5dyg1 zatrfvt{0>jwl9UZE_Iooy+nPpBu{okDuJlGTP_Fq#q4Y%+T687saI~aGK9AFndv{D zP2N=78i1oqe_kH;S#ocRq&}iv*>==C$q7K6BLjXiJt*}-4eY83s&l4W>85}oX)E7q zi3I=AUYwOoFaH?2?r_b~c}m)LTyYxX4Zu%gt)Aps$Kzst$1xQzPlJRl;kdWR{E)W+ zZfBq2H6~Nj-VZ2{dN4AFGgJ=y)tB@5^+2B~m&5ZCF`p~a+mo9gu&1Msa4D|OW7`cB zW3HBTJ?E}}s6Es>_oeW?$qczVDKVEAMA|TXzJ^arWbU2W%hR8>UC+=os|p!hk1iw6 zMqD4x3Hr9(E^&`yl^Rc!e}h?4240bh-Dc8xPx!}*@FmZOM*xXRY$c`nA__;0;eWXg zfIlLmV9tjQ>Cd6KIVNPznY;Qmd)n?{EO+#~TInRd?)u>th+0F+W8vqmgg+2|iJaE= z`cfD+`aYY!WuIzjwKRJrXnp5bpH6qTYWyH3VE0 z*t2(0vCiI@8aD;=08#aB)Y&atveATm9FMPMIdqffg7N;theXJv-XR=Ff5q($zZBWU z@Uu3jsryl+y1cp+x5JIom~C(I{FhV1Ga=S;j`QOE{1pb}yD!hfi__knrmQ9OJJ}As z(t&`ekL|;3DZLMN>>iNUr*y|JOr@^icwUZL)$A2+KZu~d))UqH;l4*+GAM5L246}( z?dQ)rR_c(SI*Q}QEB^>&M>lLPoo=PJFF*^Y62V4ZSSZAJ9=krqDz8=U>Z zb$i@3RFGitz9@{1C6dq5J1CY_{a;n=z(%O@Mvt&o?)V-~sO4T|PeCcWoBU3?fyF+9;|SUCfnPqXnzd? z$OrABk2Lj${3kf#2g##WYkit#IQ6xrRq&3Z`^`!d<5PWg<`Q)&xbDZ%6NNtaP&t44 z`F8|)R|A}NkdsEL-JWT_@9TlQ?9#faVo3#yUqGuJ+m2rc^LlE8 z<}Pn=m=tnRg~a9Th%NEi-|l7JdcgNyp8D)pSQD9u-Z&;?5~a=0`}VR_EsSK z|6=+|*i<^*=gUpX_F3nE$M0!k104+~;x=dca=f<+>(^`i#y04EOgkGrZ)@5e(&EC3 z4LxE>w*iX&)_ke$j(nEmbK}czfr;4+XRo;ERWcCf%BtJWfdY5y*E}i8Jrh4CnH;oI z-kv)WSPb~XueND)P4nlS_gyJ}M(tfLs=>uj{pB@_o`HuyJh9|mIsUO1XlwV9H5%-4 zU8&C3$mBec(}c)?YE-5~UtrB<%oHIH!&|HWW@pkYFHd$!uMayPQnqYJVZ2VOc3UHr zP&ud|``eh1I(i%(d@(y1>PAOfAHMM9yWwE%^dvBfTw9{lMoSYu8@VS9uKw`|459G8 z>GD0axw+cOQEbL6*}rYp_`axwl+gVzPmWz*bz?~MbUtTQppCe87` zohkbNKXyMWa)(>m`E2 z&Z&T#gRD1{bhk0>%{@C8R!GNUyY3|DxaibubC$dtGrFGa$?mKaNYLx|uZ31$LLtHr zwT?*9afu9Nk^zV{?URCd)+EWQf4_LXvCc3c+=zBm;yTp7BAYG*jF`NnXtXv@$%uQkAJ1>#b?x>pj$En5 zDS11X8)i?6nh0^{X+iDCJ%q1+U5+61z{iF3m!}%My)b-s>(WltqN?AfPSp>F57i{ouM zIXOZrjz64=B)#@AQ7!tnOrL)FW-r*yj5EqbG=@e<1(Fay}g3RzPycK^WCO7Eu{_V*YaD95Pr<&`11UB zzLKaG9nuN8FfZwB?te}{9KU{ZK1$i5!k~N6Bg`K=Gu9{Odp3JJAtSy1(9riR^%8!c z18(bgJzZ#c?Js;Bql1U`NMoR-AwP5p{RNu`uw&=JwAgqwB@}Tf&r0 z+Dg(}dx0&Uq~|-9$n%qYrR$yqPn2hy$i>aYWNOX_2dg|LbB0(nHEtfB@!475S^Ik6 z6x77zBo0IoD$-~*nm6mvT-M%>bf`y(P-2DmvG4Ef99>>}BK_+n^Q~=dX+lD}jG^nM zDb=jHYHHQa%e~tb?QK8~5{SAvlwy{fnwqWxZdvz)^)zu-)!XMjvHx#2_g_2x#GE8s z`?{_|g>cZ>m3= zpc{J_7_y698*=3Q{#shNw;Q|l{65R~o@8|1{WhPk!oMhs1b=SCTdpPPo1w%X8lJ(6 zQ$%$?FJ@Z?oK!fDH@y9J5~w$&bV#THh9uP4v#!}9&+Vh5=x$O@4Q{_qR{?VVQU}Uq%(wvbp<)WC^Bfx+tI;Wg-Odvc>H1zJ?NDH8DF}*Pb)N!v3~Yt zs|ZlSYHwdQ?~pS;A3i9Rm$j8jIxg|ww8+0-Smet4^P!*jKFySkj=i4Q4Fr<7ojyGW)mdN@9w@m3P$7o@xCi>u7evjMX@BGb`5;@M%7_;N zehq%>A%v?0RQ&>nDV_DGz`wmjbjng-mV74U>~dpb6C6rkqW<^d2yo2s`!p9z&4A%C z6Np`o$n|_ZgEf7M$nWZK{x80nih8VBeQ+Gf@>V15l-Cku!Mmnfnp~qy_8ci4c*LF} zhtcFPIPz>y$=2?Vs>_@J=fRcj>azC0qtIaP>7^l>SCFET5?d=PPg!O3g=M9DxZm*bhMivC^zR=k$iS9e_w>Poh-_0tGxemh1 z%yn9iQ=ST%{oGAmPmxIdp8kj;h_5nT>fMuQXiIV)qq9}<+4`usS|4z#V1$>^9S;uyAd+jue&77@NhX2GzujP+l*|8haGVJYT zEDUIVZ#RjE0Lu+pI%DiyUiXz9PI%t@f)1NE3Z55aQl9N)uO3duE<}3-JgM8A&#Is& zL6wKsnKjtNs$;`%Xv*yNCZ!x$M#1#f)5A(vI}sd&y~(ZS-Qt)`JrDd<r^%dAOeSW2U7>u`2&HftE{K2moH z^}EQ-p(=#Z5?`lgLaP$bAC!v8@Ib%8bA%3V{e2Rn-Img<933kJ`|Lge4~d z5H)b-m5nVrD-$MU(WqMV*;*&dG<3OP<9EP2?=Us}3}wRqgUPN^&Io-D07HGXOL@_T zjNC%+5IP^4Fx6gj{ASpb5hwO`L;PxY1OsDMm_vaev-GAjz z8NS?GkvUkKw+KA$Bq5~^Nz-&}em-n)UavFz#a=r&%$<}E>C~m8fa-*AY6$axX+F`a z#evKXySKf!7xvt1^BClk&>jBt!qD{k2(hO8wWK)I&|`PlaG`oTEK+@T5h-@*@Jvp` zT?3s7UkmB9T&`Q*oG0M2mW^yh?zI~g%2u?r@dR{V{ap+2vF7HqGUmxmR-Mgyn7S@7 zso5TyS1P&QbF1|Wex^e>Wty`HgqpUtb*;`+g(08`HJB$x^QGWff{7U!O?J~&CnqQQ zQn3VtgtoOaql3pj{-&m;?(XhX;J5V-iIcV- zi^1uJ96w>h9F!Ja4K*7W;GFQtY*iR~wpzY)qeO8z;9E{?QNO~X6v(~$tE+uZtV_mM5$)sWl{ zEsH-C3`qQJ36zJhZLO_~%FF3PeoaA3v1E z#k->U7Z^eBdrSsKMnqj!q`ke*tIP)#vxSngv#+Nub0iGc*4GDD50x3+Hh$zr{@F4z zGD=HJp-^a0P>`-;>x4-qa3=NRM@)z`kb!k{bTl^372rKyA^i@ASCp3n2XN;B=pL}| zIyxCY7w)HpIj^T@_ZBIF1bvP+HV`gGMn+CfPH^Z;baeDiqc|{^5Nz76Z6nO7xNoS> zZM17*cGi}Y$f4z+7oSCQ zOr~Ud4vuG9R<0OI&)GBk@eai=cQ+0V0JGGd@=M}q$^RvX;)2#4e*B_&DI;#`Ba zxWr6ifI7Qok6oQspIQO?lrv6kKXZcal>+LJU%J)Gz*FkaEiEEUOfo>G^!+=>;d>b^ zJR)#_!+>XLszr0Pw!jfSAca8I7v0KC#W?Jj-onE=2(X%RFNI{xvW||<<0nrZmH!|Z z2gYoGWOApu60+Z4O4GcniYDs=>d%~+abJU<$M+w`-vAGxPfz6e(#bHxs=7`j{1*(B?y zQ7L)gKVrbc14DrF8OeN=l8L*!GO{#StR3|hxgSMM@FbX$zsJPHOioVj?AS$12H)++ zXC9$!EM&%QPOETiNuB|vzzvx|xW5Vk_G``8ddlKY74$&03E%Z9(<&HmPFB&@Py7CO!`F!ybLQQ?=Khi!mAx;* z4JY0e^*dMOF7_ZzOh^FoX)|^9zSkG-i|s!$G_H#liyGy=3@rturlxACs^$TVMiLz# zAOB%xeVvey(9+!eC6M@-V6H~{@%H-ibP9oBVQ0TNo-*VxY}?t`_&JCDM^j9pWT9Sr z!1d840?^Q_$>dEi8yhbhH{b;Dz`y_#8=L*!D`kE2yIUbTWicNm#b-=;fe1lii(PW3Pns!Dapx2MMOBexNJ>VTLN-NLqo%% z*PyJf?)IxAs6?v*D(DJxx$?VaU}j?jqz#N63Q;+J(~<2$YIvMsIM~wCQfyy*0#t61 zxIAy$nivmk#(erzgE_gVs3<;OWi@azE)}M-ZC~0=gZQ|m^i)aVZmWRURVhngv`CHN z;lqcEMb?_fg<*v-VvXkLi;D|j?f^SAHa5;62iJxYd-2i3tcp*5Ntr zPKyu|6B9c-*1HiIcvVzXM0hk;N+o%K0)fM&KFb^o+!iis6GAU5q+}yK(aL1X#3XHKMev z>?tND5FxO#vPMQl)rS8s&fYt&$*pM@c5k<`ZD2$}KwuM8q)V5s5I{jdx|9&Gp=9;YY~gVBuHNJQJTc{y%8t=d_r0}o85tQsFbM|*?`nmk z4B%#W03V7aZe9~@J7~KDgkC&s;}yDy?%RufrKN)W{QUp98fL%@(8n7sy&vVV4Ki?Z z^KV)MM$75|Z-$fdZrCVqTtH861jw}pwq~lt0C#5uh0e^({J7#tuG7nxQ-JVocNtbz zyK$$sRaZ-l)cDMeS)esRO8vzyz_`g%^y z^8EZ=FtBDokC-3#aIm$oy0QXncV}lOU~q2G(IEf<@@MgE{rdH5?~iexkFR@T!dOyW z`GmvS#YIR^P{Mh(75E%rNO7)9&a%h$RLHirNDeG(UOo4gX#Fx+bJY%@Sg?RV8w)9D zy6e}s*T#tiLXB+-LCk(rrx=Ds#tx%#60VDXfdR9Bk%k5i*!W7v@$$-`+OaD;*@|9F zWo2c+VHx`p1y_x<(Y7tGp31dh+{w-YkOhH4@&Tqi zJv~j$EU`W)9H*GsJU$L}w_T%tS7#S9ANpI4!Yeezpb~!Q?N&&{nnCkVOML;A#TA7u z8pf7XRaFsMlbVwsOFr7+mHchF6439f{=mfR2}Q*HypdxA-J?7`QW+aZO-jQu;0ig| z8v_bOm{AqW$=KGxJo0`}yODiB*pXdCN<`H+ThVG`aIwHIBP$~Va)jF==m3*(Vggi- z4dbz4df9%ZqSF0@-wZIe>h{8sX2FrZiH)I+-S5b}9Zk?ywf8b7p}jpEO*$Xdd# zvwivHTc-Nv{L_2tp;o%!Kbb<+koJE<+rlD%s{1*52mj|ctK#?NZ+Wwl9VcHQVj7YD z&0_UlzdmBU0PthTNuAo@<$}>jNilEw2t7#7XD18EaTEcZKGIAjXR6(kmO-PY$5IHF?f+E{ZzEUyWPhxiw(ZntT4e3 z?-r-*S>#~0<7+k0@()7AbUnjjMLgaIWhxKG(IZfYbIBjBfim|-#e1&26zonFt%wdI!0qVyMIl0=dA{?F0}zYq!DEv-2+^=$Jk0O21EJ3c)5AxUb>74CXGM z>8`KWtM_r3NXzsRTA>QLHS>Jo99~M)s`<=?gT1ddThVY8TUpCpcYe^&s|BmerD~&C zLF+H2@N2{>!w+HzMiB?+Frbj!_<_Ux*PZa_)5yCA7>xVVWXWzN=2G*C4xz~dwr0fW zO56Bg0}KOF;)i8oFnM8Kl5U+#W*>l_a6iT?FKrh+%Pv)I&9rWu4RLNo&SreL&YRbU z62m5dKc~NdbdU-+>CeqBzUMEqoGwx4auRS0E5YGSN?yN44`+3ldGm=Gwid?U+Vdxo zH9XyxG^LYi=w>};VW!8g?qDG#a(B3OocIW`v}&K^wq!4N;*@pkVAlVV{yA|0T=!iJ zHqr{ZDwWK*7>imAO4>N5`Mwt2(bhr7QlH%Zv7RMw^dw~^q@%t2&!g`xpH?=9m#9Q} zJ*-x{_kRz6^C`Vg-?C^T8C-YlZIHbNtCalsaFFvO3HkOmCMwbsZT*2zm(ZXL8R}_# z!c33I$Z)ExQQ+p}#D6kr1pHTF#t=l1VmiJOKIwObNm;o{~$(1gKLBaT8tk=#;^rw4x$6M||F&x`H2o6X5LpPtXO{|ycZgfAtON$6{%I##b1dV5#tu3*k`XPi(9>M+-! z(74{)eV8E+4~z^8kdBMq_8Q0dxFAB{vrnOXE3kMx|wuFL&58k@EUm9x|d0Nt0U4%BEi<=MKr=Ir-|-jd%wl8c%oK zv>$i1>xg?G@}KMEX@jpNy}x_6aMhN&+zoD)?B==GJ$ehP%^6{3zF_#kJ*p>>n@evp zn`Of^5S6Y#KxgA1~IGlKn{8aM(xFA>(VNs*{@!21L{J{gOH-a>WvC5*3 z-9de!DL_H^m6cfn*3PUsxWw_5fLXH<;E?ed)BF@6gPQrGPUTH8uXG*h<~nhFPeR$w*iHC_zzHR7wuwESE00Ld0h(CAH@D-DhK0;wc#8?c^mTR;B7%`Kp`8?guCL z^{CxxRy%vQuE8~{A#O#Mg*RC~g=xyv#$;7!`AJHk*ei>?C|!e=UYG_)6U8dmV0D$B?A+Ct<@4!sIl69c z%u2zcFcRFmv&}=3I=NC=cds9GAHDn#kZi87YJ8$vj>(}Zgr+4*90kO2pM?yHhc|ea z@K(I$kfe(QdgXCTXhR^^zRr}LXZZGZM37rS zB0>Y4){P-HAm%DC&2LfU0Xi$$zX_KDFQ+~OD^gfc0N`*$jOmX$K49e4jJFrWCB;~= zw-@iI$ky1j_0=HgzS$ccv^Co@sqj(i*RYfM4r4gi`HA+Z){vmAp?<^e3Z!wphF8$& zbzv=A+1sWgX&$(CStXs-<*c&7Vf9C&^EXTq^2GwiN=&rLv$F8% zZ3F08&l(S1*L+)HLrxFc)F%B9JI9PyT4nDLd@(;2QZ z(FytA>qzix+4a6r_yjAasu+v6_C_F0A2q*b%psrkJQB%sYb==I7ZIZO8vwXMq;3@i zIzGg4e@C-3PQm-Fg{i1-Rn>{T`bCyd&H56B-~W`fffTwr1s|{$2KXbFE?&GAKGzXL z^~WD$r4KqT|3a(Qbe>(zXI@%qM_bVTJ$&E10rKZSd3n4f-_Nq8A{ujEHyTm;AF+rZ)jz;}{xvaX4Vj5&>ETB;f=2+d zKONEK8NH4ld!5>MJw6a#q8<9Xo_1XGLoUB=T~?8fdj>y0C7el6q_#!x;Wnc5&B8vK z`R}_qmum;)l+l7i&%vdy>i-3vrgTb?`y;)$beWoGsxyUrc)5lL&&8zg!i{iZisOq_ zt_FWwU(9h&+776Txvft9qlPs^%YqsoxKSo%fUy=9LoeJII{gI&;-lxYdM+ybI4nNd z-3g;zis~6plVI9uf@NEq%xgnLe*3NEAtj-w1ppqRQN5V^(k{QTNcW`1T#g=N6@hI36_4N$BN>q-} z%r5$9sRD2bE_lzeQ^r1ZFniYynsC$E2bS+7Nbfx%4|8!ozF3!&#~=qdWd~MkSKu=X z3)Vc#iR;YVvf}UAz-h};mTM2tdM;5pW!+@slHJc1I1MuY3X7J^9WNoA z-Be~iqzR=b)}_yswL0*_bF7~SPapLw8Gp=wBU;1rd`MPyJJ1VGv$LV|h^hEMHCeE^ zr4Mpvc?_5fcSGMqC4s*z)XTYnlU2k}BjpAsf1j@#zrG`aJI@01?=zOl_Z6+Oh zzu*RK#7ZF(!z9exYGbVGx8#FYKj9;B0hlg~xX^KMqZscGU#9sc>7&q_=Fw=R_0sCc z)47g2tmI&j{{{Zg&34Jnwm0t{QLgBS6NVUHWva7AzE@x9^s@4)au3`0Z@^&OrW&&L zJ4!OQjFZb&THfw9E~u`*&nOeX&#M{FgTZxDiQ1-{MDe4~ zUJP&!d#&}ZGrmLowHi`d@CIn^@n44m2R+EHSp=YM((RH_w4XT5<5TdN=hd@@+Wlh3?%g zo1_{_E+ysD+zEQT7``YV!=jc9K5RTrDqe+E0Y1xZ z8FRCo$xUM!<4x#(=I^k|WH422O_j2}QBAsk%F6f;V;xmXu|AFtaF-z90wF_2bs{R0aU|~!*@c!BKW8PBU@-s)L0w(T7`uH0b9(z!9aT z+AtMN@q>zj-8MSMa(WObsvZ*=l{tFFhn)T~%CIa_;kK}#&E`uzSO5Ykq&!+F+2NU| z$A7_&{37m}Q@*~l%Am)Mkp$?pu*tj>t5W1<+YM*!LpNwvORoVLkO}s*oB|xzbseO3 zZvQW;d`*E9{0vToYvRnj7}-}tG=Scet3Cdwt8(XPlk2Hb+|GOpkN040Ufn*;qBuZ45ocZtXIT}LUvlhH?J`>&%UiNUwSdAT*}w1 zLzew+5Z#V%)4y#Dfn<(M*{OGQ>v^~zw4zpLOSm$Xp=orWos!rOCNhn64iR$lkU^6^ zTE(zUtsA#**+b?A>q0z+msBOqTp)So>AM|I3r>>9;DAB31`IZ5@fY1ri@(GQmy=IV zy!o?@s&6u(Nm(I#owFN2G@V-yjarj4ZzMk)dS5?TyS7q|fuSX*R%WeDZ8hyUPL2-h z1(8`FU$^)6u8+NIm~S{xGd4!eO;jK%Xj?)&gC zCqpDL$H|=rV%5{Xm+=){`WN__poG^vs8(z=jvE*;xC+M1^i+nAQBt?Y_K1qMHX_iUP zI>c%PmiSTz6#xfW2n1@opx#+hg01)7h&6zyK{##_@!XMM^})sO(@{JEr6oXH`TIo` z9NaJmyv_TDd~8VmWN?X0G@%d+vt-_zmC1O9k<$=>l`4LO3TwK6pa%1uv7#&CUy!+5 z2!~bmBpLcctLMg!r>pOTJ-?PWkxvXig5KR*u5s%*b&)eS-)2`=b9Zd61>V~n;+f~H zSIja#e1d#emTqPeZU9qnAiSPPs{8%?p~JB!$*hs%w1WI>s+X-RnVRmuj_52{o^BDN zOv0m0F~i1MtNnd7SSsOgu35hA01O7a&gQP0#4b%|xysY`&)jy7pcg8&oP#E+Bt7S< zvhN}<8ltUNugwkO)jN$C{z~f(IGzh~C;?xq+n9mt4EQxvn_@oPFp{aWTWT%?N7S3m z*+#}irog1M?<{v#o!E^ys^qU;J3aF48so6yXTA+McJx|J)n7*Hw^fEZX71K*d^Zn2 z&iLd&k}E%Zj=iU4{zwACGNPvH#Up6*AqL_?b=yV8ZVI=>83R z#4Qs59iysYw)=aC;!$=IG$AvHKCtzT0y#6w*ugz!9OuR$Yx!2|2%#jejyfHRKCbl! zd3YW#vi)|JAEq{5>d+EbwxBTEM4#N&)j`^N(UT4k(N_1D@aRx7i73P-kXicZj@82M zOiQr!c>D3|HE42@)M-%2b>S4{=I$86mpHdiMrLU+UxsRZ?yiTv#Tz3>{J}AwdQ;DneF|g6I!-cR}I3GbY8np5*o+gS;IPx4pIF zZo^AzDdI@J>EZy&O$HR-^iE|={n7C6Aa|u z)z9!Fny_0koo#*JTgJW4?`{EUwhIPT4`kSKOVzcsW>7vXi6TC>qq$`b$B0xyiB<{= zkmo5mX&LvWI>0)PA6DyA7#H_?6XSY-wMHY<)<(nCkITJ!D7(j}z%4~YXsIPi7&7Qs zP|ltlp4#oYIxK)UhwUg59?SMa`&K>fi0LW~P`Czg`6;4&pAtkbsBJ{GflWljW zm0r=Il(ckuvQT_HW3m3hm)DI-vXlF&Sf>4x`5k|`P*myRRvr^@nc>RHo^Mu`0T?(u zMQpwYx;Hl>d!B6bSn=n&8)P)s{l;^$7*m|Ids6~|fpI7SnAb(i>|YK;L`wK|)Xr>j zpiXLaIo7Vsbwh4)LWVI!5ppOs>ar9uLeJw#Rz9oPO}$M@NJ`q8kD!4-a*HYZX7V+N z-DN`c404iuxON1c46uKWbGH8c8Q3BV#;!FOnlhEH?&Edj?t$3%ZWNQHY!XXk)%nnH zjUK8Rox;xfxf?5ZjRj$|FA22QOOtxb=$ag=g?ZNs>FH z0ZECw^XZhL8z8&0;BUhB{((Yc)=yh?D&3bPKGVu^HY6mN`j$lhdWN}$ zg;=e?-)r|UXr!vW0Q30Kc5+A`mfDR-X$m|+Oa3(l&*SNc{_tVtc!?h5u^gZTOpym# z`-!ayFIpiA5KIU?2@S-0@DcQP~Zl9O<7sl3E_M_yi`yXAjV+Iv1KLY_i5J2 zlMOId5CD>*%p{NErMdH-);{yF*WS()FRsNed`Wh(QguN%3k!$>ZG=}>RwJ~EtPN4? zec627t0@UmAEPqo(#~x`z6}nJ_V<6qsVtc3Sf0&U8N;`glpZrOrQ~RHL6evjGBPsC zZA+>w9}_&-uHPVx5(eFxsU7*!lP?~x?#T;a>$;w5xS#^KPV~eTRtta1%^Gb8K$egw zFwobPbk%0mPb^$kUe<97s4-yHfE6F+Z(Y22F(_z=(DRYwYqyf{^a+w&cS1NjUXvWO zpI>y|X(Ip}krbH^?dsru@N&@_s;9v#3N${*o}8jT87~+H#5$fQcF$DXwG%CSRH{*j zCp;fF{`E8$-fKYA;g(yJ1(ucZc_J?_cfR|(SU@xf+B(v!sxoqNs!FGGiwApZlXZ69 zm-d%NMD#mIq9j0wtP+7Enw62J{240NVgh>oaSD}JcicgNs9*be<}o86>S}198yy)b zN$Em5;!WmvcziNZ9yy+#3JOhlagUG?WhVJTD{s{D03F@Dir#UhM%^7)eF4CrOeD56 zaWFD50c`N>Y^w=H?O$50*?~Iitm`v1M@Jj|I0I-YX#vLFQEC9^J9shD(~viGCljc2 zt*+j2-P9Xw8ZP9Otg6Pe0;1~SxcOFIDKsiNdY8Pw4F+ybY79yV3=I#QY^M)jzl4sm zX$Ov<7O^p1{um%s+3ptfec*IF4MBkfgzrDgYezrq-+Ju`X!@YXvoQLkB%+<<4=WXVINh}Vbk{%6|63CWYOgU&C|ea}zu4$^{`?XH1HGAP{xBal zEsa0KldG3Nu+@ml+xRXk*Anc0ygK4L-yoHrQ&m+^&^b)7T~Mcx<^~)_kW7JrK|9w2 z_w|g#{PKRO zM~zW-o>NOJQimiuJn&P9r%Gfs2CI`@R&Pt1~yXFBs3%&2Jdn9*^&7V4ND^xHG5R zl~#zbwjC1l48R~`)P{8n^L1YUS9N`fl=j&@=FM&|6gf2*x}kZtXK{M$bW&wnm?knRUzuWC;g23Pff^K(DAmP zok;rHZJcAI+JLAlFX*ffQ9VmTS;n!>T#F>DSzn8ZJsv{J4i7vmCI?GQ8-d-N_HTi z+v|XX;YEGCu~U&!oRI}kd6I)LYW0bp5>kSkcXS}_!tp{2r-z=z9^hj?!nm{ZbFg65 zKO3&r?S1YO49VKe(9;|!bJ+|{k*IVVDIO`ZvJtbTp-d+*TXmo4Wc3+!bV@L@#CePF z%*JgsH7#d+7@Y0U+hcja2=d7-O<74aGGmv=i4KFO`|F{KmbxY;8j)k7KSglt#zK#` z<_I5GnY9K_r)puLAxBn*tzJ5R6O!Do0SQfd;Ih3RXX1NgwJ_^2T3S|KF3b(&Z?a)E z<#4ea7#W*T(RAmwq0M!4PJQ`4jJKe)!jH=^L*BV9? zA+Y5MYgN4D|&P>Gh84OgN97W5(Od<_*4U9LH zB#kd}_$fl5rT4`|goQr=KLrJA6XT7I(pV&Nim^KRpx3{ZTxbJq9k{0EB0my&lO!)$0LPMzgA_Bvx^_MQjM?)1>cSFm|uq=W(@GkkhA@iS_?IjEd=SOF` zabJ=@t7B`>oLTkFF-aQB#M{|bZlMqIuzyLZ5Sh53TQ~s_RXz1l z@q@khy}ESsbn{tYQ%B1*?PVaOk~f}^6bYn{YNf_Zr>_B|P)uUo)t#1LWq@F9qIy#sQ)wr}!6LfrhP6X7cPu3e?1iZn* zJUt)+VouvLz!8vBPlncdhr|Uq+34d7tXhH|S9T}cH5{I@s!R8mdBr?J)y43@27o&p zEDyG!+=7ZT$jQgX$L}A=tak^B0P5;)rcvB|iD#Lbgvyd9JyrFHnTh~2mg1dzQOe@> zk12b4MJ}HzReD;z=bCcS6y2W!`B3b?W}SQn7V_c6@*#?mkqK~n^Bm3b{nu{|pqd4` zdi515kj>3Ym#%7e+LiB~I!T_hci}}yIpwQ5}{51jp-NyVfIB)QmUp4@hV8E8?bd-YAG&%E!o~Wt@ zb1w+@YTX007`&Lrkn~+S6Ln&tzp4T3{y%IRgIC-6CO>xmAMzXWodrq4pX1+LHJn_g zV6h$j{~`4TAOEBGPG4VN7xVAE*RRn|y95h&F`u&q(bJENv85t16s^J;lN10(UT&ZM zf4%@|TRMeptmn84o2TAsYt?(6V7>p9baylDznIYf^5W9}#h&iH8=c$=bZ(EAym-bf z@bpgQ)A6xKXL5)`E(&om?#jQ7M>&_?=cvx8PCwK8;6GA_|2D7x2`l1DtIwWU%lP^= zjdJut$H=r4kX%GZKEKRBom~r&q?+D0yFh<4;UYLq2Js35pM7iw>Au~h`w6$w38;5ggm7ZfuJKMS> zy?Iu;Qt3Opp3>YBM=jgGg|VizN9*Ioxkpf|3nZ;VqhI?vm{nnIlxs<9~)?2JGzbUY8p}AuhAx}46?lwt4TR4hc2Zeveclfoz%VhA} zV6cLXiR!JXlg`ee?1aeM{_mO=gpv260DGAHbuv3ssEHipTs`tBO*k3_E!9>wG$AcBKQs zO;~|A1qcQ|^Y~U%MQe@=rx$$?<^feb4k#lWtp1>~V5)w)zXYLjVCCLTR>4y?aJbMc zBZ(O2C8L#zugJ_O?iS(eSW>8s<4JK)Hb1hl}dG6M@2%4`0)!XG|7C zk3E(FT-U7t0e~E0X%WTArvY(x-YT|Q7{xzN3c{>SEe~QN{mmqCed?E`z1Wl?083a= zMkb>$scY)BPf9;WiqeD>7z%m)qEoB|`{`J6NR9@WLvc3OZNK^p zQOQUk=Xk@gi?dcvCM}^;<1JXyJoG3&<%H9{1;|8E2gyLChNKS?kPAKWduzKL73>rH z?%^&?ue*KE2BC}}z)!!1fE3nz7x@1Cz@NP1Osct~c-zuTvQn{|g|tFS@^EhnO+kl6 zh_mvcz9`Ab#7iQkgDs9q6BYmPIgF_2`cTOFbg8g*eU7+gRaZBcm`lFI>V5hK)7q*^ zvZA^tAJdf}Yd1ldX=mZDt+e$vQ=915iMCL2zSgLIXFgik&Pg{@(-ZM>X?AJ2ZJ?{I zBnhZ>DogiKw)F&XsAl^O`WA$uqF+FpUQw2Xb$%uNY2cHmPi}x{&DhTZYjgRsc2uL; z&r|RU_t9jI*- z36D`Uco2wb-3~_;4j|RNc6WTh*J|Y_zi)Rq)-?yX-ei4|P=1u+qUVRftbAH!sh)Zb zinQoxCemb|%Qc7&c|aG%76Ia`t0g9I8jAIlpFi~U1U6b{@{A>*H61M@P^*WT`x(@Z z-C$F5>u1#?^@R?Ve92dq!~q~D$pSoK_{UgNzmne>eHtS4fzEQg+}P+@^#rjbJ?qFl%_&l@rm%bHNZsaOsiqx9Crjq|T zL9wuYd*jPP{KWeUSC-TG!AnOI2QR(mLmcq*a2)Qu^Yd++-ai*+nL8Hv4VmKq3ZgM) z)TjKWcnDSi8PYlaSzS_ik9TEF(If00=fDRY?hadk-PR#&ai{AB^hWn8(T6gSyats3 zB43!tE!2E$A5CanNd?)@uG8nMbS&?G_XWaZFqs+uq(W8G&DQ`-1N0OoM`e|W1xSOc zbUzy00>dEoyjm86Hy>&iGrLlE#JN6BVt$9V(rGt3d)?XA1bWo3rIjFf9V8$qXjHQ}7f%El0IV!<=t6y#OZ?AL z(cHVoBPb@+7m7*}vkOEN1ETZ9ETtF4ayr^5e|}d)RDdqLzRqhK^%<8_H9GjMs;Ep4 z@hfvZnEr)7m4aG}^y?VzVU$`Z{0$7oKVgG;-`wst0f8={PPQI~#hicYlYcZAk@(g( zo0foZe4g#UAYD1O7`fDVw|-4b|2F^0ERwa}7_P4ip@UN#M!9{ni=Iz!?84b9x2TJH z^~v_1eC&=t=|x}R=2U)w+D~kILBov%h3{W6gJJQ9!#U#I=P{2Fnv3ybDNwBvpMf_T z?8f?{au7lORbw2c)c#VcCZ|rGq15j{_gl%|-b8EW7Hbb1NB^oqU{o_96Xu2sU)`mj znrs+eO^|_&e+xg;H9Z;^G<=tlh9+#DI%5uVB3%rHo5{P>HQ?1sYTxxN9A6`4Yl~@}WfK9EsdH&>ZDIe|$1m*b@qhdc zbd`?BDZama*E7~U zdZ=aA2}GX$U!k?h8{07s!Oa`Bzdf}Ir~jW2_)j{ls?Cf9-jenl*31c{8$X(fn&!gX-tAD&24tPhj**#Qb z>HW=Mv;nrXV0aUKL`{~wZfoLb`7gjt>H5`W_~9jhF31~F=tk1BlGQ47_VfZuibw26 zCETX}1W@?Dfw5iNPH_;BOW^FFJ5k9UoMhd!YAyK$fVsWBUyRs?DosEc;&%Ys0+6n$ z$#&J&;1FvUrw@*RJ*!&y%nE}a9Vpw@U;rgMb1kA6+C!)zjm%_sN7)Di5htzhAkJKy zmd0*qM`vmIeb{y3nU53F+U*9YWKhEUu`)0EOidVsAX#2_J!SB1%%6Ynz$-5rAcXc3 z$SLKn1JRia;IzI}HqMxTxBx$(>1_^vS^kC><5^@P9#6eI;+9oNTl@it&mi@wCu&ew`rX(5vCiJlpZmW)&9q&tWd#C}E$Z*QkegLO zdjCw7b*nQiQzQ37GP4m6Z|9H1-{8Q23!bi?gk4OmugyyZ}7jdCR*? zBSo-4_2sE~9sev_lu(FBp&TK0BCp@)*RI2Y1F5^Yo4IvsN802lcp;34Q%vf%HUsW0yXz z#EdEikeN#pr$}6dgXVx>)`^QNx5`#Ll;MA~g~KlWt&1u^T#-cvp?Sn_N)#0hdSm>l zJSS8A@2Ri-1t_7tzvG`k)qM<~XZ1Ydot(hYL4b~4is#Dr&tJ|089Kn_ET0?nD9f_m z1CC{)@a~1xh3{d~QsysUX0$Wyr`!oTL)gfc9z)&nHtxUCYU*!5W8N_!#65kQ`JX!b zr@fKD2$X%U3-YkccY5hu?_cv%f$xd@eJ@ExouTgVt;3l3?O+tyr*{h2&iGYgJ8w+6jZ*onAub7e$0UFNKe3}UI}1{iF*urjaFV;FN6&2rGv(zrI2 zo8(J(aJxaqAqayhBVQsr$j}+Hv~|(3*1B$H+ukv22n&Ns)a)>T!s$g>RRbO{FrfrR zkIiR4>bTS@%yyDk72F$YN6FNR0Ni^?7P>#;vST&<*Td}U%fbTXgG&O^pM8oN*)5gX z{kndy>DY2At3iltPmbhXitcm_WJ{qAvh-re6e(D3wyI-^#~pPi5y^AEId3=mfWgIN z{0<`ZTSnAypRhV%4_&|P2a5E1praphT9TcQRM#UXmxbcG+&LaYZkuZWSYj!iY*h7A zXwR(aU-38_OTMI{=xK2C`980gUiHZimU=7Zd4}<-a6!WgizM3DW$GBuepce~_UqLS znTu)w+rB6gmcqJ7`3g8ZXaWFn-ilwZBx(hI(94ZB`S8gEL7 zgxju&&5hUNi_uk3NMdrVG};JFQ(aCbsz;w>I1Jmg8}f&23Ovn`N}v32ki`Bb=y&=M z*B+VmuIXiyeYGXN@23Q_NoImPB+H(8IDPj)G32fUbt1c+UUV^4=t=}LGu*K3d%Xub zD|tm&!Q8jCJzcy-@o6|&Y|w4T8nd{#2%HJ)ivbrl#1)SA?>8D+sg1wdC8^9;~pwj*g**pQ*3Cz_Z!WC%640 z;WCE%!y)YjTAy9RdSncJoIa1~$slVs7hR6>g!fitZLK#+td8^zO$o-N>zjDLp}X?) z{QBc}x7s||>bX$|Rj`$q4$;|d&%-EI)=#S&Jy)jj=x+lveo*n_IX_?2jXvEazFv90 zx{1$3OP$!OGcx-Y`Ul|{OfdC!6`g1Q`i6mW8GkhqDuaD=bdVmKCNF&qkz;hH8*FZD zOb1MFyqseFR|2Gw-dODaiV1q(x7$0nuFZn@8%&%;-1js;AE7#a;fOfLv^ag(Ww!!^FebW*V0cnN!D)lQUvPnUpXpGze5;58ZD3I(H+1KNCoiEB{knP(>EW z5HysxF^S%7EGCDZ41Ix*?xRrBi3D4!U}_t9LQ++F?e|iR9trWF&T_0UPke)E90oJW zh~>L=w=~N$TPvC-a&<;bn`oe|DN`Ww-P>9BA+MRH0%Vt2Emr1a>Y~R<QcC|A^vHT1NoSCZNDihm=ZFEqbl*+5$9WHRO zy~FVhh%s6$Xxl%L*wp$~&A9sesbu%o^e(T=Yj*E%nl^v2|9+NlSX8P)cY~=pG{oHm z(Thf^_7L{kGCT~gZQGUSR?m?MpJt}3a^bC|tzTI1A0wyXjLU`}=xfdVF|DD@O@`{B ziR?`C;~QINA6~-B9*+i#P>@zf!kP@%^Mc6sDdR>G)XU3~TO}8e6^2tX+L{LaZo4y; zjw3hKw^9468~kxM87J|MuaAatL|wSVClWQ{K@Q2+&XT9zm-+WIFZmC>qVO@WKU<0Y zW-AZY-%7CRashEk3XA`SLg}iuum8M^JuA83Lyel;D8( zvFZMhN-t5JxtgGl%*U+Ma@pa<^_bkKe<@ntaASF<=;4aK!OuCzk&V}7k&>WrUL{kW zTKWgpVrpnXiAjQSg2Ndn@=}>hzmSTve&l z$#(H^nNf!-BwI0JL4l!Xmtnk3Zj@IMXI56k>+@WJn7`hvT$b!S(MUe;c*f2pj?2vB`we9Z2Ubezu@q*2-RAuX z^*O=k7PnTT;Zxq1Gr9X+z5y6#wcB)q&hH=%<2)OT2g#?QjWOGnmdQRZ;3FwpLFeXQ z(=fXE%X9R1w$v^0gdJuLu|;g~AHLjjjETizssjfR21*vU~V`M?3^U=wfe2o7^?xK;d>AQ`g7h zz0};FR>j{?hCtenBbn$#Cr+5f96k5Lz0Usv{{Z&d$|{V)=g{1J$}IC0e4?I4EAKTS z6JO@z`Oy7Ej@X?OhA5Y#DLm!v_GwtpddYMItrabt)eHVcZ{gK5bm(^Lq!`8+@3|a# z+EuaDe%$3YEgLm#!OpQLdwk#PD-C2;(!I2Br4?^}@&yLFQiJORF5UfpHR3mR?|TQc zP0jISd{35#&%CsEY0iG@WQc9$b{{ZS}oF&ZSJ$SQAz} z?y~5PaC~H;v$M|jo0%Vt<^7wgR&y3NCQqw^8cpb9T4C{`^=Q@xr6)9>W3TKiRa$TJ zu=o>G?H=>CPI#T}k@PtgZ_=KTXbxH4s4tDPNAy%!y<0Apg`u|6k^TcjE25J|g4^lK0*m8#GK-RR zm2U`kt|6w@)`IPJT8@g77d125ZFqKGq-%I(3~g`Tdj_fZWa}@WsaGW}+Ajt7z|#H5 z6FR0~?YO(-6-0D(>Ygl9($8%^Vgrdl4GkdQha%!RlyeF#g_)af;{NB9xQ}%o@rM<1eiESWyv* z0V>p#)Dop>Js#z1l6owGCu-fGW0hs*?BcHpPmG2=zm*ajq4#V|>UZBSHwTOLG@Lj= zIwSXlN?mqZ^6fq-Gzo&axT1A*hkyqV%sd=jD!usb)V@Nvp+?bz@36f@3v@`d!4m9k z>UHynV6l47YmcTj27~RB)Uq3H3vs~rzRZlk$GEt{qoTMm_f+C1aY|z6eZQCrNRLY_ z%+d*4=)j7BelCAGm2-g0)Uv!;0MLjImQ^|4H;2t~UCWV%TeHi|!agcY07qa5zc*ogtmWO( zd>fn=yF>BxVpuNqi;PRcIT0u7>%5H;nTPH7pBU>tf;6Orj(xHS3;NdAOwgR3{j0Qe zWqO*Yw6to^*+JeHc*HVScv3)Bs;s6q7#AJ;6rxA#h@xy2Lk0(cM<6?R-H6hGxbBi; zSu0pSRhE^|675ed(9op+)!JJC#kFL_jfwIPlv;P!UL$pTBq*1l zG82WFIQW!kBDQ<&xx7|#SI76)6&3DDLP7}o=>-Zv39Xzbr)%3m_}>Sx;??LM#~&Y` zT`+tAKl){!)aFXO$)UZ>tuLQ`u2ift!$Gn8aM`Rgs5Uh|*^pa!sbUxNY

X#079o z_>ohl`u6lA!Q|!Mg-xzwN!`!!4h~&E(I$5OwB^}k?2siP10KqSy&a)o7TCi5IW|lD zfQpgq+405lY3+tljwWOG!hoXAfyG$bcrp}g7idbTF7#KlX?Pk2T>grNtXQFs!0$ET)ym$A|OF^I3&l7pk z9t$3AdT@8kH9uVz@P)(WKCEifJCh$ChcF@^M=6bYJ5-A9rn}QJ8r^4RDT4IN!rBcX zR2MJdtrJ|gu2)Na!^A6GY1$eOrK!-H>0zBo^Ih?C*DU4FfSnBoIG)8LJJUw>_2W0) z)_I~bL#46>-vnsZydh0v!iLOv|Tdw*fZX7L<_^qUb2?Hy7$!IFZ%x z%jcxAyBj-6KnJqeFXN?&o8$-bD4z$o=`Usk-zD7KNE)8wqjK4(5i69uRZ@`>bve;v zOeDuse<>o3utlAG^^Ndr`kXI{scCEVk@rOmr?UuRa7a+`cp-TX=N6Hyw!kHfT!X6_ z9w1gY$kp7b!Yla&lrRtO0#FK5J^? z-q}oNX1yziIrAQM+I;ss^_rLw+tlG%vG(0x`j6g<%TFkJ*nBz7)(8>EIr4o(kgjp9 zcFI{~a#K*C#O1M3!PBR{HwVzRN1yWs+8|+rM)3};lN{^^RoUa)qEnXpeY|DXVZeAK zn5pp_xwEN4TejiB+c0u)NN5NRvMf@n;i6M0mxVj)LyV#PIiS$Tf0ju#Z4sFS5wcu4Z_g`UAj4XA2S4N8_(p=FBXp2Z0- z?AX2u(O62+s+{x9iaHC>_vHxLESueq|0HBB$5)=$tgZ1$9dG32%jGt^Hw|wd4EE|` zy{WWrH%yYnAatzsEL7)Pw~5ly1U~B(C7~s;3OFb(x1|dWaErE1@qp{?fg0gkbOBEa zJ$INcRcXr0Me`%;*tY4r>h*F#5`=|ao5pj5k@7aihuPuw!(A6TBK!J@yCS4fDk_xV zI3e4)fSI0=B@uFK&4b;Q#2?8O_VZx$WI+Aoh5gPDF9i*lB&`-*w#FvE*Yk!1LA-Ui zhTXca98MQ1PDg?zGV7fjtUcX7LbZkV1&L>1Iu`nthOh4|_9=mG)4GtmdZ+!oGhg(r zCZl#aZ24FR-hydPNA<0Pb?Hp2`0hp>ouBjgN~M<}%$X?*7XTI1csGVzsH0kI&~1E}jF^x3XspyeBO85<48u zqyWq=wfMANS~c!YU`zIpW!|MUv8$Ts2W)#a1~nY_z;Lr-}LDg{JrT-^Q+&Ws!de$tw% zc@~0T*_=mzG)+J#BBsiQ_SxB8@UUiWWi)n8jeetloQmE0*E;^sHv9(QCD6mV*BdX! zqNcSI^tMF*{MYgmz$Adie^O=E4^fA5guFq@fMU+Y*_nVB&CB>{P%rC74Jm^ER=X*h zP*U~C-|hVP%}4%A)3jxY_5?NOzO3wQLSAGqs@EzB3Z3JGyl5ux9389E6})0}&4TfA zx=jAl?n4?YnjGQXa~6K8pOU^+Z~cUhx}KcsRWfskp(*J9#~u2};Gb&;oMZ`+Kad9S zC2+*`cSyrr#)M(DL_#=K&f<6b>y;LdU!!f^4{ z&)TpHcLi1LxvwlP7P5CW-Mq}N5EcpneQ$4%%Xt0iJhy86i)tSJhxAWo>_E$?Z&I8L zuw&;MH@yaw;3_&n8y1aIYWc=Z_xl*}J}0^w3XR*av+S#$+R!_{*Z*EBzxkYeJT7;w zt4l(aO<9KK$W$#S;wirLnbRsHfPJzvAlGyDLgkozQi9B2uK^~V5s5bCixWQI60U#y zqMy+!WG8NBsTGIn>EQS(hR`P^qJPhvVu^BSw9In39}njUpI5$*&RecMPi6*hR2ZIJ z??4Mu2KE&$+{-`9p>_qSUxG;#*M8H5z>=V2#C(L zyGA#F=&9!}6ym%RH05jV|7Kk1hWizGzCuUOKX174yfiRVP}QSM7z_=k#WI487IVK|l5sL@k-8lqWlTE@z&GFF=SLnh%Q5fvI%4PMXwOeM&Xm$m6sD)>b~ zM6}~1xzX9`N08^h=pmR#;UjF%MXY4zg{1Cbk)Vt=a56}PXIXuB)`8KUqo|j7hVr!M z4NPXX?pm5MveN*AbmTVMbj?LmR->d?zMz&EBdF7Y^n3SuD8U$bu0x(4?^OP|f}*kq z#ATBYO$RTb#$kmATS;j;>9LTGcDgFJwNVg~J@4XyclR~_!Nil-VG)4n*h}8U=BPFw zNxVFc-qQO*-RNYB?KX(kz*qJu0{m<2io%Nxmno)mwBXv1#C2VBSNqI9-LUU-$6 ztaQ&F47HraVM27!kK-^p<`zMUoR1=+=h!ef-|B@#IvCvAd>E@Oe>6?vcSZ z5U`wW{2VF8r%W754y`*JR68MUdU&s@Xa+CBq5@|Su%aDRQVMoYC+bB=jCIO?OdwVm zwTB)}2DK$Ru^rWuzho;NqL^3Sm)(9*_T{)kjjUFZ+>z;lQic6)(lDJ#y5WnRHEBQ> zy%u*CeBeQlTU`(LuJ0uzrjc5!oG!a6cb6FJi{tIB&}N{pwhre`I_T}~tf~gwtrwtw zgh7GzFKRYNj#Z3hfPfjZ@yjM$pWGXzwm~uh6=F{3pNuM&y6g9*@jQr4_oNhDfVJ!ElUF|12rS8>*s-r4g?qczIwm*3 zn~19V0Jr-sd1Dqr*YM<2zF6@9<6x}+o={4;s>SO$TO%kCCwesqtYUmF=n#P>rKkBQ zG%QXkR~PennPxcd*=eylY!p~i0%}qUu@oNcNLb~ZG9lRh^x7R17$BRtarpZ9jh-(G zcaQ$mUReH{j9N<#{)MGofnGif4Qa=fjwS7s7m<^&!E|o&{=UKK@Q5G}J`FlnoGeR^ zj)GcP`|3~8lO8sj_D25kP}1i7SD%QXG|!$z+f((O#<5RGIC`#+664Byc9g1kL-$CQ zC5oq%&@#BT$iGv35ciSf*9iMDH}xW2%k|oIQF;(4b-H$3Yls_YH(H*PbJv0ZMobkK zC7cBsEm8@-o*nKktf-)cmT?QR_;jxa7SShwmuFca34^AkXd01zH?z zGIfO#@TTSDSV~O*(NPL%f%-W-FEZAulZo*1WS~{w+vB!7he8$;y{e|NF2?2b?HQS` zEY`A%)1eemxU=a^8jH596^#oaYR1Fcbi9`3v8NEBxzl2rFev!C1~J7`V`z_TR)6_p zswV)Sq@(4Iq0MX3BYJgjU^^iceu~2mha~3u9Sz}F!Vsk$ZKnHXGMkZvA0fH@HLOL- zdcCpapg}Gf;Ll#=WS?lr7Vt({SjkgTA(@s+Cuf@}0*j(MCkn;Ifs33EQ^Is^=dawb z+Y9VZB^|QS*x^$b##!?o$n5H%3^z`Y9d>-=eUFlcI44Pd#L}kIf_b!4k%K{v&?EaF z1A7aLexXp_UF2-ZD!^}&!W*cruDH6lH+Uz%-9O$vud1OQml#k)-%!iV#njI^%E=DY zAJ2HM=KQnM=hO&GUOs@Hf!sLQ`DpTc;u9_farAB2Kf!WrGdyBsVQ&w{vW^>#BZhj6 zd^ZT)1DnH(e0_aS@k?b>xeM|{9hvtka})dhU*CfTY+1R>bmYiNuU^u0%f4^q-Suf( z9m9Npz(eZBZ`E!DkJ3MlRb26>cqhxK(`9S?S|z+Hlz~9tFXE+GyO_GbYq(%_YP<6t}iC3haM4V0?QB$pEBdj=xV%HKmNXJ7JZgW)7w8<0CuH- zO-!m3A!&E0PwGG-CmZu>1Lr|zryk^->|~9#@LR(fMw!j0H$D>jH!o!EpPX&fm7mCV z2!1lV+(@W6gzXN&Tn28#$y?TJ1ZaRd8n$&wcKCX^p_n$dSU96cukZEOBE_D)>@Z-} ziRUQ?P%F^tY3<)(cvSaV!DN*THsE2O;_C@4YO3%0rG3i3JwMr=@Avbscd3u=>-X6( zl8km$JbER^g+c(sZh%! zy}9YAN*ThGRdNGD>o{E&9=~75nPUV}1C}N95|{<{_}q9L<9WZ>XIIQcbvxC>-Z{HI zq}X-CFL!R+?@=sxAY^nW2EnMn zmpH7(%kFXUmuVahafSk!)q)k>Eem69YP&JWw--7*`ZgwqY0tWyh^8w2mJ=kig1W27T~2Fq!IAd}vi1nK|(8 zG&s4|1dScOcAR!bOwEPK1-I+6#Y=W2vnbaRQ1i*CD9BOIFDNQt*?nWuFZQc7yBzX+nMnX+ z;hNOG*Q^IXlLyYtGH!H~Ol=L=%_PFn4DeJ#FUS_oluvko7q>&C-b8?q;=lE?kZk|D zdfQyfBN^^Z2#Ye+8yoQge5)hCyJiT!-~HZbI=CkT3gqr>Zn{>I2Vo~;8dGt3suV^LHswCV^V621LJpQdXAll^-ZWj|#qZ$$mPJe-UC<$7W$uYRl0 zL!~>u&Z6@jda-;Rt&}vawYp-(bnIxmRyLzXaeZ(R0ZbJVW9-((muGFnPPaq7b=&W8 zSw)PW9ub?0lZc1EaADsLC1>eAEOP5ga`#^vP52f^Vy=H+`PofxpN}e``OTqv^VWs2 zQd=k!j=Gt%hQm>i6GpXjuU166G9#M#T~fX6yS3cEy;0w+JriEfX&R-46bBo(gQqk0 zrC_mYrbqm;KuhbWFUhY_;WM&`er zPb)5+u23BSoRw_#MR`J?hXTa*WY6L~Z*qyEHl$Ji@G=ytHaEHT~8qnGl?R?{KSm_mBz9KdW~$?QUv!y{`dpsq_2$CI79(I%7!M;iA* zYYhOjX|c-!BGJ;i!WiU_13+{+np6rq$&HigcN)2y40=RuwHT;1(KdTa?}6-`@eSpC zvHo-|6 z^msw-M%NC9bMo8I2uIuoeN%zSb=O|8)Z^{1B7a`SO91k6xEN`$Uixy1cy?$1Ctjkb z2>8v_T5PJb8N%C%KB;;D#tETvy0p2J80sIf9^u+9oWkozXwm072%0|RwH4g%1ME$U zM62rA+?g;9wnzP8gWRQuxM<-8SWYzcu70I=^T|N0vNh2H$5K2 z1q&@*ZG0L)N~frmh&dbbty_RNO@=%XuY@y<-}uJ)-Y?cz`W47YC7_{D6r8mUiT3$& z-hDVht;EmBwKt(7y&;t7#X$zkK>bC6I~z^pDajbBAu!82OCSald)EEclJ_TWTeJQt zZUyK3^sqfpEn|F^s?Lk{Suw$^qL>Buh>ReML4U5WF{45(+qMFKdfGfF&=3*sN#gli zIu9U}v@=YPTaK>Wm#n+nGnu*}sspjPa$hQIIP8BH9|3S#=X6#8;^Al9tcuM6K%5sM zRR2=Za*zM1+Wz@7nT*EOXr+(u&?5x(_02Jk^;?C%KXdfV3#|re{rWsN__I|6xS`)$ znuzWoJ-;EKeMC=w)DroFZZLm!8$B7h@)aN7{0Pn4{{Igk<_!R3=LY-v`K2o$I6wct z2j7r$xa>3e%C;WexHy#W8MyLt9%HY4ncXXdfu~;6AinkMkBlx}a&kAYP7Of1dzOrb zQ2C0Z)MPO|5-(YLdLUJIOA{dtd73SIR3bl+X!4e=`z0?vf(cpCpQL)=-krxj{=bh* z6hU-+_#W${Yk&>lmv-RD8_8-L$u4E0{;uE%3L^wa=3;h+giStLXk3;EQ!{zaAED9FZacV`S@i+Z+C z6B&ymA3G#FI1IPpG}|5Wb{7oucW)9{@;o&pjU8W7M4U76x1Q!9N7=t&yK!GF=G1<^ zynZp8)Z5L}ZH;~l*C<#{u&?lrY5NjMjPz2+caYp`=*DPDwMwJpO|K zcA-S8c`2{s;-v;OUY#5F%OKy?^A1jF`R!R0QOI8KU6Q>dWN%qiI$?h2Q2*drzxTQn z*@P-Rzh!Vq*@V_P>Qzt|W-6QeNL|N^WLYjI1LoMlK#AU~qs}Mb4oV9F!r6E6)pCCP zzZ)xi=>o<&>{Icyvu;aOLBaF07u0K*9DvVb!;@c<*bO!XWm(GsT^|?S-DS03$F?&v zhP%`|aN-^X__ylqMm`$gS%Cs-+S>|Fs`!F!HzfDOa zuUK-mjex!Hc{uoWkzZ7p?Pa=}j@)$cx7E0)fmm_$h#7TQ&k*c%8rfR__n&dnGLGV5 zv@#p}pUBQ!cwqC3L}o?<6GNrJ3!N~ts=1tFQ$ikaGW1PF#f$kst87DnIX>^cceCGx z<@$B>zB4eK?d()DFqBuxy|$Xy=1e5v)jK-OB`>8CxWX@V;59Qbsde8M%Wu465nyWZ zkR~H4v%L^i;Y(~4OnJ~^ZXaB79KBj08%p?WY|1hQz4d+1bj)o5(!7j`MM5{)aFF%Mf6<_O~!`B$~uw&b+YbwgH6G~kyZ;F-?f#HH5LIxi~%S69-|2-kd zJyQg3)agKYZ{qm1EQX#OSL}+&O?MYn{$v&5?AVh`@8V*5;cqC+(gcA8X7|gV)MmGP zwzeb1Aga<-ZpV4L-gwZa6mk<3%U3L(JsS7vKMuQJK|Em3P^noMtKfOKw^>XV`u?aj&eIcq zz{q8HvI#yNm&+YG<*~3fb_1Qna08_i?}=P!GRq8fer6|*-K{HhCtdHrN<&Cp$@vbu zaj*@eCv!_F4}|a|l|MEf9Fd`MS;Q(SJCxUOoSpublJRqpmb2~>u!2V`uHVVM@0SJ^ z-0G5)wevh({*6+&5ps-=fWSnR13*nX>V?A#y^}-Op&#>=rwewuE%1I3xYtZFwbOjifan!eNa7iv}=+pSXYFz*kPQ10;w#Kl}UED<{b>;&DGL2sGyHU~}po$3b1?yD0S8z1$z~H$F_qk)YWYaIfdY ztnz@#Gy08pcIG#^JQ?H#cxB0|gzmNKOLZ?l<-+`F@K(oWd|aH#OQM8Oc2kp)&TK|6=?fmRqk2U^ME=MfC-g!M01BFr=2gg^w(fjDGhmp1l^1Oa_-j!$GPIVwjPtB%I z*rbnZ-3CAS3GeA7)XmU1@t zg8ltPkv8a)_UN`dC_^NPn%vI6CK7=_F^6Z_r+(Z|q}c-}I@+l2y}ydL8L*O&bO5lG zH`494eSODIb}oEli0mW8RF#sv%FWq{Nz)5re!)|pkN3_Bh~T!ZWp8Mz+EpLAv3P_3 z7Tsdgf*O5}Y;J!~-~P^uq$s_zKC*IV{3cxi zWonj)a144v& z^O7Vw3$R`Kyz}ti>hYJHll=;M4UNvWH$G~Y_d8*~$AoYOvM8*9Cr~h}w@3f}i zJk~-gfk;(?7t530AUz3om+iKMK;q9)h0+UT?0JV%$$pGlGI@pLwcLCFXKcV458(P% zGZo?%`V4e*Wcbew03`~@+#+*j>HX@DBqCfKF8j}9rk*SFWF?MH(OJe)w`XTSOd=ub zxyWC$$Iq@mM$Ht)*gc$ddy>eoGglAvsUiy5@Qhz%ye*kMik#`+daeWd<-UBH3;I`-Y)AEO<`|C?}Z?)w$OI~G+l6e-Bxm>7!8m9mT zjdYHLnRYn5r~hGbmAXQDknU!Cf`{rQ9jK3vVLa8W-d%qtRqmwj_n1my#8ko6yB^YF z33AyxO;xH1JG$4=;*`0a?<3(N`Tz&EYbww&(`} zrldgQzI(GTHL#%Bc;Nf1_mZH8cCVFaUk^2P!#wRmJxhyoa!LR|%j0@dwplR zrM^LOL5t$o!kiQQGM@~OSFdMl#k;UD`jKF%X}-ljIQp(7@k)0T18#iWd{_E6riT165$SStL; zN%)=5j!?NMqGU9if|xP|>USxm=J)pZUq9cu)xV9Ccd=%=80-*d8056UjxWrrNuVvb z9(yj6R*WXEE?vOi0ZIJPp)Gq7^{^P#IL+7cEi1XRZvd)5gjyJdI|zy|OYC-lKyt7@O8CYKUk*r>_0^X(ijKWT8vWI*vA?LBLq zp9YpAKxySfVzDDU()9`x{hvH7zyOA${pLJA*xAkJYUuY#%ot}udBW>pGlE8nq%|w! zS|lH1s<0k^$o^6D#dpw@K28$H?ga)Tq6hyQ++KMe;Kg@XA(Cn*tBpm~`qgE6O%hDs zb{axFI5xQ$bZ&;ey-(DmB|G_DLh6^f9USUj`ardkEGOg*)j$&dr5={reGZ_$7^M>m zEfVWgo64G3?#6y#7s&Fv7?s76bHOchZ**1|?@x?7Zkb^S*G94ubt0*lTnu=!CMo_*aG|?fdDqd~os+BpNhS{`l1}Edk zmD5QYy{u%O5W#`&2K(lH%4fNJOt=b?0>OHj{)&tI=AQ?bmY$o5e|_p_uwR>T;i3Bt z%HxQDaS6XYw4h(JnC6+YSZ;5G}W65bZ_C+~T>-zte1BO(;< zjJ_Ys4C(;BZ)rEtF`R0K_{GxkWp6iZY*bh25EqB{!_^q*Vw_!T%g5(%l?lpB%n`7| z#ZpKh=7XU@Ax-E8;#1am^a-~&1z4P>eGu|$3m(-1AqBE%ZT{YTR*D#Z|C8sev`Dk@ zPb?YY%vP&C%$vWgd=HzJNk>uAb)VTGu^|H0U-cVpW}oIV-S>%Reh3MrdwjB*fE~VZ z7c=N6wJE35?3p7Lo)9Nb(^dbc(*qEhzCd@IKCmPc0F(xlbw*@bm2lBQkzeHYPK<+X z>)a7W4@Qc9yEkf$v-^4=SYSmO0Uabr(fuvg&c=qt9`vOK=(0HwWYac5z#ttm`k!{; zCa?rG5Xd&ZaywCiB=yPL#O2tA6`t2BtVPJYcVD?=$g5i2mbFf26t3LRVH@ zQlt8smfEw{;`g)(>{3wo3kSMLW?o#zJ^$@q0^d4Y#S!oVhhEC2MD~eReHDc}=*f=} z__c4@Xov8p0HFzjiIQa9&F!1`bO`XP&OXI?g?<2qK&X8AZ$moZJaR<;?I;8gA8%%q z{yN3)&6yF9lnlS9s&`zxoCkssOdCr;x;;2CfT!Xgs47pj8juceAkr0@ zoz+k?MjIP!K9_e0`SkGap)OBVOw@2au)-D@+whNoIbKI~Os|D~J0N{4<;uWN$w`|A1G;H3}W13qM;239w}wU8GU{;OB)asb6L!;fov+PD?ar1@YvrCl(pU`JmMUF z04Pr(3!%@}#`;v%vv>V)K+C4C)nK^UIwXtnOW`)_z|6*{0xV!Qj%E1sACicrrKJ;4 ztpgQ6bo!84pg}PZrH(_g0G+!H{1-F*_qOc$OP&deCus!Lcqdg?*-LyN;KdIFOu^vuHWS0n>xz*NJ4`oVirU+^mBU_g= zX4nF9M166La-1L|8A7+~xo06c&HF+Ue@5um+Zjak6bB$mtxmuGdgHpUB|?Di+Z>Vb zU*c3zQT1X@7M|kfX2A_Jy{2kG3Wma3MMkqJvb62F1fuM!?4xu$b}lwc+ANHxTjwps zU8?jOFH5DN;YroD=Us>%uEAuuSvo7XZ-G$qe3+Jf_YM1Q)}-02#ofP%Y74^0sISJv ziG!@W1_kCOd-(;w^s_OP=r}kVLTFv)QM8PHBBDUN>Fm<=oy>FgaQ(oH3J;)=c`Pm( z@u^s(2}JE_E`WkDiUyXmcR;z(7(lo>H^0Dzyt;;(s@h{f z?+Eu?j8I^krO%1<*7AH8Ah^7;w$hnx3)r`;)mKkBhUkK7?LvwSK%Z#otqc|A;(~t2 zx2m)6U$gC~p30Bsxhdt4oNOv|J+wVl?pkW6vgb{H;=QtGqLQhIjU8#4QTnQ;OWOjd zIQ{O|f!Ilz!vO_qHb+&Va?fT3^m`W@i@QLxqtesjYQ5V7644k`zx-R`ibW`P!# zrt-O?fFRuu`Qp}UH-!y*bW&1Dj*Lm%O#{eyISf(}(r8d?+1$40{puwr3go`fp{*F3 zbCs*oV|-{-gBx3zSyt@o(6m$f*y~_mj$?bfI$NL}NGW*h&9cfQ zb!?+xqCb#1fk}d9Z89Q5#8%WdrJc za@k`bFc6UP*LJQoc+TB@K|H@lOelR0u#7-zQ&_*He&kdGfMWy9hfAvRJ;TYarmRA`VLb92rb4Ai%(Zb0kAz>gh<5R(3kKp?o^`EmCwd88>w*Gdw z{J#S0wuZvb^q~z#BVv)wF=_^eXQz5e05?#`pa2odY}0Nb)w8}a`1DAikwdKiaV}6~ z(4#ud)y6429fyUPJc($eeWIgJk*iO!)LEU7H@x8=E$D~O0QtalP^`lGOVRqx4`ZhB z&DZb`6CX;W_C|K#Mhtp zbTwiu%T~!R2lePo7oxghq9%EkUEI@9pXZA4SL_VHmnG$o z`Ixuu59EpOE_b3et}0tM&*)+!>F8`E)_R=2ckz4wJy*uq*l%bE;}?1Idka=~1L8HS zQz0#^&n_?)_wvIdmu9%lOToqHnWq(D-tzo+wHez zZ(3ogI&VNR^KsLge$M{2qR)ubWY1bdbZtU-j+(j#6ZsZIIw;2SvTStcFS^*LlEoPo zdBZ~))xjBRBYEUczS6}E0j;bV%HdD=kq}xSA(m~b6ZUZ_5%`qd~p(=WJZ z;W9R)I)Zfe>^fGbblxrc&dBrbqURR(iDY}tdm5x1*(iQE)t_gF;FYV$H#IOg+IE>O)Df|*3J1YWfo2VWBwt zZ-=%?8e4MZiKJ9Z|rd; zvq-v%2gBhdZv+-pwPk1`6rfAv{FxUy1^)iF7f>K8$lurHL*CVv&R`0~wV7l#RK>~T zr?LrS_D`87$0xa`#j(~ax_^tn0Y&$_5(8B<`zs{tBxa`nQI-v8wY$KwH=6F1{czIOvEjF4xK7JjC&z~rCLxk+ zun87S$OI|<5<;R43OwRJ&kZE!Bng!mjLjb#dei5#bl>Ek2Z1Km@l((~gc2;3T5B;FOW{zFzi%Pa@DP)TZW+aW_IIU` zzl|~N{r)WC0J!a`++ck>*YIAg3sBJQuc^?GXE{oSpIj75k(#HY-jDN=NJ(4p43Pgn z)P1byWQu)(T_R(7?6a@JyoR#SkFa{jLAD3My84u9wC|VpOB42RU?ss@dt}ciU@+^| z2(KS)J&UPiwbvswMp4u~8b@iJo&OGsqa?FmythmQAe+Xdmu-BGi|Zm#o+n8&Y`s*c zWHLP{ME$DPn#)ZPH$|zM#zZ^(TmAg)^LvFZ!vOkgMOa(Hf)P8-E!@KiU*(Vwx*?j~ zbL)oeOf67sNCRD%?eqGOKQ^-1ZyU{HBkt5eLJ@Ocn@#4|%)H+=*mP}6^m(@{Cki}f zgXah`GZ}CH%iT-2};)BpA3bnWp`gB+kgeKATv%qFJP|~{68oZj%}GoQPLw` zrV8h>QrUKw-zm2(&~Wf%)c4k zGkdNwXg&*f{-Q2vhv;B)m%b9Wi2VHNAMqmr)M5U2_$7(t0k=2z0ynznKDjtgMMNl4 zL~TsR^4~SL56C7pFPcp?)Wp`&l5Zm>eyp2)n$Ou_*=hRk8*o0T%pES`yIoF*rmK11 zwcdMM%W^w&YG5%RuF-ftrxFh^UcRFs zUo2P)O(_C_B$*<@6_(Gegq!2%f4H=kgv2KKN2A^4G7`Tr!qH(h7*Z+3M|LuMU3n&4 zF)*jD7>^TaH(d9#NI0a<1OGm@*8hrDK9%PbSRSt4lwBlNzES;@NP;h}XtQAwJZE_~ z<|D%YMm97eC7o{L8m$6^A!uUE6wo6bpXzWato82=A%M(uQfCFz(HI=9n+d9U7=!&O zDsj*TJGE0BAAJJq^WZ8gUedkvGhRJ~ZBRUgPN8IM;ek7SkHS zdPO~6$y`+(Sg>BVKmIAGKyKl9lJ~l?E)w5kfC7}^yXs%|cvi?p zlhX3W=+k&j(cf7R$TyPgW9^ui@cy$ggKbvCnRxgTz=Iu7ie*(JHMVIf3gG=?ahSovJ zs5|7cRG}VrN2)=!IJlx&l03~0>&JNG3faW4vu5}U4dRoHF;nZ(3ZXJ9t*j*3Qk^gL zK4{-@kQ!|${aah{xRmN+((AH2>@?hhDxENL!375OCPw&jBqj>Am zF3Tk{n{|x9(NWfxfGkx@^_>K$y!E@GuIy!yDIs=Ot6~qi#`nBSI|uQ_rcv&C{?J zNj*!(WtbI*25HUZhxwa;3#-%+x|8zWoO-dBJ7VFcT{9Hd?PqG zXqj3Pn#im>)?#(xu6KG3qQV^2(*M_;n`w*Dw4`VtAUr=(*+m|B_?(egz^!+?GDVkl z9>6z6rq%d)&g*Cpf1b2XaILiGHVGqXFPFR0Q$x>Vp1cj`qjEYoK8d+mA13+{{*jAl z*7!DSgN?Bsqa4+3Y+nGU3Ky8+ebZcO=xkCqe-&r9vtW!}{ZuWEr zC`BgT*%2KT6GAWslchJ?*S?AJH`z)Pyuob-GA8McxyIPCy&t*O)p2v_Bx$JR(ooZ~ za2=j))=8B-?_LsUNMt5(LdH&Tkdz%9V4@F|lU}i@s|FSBEx4n2+t4wXi;lb{jkTK| z&JtBtR>-U@uik4({nJ7gp;Dd?e5$TM?Br~|p!-~v&(-1ZH4D{h#cUPPGi;>|puW1q zdO>%DQN#NZ?H9AYx+ngJD&qEAA(_=hZa*tn?aThNR-OA?rLwU|PqSyFf7;68;e5-S zscD2=kb#_vmeOkI>`C;b6Bjc;ZJr~^ii<&+9YwWudwm%iV4nK2<6cDBX7dN_?Wm~s zr}py|yUS7v-3~T3R-UB-fGGZCHLJq5GxT0}oGMhgKyeqm&}7hR0VIW_h;$3o+hdTh zIA5l(A>Ce}Z|zUNs8-Kku-p2c*|fi<`xTr5eM47Bz{dYkTX~efN8{93B8ga3IYvt} z>Af=Eq~!Vo=i%P=8i%JMRzvm%3QzzK&wmW8a@U@eI7t`Jg==>U8)P}3 z|E3G~#n_7sXYg>j5d}&a{76F-IuyvrGox>d>FWdTblA=2M5|aG5U2pDkbaokrQ_Q} zTEQejv@@+kQ>{asR5h<^(t55R<{PHts)yLT)((%4&xxglpS8vnKKX(5oHp%A9{ZE; zzMK^1W9%=Bs}0GfKJ-t>M8`0m?OU2aW>0!h*)lNv;O~ES-!3etoj?hs=3|#`= zNQ51_Z0$Qe)aTo`Py9tmMRmEikYb{82va}y8wcYHOQW%_o^EnS(Bs)+HShn>hHskxR zZ<(=J6N7~xtHq>VO=TuvaiOC4iM!_#ExA|>VjsrRq2&a_Q; zxAsU$kDt6A`=p@la(`J+U{OLnuUMh|*yiG{^I!>%IqU)YPC$6y}|6zSUAWEY=vz58UsT+vg%$_Z9*lNUyobjDN>k2`_!}q_2cz zl5(6_C`lBZRZb?=LtC@b2!8v|MU_=<#dneb*vK4?qUA}yf|Sg71c?Mn@8=)Z_w*~L zVd?`$uSGiroDK`YSJ|-Udd^`ID+sRehX_ST_G*ZeY$=^8!yAcl(O%!S{Aame-+lx$ zv*g!7#`_N#>v0!0g`8GaBVitzO3`m$&3HCR=>V5Eh--PtY2`GzmTVD^j3SYMwE z1=@}M__v}#r{PT{9dry#bc}T)WBRS8e+FUM_g@z&8?$b&Vx|BQf1GYQ*D19$$rFF^ z*ZXoczy`i!rH{>~O zi{~C_M#kE$==B4&w!_7dz6MAzydWA+-QU}8SehjN?)>0w2Zn28VzpUt@~BV%uNXZR zQdKM>v_e9bdUUL4Xewf1&`Erh7Zk66)XK@JX1f{ zD>bbI?60O7Hwys!yuZDuhm5Ig8j*M&~#(;j!m@f`X%W*U^iZ zmKu9L|CC5=0evt%;E@G;#pUHM5!>AV>=(CYN?Qswt5R>HOUSvt%Tx|B6!gep13p!> zGIM;MnVGpVd1WH^@vN>A8z!*2dUf?3%>Ls?8Ug&Yjb{)&1Jk&+3Df6l^lG>t<9+|| zaF_!qZfN)XQ|y(f>FQKc_O(=E?zoU*tVZRSUXZ#nmb>|?YGZVcL)nAFFqK@4RaIYq z6NG9>(!0!1J3;DROQwk{P(jlN!??&U-YPzpGDSYYgcr7`osd9yoWBl_#qIANAXk)# zc&@l!ja1NA^dOKR-DP@t80hQ;bT%@H!r9;XHWo+t6Z+<#JNDvkP5MK``E$ztX}SJt zF3t#e`(CPZF&?(BW>|5zS^RH*4cmx+LYWtlYa8;yEf$#7%9*>ESw)o%9N1OAMN=fb+1=e9R` z$bUZ&R-K&?Lz!Co7t^A)QbNU<&F^2ms>X9*O*{`lYJkWH&rBT%oDJZt_o88ZF7n)& zT@&AdG*_s)FVAl3aDTLV?laloxW5$99EbkruQBdl9&8&6&TahlcR5Ol_p+iT!k@qW EFEXHbasU7T literal 0 HcmV?d00001 diff --git a/programming_guide/quick_reference.md b/programming_guide/quick_reference.md index 4e0c5d11b7..fff7e14cbd 100644 --- a/programming_guide/quick_reference.md +++ b/programming_guide/quick_reference.md @@ -49,6 +49,12 @@ | `print(ctx.module)` | Converts our ctx wrapped structural code to mlir and prints to stdout| | `ctx.module.operation.verify()` | Runs additional structural verficiation on the python binded source code and return result to stdout | +## Common AIE API functions for Kernel Programming +| Function Signature | Definition | Parameters | Return Type | Example | +|---------------------|------------|------------|-------------|---------| +| `aie::vector my_vector` | Declare vector type | `T`: data type
`vec_factor`: vector width | n/a | aie::vector my_vector; | +| `aie::load_v(pA1);` | Vector load | `vec_factor`: vector width | `aie::vector` | aie::vector my_vector; | + ## Helpful AI Engine Architecture References and Tables * [AIE2 - Table of supported data types and vector sizes (AIE API)](https://www.xilinx.com/htmldocs/xilinx2023_2/aiengine_api/aie_api/doc/group__group__basic__types.html) diff --git a/programming_guide/section-1/Makefile b/programming_guide/section-1/Makefile index 9a89112879..1a3d65de9a 100644 --- a/programming_guide/section-1/Makefile +++ b/programming_guide/section-1/Makefile @@ -6,7 +6,7 @@ # ##===----------------------------------------------------------------------===## -include ../../tutorials/makefile-common +include ../../programming_examples/makefile-common build/aie.mlir: aie2.py mkdir -p ${@D} diff --git a/programming_guide/section-3/Makefile b/programming_guide/section-3/Makefile index eb57eeb40b..77688005e3 100644 --- a/programming_guide/section-3/Makefile +++ b/programming_guide/section-3/Makefile @@ -12,11 +12,15 @@ all: build/final.xclbin build/insts.txt targetname = vectorScalar +build/aie.mlir: aie2.py + mkdir -p ${@D} + python3 $< > $@ + build/scale.o: vector_scalar_mul.cc mkdir -p ${@D} cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $(<:%=../%) -o ${@F} -build/final.xclbin: aie.mlir build/kernel1.o build/kernel2.o build/kernel3.o +build/final.xclbin: build/aie.mlir build/scale.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) diff --git a/programming_guide/section-3/README.md b/programming_guide/section-3/README.md index c91095d68f..166cab665e 100644 --- a/programming_guide/section-3/README.md +++ b/programming_guide/section-3/README.md @@ -149,7 +149,6 @@ To compile the design and C++ testbench: ```sh make -make build/vectorScalar.exe ``` To run the design: diff --git a/programming_guide/section-3/test.cpp b/programming_guide/section-3/test.cpp index c5690e127d..0698905f19 100644 --- a/programming_guide/section-3/test.cpp +++ b/programming_guide/section-3/test.cpp @@ -34,13 +34,10 @@ int main(int argc, const char *argv[]) { test_utils::parse_options(argc, argv, desc, vm); int verbosity = vm["verbosity"].as(); - int trace_size = vm["trace_sz"].as(); constexpr bool VERIFY = true; - constexpr bool ENABLE_TRACING = false; - // constexpr int TRACE_SIZE = 8192; constexpr int IN_SIZE = 4096; - constexpr int OUT_SIZE = ENABLE_TRACING ? IN_SIZE + trace_size / 4 : IN_SIZE; + constexpr int OUT_SIZE = IN_SIZE; // Load instruction sequence std::vector instr_v = @@ -64,7 +61,7 @@ int main(int argc, const char *argv[]) { XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); auto bo_inFactor = xrt::bo(device, 1 * sizeof(DATATYPE), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - auto bo_outC = xrt::bo(device, OUT_SIZE * sizeof(DATATYPE) + trace_size, + auto bo_outC = xrt::bo(device, OUT_SIZE * sizeof(DATATYPE), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); if (verbosity >= 1) @@ -85,7 +82,7 @@ int main(int argc, const char *argv[]) { // Zero out buffer bo_outC DATATYPE *bufOut = bo_outC.map(); - memset(bufOut, 0, OUT_SIZE * sizeof(DATATYPE) + trace_size); + memset(bufOut, 0, OUT_SIZE * sizeof(DATATYPE)); // sync host to device memories bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); @@ -120,11 +117,6 @@ int main(int argc, const char *argv[]) { } } - if (trace_size > 0) { - test_utils::write_out_trace(((char *)bufOut) + (IN_SIZE * sizeof(DATATYPE)), - trace_size, vm["trace_file"].as()); - } - // Print Pass/Fail result of our test if (!errors) { std::cout << std::endl << "PASS!" << std::endl << std::endl; diff --git a/programming_guide/section-3/test.py b/programming_guide/section-3/test.py index bfdc33cbea..4028e889b6 100644 --- a/programming_guide/section-3/test.py +++ b/programming_guide/section-3/test.py @@ -15,7 +15,6 @@ from aie.extras.dialects.ext import memref, arith import aie.utils.test as test_utils -import aie.utils.trace as trace_utils def main(opts): @@ -41,7 +40,7 @@ def main(opts): INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize - OUT_SIZE = INOUT2_SIZE + int(opts.trace_size) + OUT_SIZE = INOUT2_SIZE # ------------------------------------------------------ # Get device, load the xclbin & kernel and register them @@ -99,11 +98,6 @@ def main(opts): e = np.equal(output_buffer, ref) errors = errors + np.size(e) - np.count_nonzero(e) - # Write trace values if trace_size > 0 - if opts.trace_size > 0: - trace_buffer = entire_buffer[INOUT2_VOLUME:] - trace_utils.write_out_trace(trace_buffer, str(opts.trace_file)) - # ------------------------------------------------------ # Print verification and timing results # ------------------------------------------------------ diff --git a/programming_guide/section-4/CMakeLists.txt b/programming_guide/section-4/CMakeLists.txt deleted file mode 100644 index 6b330f21c1..0000000000 --- a/programming_guide/section-4/CMakeLists.txt +++ /dev/null @@ -1,70 +0,0 @@ -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. - -# parameters -# -DBOOST_ROOT: Path to Boost install -# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo -# -DXRT_LIB_DIR: Path to xrt_coreutil.lib -# -DTARGET_NAME: Target name to be built - -# cmake needs this line -cmake_minimum_required(VERSION 3.1) - -find_program(WSL NAMES powershell.exe) - -if (NOT WSL) - set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") - set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") -else() - set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") - set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") -endif() - -set(TARGET_NAME test CACHE STRING "Target to be built") - -SET (ProjectName ${TARGET_NAME}) -SET (currentTarget ${TARGET_NAME}) - -if ( WSL ) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) -endif () - -project(${ProjectName}) - -# Find packages -find_package(Boost REQUIRED) - -add_executable(${currentTarget} - ${CMAKE_CURRENT_SOURCE_DIR}/../../runtime_lib/test_lib/test_utils.cpp - test.cpp -) - -target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1) - -target_include_directories (${currentTarget} PUBLIC - ${XRT_INC_DIR} - ${Boost_INCLUDE_DIRS} - ${CMAKE_CURRENT_SOURCE_DIR}/../../runtime_lib/test_lib -) - -target_link_directories(${currentTarget} PUBLIC - ${XRT_LIB_DIR} - ${Boost_LIBRARY_DIRS} -) - -if (NOT WSL) - target_link_libraries(${currentTarget} PUBLIC - xrt_coreutil - boost_program_options - boost_filesystem - ) -else() - target_link_libraries(${currentTarget} PUBLIC - xrt_coreutil - ) -endif() diff --git a/programming_guide/section-4/aie2.py b/programming_guide/section-4/aie2.py deleted file mode 100644 index 4231179c36..0000000000 --- a/programming_guide/section-4/aie2.py +++ /dev/null @@ -1,74 +0,0 @@ -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2023 AMD Inc. - -from aie.dialects.aie import * # primary mlir-aie dialect definitions -from aie.extras.context import mlir_mod_ctx # mlir-aie context - -from aie.dialects.aiex import * # extended mlir-aie dialect definitions -from aie.dialects.scf import * # scf (strcutred control flow) dialect -from aie.extras.dialects.ext import memref, arith # memref and arithmatic dialects - - -# AI Engine structural design function -def my_first_aie_program(): - - # Dvice declaration - aie2 device NPU - @device(AIEDevice.npu) - def device_body(): - # Memref types - memRef_8_ty = T.memref(8, T.i32()) - memRef_16_ty = T.memref(16, T.i32()) - memRef_32_ty = T.memref(32, T.i32()) - memRef_64_ty = T.memref(64, T.i32()) - - # Tile declarations - ComputeTile = tile(0, 2) - ShimTile = tile(0, 0) - - # Data movement with object FIFOs - # Input (from shim tile to compute tile) - of_in0 = object_fifo("in0", ShimTile, ComputeTile, 2, memRef_8_ty) - - # Output (from compute tile to shim tile) - of_out0 = object_fifo("out0", ComputeTile, ShimTile, 2, memRef_8_ty) - - # Compute tile body - @core(ComputeTile) - def core_body(): - for _ in for_(8): - # Acquire input and output object FIFO objects - elem_in = of_in0.acquire(ObjectFifoPort.Consume, 1) - elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) - - # Core functionality - load, add 1, store - for i in for_(8): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) - yield_([]) - - # Release input and output object FIFO objects - of_in0.release(ObjectFifoPort.Consume, 1) - of_out0.release(ObjectFifoPort.Produce, 1) - yield_([]) - - # To/from AIE-array data movement - @FuncOp.from_py_func(memRef_64_ty, memRef_64_ty, memRef_64_ty) - def sequence(inTensor, unused, outTensor): - npu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, 64] - ) - npu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 64] - ) - npu_sync(column=0, row=0, direction=0, channel=0) - - -# Declares that subsequent code is in mlir-aie context -with mlir_mod_ctx() as ctx: - my_first_aie_program() # Call design function within the mlir-aie context - print(ctx.module) # Print the python-to-mlir conversion diff --git a/programming_guide/section-4/section-4a/Makefile b/programming_guide/section-4/section-4a/Makefile index ee28c567c4..3b0140656f 100644 --- a/programming_guide/section-4/section-4a/Makefile +++ b/programming_guide/section-4/section-4a/Makefile @@ -16,10 +16,14 @@ build/aie.mlir: aie2.py mkdir -p ${@D} python3 $< > $@ -build/final.xclbin: build/aie.mlir +build/scale.o: vector_scalar_mul.cc mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ - --xclbin-name=${@F} --npu-insts-name=insts.txt ${ -1. Let's change our design and increase the loop size of our kernel by a factor of 10. This involves changing the outer loop from 8 to 80. What reported times do you see now? - - ----- [[Up]](../../section-4) [[Next]](../section-4b) diff --git a/programming_guide/section-4/section-4a/aie2.py b/programming_guide/section-4/section-4a/aie2.py index 3e1f7e59ab..b09f9d0637 100644 --- a/programming_guide/section-4/section-4a/aie2.py +++ b/programming_guide/section-4/section-4a/aie2.py @@ -5,75 +5,73 @@ # # (c) Copyright 2023 AMD Inc. -from aie.dialects.aie import * # primary mlir-aie dialect definitions -from aie.extras.context import mlir_mod_ctx # mlir-aie context +import sys -from aie.dialects.aiex import * # extended mlir-aie dialect definitions -from aie.dialects.scf import * # scf (strcutred control flow) dialect -from aie.extras.dialects.ext import memref, arith # memref and arithmatic dialects +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.context import mlir_mod_ctx +import aie.utils.trace as trace_utils -# AI Engine structural design function -def my_first_aie_program(): - # Dvice declaration - aie2 device NPU +def my_vector_scalar(): + @device(AIEDevice.npu) def device_body(): - # Memref types - memRef_8_ty = T.memref(8, T.i32()) - memRef_16_ty = T.memref(16, T.i32()) - memRef_32_ty = T.memref(32, T.i32()) - memRef_64_ty = T.memref(64, T.i32()) - memRef_640_ty = T.memref(640, T.i32()) + memRef_ty = T.memref(1024, T.i32()) + + # AIE Core Function declarations + scale_scalar = external_func( + "vector_scalar_mul_aie_scalar", + inputs=[memRef_ty, memRef_ty, T.memref(1, T.i32()), T.i32()], + ) # Tile declarations - ComputeTile = tile(0, 2) ShimTile = tile(0, 0) + ComputeTile2 = tile(0, 2) - # Data movement with object FIFOs - # Input (from shim tile to compute tile) - of_in0 = object_fifo("in0", ShimTile, ComputeTile, 2, memRef_8_ty) - - # Output (from compute tile to shim tile) - of_out0 = object_fifo("out0", ComputeTile, ShimTile, 2, memRef_8_ty) + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) + of_factor = object_fifo( + "infactor", ShimTile, ComputeTile2, 2, T.memref(1, T.i32()) + ) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) - # Compute tile body - @core(ComputeTile) + # Set up compute tiles + # Compute tile 2 + @core(ComputeTile2, "scale.o") def core_body(): - for _ in for_(8): - # Acquire input and output object FIFO objects - elem_in = of_in0.acquire(ObjectFifoPort.Consume, 1) - elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) - - # Core functionality - load, add 1, store - for i in for_(8): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) + # Effective while(1) + for _ in for_(sys.maxsize): + elem_factor = of_factor.acquire(ObjectFifoPort.Consume, 1) + # Number of sub-vector "tile" iterations + for _ in for_(4): + elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) + elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) + call(scale_scalar, [elem_in, elem_out, elem_factor, 1024]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) yield_([]) - - # Release input and output object FIFO objects - of_in0.release(ObjectFifoPort.Consume, 1) - of_out0.release(ObjectFifoPort.Produce, 1) + of_factor.release(ObjectFifoPort.Consume, 1) yield_([]) # To/from AIE-array data movement - @FuncOp.from_py_func(memRef_64_ty, memRef_64_ty, memRef_64_ty) - def sequence(inTensor, unused, outTensor): - npu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, 64] - ) - npu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 64] - ) + tensor_ty = T.memref(4096, T.i32()) + scalar_ty = T.memref(1, T.i32()) + + @FuncOp.from_py_func(tensor_ty, scalar_ty, tensor_ty) + def sequence(A, F, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 4096]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, 4096]) + npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1]) npu_sync(column=0, row=0, direction=0, channel=0) -# Declares that subsequent code is in mlir-aie context with mlir_mod_ctx() as ctx: - my_first_aie_program() # Call design function within the mlir-aie context - res = ctx.module.operation.verify() # Verify mlir context + my_vector_scalar() + res = ctx.module.operation.verify() if res == True: - print(ctx.module) # Print the python-to-mlir conversion + print(ctx.module) else: print(res) diff --git a/programming_guide/section-4/section-4a/answers/aie2.py b/programming_guide/section-4/section-4a/answers/aie2.py deleted file mode 100644 index 595e0c11d2..0000000000 --- a/programming_guide/section-4/section-4a/answers/aie2.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2023 AMD Inc. - -from aie.dialects.aie import * # primary mlir-aie dialect definitions -from aie.extras.context import mlir_mod_ctx # mlir-aie context - -from aie.dialects.aiex import * # extended mlir-aie dialect definitions -from aie.dialects.scf import * # scf (strcutred control flow) dialect -from aie.extras.dialects.ext import memref, arith # memref and arithmatic dialects - - -# AI Engine structural design function -def my_first_aie_program(): - - # Dvice declaration - aie2 device NPU - @device(AIEDevice.npu) - def device_body(): - # Memref types - memRef_8_ty = T.memref(8, T.i32()) - memRef_16_ty = T.memref(16, T.i32()) - memRef_32_ty = T.memref(32, T.i32()) - memRef_64_ty = T.memref(64, T.i32()) - memRef_640_ty = T.memref(640, T.i32()) - - # Tile declarations - ComputeTile = tile(0, 2) - ShimTile = tile(0, 0) - - # Data movement with object FIFOs - # Input (from shim tile to compute tile) - of_in0 = object_fifo("in0", ShimTile, ComputeTile, 2, memRef_8_ty) - - # Output (from compute tile to shim tile) - of_out0 = object_fifo("out0", ComputeTile, ShimTile, 2, memRef_8_ty) - - # Compute tile body - @core(ComputeTile) - def core_body(): - for _ in for_(80): - # Acquire input and output object FIFO objects - elem_in = of_in0.acquire(ObjectFifoPort.Consume, 1) - elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) - - # Core functionality - load, add 1, store - for i in for_(8): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) - yield_([]) - - # Release input and output object FIFO objects - of_in0.release(ObjectFifoPort.Consume, 1) - of_out0.release(ObjectFifoPort.Produce, 1) - yield_([]) - - # To/from AIE-array data movement - @FuncOp.from_py_func(memRef_640_ty, memRef_64_ty, memRef_640_ty) - def sequence(inTensor, unused, outTensor): - npu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, 640] - ) - npu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 640] - ) - npu_sync(column=0, row=0, direction=0, channel=0) - - -# Declares that subsequent code is in mlir-aie context -with mlir_mod_ctx() as ctx: - my_first_aie_program() # Call design function within the mlir-aie context - res = ctx.module.operation.verify() # Verify mlir context - if res == True: - print(ctx.module) # Print the python-to-mlir conversion - else: - print(res) diff --git a/programming_guide/section-4/section-4a/answers/test.cpp b/programming_guide/section-4/section-4a/answers/test.cpp deleted file mode 100644 index d154a97425..0000000000 --- a/programming_guide/section-4/section-4a/answers/test.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===- test.cpp -------------------------------------------000---*- C++ -*-===// -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (C) 2023, Advanced Micro Devices, Inc. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#include "test_utils.h" - -#ifndef DATATYPES_USING_DEFINED -#define DATATYPES_USING_DEFINED -// ------------------------------------------------------ -// Configure this to match your buffer data type -// ------------------------------------------------------ -using INOUT0_DATATYPE = std::uint32_t; -using INOUT1_DATATYPE = std::uint32_t; -using INOUT2_DATATYPE = std::uint32_t; -#endif - -namespace po = boost::program_options; - -// ---------------------------------------------------------------------------- -// Verify results (specific to our design example) -// ---------------------------------------------------------------------------- -template -int verify(int CSize, std::vector C, int verbosity) { - int errors = 0; - for (uint32_t i = 0; i < CSize; i++) { - uint32_t ref = i + 2; - if (C[i] != ref) { - std::cout << "Error in output " << C[i] << " != " << ref << std::endl; - errors++; - } else { - if (verbosity > 1) - std::cout << "Correct output " << C[i] << " == " << ref << std::endl; - } - } - return errors; -} - -// ---------------------------------------------------------------------------- -// Main -// ---------------------------------------------------------------------------- -int main(int argc, const char *argv[]) { - - // ------------------------------------------------------ - // Parse program arguments - // ------------------------------------------------------ - po::options_description desc("Allowed options"); - po::variables_map vm; - test_utils::add_default_options(desc); - - test_utils::parse_options(argc, argv, desc, vm); - int verbosity = vm["verbosity"].as(); - int do_verify = vm["verify"].as(); - int n_iterations = vm["iters"].as(); - int n_warmup_iterations = vm["warmup"].as(); - int trace_size = vm["trace_sz"].as(); - - // ------------------------------------------------------ - // Configure this to match your design's buffer size - // ------------------------------------------------------ - int INOUT0_VOLUME = 640; // Input only, 64x uint32_t in this example - int INOUT1_VOLUME = 640; // Not used in this example - int INOUT2_VOLUME = 640; // Output only, 64x uint32_t in this example - - size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE); - size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE); - size_t INOUT2_SIZE = INOUT2_VOLUME * sizeof(INOUT2_DATATYPE); - - // TODO Remove trace for now? - size_t OUT_SIZE = INOUT2_SIZE + trace_size; - - srand(time(NULL)); - - // Load instruction sequence - std::vector instr_v = - test_utils::load_instr_sequence(vm["instr"].as()); - if (verbosity >= 1) - std::cout << "Sequence instr count: " << instr_v.size() << "\n"; - - // ------------------------------------------------------ - // Get device, load the xclbin & kernel and register them - // ------------------------------------------------------ - xrt::device device; - xrt::kernel kernel; - - test_utils::init_xrt_load_kernel(device, kernel, verbosity, - vm["xclbin"].as(), - vm["kernel"].as()); - - // ------------------------------------------------------ - // Initialize input/ output buffer sizes and sync them - // ------------------------------------------------------ - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); - auto bo_inout0 = - xrt::bo(device, INOUT0_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); - auto bo_inout1 = - xrt::bo(device, INOUT1_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - // Assumes trace will only be added to inout2 - auto bo_inout2 = - xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - if (verbosity >= 1) - std::cout << "Writing data into buffer objects.\n"; - - // Initialize instruction buffer - void *bufInstr = bo_instr.map(); - memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); - - // Initialize Inout buffer 0 - INOUT0_DATATYPE *bufInOut0 = bo_inout0.map(); - std::vector AVec(INOUT0_VOLUME); - for (int i = 0; i < INOUT0_VOLUME; i++) - AVec[i] = i + 1; - // AVec.push_back(i + 1); - memcpy(bufInOut0, AVec.data(), (AVec.size() * sizeof(INOUT0_DATATYPE))); - - // Initialize Inout buffer 1 - // INOUT1_DATATYPE *bufInOut1 = bo_inout1.map(); - // std::vector BVec(INOUT1_VOLUME); - // for (int i = 0; i < INOUT1_VOLUME; i++) - // BVec[i] = i + 1 - // //BVec.push_back(i + 1); - // memcpy(bufInOut1, BVec.data(), (BVec.size() * sizeof(INOUT1_DATATYPE))); - - // Initialize Inout buffer 2 - char *bufInOut2 = bo_inout2.map(); - std::vector CVec(INOUT2_VOLUME); - memset(bufInOut2, 0, OUT_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - - // Sync buffers to update input buffer values - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout0.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_inout1.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout2.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - // ------------------------------------------------------ - // Initialize run configs - // ------------------------------------------------------ - unsigned num_iter = n_iterations + n_warmup_iterations; - float npu_time_total = 0; - float npu_time_min = 9999999; - float npu_time_max = 0; - - int errors = 0; - - // ------------------------------------------------------ - // Main run loop - // ------------------------------------------------------ - for (unsigned iter = 0; iter < num_iter; iter++) { - - // Run kernel - if (verbosity >= 1) - std::cout << "Running Kernel.\n"; - auto start = std::chrono::high_resolution_clock::now(); - auto run = - kernel(bo_instr, instr_v.size(), bo_inout0, bo_inout1, bo_inout2); - run.wait(); - auto stop = std::chrono::high_resolution_clock::now(); - bo_inout2.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - if (iter < n_warmup_iterations) { - /* Warmup iterations do not count towards average runtime. */ - continue; - } - - // Copy output results and verify they are correct - memcpy(CVec.data(), bufInOut2, (CVec.size() * sizeof(INOUT2_DATATYPE))); - if (do_verify) { - if (verbosity >= 1) { - std::cout << "Verifying results ..." << std::endl; - } - auto vstart = std::chrono::system_clock::now(); - errors = verify(INOUT2_VOLUME, CVec, verbosity); - auto vstop = std::chrono::system_clock::now(); - float vtime = - std::chrono::duration_cast(vstop - vstart) - .count(); - if (verbosity >= 1) { - std::cout << "Verify time: " << vtime << "secs." << std::endl; - } - } else { - if (verbosity >= 1) - std::cout << "WARNING: results not verified." << std::endl; - } - - // Write trace values if trace_size > 0 - if (trace_size > 0) { - // test_utils::write_out_trace(((char *)bufInOut2) + INOUT2_SIZE, - // trace_size, - test_utils::write_out_trace(((char *)bufInOut2), trace_size, - vm["trace_file"].as()); - } - - // Accumulate run times - float npu_time = - std::chrono::duration_cast(stop - start) - .count(); - - npu_time_total += npu_time; - npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min; - npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max; - } - - // ------------------------------------------------------ - // Print verification and timing results - // ------------------------------------------------------ - - // TODO - Mac count to guide gflops - float macs = 0; - - std::cout << std::endl - << "Avg NPU time: " << npu_time_total / n_iterations << "us." - << std::endl; - if (macs > 0) - std::cout << "Avg NPU gflops: " - << macs / (1000 * npu_time_total / n_iterations) << std::endl; - - std::cout << std::endl - << "Min NPU time: " << npu_time_min << "us." << std::endl; - if (macs > 0) - std::cout << "Max NPU gflops: " << macs / (1000 * npu_time_min) - << std::endl; - - std::cout << std::endl - << "Max NPU time: " << npu_time_max << "us." << std::endl; - if (macs > 0) - std::cout << "Min NPU gflops: " << macs / (1000 * npu_time_max) - << std::endl; - - if (!errors) { - std::cout << "\nPASS!\n\n"; - return 0; - } else { - std::cout << "\nError count: " << errors << "\n\n"; - std::cout << "\nFailed.\n\n"; - return 1; - } -} diff --git a/programming_guide/section-4/section-4a/test.cpp b/programming_guide/section-4/section-4a/test.cpp index 2ec8a0d1c3..a5af1576bf 100644 --- a/programming_guide/section-4/section-4a/test.cpp +++ b/programming_guide/section-4/section-4a/test.cpp @@ -1,4 +1,4 @@ -//===- test.cpp -------------------------------------------000---*- C++ -*-===// +//===- test.cpp -------------------------------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,59 +8,26 @@ // //===----------------------------------------------------------------------===// -#include #include #include #include #include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" #include "test_utils.h" +#include "xrt/xrt_bo.h" #ifndef DATATYPES_USING_DEFINED #define DATATYPES_USING_DEFINED -// ------------------------------------------------------ -// Configure this to match your buffer data type -// ------------------------------------------------------ -using INOUT0_DATATYPE = std::uint32_t; -using INOUT1_DATATYPE = std::uint32_t; -using INOUT2_DATATYPE = std::uint32_t; +using DATATYPE = std::uint32_t; // Configure this to match your buffer data type #endif -namespace po = boost::program_options; +const int scaleFactor = 3; -// ---------------------------------------------------------------------------- -// Verify results (specific to our design example) -// ---------------------------------------------------------------------------- -template -int verify(int CSize, std::vector C, int verbosity) { - int errors = 0; - for (uint32_t i = 0; i < CSize; i++) { - uint32_t ref = i + 2; - if (C[i] != ref) { - std::cout << "Error in output " << C[i] << " != " << ref << std::endl; - errors++; - } else { - if (verbosity > 1) - std::cout << "Correct output " << C[i] << " == " << ref << std::endl; - } - } - return errors; -} +namespace po = boost::program_options; -// ---------------------------------------------------------------------------- -// Main -// ---------------------------------------------------------------------------- int main(int argc, const char *argv[]) { - // ------------------------------------------------------ - // Parse program arguments - // ------------------------------------------------------ + // Program arguments parsing po::options_description desc("Allowed options"); po::variables_map vm; test_utils::add_default_options(desc); @@ -70,33 +37,19 @@ int main(int argc, const char *argv[]) { int do_verify = vm["verify"].as(); int n_iterations = vm["iters"].as(); int n_warmup_iterations = vm["warmup"].as(); - int trace_size = vm["trace_sz"].as(); - - // ------------------------------------------------------ - // Configure this to match your design's buffer size - // ------------------------------------------------------ - int INOUT0_VOLUME = 64; // Input only, 64x uint32_t in this example - int INOUT1_VOLUME = 64; // Not used in this example - int INOUT2_VOLUME = 64; // Output only, 64x uint32_t in this example - - size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE); - size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE); - size_t INOUT2_SIZE = INOUT2_VOLUME * sizeof(INOUT2_DATATYPE); - // TODO Remove trace for now? - size_t OUT_SIZE = INOUT2_SIZE + trace_size; - - srand(time(NULL)); + constexpr bool VERIFY = true; + constexpr int IN_SIZE = 4096; + constexpr int OUT_SIZE = IN_SIZE; // Load instruction sequence std::vector instr_v = test_utils::load_instr_sequence(vm["instr"].as()); + if (verbosity >= 1) std::cout << "Sequence instr count: " << instr_v.size() << "\n"; - // ------------------------------------------------------ - // Get device, load the xclbin & kernel and register them - // ------------------------------------------------------ + // Start the XRT context and load the kernel xrt::device device; xrt::kernel kernel; @@ -104,52 +57,41 @@ int main(int argc, const char *argv[]) { vm["xclbin"].as(), vm["kernel"].as()); - // ------------------------------------------------------ - // Initialize input/ output buffer sizes and sync them - // ------------------------------------------------------ + // set up the buffer objects auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); - auto bo_inout0 = - xrt::bo(device, INOUT0_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); - auto bo_inout1 = - xrt::bo(device, INOUT1_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - // Assumes trace will only be added to inout2 - auto bo_inout2 = - xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(DATATYPE), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); + auto bo_inFactor = xrt::bo(device, 1 * sizeof(DATATYPE), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_outC = xrt::bo(device, OUT_SIZE * sizeof(DATATYPE), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); if (verbosity >= 1) std::cout << "Writing data into buffer objects.\n"; - // Initialize instruction buffer + // Copy instruction stream to xrt buffer object void *bufInstr = bo_instr.map(); memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); - // Initialize Inout buffer 0 - INOUT0_DATATYPE *bufInOut0 = bo_inout0.map(); - std::vector AVec(INOUT0_VOLUME); - for (int i = 0; i < INOUT0_VOLUME; i++) - AVec[i] = i + 1; - // AVec.push_back(i + 1); - memcpy(bufInOut0, AVec.data(), (AVec.size() * sizeof(INOUT0_DATATYPE))); - - // Initialize Inout buffer 1 - // INOUT1_DATATYPE *bufInOut1 = bo_inout1.map(); - // std::vector BVec(INOUT1_VOLUME); - // for (int i = 0; i < INOUT1_VOLUME; i++) - // BVec[i] = i + 1 - // //BVec.push_back(i + 1); - // memcpy(bufInOut1, BVec.data(), (BVec.size() * sizeof(INOUT1_DATATYPE))); - - // Initialize Inout buffer 2 - char *bufInOut2 = bo_inout2.map(); - std::vector CVec(INOUT2_VOLUME); - memset(bufInOut2, 0, OUT_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - - // Sync buffers to update input buffer values + // Initialize buffer bo_inA + DATATYPE *bufInA = bo_inA.map(); + for (int i = 0; i < IN_SIZE; i++) + bufInA[i] = i + 1; + + // Initialize buffer bo_inFactor + DATATYPE *bufInFactor = bo_inFactor.map(); + *bufInFactor = scaleFactor; + + // Zero out buffer bo_outC + DATATYPE *bufOut = bo_outC.map(); + memset(bufOut, 0, OUT_SIZE * sizeof(DATATYPE)); + + // sync host to device memories bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout0.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_inout1.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout2.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inFactor.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_outC.sync(XCL_BO_SYNC_BO_TO_DEVICE); // ------------------------------------------------------ // Initialize run configs @@ -170,11 +112,12 @@ int main(int argc, const char *argv[]) { if (verbosity >= 1) std::cout << "Running Kernel.\n"; auto start = std::chrono::high_resolution_clock::now(); - auto run = - kernel(bo_instr, instr_v.size(), bo_inout0, bo_inout1, bo_inout2); + auto run = kernel(bo_instr, instr_v.size(), bo_inA, bo_inFactor, bo_outC); run.wait(); auto stop = std::chrono::high_resolution_clock::now(); - bo_inout2.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + // Sync device to host memories + bo_outC.sync(XCL_BO_SYNC_BO_FROM_DEVICE); if (iter < n_warmup_iterations) { /* Warmup iterations do not count towards average runtime. */ @@ -182,13 +125,26 @@ int main(int argc, const char *argv[]) { } // Copy output results and verify they are correct - memcpy(CVec.data(), bufInOut2, (CVec.size() * sizeof(INOUT2_DATATYPE))); + // Copy output results and verify they are correct if (do_verify) { if (verbosity >= 1) { std::cout << "Verifying results ..." << std::endl; } auto vstart = std::chrono::system_clock::now(); - errors = verify(INOUT2_VOLUME, CVec, verbosity); + for (uint32_t i = 0; i < IN_SIZE; i++) { + int32_t ref = bufInA[i] * scaleFactor; + int32_t test = bufOut[i]; + if (test != ref) { + if (verbosity >= 1) + std::cout << "Error in output " << test << " != " << ref + << std::endl; + errors++; + } else { + if (verbosity >= 1) + std::cout << "Correct output " << test << " == " << ref + << std::endl; + } + } auto vstop = std::chrono::system_clock::now(); float vtime = std::chrono::duration_cast(vstop - vstart) @@ -201,14 +157,6 @@ int main(int argc, const char *argv[]) { std::cout << "WARNING: results not verified." << std::endl; } - // Write trace values if trace_size > 0 - if (trace_size > 0) { - // test_utils::write_out_trace(((char *)bufInOut2) + INOUT2_SIZE, - // trace_size, - test_utils::write_out_trace(((char *)bufInOut2), trace_size, - vm["trace_file"].as()); - } - // Accumulate run times float npu_time = std::chrono::duration_cast(stop - start) @@ -245,12 +193,15 @@ int main(int argc, const char *argv[]) { std::cout << "Min NPU gflops: " << macs / (1000 * npu_time_max) << std::endl; + // Print Pass/Fail result of our test if (!errors) { - std::cout << "\nPASS!\n\n"; + std::cout << std::endl << "PASS!" << std::endl << std::endl; return 0; } else { - std::cout << "\nError count: " << errors << "\n\n"; - std::cout << "\nFailed.\n\n"; + std::cout << std::endl + << errors << " mismatches." << std::endl + << std::endl; + std::cout << std::endl << "fail." << std::endl << std::endl; return 1; } } diff --git a/programming_guide/section-4/section-4a/test.py b/programming_guide/section-4/section-4a/test.py index 0e82d741cb..887586472d 100644 --- a/programming_guide/section-4/section-4a/test.py +++ b/programming_guide/section-4/section-4a/test.py @@ -8,22 +8,13 @@ import sys import time -import aie.utils.test as test_utils - -# ------------------------------------------------------ -# Configure this to match your design's buffer size -# ------------------------------------------------------ -INOUT0_VOLUME = 64 # Input only, 64x uint32_t in this example -INOUT1_VOLUME = 64 # Not used in this example -INOUT2_VOLUME = 64 # Output only, 64x uint32_t in this example +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.context import mlir_mod_ctx +from aie.extras.dialects.ext import memref, arith -INOUT0_DATATYPE = np.uint32 -INOUT1_DATATYPE = np.uint32 -INOUT2_DATATYPE = np.uint32 - -INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize -INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize -INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize +import aie.utils.test as test_utils def main(opts): @@ -34,6 +25,21 @@ def main(opts): instr_text = [l for l in instr_text if l != ""] instr_v = np.array([int(i, 16) for i in instr_text], dtype=np.uint32) + # ------------------------------------------------------------ + # Configure this to match your design's buffer size and type + # ------------------------------------------------------------ + INOUT0_VOLUME = int(4096) # Input only, 64x uint32_t in this example + INOUT1_VOLUME = int(1) # Input only, 1 uint32_t scale factor + INOUT2_VOLUME = int(4096) # Output only, 64x uint32_t in this example + + INOUT0_DATATYPE = np.int32 + INOUT1_DATATYPE = np.int32 + INOUT2_DATATYPE = np.int32 + + INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize + INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize + INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize + OUT_SIZE = INOUT2_SIZE # ------------------------------------------------------ @@ -47,7 +53,6 @@ def main(opts): bo_instr = xrt.bo(device, len(instr_v) * 4, xrt.bo.cacheable, kernel.group_id(0)) bo_inout0 = xrt.bo(device, INOUT0_SIZE, xrt.bo.host_only, kernel.group_id(2)) bo_inout1 = xrt.bo(device, INOUT1_SIZE, xrt.bo.host_only, kernel.group_id(3)) - # bo_inout2 = xrt.bo(device, INOUT2_SIZE, xrt.bo.host_only, kernel.group_id(4)) bo_inout2 = xrt.bo(device, OUT_SIZE, xrt.bo.host_only, kernel.group_id(4)) # Initialize instruction buffer @@ -55,10 +60,10 @@ def main(opts): # Initialize data buffers inout0 = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) - inout1 = np.zeros(INOUT1_VOLUME, dtype=INOUT1_DATATYPE) - inout2 = np.zeros(INOUT2_VOLUME, dtype=INOUT2_DATATYPE) + scale_factor = np.array([3], dtype=INOUT1_DATATYPE) + inout2 = np.zeros(OUT_SIZE, dtype=np.uint8) bo_inout0.write(inout0, 0) - bo_inout1.write(inout1, 0) + bo_inout1.write(scale_factor, 0) bo_inout2.write(inout2, 0) # Sync buffers to update input buffer values @@ -94,12 +99,12 @@ def main(opts): continue # Copy output results and verify they are correct - out_size = INOUT2_SIZE - output_buffer = bo_inout2.read(out_size, 0).view(INOUT2_DATATYPE) + entire_buffer = bo_inout2.read(OUT_SIZE, 0).view(np.uint32) + output_buffer = entire_buffer[:INOUT2_VOLUME] if opts.verify: if opts.verbosity >= 1: print("Verifying results ...") - ref = np.arange(2, INOUT0_VOLUME + 2, dtype=INOUT0_DATATYPE) + ref = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) * scale_factor e = np.equal(output_buffer, ref) errors = errors + np.size(e) - np.count_nonzero(e) @@ -128,5 +133,6 @@ def main(opts): if __name__ == "__main__": - opts = test_utils.parse_args(sys.argv[1:]) + p = test_utils.create_default_argparser() + opts = p.parse_args(sys.argv[1:]) main(opts) diff --git a/programming_guide/section-4/section-4a/vector_scalar_mul.cc b/programming_guide/section-4/section-4a/vector_scalar_mul.cc new file mode 100755 index 0000000000..10c0aecbbc --- /dev/null +++ b/programming_guide/section-4/section-4a/vector_scalar_mul.cc @@ -0,0 +1,25 @@ +//===- vector_scaler_mul.cc -------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +extern "C" { + +void vector_scalar_mul_aie_scalar(int32_t *a, int32_t *c, int32_t *factor, + int32_t N) { + for (int i = 0; i < N; i++) { + c[i] = *factor * a[i]; + } +} + +} // extern "C" diff --git a/programming_guide/section-4/section-4b/Makefile b/programming_guide/section-4/section-4b/Makefile index 09126e5289..8b7b1cc434 100644 --- a/programming_guide/section-4/section-4b/Makefile +++ b/programming_guide/section-4/section-4b/Makefile @@ -18,10 +18,14 @@ build/aie.mlir: aie2.py mkdir -p ${@D} python3 $< > $@ -build/final.xclbin: build/aie.mlir +build/scale.o: vector_scalar_mul.cc mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ - --xclbin-name=${@F} --npu-insts-name=insts.txt ${ 0. This is also true for the [Vector Scalar Multiply example](../../../programming_examples/basic/vector_scalar_mul). +**NOTE**: In our example design, the [aie2.py](./aie2.py) and associated [Makefile](./Makefile), we provide a Makefile target `run` for standard build and the target `trace` for trace-enabld build. The trace-enabled build passes the trace buffer size as an argument to [aie2.py](./aie2.py) which conditionally enables the trace `flow` and calls `configure_simple_tracing_aie2` as long as `trace_size` is > 0. This is also true for the [Vector Scalar Multiply example](../../../programming_examples/basic/vector_scalar_mul). ### (2a) C/C++ Host code ([test.cpp](./test.cpp)) The main changes needed for [test.cpp](./test.cpp) is the increase in the output buffer size to account for the trace buffer size, being careful to read only the output buffer portion when verifying correctness of the results. We also need to be sure to pass the correct buffer offset which points to the trace buffer data when calling `write_out_trace`. -You can see in [test.cpp](.test.cpp) that trace_size is set based on an input argument of `-t $(trace_size)` which is defined and passed in the [Makefile](.Makefile). The `trace` target from the [Makefile](./Makefile) is shown below. +You can see in [test.cpp](./test.cpp) that trace_size is set based on an input argument of `-t $(trace_size)` which is defined and passed in the [Makefile](./Makefile). The `trace` target from the [Makefile](./Makefile) is shown below. ```Makefile trace: ${targetname}.exe build/final.xclbin build/insts.txt @@ -143,15 +143,14 @@ trace: ${targetname}.exe build/final.xclbin build/insts.txt Following the invocation of the executable, we call the `parse_trace.py` python script which we will cover in more detail in step 3. Within the [test.cpp](./test.cpp), we redefine OUT_SIZE to be the sum of output buffer size (in bytes) and the trace buffer size. ```c++ - int OUT_SIZE = INOUT2_SIZE + trace_size; + int OUT_SIZE = IN_SIZE + trace_size; ``` -All subsuquent references to the output buffer size should use `OUT_SIZE`. The exception is when we want to verify the output results which should be bounded by the original output buffer size, in this case `INOUT2_VOLUME`. +All subsequent references to the output buffer size should use `OUT_SIZE`. The exception is when we want to verify the output results which should be bounded by the original output buffer size, in this case `IN_SIZE`. Finally, the function to write the trace output to a file as defined in `aie.utils.trace` is `write_out_trace` and we need to pass it the pointer in the output buffer where the trace data begins, the trace buffer size and the trace file name (default is `trace.txt`). ```c++ - test_utils::write_out_trace( - ((char *)bufInOut2) + INOUT2_SIZE, - trace_size, vm["trace_file"].as()); + test_utils::write_out_trace(((char *)bufOut) + IN_SIZE, trace_size, + vm["trace_file"].as()); ``` ### (2b) Python Host code ([test.py](./test.py)) @@ -163,7 +162,7 @@ trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt ``` The python equivalent host code performs the same steps as the C/C++ host code as we redefine `OUT_SIZE` to include the `trace_size`. ```python - OUT_SIZE = INOUT1_SIZE + int(opts.trace_size) + OUT_SIZE = INOUT2_SIZE + int(opts.trace_size) ``` During verification, the `output_buffer` excludes the trace data and uses the `read` function as follows: ```python @@ -194,14 +193,21 @@ Open https://ui.perfetto.dev in your browser and then open up the waveform json * Check matching packet IDs for packet-routed flows. The packet flow ID must match the configured ID value in Trace Control 1 register or else the packets don't get routed. ## Exercises -1. Let's give tracing a try. In this directory, we're been examining a design based off the `Vector Scalar Add` example. Run `make trace` to compile the design and generate a trace file and run the `prase_trace.py` script on it to generate the `trace_4b.json` waveform file. Open this in http://ui.perfetto.dev. if you zoom into the region of interest with the W and S to zoom in and out respectively and A adn D to pan left and right. You should seem a wave like the following: +1. Let's give tracing a try. In this directory, we're been examining a local design based off the `Vector Scalar Mul` example. Run `make trace` to compile the design and generate a trace file and run the `prase_trace.py` script on it to generate the `trace_4b.json` waveform file. Open this in http://ui.perfetto.dev. if you zoom into the region of interest with the keyboard shortcut key W and S to zoom in and out respectively and A and D to pan left and right. You should seem a wave like the following: - + - Based on this wave, You can mouse over each chunk of continguous data for `PortRunning0` (input dma port) and `PortRunning1` (output dma port). What is the chunk size? How many input and output chunks are there? This shoudl match iteration loop bounds in our exmple design. - -1. **TODO** Additional questions about routing congestion for circuit switch and packet switch routes for trace packets? + Based on this wave, You can mouse over each chunk of continguous data for `PortRunning0` (input dma port) and `PortRunning1` (output dma port). What is the chunk size? How many input and output chunks are there? This shoudl match iteration loop bounds in our exmple design. + Here, we notice a few signals worth mentioning. + * `Event0` - The event marking the beginning of our kernel. See [vector_scalar_mul.cc](./vector_scalar_mul.cc) where we added the function `event0()` before the loop. This is generally a handy thing to do to attach an event to the beginning of our kernel. + * `Event1` - The event marking the end of our kernel. See [vector_scalar_mul.cc](./vector_scalar_mul.cc) where we added the function `event1()` before the loop. Much like event0, attaching event1 to the end of our kernel is also helpful. + * `VectorInstr` - Vector instructions like vector MAC or vector load/store. Here, we are running a scalar implementation so there are no vector events. + * `PortRunning0` - Mapped to Port 0 which is by default configured to the S2MM0 input (DMA from stream to local memory) + * `PortRunning1` - Mapped to Port 1 which is by default configured to the MM2S0 output (DMA from local memory to stream) + * `LockStall` - Any locks that are stalled in the core + * `LockAcquiresInstr` - Any lock acquire requests + * `LockReleaseInstr` - Any lock release requests ----- [[Prev]](../section-4a) [[Up]](../../section-4) [[Next]](../section-4c) diff --git a/programming_guide/section-4/section-4b/aie2.py b/programming_guide/section-4/section-4b/aie2.py index a629daa0ce..87d4e85d13 100644 --- a/programming_guide/section-4/section-4b/aie2.py +++ b/programming_guide/section-4/section-4b/aie2.py @@ -5,95 +5,89 @@ # # (c) Copyright 2023 AMD Inc. -from aie.dialects.aie import * # primary mlir-aie dialect definitions -from aie.extras.context import mlir_mod_ctx # mlir-aie context +import sys -from aie.dialects.aiex import * # extended mlir-aie dialect definitions -from aie.dialects.scf import * # scf (strcutred control flow) dialect -from aie.extras.dialects.ext import memref, arith # memref and arithmatic dialects +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.context import mlir_mod_ctx import aie.utils.trace as trace_utils -# AI Engine structural design function -def my_first_aie_program(): +def my_vector_scalar(): enableTrace = True trace_size = 8192 - C_sz_in_bytes = 64 * 4 - # Dvice declaration - aie2 device NPU @device(AIEDevice.npu) def device_body(): - # Memref types - memRef_8_ty = T.memref(8, T.i32()) - memRef_16_ty = T.memref(16, T.i32()) - memRef_32_ty = T.memref(32, T.i32()) - memRef_64_ty = T.memref(64, T.i32()) + memRef_ty = T.memref(1024, T.i32()) + + # AIE Core Function declarations + scale_scalar = external_func( + "vector_scalar_mul_aie_scalar", + inputs=[memRef_ty, memRef_ty, T.memref(1, T.i32()), T.i32()], + ) # Tile declarations - ComputeTile = tile(0, 2) ShimTile = tile(0, 0) - - # Data movement with object FIFOs - # Input (from shim tile to compute tile) - of_in0 = object_fifo("in0", ShimTile, ComputeTile, 2, memRef_8_ty) - - # Output (from compute tile to shim tile) - of_out0 = object_fifo("out0", ComputeTile, ShimTile, 2, memRef_8_ty) - - # Compute tile body - @core(ComputeTile) + ComputeTile2 = tile(0, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) + of_factor = object_fifo( + "infactor", ShimTile, ComputeTile2, 2, T.memref(1, T.i32()) + ) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + + # Set up compute tiles + # Compute tile 2 + @core(ComputeTile2, "scale.o") def core_body(): - for _ in for_(0xFFFFFFFF): - # for _ in for_(8): - # Acquire input and output object FIFO objects - elem_in = of_in0.acquire(ObjectFifoPort.Consume, 1) - elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1) - - # Core functionality - load, add 1, store - for i in for_(8): - v0 = memref.load(elem_in, [i]) - v1 = arith.addi(v0, arith.constant(1, T.i32())) - memref.store(v1, elem_out, [i]) + # Effective while(1) + for _ in for_(sys.maxsize): + elem_factor = of_factor.acquire(ObjectFifoPort.Consume, 1) + # Number of sub-vector "tile" iterations + for _ in for_(4): + elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) + elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) + call(scale_scalar, [elem_in, elem_out, elem_factor, 1024]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) yield_([]) - - # Release input and output object FIFO objects - of_in0.release(ObjectFifoPort.Consume, 1) - of_out0.release(ObjectFifoPort.Produce, 1) + of_factor.release(ObjectFifoPort.Consume, 1) yield_([]) # Set up a circuit-switched flow from core to shim for tracing information if enableTrace: - flow(ComputeTile, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) # To/from AIE-array data movement - @FuncOp.from_py_func(memRef_64_ty, memRef_64_ty, memRef_64_ty) - def sequence(inTensor, notUsed, outTensor): + tensor_ty = T.memref(4096, T.i32()) + scalar_ty = T.memref(1, T.i32()) + @FuncOp.from_py_func(tensor_ty, scalar_ty, tensor_ty) + def sequence(A, F, C): if enableTrace: trace_utils.configure_simple_tracing_aie2( - ComputeTile, + ComputeTile2, ShimTile, ddr_id=2, size=trace_size, - offset=C_sz_in_bytes, + offset=4096 * 4, # offset in bytes ) - npu_dma_memcpy_nd( - metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, 64] - ) - npu_dma_memcpy_nd( - metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 64] - ) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 4096]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, 4096]) + npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1]) npu_sync(column=0, row=0, direction=0, channel=0) -# Declares that subsequent code is in mlir-aie context with mlir_mod_ctx() as ctx: - my_first_aie_program() # Call design function within the mlir-aie context - res = ctx.module.operation.verify() # Verify mlir context + my_vector_scalar() + res = ctx.module.operation.verify() if res == True: - print(ctx.module) # Print the python-to-mlir conversion + print(ctx.module) else: print(res) diff --git a/programming_guide/section-4/section-4b/test.cpp b/programming_guide/section-4/section-4b/test.cpp index 6f775e5b54..4e27fd8780 100644 --- a/programming_guide/section-4/section-4b/test.cpp +++ b/programming_guide/section-4/section-4b/test.cpp @@ -1,4 +1,4 @@ -//===- test.cpp -------------------------------------------000---*- C++ -*-===// +//===- test.cpp -------------------------------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,59 +8,26 @@ // //===----------------------------------------------------------------------===// -#include #include #include #include #include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" #include "test_utils.h" +#include "xrt/xrt_bo.h" #ifndef DATATYPES_USING_DEFINED #define DATATYPES_USING_DEFINED -// ------------------------------------------------------ -// Configure this to match your buffer data type -// ------------------------------------------------------ -using INOUT0_DATATYPE = std::uint32_t; -using INOUT1_DATATYPE = std::uint32_t; -using INOUT2_DATATYPE = std::uint32_t; +using DATATYPE = std::uint32_t; // Configure this to match your buffer data type #endif -namespace po = boost::program_options; +const int scaleFactor = 3; -// ---------------------------------------------------------------------------- -// Verify results (specific to our design example) -// ---------------------------------------------------------------------------- -template -int verify(int CSize, std::vector C, int verbosity) { - int errors = 0; - for (uint32_t i = 0; i < CSize; i++) { - uint32_t ref = i + 2; - if (C[i] != ref) { - std::cout << "Error in output " << C[i] << " != " << ref << std::endl; - errors++; - } else { - if (verbosity > 1) - std::cout << "Correct output " << C[i] << " == " << ref << std::endl; - } - } - return errors; -} +namespace po = boost::program_options; -// ---------------------------------------------------------------------------- -// Main -// ---------------------------------------------------------------------------- int main(int argc, const char *argv[]) { - // ------------------------------------------------------ - // Parse program arguments - // ------------------------------------------------------ + // Program arguments parsing po::options_description desc("Allowed options"); po::variables_map vm; test_utils::add_default_options(desc); @@ -72,31 +39,20 @@ int main(int argc, const char *argv[]) { int n_warmup_iterations = vm["warmup"].as(); int trace_size = vm["trace_sz"].as(); - // ------------------------------------------------------ - // Configure this to match your design's buffer size - // ------------------------------------------------------ - int INOUT0_VOLUME = 64; // Input only, 64x uint32_t in this example - int INOUT1_VOLUME = 64; // Not used in this example - int INOUT2_VOLUME = 64; // Output only, 64x uint32_t in this example - - size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE); - size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE); - size_t INOUT2_SIZE = INOUT2_VOLUME * sizeof(INOUT2_DATATYPE); - - // TODO Remove trace for now? - size_t OUT_SIZE = INOUT2_SIZE + trace_size; + constexpr bool VERIFY = true; + constexpr int IN_VOLUME = 4096; - srand(time(NULL)); + constexpr int IN_SIZE = IN_VOLUME * sizeof(DATATYPE); + int OUT_SIZE = IN_SIZE + trace_size; // Load instruction sequence std::vector instr_v = test_utils::load_instr_sequence(vm["instr"].as()); + if (verbosity >= 1) std::cout << "Sequence instr count: " << instr_v.size() << "\n"; - // ------------------------------------------------------ - // Get device, load the xclbin & kernel and register them - // ------------------------------------------------------ + // Start the XRT context and load the kernel xrt::device device; xrt::kernel kernel; @@ -104,61 +60,41 @@ int main(int argc, const char *argv[]) { vm["xclbin"].as(), vm["kernel"].as()); - // ------------------------------------------------------ - // Initialize input/ output buffer sizes and sync them - // ------------------------------------------------------ + // set up the buffer objects auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); - auto bo_inout0 = - xrt::bo(device, INOUT0_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); - auto bo_inout1 = - xrt::bo(device, INOUT1_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - // Assumes trace will only be added to inout2 - auto bo_inout2 = + auto bo_inA = + xrt::bo(device, IN_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); + auto bo_inFactor = xrt::bo(device, 1 * sizeof(DATATYPE), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_outC = xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - // auto bo_trace = - // // xrt::bo(device, trace_size, XRT_BO_FLAGS_HOST_ONLY, - // kernel.group_id(4)); xrt::bo(device, trace_size, - // XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); if (verbosity >= 1) std::cout << "Writing data into buffer objects.\n"; - // Initialize instruction buffer + // Copy instruction stream to xrt buffer object void *bufInstr = bo_instr.map(); memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); - // Initialize Inout buffer 0 - INOUT0_DATATYPE *bufInOut0 = bo_inout0.map(); - std::vector AVec(INOUT0_VOLUME); - for (int i = 0; i < INOUT0_VOLUME; i++) - AVec[i] = i + 1; - // AVec.push_back(i + 1); - memcpy(bufInOut0, AVec.data(), (AVec.size() * sizeof(INOUT0_DATATYPE))); - - // Initialize Inout buffer 1 - // INOUT1_DATATYPE *bufInOut1 = bo_inout1.map(); - // std::vector BVec(INOUT1_VOLUME); - // for (int i = 0; i < INOUT1_VOLUME; i++) - // BVec[i] = i + 1 - // //BVec.push_back(i + 1); - // memcpy(bufInOut1, BVec.data(), (BVec.size() * sizeof(INOUT1_DATATYPE))); - - // Initialize Inout buffer 2 - char *bufInOut2 = bo_inout2.map(); - std::vector CVec(INOUT2_VOLUME); - memset(bufInOut2, 0, OUT_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - // memset(bufInOut2, 0, INOUT2_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - - // char *bufTrace = bo_trace.map(); - // memset(bufTrace, 0, trace_size); - - // Sync buffers to update input buffer values + // Initialize buffer bo_inA + DATATYPE *bufInA = bo_inA.map(); + for (int i = 0; i < IN_VOLUME; i++) + bufInA[i] = i + 1; + + // Initialize buffer bo_inFactor + DATATYPE *bufInFactor = bo_inFactor.map(); + *bufInFactor = scaleFactor; + + // Zero out buffer bo_outC + DATATYPE *bufOut = bo_outC.map(); + memset(bufOut, 0, OUT_SIZE); + + // sync host to device memories bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout0.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_inout1.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout2.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inFactor.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_outC.sync(XCL_BO_SYNC_BO_TO_DEVICE); // ------------------------------------------------------ // Initialize run configs @@ -179,14 +115,12 @@ int main(int argc, const char *argv[]) { if (verbosity >= 1) std::cout << "Running Kernel.\n"; auto start = std::chrono::high_resolution_clock::now(); - auto run = - kernel(bo_instr, instr_v.size(), bo_inout0, bo_inout1, bo_inout2); - // kernel(bo_instr, instr_v.size(), bo_inout0, bo_trace, bo_inout2); + auto run = kernel(bo_instr, instr_v.size(), bo_inA, bo_inFactor, bo_outC); run.wait(); - // sleep(3); auto stop = std::chrono::high_resolution_clock::now(); - bo_inout2.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - // bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + // Sync device to host memories + bo_outC.sync(XCL_BO_SYNC_BO_FROM_DEVICE); if (iter < n_warmup_iterations) { /* Warmup iterations do not count towards average runtime. */ @@ -194,13 +128,26 @@ int main(int argc, const char *argv[]) { } // Copy output results and verify they are correct - memcpy(CVec.data(), bufInOut2, (CVec.size() * sizeof(INOUT2_DATATYPE))); + // Copy output results and verify they are correct if (do_verify) { if (verbosity >= 1) { std::cout << "Verifying results ..." << std::endl; } auto vstart = std::chrono::system_clock::now(); - errors = verify(INOUT2_VOLUME, CVec, verbosity); + for (uint32_t i = 0; i < IN_VOLUME; i++) { + int32_t ref = bufInA[i] * scaleFactor; + int32_t test = bufOut[i]; + if (test != ref) { + if (verbosity >= 1) + std::cout << "Error in output " << test << " != " << ref + << std::endl; + errors++; + } else { + if (verbosity >= 1) + std::cout << "Correct output " << test << " == " << ref + << std::endl; + } + } auto vstop = std::chrono::system_clock::now(); float vtime = std::chrono::duration_cast(vstop - vstart) @@ -215,7 +162,7 @@ int main(int argc, const char *argv[]) { // Write trace values if trace_size > 0 if (trace_size > 0) { - test_utils::write_out_trace(((char *)bufInOut2) + INOUT2_SIZE, trace_size, + test_utils::write_out_trace(((char *)bufOut) + IN_SIZE, trace_size, vm["trace_file"].as()); } @@ -255,12 +202,15 @@ int main(int argc, const char *argv[]) { std::cout << "Min NPU gflops: " << macs / (1000 * npu_time_max) << std::endl; + // Print Pass/Fail result of our test if (!errors) { - std::cout << "\nPASS!\n\n"; + std::cout << std::endl << "PASS!" << std::endl << std::endl; return 0; } else { - std::cout << "\nError count: " << errors << "\n\n"; - std::cout << "\nFailed.\n\n"; + std::cout << std::endl + << errors << " mismatches." << std::endl + << std::endl; + std::cout << std::endl << "fail." << std::endl << std::endl; return 1; } } diff --git a/programming_guide/section-4/section-4b/test.py b/programming_guide/section-4/section-4b/test.py index a36dc5d5a7..e7f6628ba6 100644 --- a/programming_guide/section-4/section-4b/test.py +++ b/programming_guide/section-4/section-4b/test.py @@ -3,30 +3,20 @@ # Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. # SPDX-License-Identifier: MIT -# import argparse import numpy as np import pyxrt as xrt import sys import time +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.context import mlir_mod_ctx +from aie.extras.dialects.ext import memref, arith + import aie.utils.test as test_utils import aie.utils.trace as trace_utils -# ------------------------------------------------------ -# Configure this to match your design's buffer size -# ------------------------------------------------------ -INOUT0_VOLUME = 64 # Input only, 64x uint32_t in this example -INOUT1_VOLUME = 64 # Not used in this example -INOUT2_VOLUME = 64 # Output only, 64x uint32_t in this example - -INOUT0_DATATYPE = np.uint32 -INOUT1_DATATYPE = np.uint32 -INOUT2_DATATYPE = np.uint32 - -INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize -INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize -INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize - def main(opts): @@ -36,6 +26,21 @@ def main(opts): instr_text = [l for l in instr_text if l != ""] instr_v = np.array([int(i, 16) for i in instr_text], dtype=np.uint32) + # ------------------------------------------------------------ + # Configure this to match your design's buffer size and type + # ------------------------------------------------------------ + INOUT0_VOLUME = int(4096) # Input only, 64x uint32_t in this example + INOUT1_VOLUME = int(1) # Input only, 1 uint32_t scale factor + INOUT2_VOLUME = int(4096) # Output only, 64x uint32_t in this example + + INOUT0_DATATYPE = np.int32 + INOUT1_DATATYPE = np.int32 + INOUT2_DATATYPE = np.int32 + + INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize + INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize + INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize + OUT_SIZE = INOUT2_SIZE + int(opts.trace_size) # ------------------------------------------------------ @@ -49,7 +54,6 @@ def main(opts): bo_instr = xrt.bo(device, len(instr_v) * 4, xrt.bo.cacheable, kernel.group_id(0)) bo_inout0 = xrt.bo(device, INOUT0_SIZE, xrt.bo.host_only, kernel.group_id(2)) bo_inout1 = xrt.bo(device, INOUT1_SIZE, xrt.bo.host_only, kernel.group_id(3)) - # bo_inout2 = xrt.bo(device, INOUT2_SIZE, xrt.bo.host_only, kernel.group_id(4)) bo_inout2 = xrt.bo(device, OUT_SIZE, xrt.bo.host_only, kernel.group_id(4)) # Initialize instruction buffer @@ -57,10 +61,10 @@ def main(opts): # Initialize data buffers inout0 = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) - inout1 = np.zeros(INOUT1_VOLUME, dtype=INOUT1_DATATYPE) - inout2 = np.zeros(INOUT2_VOLUME, dtype=INOUT2_DATATYPE) + scale_factor = np.array([3], dtype=INOUT1_DATATYPE) + inout2 = np.zeros(OUT_SIZE, dtype=np.uint8) bo_inout0.write(inout0, 0) - bo_inout1.write(inout1, 0) + bo_inout1.write(scale_factor, 0) bo_inout2.write(inout2, 0) # Sync buffers to update input buffer values @@ -101,9 +105,8 @@ def main(opts): if opts.verify: if opts.verbosity >= 1: print("Verifying results ...") - ref = np.arange(2, INOUT0_VOLUME + 2, dtype=INOUT0_DATATYPE) + ref = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) * scale_factor e = np.equal(output_buffer, ref) - # e = np.equal(dput_buffer, ref) errors = errors + np.size(e) - np.count_nonzero(e) # Write trace values if trace_size > 0 @@ -136,5 +139,6 @@ def main(opts): if __name__ == "__main__": - opts = test_utils.parse_args(sys.argv[1:]) + p = test_utils.create_default_argparser() + opts = p.parse_args(sys.argv[1:]) main(opts) diff --git a/programming_guide/section-4/section-4b/vector_scalar_mul.cc b/programming_guide/section-4/section-4b/vector_scalar_mul.cc new file mode 100755 index 0000000000..b47fc34622 --- /dev/null +++ b/programming_guide/section-4/section-4b/vector_scalar_mul.cc @@ -0,0 +1,26 @@ +//===- vector_scaler_mul.cc -------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +extern "C" { + +void vector_scalar_mul_aie_scalar(int32_t *a, int32_t *c, int32_t *factor, + int32_t N) { + event0(); + for (int i = 0; i < N; i++) { + c[i] = *factor * a[i]; + } + event1(); +} +} // extern "C" diff --git a/programming_guide/section-4/section-4c/README.md b/programming_guide/section-4/section-4c/README.md index 6d863fe7bb..94b86ba297 100644 --- a/programming_guide/section-4/section-4c/README.md +++ b/programming_guide/section-4/section-4c/README.md @@ -17,7 +17,7 @@ ----- -Now that we are able to measure the total application time ([section-4a](../section-4a/)) and have examined the kernel performance via tracing ([section-4b](../section-4b)), we will take a closer look at kernel vectorization. We will be using the [vector-scalar multiply example](../../../programming_examples/basic/vector_scalar_mul/) to illustrate kernel vectorization concepts. +Now that we are able to measure the total application time ([section-4a](../section-4a/)) and have examined the kernel performance via tracing ([section-4b](../section-4b)), we will take a closer look at kernel vectorization. We will be using the [vector-scalar multiply example](../../../programming_examples/basic/vector_scalar_mul/) rather than a local copy of that same design to illustrate kernel vectorization concepts. Note that by default, that example design is working with 16-bit data (vs 32-bit of our local examples) and has `vectorized=True`. Go ahead and read the design example summary for [vector-scalar multiply](../../../programming_examples/basic/vector_scalar_mul/) first to get an idea of the different components of this example design. Then, let's take a closer look at the kernel source file ([scale.cc](../../../aie_kernels/aie2/scale.cc)). diff --git a/programming_guide/section-4/test.cpp b/programming_guide/section-4/test.cpp deleted file mode 100644 index 2ec8a0d1c3..0000000000 --- a/programming_guide/section-4/test.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===- test.cpp -------------------------------------------000---*- C++ -*-===// -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (C) 2023, Advanced Micro Devices, Inc. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#include "test_utils.h" - -#ifndef DATATYPES_USING_DEFINED -#define DATATYPES_USING_DEFINED -// ------------------------------------------------------ -// Configure this to match your buffer data type -// ------------------------------------------------------ -using INOUT0_DATATYPE = std::uint32_t; -using INOUT1_DATATYPE = std::uint32_t; -using INOUT2_DATATYPE = std::uint32_t; -#endif - -namespace po = boost::program_options; - -// ---------------------------------------------------------------------------- -// Verify results (specific to our design example) -// ---------------------------------------------------------------------------- -template -int verify(int CSize, std::vector C, int verbosity) { - int errors = 0; - for (uint32_t i = 0; i < CSize; i++) { - uint32_t ref = i + 2; - if (C[i] != ref) { - std::cout << "Error in output " << C[i] << " != " << ref << std::endl; - errors++; - } else { - if (verbosity > 1) - std::cout << "Correct output " << C[i] << " == " << ref << std::endl; - } - } - return errors; -} - -// ---------------------------------------------------------------------------- -// Main -// ---------------------------------------------------------------------------- -int main(int argc, const char *argv[]) { - - // ------------------------------------------------------ - // Parse program arguments - // ------------------------------------------------------ - po::options_description desc("Allowed options"); - po::variables_map vm; - test_utils::add_default_options(desc); - - test_utils::parse_options(argc, argv, desc, vm); - int verbosity = vm["verbosity"].as(); - int do_verify = vm["verify"].as(); - int n_iterations = vm["iters"].as(); - int n_warmup_iterations = vm["warmup"].as(); - int trace_size = vm["trace_sz"].as(); - - // ------------------------------------------------------ - // Configure this to match your design's buffer size - // ------------------------------------------------------ - int INOUT0_VOLUME = 64; // Input only, 64x uint32_t in this example - int INOUT1_VOLUME = 64; // Not used in this example - int INOUT2_VOLUME = 64; // Output only, 64x uint32_t in this example - - size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE); - size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE); - size_t INOUT2_SIZE = INOUT2_VOLUME * sizeof(INOUT2_DATATYPE); - - // TODO Remove trace for now? - size_t OUT_SIZE = INOUT2_SIZE + trace_size; - - srand(time(NULL)); - - // Load instruction sequence - std::vector instr_v = - test_utils::load_instr_sequence(vm["instr"].as()); - if (verbosity >= 1) - std::cout << "Sequence instr count: " << instr_v.size() << "\n"; - - // ------------------------------------------------------ - // Get device, load the xclbin & kernel and register them - // ------------------------------------------------------ - xrt::device device; - xrt::kernel kernel; - - test_utils::init_xrt_load_kernel(device, kernel, verbosity, - vm["xclbin"].as(), - vm["kernel"].as()); - - // ------------------------------------------------------ - // Initialize input/ output buffer sizes and sync them - // ------------------------------------------------------ - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); - auto bo_inout0 = - xrt::bo(device, INOUT0_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); - auto bo_inout1 = - xrt::bo(device, INOUT1_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - // Assumes trace will only be added to inout2 - auto bo_inout2 = - xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - if (verbosity >= 1) - std::cout << "Writing data into buffer objects.\n"; - - // Initialize instruction buffer - void *bufInstr = bo_instr.map(); - memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); - - // Initialize Inout buffer 0 - INOUT0_DATATYPE *bufInOut0 = bo_inout0.map(); - std::vector AVec(INOUT0_VOLUME); - for (int i = 0; i < INOUT0_VOLUME; i++) - AVec[i] = i + 1; - // AVec.push_back(i + 1); - memcpy(bufInOut0, AVec.data(), (AVec.size() * sizeof(INOUT0_DATATYPE))); - - // Initialize Inout buffer 1 - // INOUT1_DATATYPE *bufInOut1 = bo_inout1.map(); - // std::vector BVec(INOUT1_VOLUME); - // for (int i = 0; i < INOUT1_VOLUME; i++) - // BVec[i] = i + 1 - // //BVec.push_back(i + 1); - // memcpy(bufInOut1, BVec.data(), (BVec.size() * sizeof(INOUT1_DATATYPE))); - - // Initialize Inout buffer 2 - char *bufInOut2 = bo_inout2.map(); - std::vector CVec(INOUT2_VOLUME); - memset(bufInOut2, 0, OUT_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - - // Sync buffers to update input buffer values - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout0.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_inout1.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout2.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - // ------------------------------------------------------ - // Initialize run configs - // ------------------------------------------------------ - unsigned num_iter = n_iterations + n_warmup_iterations; - float npu_time_total = 0; - float npu_time_min = 9999999; - float npu_time_max = 0; - - int errors = 0; - - // ------------------------------------------------------ - // Main run loop - // ------------------------------------------------------ - for (unsigned iter = 0; iter < num_iter; iter++) { - - // Run kernel - if (verbosity >= 1) - std::cout << "Running Kernel.\n"; - auto start = std::chrono::high_resolution_clock::now(); - auto run = - kernel(bo_instr, instr_v.size(), bo_inout0, bo_inout1, bo_inout2); - run.wait(); - auto stop = std::chrono::high_resolution_clock::now(); - bo_inout2.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - if (iter < n_warmup_iterations) { - /* Warmup iterations do not count towards average runtime. */ - continue; - } - - // Copy output results and verify they are correct - memcpy(CVec.data(), bufInOut2, (CVec.size() * sizeof(INOUT2_DATATYPE))); - if (do_verify) { - if (verbosity >= 1) { - std::cout << "Verifying results ..." << std::endl; - } - auto vstart = std::chrono::system_clock::now(); - errors = verify(INOUT2_VOLUME, CVec, verbosity); - auto vstop = std::chrono::system_clock::now(); - float vtime = - std::chrono::duration_cast(vstop - vstart) - .count(); - if (verbosity >= 1) { - std::cout << "Verify time: " << vtime << "secs." << std::endl; - } - } else { - if (verbosity >= 1) - std::cout << "WARNING: results not verified." << std::endl; - } - - // Write trace values if trace_size > 0 - if (trace_size > 0) { - // test_utils::write_out_trace(((char *)bufInOut2) + INOUT2_SIZE, - // trace_size, - test_utils::write_out_trace(((char *)bufInOut2), trace_size, - vm["trace_file"].as()); - } - - // Accumulate run times - float npu_time = - std::chrono::duration_cast(stop - start) - .count(); - - npu_time_total += npu_time; - npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min; - npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max; - } - - // ------------------------------------------------------ - // Print verification and timing results - // ------------------------------------------------------ - - // TODO - Mac count to guide gflops - float macs = 0; - - std::cout << std::endl - << "Avg NPU time: " << npu_time_total / n_iterations << "us." - << std::endl; - if (macs > 0) - std::cout << "Avg NPU gflops: " - << macs / (1000 * npu_time_total / n_iterations) << std::endl; - - std::cout << std::endl - << "Min NPU time: " << npu_time_min << "us." << std::endl; - if (macs > 0) - std::cout << "Max NPU gflops: " << macs / (1000 * npu_time_min) - << std::endl; - - std::cout << std::endl - << "Max NPU time: " << npu_time_max << "us." << std::endl; - if (macs > 0) - std::cout << "Min NPU gflops: " << macs / (1000 * npu_time_max) - << std::endl; - - if (!errors) { - std::cout << "\nPASS!\n\n"; - return 0; - } else { - std::cout << "\nError count: " << errors << "\n\n"; - std::cout << "\nFailed.\n\n"; - return 1; - } -} diff --git a/programming_guide/section-4/test.py b/programming_guide/section-4/test.py deleted file mode 100644 index 0e82d741cb..0000000000 --- a/programming_guide/section-4/test.py +++ /dev/null @@ -1,132 +0,0 @@ -# test.py -*- Python -*- -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: MIT - -import numpy as np -import pyxrt as xrt -import sys -import time - -import aie.utils.test as test_utils - -# ------------------------------------------------------ -# Configure this to match your design's buffer size -# ------------------------------------------------------ -INOUT0_VOLUME = 64 # Input only, 64x uint32_t in this example -INOUT1_VOLUME = 64 # Not used in this example -INOUT2_VOLUME = 64 # Output only, 64x uint32_t in this example - -INOUT0_DATATYPE = np.uint32 -INOUT1_DATATYPE = np.uint32 -INOUT2_DATATYPE = np.uint32 - -INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize -INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize -INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize - - -def main(opts): - - # Load instruction sequence - with open(opts.instr, "r") as f: - instr_text = f.read().split("\n") - instr_text = [l for l in instr_text if l != ""] - instr_v = np.array([int(i, 16) for i in instr_text], dtype=np.uint32) - - OUT_SIZE = INOUT2_SIZE - - # ------------------------------------------------------ - # Get device, load the xclbin & kernel and register them - # ------------------------------------------------------ - (device, kernel) = test_utils.init_xrt_load_kernel(opts) - - # ------------------------------------------------------ - # Initialize input/ output buffer sizes and sync them - # ------------------------------------------------------ - bo_instr = xrt.bo(device, len(instr_v) * 4, xrt.bo.cacheable, kernel.group_id(0)) - bo_inout0 = xrt.bo(device, INOUT0_SIZE, xrt.bo.host_only, kernel.group_id(2)) - bo_inout1 = xrt.bo(device, INOUT1_SIZE, xrt.bo.host_only, kernel.group_id(3)) - # bo_inout2 = xrt.bo(device, INOUT2_SIZE, xrt.bo.host_only, kernel.group_id(4)) - bo_inout2 = xrt.bo(device, OUT_SIZE, xrt.bo.host_only, kernel.group_id(4)) - - # Initialize instruction buffer - bo_instr.write(instr_v, 0) - - # Initialize data buffers - inout0 = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) - inout1 = np.zeros(INOUT1_VOLUME, dtype=INOUT1_DATATYPE) - inout2 = np.zeros(INOUT2_VOLUME, dtype=INOUT2_DATATYPE) - bo_inout0.write(inout0, 0) - bo_inout1.write(inout1, 0) - bo_inout2.write(inout2, 0) - - # Sync buffers to update input buffer values - bo_instr.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout0.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout1.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout2.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - - # ------------------------------------------------------ - # Initialize run configs - # ------------------------------------------------------ - num_iter = opts.iters + opts.warmup_iters - npu_time_total = 0 - npu_time_min = 9999999 - npu_time_max = 0 - errors = 0 - - # ------------------------------------------------------ - # Main run loop - # ------------------------------------------------------ - for i in range(num_iter): - # Run kernel - if opts.verbosity >= 1: - print("Running Kernel.") - start = time.time_ns() - h = kernel(bo_instr, len(instr_v), bo_inout0, bo_inout1, bo_inout2) - h.wait() - stop = time.time_ns() - bo_inout2.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE) - - # Warmup iterations do not count towards average runtime. - if i < opts.warmup_iters: - continue - - # Copy output results and verify they are correct - out_size = INOUT2_SIZE - output_buffer = bo_inout2.read(out_size, 0).view(INOUT2_DATATYPE) - if opts.verify: - if opts.verbosity >= 1: - print("Verifying results ...") - ref = np.arange(2, INOUT0_VOLUME + 2, dtype=INOUT0_DATATYPE) - e = np.equal(output_buffer, ref) - errors = errors + np.size(e) - np.count_nonzero(e) - - npu_time = stop - start - npu_time_total = npu_time_total + npu_time - npu_time_min = min(npu_time_min, npu_time) - npu_time_max = max(npu_time_max, npu_time) - - # ------------------------------------------------------ - # Print verification and timing results - # ------------------------------------------------------ - - # TODO - Mac count to guide gflops - - print("\nAvg NPU time: {}us.".format(int((npu_time_total / opts.iters) / 1000))) - print("\nMin NPU time: {}us.".format(int((npu_time_min / opts.iters) / 1000))) - print("\nMax NPU time: {}us.".format(int((npu_time_max / opts.iters) / 1000))) - - if not errors: - print("\nPASS!\n") - exit(0) - else: - print("\nError count: ", errors) - print("\nFailed.\n") - exit(-1) - - -if __name__ == "__main__": - opts = test_utils.parse_args(sys.argv[1:]) - main(opts) diff --git a/programming_guide/section-4/test_trace.cpp b/programming_guide/section-4/test_trace.cpp deleted file mode 100644 index 2ec8a0d1c3..0000000000 --- a/programming_guide/section-4/test_trace.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===- test.cpp -------------------------------------------000---*- C++ -*-===// -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (C) 2023, Advanced Micro Devices, Inc. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include -#include -#include - -#include "xrt/xrt_bo.h" -#include "xrt/xrt_device.h" -#include "xrt/xrt_kernel.h" - -#include "test_utils.h" - -#ifndef DATATYPES_USING_DEFINED -#define DATATYPES_USING_DEFINED -// ------------------------------------------------------ -// Configure this to match your buffer data type -// ------------------------------------------------------ -using INOUT0_DATATYPE = std::uint32_t; -using INOUT1_DATATYPE = std::uint32_t; -using INOUT2_DATATYPE = std::uint32_t; -#endif - -namespace po = boost::program_options; - -// ---------------------------------------------------------------------------- -// Verify results (specific to our design example) -// ---------------------------------------------------------------------------- -template -int verify(int CSize, std::vector C, int verbosity) { - int errors = 0; - for (uint32_t i = 0; i < CSize; i++) { - uint32_t ref = i + 2; - if (C[i] != ref) { - std::cout << "Error in output " << C[i] << " != " << ref << std::endl; - errors++; - } else { - if (verbosity > 1) - std::cout << "Correct output " << C[i] << " == " << ref << std::endl; - } - } - return errors; -} - -// ---------------------------------------------------------------------------- -// Main -// ---------------------------------------------------------------------------- -int main(int argc, const char *argv[]) { - - // ------------------------------------------------------ - // Parse program arguments - // ------------------------------------------------------ - po::options_description desc("Allowed options"); - po::variables_map vm; - test_utils::add_default_options(desc); - - test_utils::parse_options(argc, argv, desc, vm); - int verbosity = vm["verbosity"].as(); - int do_verify = vm["verify"].as(); - int n_iterations = vm["iters"].as(); - int n_warmup_iterations = vm["warmup"].as(); - int trace_size = vm["trace_sz"].as(); - - // ------------------------------------------------------ - // Configure this to match your design's buffer size - // ------------------------------------------------------ - int INOUT0_VOLUME = 64; // Input only, 64x uint32_t in this example - int INOUT1_VOLUME = 64; // Not used in this example - int INOUT2_VOLUME = 64; // Output only, 64x uint32_t in this example - - size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE); - size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE); - size_t INOUT2_SIZE = INOUT2_VOLUME * sizeof(INOUT2_DATATYPE); - - // TODO Remove trace for now? - size_t OUT_SIZE = INOUT2_SIZE + trace_size; - - srand(time(NULL)); - - // Load instruction sequence - std::vector instr_v = - test_utils::load_instr_sequence(vm["instr"].as()); - if (verbosity >= 1) - std::cout << "Sequence instr count: " << instr_v.size() << "\n"; - - // ------------------------------------------------------ - // Get device, load the xclbin & kernel and register them - // ------------------------------------------------------ - xrt::device device; - xrt::kernel kernel; - - test_utils::init_xrt_load_kernel(device, kernel, verbosity, - vm["xclbin"].as(), - vm["kernel"].as()); - - // ------------------------------------------------------ - // Initialize input/ output buffer sizes and sync them - // ------------------------------------------------------ - auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), - XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); - auto bo_inout0 = - xrt::bo(device, INOUT0_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2)); - auto bo_inout1 = - xrt::bo(device, INOUT1_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); - // Assumes trace will only be added to inout2 - auto bo_inout2 = - xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - - if (verbosity >= 1) - std::cout << "Writing data into buffer objects.\n"; - - // Initialize instruction buffer - void *bufInstr = bo_instr.map(); - memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); - - // Initialize Inout buffer 0 - INOUT0_DATATYPE *bufInOut0 = bo_inout0.map(); - std::vector AVec(INOUT0_VOLUME); - for (int i = 0; i < INOUT0_VOLUME; i++) - AVec[i] = i + 1; - // AVec.push_back(i + 1); - memcpy(bufInOut0, AVec.data(), (AVec.size() * sizeof(INOUT0_DATATYPE))); - - // Initialize Inout buffer 1 - // INOUT1_DATATYPE *bufInOut1 = bo_inout1.map(); - // std::vector BVec(INOUT1_VOLUME); - // for (int i = 0; i < INOUT1_VOLUME; i++) - // BVec[i] = i + 1 - // //BVec.push_back(i + 1); - // memcpy(bufInOut1, BVec.data(), (BVec.size() * sizeof(INOUT1_DATATYPE))); - - // Initialize Inout buffer 2 - char *bufInOut2 = bo_inout2.map(); - std::vector CVec(INOUT2_VOLUME); - memset(bufInOut2, 0, OUT_SIZE); // Zeroes out INOUT2_VOLUME + trace_size - - // Sync buffers to update input buffer values - bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout0.sync(XCL_BO_SYNC_BO_TO_DEVICE); - // bo_inout1.sync(XCL_BO_SYNC_BO_TO_DEVICE); - bo_inout2.sync(XCL_BO_SYNC_BO_TO_DEVICE); - - // ------------------------------------------------------ - // Initialize run configs - // ------------------------------------------------------ - unsigned num_iter = n_iterations + n_warmup_iterations; - float npu_time_total = 0; - float npu_time_min = 9999999; - float npu_time_max = 0; - - int errors = 0; - - // ------------------------------------------------------ - // Main run loop - // ------------------------------------------------------ - for (unsigned iter = 0; iter < num_iter; iter++) { - - // Run kernel - if (verbosity >= 1) - std::cout << "Running Kernel.\n"; - auto start = std::chrono::high_resolution_clock::now(); - auto run = - kernel(bo_instr, instr_v.size(), bo_inout0, bo_inout1, bo_inout2); - run.wait(); - auto stop = std::chrono::high_resolution_clock::now(); - bo_inout2.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - if (iter < n_warmup_iterations) { - /* Warmup iterations do not count towards average runtime. */ - continue; - } - - // Copy output results and verify they are correct - memcpy(CVec.data(), bufInOut2, (CVec.size() * sizeof(INOUT2_DATATYPE))); - if (do_verify) { - if (verbosity >= 1) { - std::cout << "Verifying results ..." << std::endl; - } - auto vstart = std::chrono::system_clock::now(); - errors = verify(INOUT2_VOLUME, CVec, verbosity); - auto vstop = std::chrono::system_clock::now(); - float vtime = - std::chrono::duration_cast(vstop - vstart) - .count(); - if (verbosity >= 1) { - std::cout << "Verify time: " << vtime << "secs." << std::endl; - } - } else { - if (verbosity >= 1) - std::cout << "WARNING: results not verified." << std::endl; - } - - // Write trace values if trace_size > 0 - if (trace_size > 0) { - // test_utils::write_out_trace(((char *)bufInOut2) + INOUT2_SIZE, - // trace_size, - test_utils::write_out_trace(((char *)bufInOut2), trace_size, - vm["trace_file"].as()); - } - - // Accumulate run times - float npu_time = - std::chrono::duration_cast(stop - start) - .count(); - - npu_time_total += npu_time; - npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min; - npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max; - } - - // ------------------------------------------------------ - // Print verification and timing results - // ------------------------------------------------------ - - // TODO - Mac count to guide gflops - float macs = 0; - - std::cout << std::endl - << "Avg NPU time: " << npu_time_total / n_iterations << "us." - << std::endl; - if (macs > 0) - std::cout << "Avg NPU gflops: " - << macs / (1000 * npu_time_total / n_iterations) << std::endl; - - std::cout << std::endl - << "Min NPU time: " << npu_time_min << "us." << std::endl; - if (macs > 0) - std::cout << "Max NPU gflops: " << macs / (1000 * npu_time_min) - << std::endl; - - std::cout << std::endl - << "Max NPU time: " << npu_time_max << "us." << std::endl; - if (macs > 0) - std::cout << "Min NPU gflops: " << macs / (1000 * npu_time_max) - << std::endl; - - if (!errors) { - std::cout << "\nPASS!\n\n"; - return 0; - } else { - std::cout << "\nError count: " << errors << "\n\n"; - std::cout << "\nFailed.\n\n"; - return 1; - } -} diff --git a/programming_guide/section-4/test_trace.py b/programming_guide/section-4/test_trace.py deleted file mode 100644 index b6c0d99c02..0000000000 --- a/programming_guide/section-4/test_trace.py +++ /dev/null @@ -1,142 +0,0 @@ -# test.py -*- Python -*- -# -# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: MIT - -# import argparse -import numpy as np -import pyxrt as xrt -import sys -import time - -import aie.utils.test as test_utils - -# ------------------------------------------------------ -# Configure this to match your design's buffer size -# ------------------------------------------------------ -INOUT0_VOLUME = 64 # Input only, 64x uint32_t in this example -INOUT1_VOLUME = 64 # Not used in this example -INOUT2_VOLUME = 64 # Output only, 64x uint32_t in this example - -INOUT0_DATATYPE = np.uint32 -INOUT1_DATATYPE = np.uint32 -INOUT2_DATATYPE = np.uint32 - -INOUT0_SIZE = INOUT0_VOLUME * INOUT0_DATATYPE().itemsize -INOUT1_SIZE = INOUT1_VOLUME * INOUT1_DATATYPE().itemsize -INOUT2_SIZE = INOUT2_VOLUME * INOUT2_DATATYPE().itemsize - - -def main(opts): - - # Load instruction sequence - with open(opts.instr, "r") as f: - instr_text = f.read().split("\n") - instr_text = [l for l in instr_text if l != ""] - instr_v = np.array([int(i, 16) for i in instr_text], dtype=np.uint32) - - OUT_SIZE = INOUT2_SIZE + opts.trace_size - - # ------------------------------------------------------ - # Get device, load the xclbin & kernel and register them - # ------------------------------------------------------ - (device, kernel) = init_xrt_load_kernel(opts) - - # ------------------------------------------------------ - # Initialize input/ output buffer sizes and sync them - # ------------------------------------------------------ - bo_instr = xrt.bo(device, len(instr_v) * 4, xrt.bo.cacheable, kernel.group_id(0)) - bo_inout0 = xrt.bo(device, INOUT0_SIZE, xrt.bo.host_only, kernel.group_id(2)) - bo_inout1 = xrt.bo(device, INOUT1_SIZE, xrt.bo.host_only, kernel.group_id(3)) - # bo_inout2 = xrt.bo(device, INOUT2_SIZE, xrt.bo.host_only, kernel.group_id(4)) - bo_inout2 = xrt.bo(device, OUT_SIZE, xrt.bo.host_only, kernel.group_id(4)) - - # Initialize instruction buffer - bo_instr.write(instr_v, 0) - - # Initialize data buffers - inout0 = np.arange(1, INOUT0_VOLUME + 1, dtype=INOUT0_DATATYPE) - inout1 = np.zeros(INOUT1_VOLUME, dtype=INOUT1_DATATYPE) - inout2 = np.zeros(INOUT2_VOLUME, dtype=INOUT2_DATATYPE) - bo_inout0.write(inout0, 0) - bo_inout1.write(inout1, 0) - bo_inout2.write(inout2, 0) - - # Sync buffers to update input buffer values - bo_instr.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout0.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout1.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - bo_inout2.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE) - - # ------------------------------------------------------ - # Initialize run configs - # ------------------------------------------------------ - num_iter = opts.iters + opts.warmup_iters - npu_time_total = 0 - npu_time_min = 9999999 - npu_time_max = 0 - errors = 0 - - # ------------------------------------------------------ - # Main run loop - # ------------------------------------------------------ - for i in range(num_iter): - # Run kernel - if opts.verbosity >= 1: - print("Running Kernel.") - start = time.time_ns() - h = kernel(bo_instr, len(instr_v), bo_inout0, bo_inout1, bo_inout2) - h.wait() - stop = time.time_ns() - bo_inout2.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_FROM_DEVICE) - - # Warmup iterations do not count towards average runtime. - if i < opts.warmup_iters: - continue - - # Copy output results and verify they are correct - out_size = INOUT2_SIZE + opts.trace_size - print("out_size:", out_size) - output_buffer = bo_inout2.read(out_size, 0).view(INOUT2_DATATYPE) - dout_buffer = output_buffer[0 : INOUT2_VOLUME - 1] - trace_buffer = output_buffer[INOUT2_VOLUME - 1 :] - if opts.verify: - if opts.verbosity >= 1: - print("Verifying results ...") - ref = np.arange(2, INOUT0_VOLUME + 2, dtype=INOUT0_DATATYPE) - # e = np.equal(output_buffer, ref) - e = np.equal(dput_buffer, ref) - errors = errors + np.size(e) - np.count_nonzero(e) - - # Write trace values if trace_size > 0 - # if opts.trace_size > 0: - # print("Do something with trace!") - # test_utils.write_out_trace(trace_buffer, opts.trace_size, opts.trace_file) - - npu_time = stop - start - npu_time_total = npu_time_total + npu_time - npu_time_min = min(npu_time_min, npu_time) - npu_time_max = max(npu_time_max, npu_time) - - # ------------------------------------------------------ - # Print verification and timing results - # ------------------------------------------------------ - - # TODO - Mac count to guide gflops - - print("\nAvg NPU time: {}us.".format(int((npu_time_total / opts.iters) / 1000))) - print("\nMin NPU time: {}us.".format(int((npu_time_min / opts.iters) / 1000))) - print("\nMax NPU time: {}us.".format(int((npu_time_max / opts.iters) / 1000))) - - if not errors: - print("\nPASS!\n") - exit(0) - else: - print("\nError count: ", errors) - print("\nFailed.\n") - exit(-1) - - -if __name__ == "__main__": - opts = test_utils.parse_args(sys.argv[1:]) - main(opts)