From ed489bcf5abdb63c6bce804618e62aa680048008 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Mon, 9 Sep 2024 11:21:14 +0800 Subject: [PATCH 1/3] Add logo --- README.md | 4 +++- docs/README.zh.md | 2 ++ docs/source/_static/ninetoothed-logo.png | Bin 0 -> 17335 bytes 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100755 docs/source/_static/ninetoothed-logo.png diff --git a/README.md b/README.md index 894ee74..ba962a7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # NineToothed +![NineToothed Logo](docs/source/_static/ninetoothed-logo.png) + A domain-specific language (DSL) based on Triton but providing higher-level abstractions. **Other language versions: [English](README.md), [简体中文](docs/README.zh.md).** @@ -65,4 +67,4 @@ def matmul_kernel(a: a_tiled, b: b_tiled, c: c_tiled): For matrix multiplication, we also have three tensor parameters, but the tiling method is more complex than vector addition. We denote the three matrices as $A$, $B$, and $C$, where $A$ and $B$ are inputs, and $C$ is the output. Tiling $C$ is simple; we just need to divide it into blocks of size `(BLOCK_SIZE_M, BLOCK_SIZE_N)` by rows and columns. Once each block computes its result, the entire $C$ is computed. However, how should we tile $A$ and $B$? The answer is to introduce another meta-parameter `BLOCK_SIZE_K`. This way, we can divide $A$ into blocks of size `(BLOCK_SIZE_M, BLOCK_SIZE_K)` and $B$ into blocks of size `(BLOCK_SIZE_K, BLOCK_SIZE_N)`. However, for matrix multiplication, $A$ and $B$ do not correspond block by block; each row of $A$ needs to correspond to each column of $B$. Therefore, we need to further `tile` $A$ and $B$ by rows and columns, respectively. Up to this point, we have a set of row blocks of $A$ and column blocks of $B$. However, each row block of $A$ must correspond to every column block of $B$. This is where `expand` comes in. We `expand` the row blocks of $A$ along the columns to the number of columns of $C$ and the column blocks of $B$ along the rows to the number of rows of $C$. This way, we successfully tile $A$, $B$, and $C$. In fact, our meta-operations up to this point have already enabled us to write kernel functions. However, we notice that the levels where the row blocks and column blocks reside, which we mentioned earlier, are two-dimensional, and their sizes are of the forms `(1, ...)` and `(..., 1)`. This means that if no other operations are performed, the way we access row blocks and column blocks would have to be `a[0, k]` and `b[k, 0]`. If we want to use `a` to find the range of `k`, we would need to use `a.shape[1]`, but we know that dimensions of size `1` can actually be removed completely. This is why we added two lines of `squeeze`. The `dtype` refers to the data type, which in PyTorch can generally be some integer or floating-point type, such as `torch.float32`. However, since meta-operations like `tile` can be performed in NineToothed, `dtype` can also be a `Tensor`. In other words, there is a concept of "tensors that store tensors" in NineToothed. In summary, these two lines perform operations on the tensors stored in the outmost tensor, removing the dimensions of size `1`. This way, when we access the row and column blocks, we can use `a[k]` and `b[k]`, and when finding the range of `k`, we can use `a.shape[0]`. -With tiling done, the rest is simple. In the function body, we define an `accumulator` to accumulate intermediate results. We then iterate through the corresponding row blocks of $A$ and column blocks of B, multiplying them and accumulating the results in `accumulator`. Finally, we place the `accumulator` in the corresponding block of $C$. Since each block of the parameter tensors undergoes this operation, the multiplication is completed for the whole tensors as well. +With tiling done, the rest is simple. In the function body, we define an `accumulator` to accumulate intermediate results. We then iterate through the corresponding row blocks of $A$ and column blocks of $B$, multiplying them and accumulating the results in `accumulator`. Finally, we place the `accumulator` in the corresponding block of $C$. Since each block of the parameter tensors undergoes this operation, the multiplication is completed for the whole tensors as well. diff --git a/docs/README.zh.md b/docs/README.zh.md index 4ea7ae5..81cdd07 100644 --- a/docs/README.zh.md +++ b/docs/README.zh.md @@ -1,5 +1,7 @@ # 九齿 +![九齿 Logo](source/_static/ninetoothed-logo.png) + 一种基于 Triton 但提供更高层抽象的领域特定语言(DSL)。 **其他语言版本: [English](../README.md)、[简体中文](README.zh.md)。** diff --git a/docs/source/_static/ninetoothed-logo.png b/docs/source/_static/ninetoothed-logo.png new file mode 100755 index 0000000000000000000000000000000000000000..7d9f8fbe950f911a702c81e628ba0d8cdb13f9e0 GIT binary patch literal 17335 zcmeIacURNT^FFMofFRNZq>JC$UJqzNdffb=fCcM>2dMS3p*LJ>j< zy@eJ^?(qKnp2PQ_JLlx>OV5xn}0tnc3G8^Hy7xh~PQFy?ghF)ZQrR-Me=mb@#@{ z#r|b*W;qu7aoiwE%|%*Y7+>rtsY^&KmPE>)a%~8r(JjN`@QZZ zw)gHSYN{#88~9u9E#iHow`)H?+DuEEY;qTEzdw=m8p*{Jfsg+)u7o7Xi782m6#Pu7 zS|KTpN~zr!p9yD`yZcjLW1a#JKEbDkdoO-{%idd0N(;#L%dUUG^?8@A)nPO3CZt7h zGgE@fS0`=xI)@7@#Q*>LKSbc;cki`I`r(r**yRPEfLm$KXeG>V*!lATj^YbMM_6rM zqx5ccdvyq8(tLt6u=w-aX0d^P8dB3#?iYws@}O0R`-o4Fm1$RbyBkwI%H`_99mnde zz-e@PIpKjxEmsJ^VI98rmzer$sGJvYvV60eAF}ClwJ%8d{b*Iu(3IZAg=D?@;QJPM z+s{FJnN04n`LPE#*87aO3dgtG zTJ42GQu2=7cvSy!M5ynFTAo{nRiE-&Uu?&H?Nq8o# zkdWl$4ByNhislfL?bW0BC($x)FF(-E;5WOO7CN50i?;1F)2>Dfj*ud$Mc5b07K;rI zcEl*^8Ifh;z39%t3~QU3do)X`I)4r0$_>FxH;fJTo!l^-_r3+|ev|5P*OKZx zm6F@u9x%^DN*cat)JiED?OfEVJ+~&1z|TjquX{gBU7h*( z`1y&o)h~{=bxe-78$EXDmUD=I+9k<;eW}zO+!OXy?rQP)5h2o8>BUy-M!>oQgVey| zE4kI}329E$yXOf^tZf3Q=le6@rs}n`xr1WQ(e>mDlE0)l^eh|na(fwP#a~DAtA5Lr z1MTe7dZWx^hz-5kl)f!7efByP^fqmQq9eZd#jd)L;Zx9vH5CnGy& z>yMSQ=o7*9@kWx|+@X#>;oN>+%^{&ZomuKf%(+EVU%hs2Cgu|@WIlwka8=J0iJtDU zDKmLxNy7aTM}}@y_J84JCv6Z=m#XC2WMrNv7JK^nXoF7BdS`9fzq6@JE zF%?F#b!gyd?7z}y0QB-AJB7Ph?K^eg>EquVJCK0t8AzC+LthQ*zAA1#UdLmb6(u#k1_AhyK~rd z+&^cjO{t$ZTd1_XaMh3Br~P&q^t}M(Dfj@1Udr3Gi(iqzben!FEd}DX(|5cU1ay6Apn8eEgUm0qY6vgX-JB#g>z4pI(fWPcX?(ma(I{ zAt$#`mo%>nuLrAb*|Ap}u|xhbT8GSi?>of!!)YBTxiD83j=Va(#19nhH@=+%#Jse% zeQt{j)DL`ZO+FKws`a}Fmd%I0^ppygNSs?ws8Dba!=dN*`0Hx^+!zIGcW@rB*x1-} ztwNTcSQZE*P*v7V{7YcCVUy&Uj(c}$(CojiA7{pB#V2JM12qwUP+v@!`uIwg?9pXy zyg}MU-#_3?ER=E_6FD&5S&j)mXatFLz(blP@ItGpp{wv1=a5B2QcdNY|_=y%(mCE0>l<)>1t3a^+H(Ia% zi4##4db-oq!pJc@2-MXYGQ#DJow!$`J{=D1%7#_)r%f*`G-pgN3?Xbp%-Ux(;fmfr zi~>lPWaOV!&ju`pcbL_B%NBg>l>qH9%z2*wYz9LPy5jlTm!432Xc8I*OD}3Nwx&&H zZ%w-)cp)BW7+=tCOLR&d0brFBiI{qHUecM6vmfeEe9+#(-PguP!I$%>(mU(P?bHkI z9J#fBQShG!oTjeMnYztw(0_i_(&M`87;n3P>LaohjJ6M@Wb+CiKx~~`!hL^q4aX=? zgG+lAF(d`;2$%{j` zG}ro+R<$dEI`=ZS;bpM#MRi#=4Y{0kyGmJg>qkb=y;QR@_63pnNE_%q;v>EYe56EGNH%E3( zvbr8@K?QOh+ECx!`Snv$VFTnV8@QywHqC$c-52~OrnHW%Oq~8j84r&bh~}^xY-y@S zRfgtlzUCuROZsO&piu$<+R=uT5hOx0b0l3Km5+Vp9ogpez8;0JT?d2*nW-Xqx1M^n zk}a{Y7u-~R!mrNx$ql;fkE|?TjkXHoD6_jn)BC1`^ggQM%HHkEm6eUy`xQ|TsgHS^ z>Ua~VGbr1vX-x?c{F5fj+wk)Qy4-q|2ukZ&q^wWRfJvG-A3jU?_2;KDrv>HHwZ(s} z`ildbIkNR0b|~sXfC3=6_Jb^BvH?_|yicqF2#IF#(MJBMB{NuK;Ruvy+;UC_i%ZObk8k$QY{y@GrDX`1uJ&=S?VC$!TIg}e+Z{Om4~Zm8OS+}P*zKy z%<(UcPGzqNc=c)AbjVIsCdsZM!wL$RbWZ7xe?7LX;%JM4e_GhBrZ9=DV#D{2+w;^I zWylDs7dyvWlFjoj5cAsgm7=Z21@0jYVR>BQEBx@xRmy5-CveauPJh4i7i9VQXi}D8 zZwutYs&pV)QZH>@P||vdm+N@m|5n-nuqA)#*41u>h$r0ExgLaE+*1i7#}E;oUPS_e zIxR$oh&tQOG^JBv0JYIA7+9>5==s65n(C!kV)dTH{_d4XC9BAGRUmA5-iv11>w|LnRJE+c3|0}5*l&**eyBj|IO_45 z@qFakjzTbDYR0EoA=kXl6-&QawosKpuMDaj&FG{94!Y865b&y!-N8cF&iAD|3z90o z6NGRUttZ=W8Hz&L1_?!Cyp`4L&*r%ZmmvjS11!9Q=+U4(E%OZAoXGEEH{lQ9>abTJ z9;AO@VR0aIf8e+(wED?+4t1_9=c0Qwb`K#vo@r!KFIclFDAVS^=9?YY??MrYyo0#6 zJ4CR`916y6Q6y%jO8DCDYs$212r{N(`=)~BChds%WfIwoKGB);9zQ{KtX`*2k+&CY zWM%Um3^#)CQM8a$qXw0%7tn5%oY(ji2!(0gJRBeEU;Qd|pl4=;X>_@qORm~)?@PYz z{$rdPD}B^3c4IY?aA1{0f_ygE(B$1KW<|t{)0;KMa>59h%Q2m6sqmiOhq=wkNWN}9 z=*TT-;{%G>91KhoiP@u#<$g>*(4eCc{Tprdcx!v?VAd@5!1`_~xQ8|Rsa z`%q|dtlVq!n)m&s#))B%`@sj-W$5k?;Ws;RSz$ZH^s7K~j3R9LLkBxr+ar?+SlKuC zsz-8u7`HM%neANnZ-1Ay!*X?o`-Rt-(u$6^zj9DYfBkBmq+kb}K0CgOrm(y_x%-+Sxy*AV2M<(G1{TKp>=-7g3}_93T=&{^_JAa_H@u*aPgPK{s2 z=9UDWL!S~6}r6U9H^7-cT<0P@uAG6 z#lsC0V0vP@lHSTN^l-0PTzl&`N1vmFW*U;$u1$vs3mI+6fpMNES9bt9RrZWy)(-1m z@QQ7BXtneye`58%q{C8W#`D!;lggcqb~6pwtI+q>9_uzVi}gdv>o5au580v8Uyb<+ z)%S50of4we)i?5sv(pB%;jVG-A=}yUGs%^MXujYZUVzb=>KUM;*C5msF@c7%XZMw5 za|1|sNf;Gk)mFT+%kHN)@ff%81=moBU%j3E!cVjA?)q(IT59?{WN)-sdx=CY)1-&i z58h8R>kUObX@gCNcwGzpjrm=ParD*2n6I`{vl<|OdKJC{t$A>KXo~>|UByISnPTs@h(3uGp;GiR1T0S7#haVzwdcZXP$`*+?LV z-CMC)`4I;n=^qnT^ykl+#&|bB*Ca@*B5jXBLVoklvqE#-OxLgKJLW)#JyaQvz{Uc1 znLoUiVmEI>|0c@~KcWpeknHi3aO-YV`m!RM$TX*sq_TTESLs`Q32%2wOCPP0A)Io)p zeNQ2f3sd+n#LP_wH|$ji(=Pg?fTn#t(D>(>5QcTYqm7Oi@a$(P)?YlUxD`eU{8{Zu z0H*#yE~Jspp{49HGr?M0p8{ZVa56@QA4{~G+I)b?py5eh+r~w;JxYn?)zL2YlXQl`wZ( z7d5&b$DPwxR;ANAkt6CKF-jk$qA_fV6nUL%&R3ugIMRC8r^&9x)Xupm6xyGI>#6e! zV0gXY3|&0xEOXU07|eF|1!=8s7ult)CML8(FkSIa%n)6_Ys7^HXG;H@#ULc@5k|nG zsEHwSkZ;)HY{oL}>JWJKoY5UzwlmLYdHM�T5nkjXa!-mLB)|$=!wCuAhshgTLV} z7K6stkh-j04d4qjo><QKf2 zGx=&<=|-&rU5O5f8ZG{MfxN77qgcwhS7)Kiu^4l|?-yt2(b#Z}1od-Pt?hVxG<#gt z<`R=+zFbdO>2&YM6@ira_e|dla|3^QI+)Jp*Y=sh3bj6CtzKgo+;C!lbS1(_k>Uc?9Xf0_;C7oN zf$&;hR2V+*{$13rd{xGx41~EUlm?@6C9+cGUP|9;Zy~!(V154<@1hxIK5*)R(<3)& z-PB>qvxwF@f{QScG0vkw zPIAh`ifNn1rzk3IP9ci4qSJinspI$lRFK5fKLoY$kcr0nluv-eQ_XLU7=d^xEY{jk zV-e7SRbyerBI)9D@_x2Lnly%3u_w}8&-`xQ&^^1^Q#nsmQI&Hj3P~)!Ku*Zj1mj5Z8f07 zcXv}>g5gnoSRq`5GXH$~Jbk|}Yy5=qh=p3&0Ll4JExp0TZpJti%C+1B~@s6M!rot$y8+DOPC5t|%($|u9)ws2^Al-S5f)q8qvxt7Ff%R|2_s*ugNyP4XN z#LcxR^|@*~I&MS@kDz10-tsq6BBGVIEaT!)ys0TZmQ0I3GIB}M@vd+J+Z_Mttm5s+ zu|m%mihI6;h*^D~23V(pcV&c?Hs=+Udm-V-p zS4b9M`0QZUoH?X!AF&$clM)9buK4n4VMtIcRDeQ=>%8-}KYvp~IORq_cRR;zUNWC0 z^Do&TkNmC??rIRVT`;-cYse3_^9ZhzeDU@js#Vx1$RyaNE2Z1A$07e7ul`a;0n{E8qkjU4|Ot|`0^;znq8Hd~LmHJith1B8q;&4A-?2! zqE~e%kOUb{I#r>{CKY@XMc;)odaU=24@{UeHtX7Fwcb1>X1PKJn7JI1@tzP;PPxXE zKC7Bx3L1GUZ%NVIrs6Gqr7{keRgJI$p*V`cf;Q<3Gzfj+;0gWQikDhuHr|+`FJHWy%DCZz{j{f{~3vLtF~r{TCiX$ z0U7#c&9HVJTh}S$wzJA#Gn7`Nq4!*J& z1`~@oaG@%!t}--RWlrxarNydUy)=aHmW|`j^M0%AxkSSR%%E$fro% zMiGq-Nr?_^FdHD%dewt%l$}d=2z)5$?r{B0(a&)%NDfeqoTmTt&7@7nMunS(tNC^0 ze?I<%BU*RgCaT9gETuTS2KrTZ|36!F4C&@2P?jn&wOu^@@%dR&ElcD~nfc`r;ENEB z%%dVLZ|Zefl_6}R3@nJS3GO!;ucCTIkz-dK^L#}!31^r*Ljm?=vxoXy{);*4mva|R zUnqIK^1N2tQF*XCUv{&MrXe+2P1bs=Xiq}}oLI|#KcvVgtS6}{Si+25Ae}LU?=WJY zb+i|kKj>3G@?|fT&CnZ8)1JC0ENJ-8HPktEOS9YWU1$BY-x|Mo>ZIj7{BiO5i!uFw z*^Pdjz=4SqDeb%_YUM23BK?ThjNE$BE?H)ZSg)1-a5fjlopqlJRcoaJd2r#N9>+Ab ze}HH$xj&-h$W-VHSV`QpVMz-9`a|=my@Z+O5ttm;>TD#|O-4G%XcQ0e;BFx23c;|- zg3a%WyitzxhysmYZ26N31cn=aP$y}&`chBL#nqAJjLN60T_yK%WLQ0gc2`>UgElCM z^9MO!CT__aYnsz4$B)gA>wa54*!hcSYiee+G?}>302>_UC*1u6Cr|xIYwBC+&47@U zKOVJeew=Bg!shqaR7-mjUc@6g@GBeeI|&w&UXrkEGAoX&QY=)?VD=R7zaIujmcE7u zkQY-+!0;N6Jj8+jJ2wwZ<#8sSNwv&0zL{e|#*|B$k#|GfrJ}DchAQYGfvMB&4n*;BV&jkK0ilGt^5d zDK=`X(Aj6L5?)`wz7`sg0egG@>51GQOH2Hmrm^pPX|+<%D(InoyD(A6%U-=jCmsKi zWlaBt`O(!7WV(cxrhS8lC%oaU+1aJlnTFkBn}eI$5#pJ?qy3@ef&7?%kAN&p7A+-$|C5mzTx>V+C-b*6mQ-1sfqssr_dYYnDN5JX}KL z%MW(*lPVrsZNMLB4~BqylD!BV+yg~r9};4Bl<%iPy5AX=<@c0gEb|m}MA2gydGAMD zh4_u+9LZN9YOmTw>wkHgO-k(PbnATV!D8L_W<_`aSqj6GLEMVd{$=nc_|Eac&Q+KNN`F0iGBGcb#Msv z2Gr<#;`u<9`SqKwPF`As;%_-Dkz4mnT;_SNzYc~ykALc|Xd=y0e7&w0`RASJeB!~z zrWS_7Dag?=FvHc&GeOqEavwajI&rl#GFQVELUNIUk3x-AMY_hBshsK?ANpS|48>ID z2fxua4k615uSCmG01IpreSscotTel^%vcJ3QT2Al5LtvX9zT?N38atP!I;c{yGdxO z!F;YLyEfYv;0j3u0MZBqY#9Rc=b$sti64ecf-kXMVq3d0^5upoiRo<%Q7M1^I)k#x z17|ajaqwKhb4AQBjUkww;H*3Q`>@%{li05JKZKaZ1Ycrf?&Xt6A7C)e4WdkdaT)YS zkJE7Uil9UMJ%VvVBy!!)-taJY%6=NP&HDRaBH5o^^TCG3UqVzCz6ljk2u|9?MTU z1C-dfkRUnprAIp-u_b+=yZk|L)58F)aVT`~Kn&w;!VX}$xFg33822Kx(A&}3@qC3` z_RRK^v0;@EGuy}iTQ(bE(o*w= zM%lZ`ljJ~GM7x;+bM3Fgh;_n2@uy+&%vhIrS$th}Q6y4k%8_M-hd#-caM;|3o=*dy z&H@7Lj7cehT<@!Wpn7@isih;lRbzN5RX*=Md%u0)QYSSnH36f_S*E^aEC5HyH$|GM zmW^9%Rc31)n0WxChPyJ;v7n?upHd$mE1Dn{mPMCPpe7^aZF;1gz@3eglx8|3?edB9 z^jJj8Tu!fwU3mC7EyU-x7P*VLfL>i6rhthxXpu5kMY+GxPKZxGj4VqE&agYLt|_*r zcl_$~L=q0n>zIx5Vyocf9q9;7hc){#^H?6d?~pbtSW%=gM6Lx`o5~Rb1c>Ps6?2Q< zuw~R%IgVsj{R&xkbcSz(>&>$H_)4X`e&{dBm2)lobbe5Bz18+{D$ougBY+J3@-d-?WA#J7^xuY)QM2VLBuIDb(yfV7(C3KL=fxBVX5L!G1 zZ>DNZdvHKlFF{BdC{#)@r07RO5q)Q+ZlAa_qS!dDg;>Ni+gJ`&)=D#v8~R3>wyswb zKX56Dh`ZyvLMc+`IwFtO#foVtalqmO_}Qppb^P#$;~~ZWbeJHYml%+t@XXlEb5AJL zD0B)9-Kk!Z6}R+FkJ3&Wt8Da8hO=Xc-6$P#Rmx^W90Z~m8l8tf8b3`uAy!uknLcrvgC-wp8_+8jKZ z{U7{&Q^qhVSDt5u_`R>ja?XEC;9V?gZ2ugss*uc;;NkzmbygfjhO2%o(#b*a2!y02 z!)7=?oT(&P48&WWN6OG;RqPF9%Z4_4!hS$#&)nfHI*M;C=pJ2h=~rUP{}|dkYCrp_ zR`tp|`k}ghp3NJqqipRi(m{miYN`g`#tLpJhO(Y#i?`06L(%2FBfi6xlZC`!`5IrE zIul|z!Sd+Cz=0NphrX2n*BOU68~|LZqGIbKz$nyZ;KyN!X-eFI7(Ex4_TTNYG5ex? zUP%cLn?X|R+Y0{A-0)}Js=E|7vXI@T+14p~r`XpzG%(T|(e(hjpa74U_Q$D&`Q@aX z)4=-ro&bGWrX``1jvgJ_r=Rf#^NhH(jdAxZ`(jKtgBV% z_Q*ghinc}=PmJBFmf&F3 z7?)szIy^32XYhL3d{#fJ|3pi}{cu&wjG?bxB~$vQqx-S2Ec5N#^XG$Co{7B`W9NfW zH{Edi5S2`sQ@Ngj1#SMv?!Defr3ACV;56F$hmuhduXWNm%+T1hsL^}g*j;nV$)jsr zkfeQEA=2I*wOQ&Ei8SCoTc$iC?bYc}%9OlOINI#w8?V-=apxA6RZ_0rI@md*I7mO@ z^q6mm>khwMEKf*7hSX~M6pI8skU^J}WHUK08W?;b!jZ8{+C1$Rb6s8g6x*oSoe^lH z62lxGDptXtRCQsv7_vG`lM<^ud8;q|bD6nJ01-}2rVo;PJFAuX3SqMv!7+q+J6j&O zf;uo+j%K(ygaTju4;Nbt(%GNn<6AS|-KvxIVP8B$>SAM0n79}bs+LXyMiwiB6)gp~ zBpyRUgFSdfP@r^HhNXH4w5m)HTjzn=AbaZ#^BtN;|^d5gFGv5Zn@M?^AL){n>9 zr*O}YQbr2*w1Bh=(;NBepYbaKqoRh8b_)@Wk$f!eV8iJPEy^d&yH=e733_4U8hN{h zwoIh%dLCo4XOqB%{xKAnkseR#o_QK@)UQ??^EGY8XM<)uiTB($gcP^v6hT+ zo8t`SYBW)FOzhCjpzN{nstP4!IG`$mHuljVFYS74Ep{QT5)F*F_~6qC&eBhfl;|~y zXy?W({=g1>_9|BiBRbJ5jn#T`9g00m4OB^wTEkaS+xc#GHhzke0>aFLR}3_OLnT-M z+yj6x0xAXpReQfDBVml8j1OJNu?AIln<;BtAgbqzdpohpcD}B=(YIHYuW9GRHRe_< zA6)7amMV62vKr@lKmo7>d3)D5Y3(pcp0v7;GacR2cMAbxy7{vcdA)0 zRq)5UYxYxcCLt8dIF_W? zX)m3M^8jVVZ~>b=$x@ABNTb>9q%>+zWHw>{^VcOM4>dT=>R}p?M67|1H8y@ua%pMB zYa_k;+709;AME;R%lab!;yQv{IT}u92YaXNwvm*Ogj5g=1Xt{TVjB~*#q!mf^cD6w zl=e)a2H&@tEo)>a++W^D-dgVnC3Z(9jyzKxfmj4L2V`^1H6nHihnRlvm?y-3x+Yu| zb}f#n+>g-RQx~9wfZx;@dYh^yN=1Fa#wyL+ZT6qf4RI{V?O}XcJrxB6Ak?QWgUTZ7 zP)R9`;19qSqD(z5>x<5MkEb4A5!NqpaRJkj!RRsCyvz<21>E&XGfU`kb z!ZR@f?*aZ|1>>Er93@OP(_u9h|$oGqm;R)qA1g znRCiBl7sA9wWI7Us+g-C-)h`V6M>-Ln5K{_|Er<69vXc)HirHKv&nm*g1u)10FD0% zI)qxZ?fEO#xH{jeXOYaQ-djWRD$j97tfSQ}^yq9WB{p(W?gay18&{QBMdA?)e!VIiu+L9LD z04A0}vw_(p$nxeF02s5QpEM~kJ(GMSI%^JJt$(QCuw;lkC*?X>d+ zXIa65>r&NES??zgu&#SNr)A|$wv%vR#)4)tYh{G2q_Ukq6xmCUo!i~@HSfw>)e`2X zqS6HLWcO>)<~f-XQbw)~T)TLjq3&!Ltg>KwE=j6%Br)5ToeBFB7s8dH3YIdXLS+2r z#`cN+0x%IYnRd(86lSKadYPokDh7G%g5usjHi_oX6DU-N+Vp~h2WX}x_TU2+B-gxB ziWUNPYTkJtBC$OCj&xwnGA`|PGx}Ar3FlKq)!gEt-(|d96lWrn5_%@>ASZ)|=*L`| zz;`&a7WS!OUJtb8EtaqC1w&_&S0@v-Ni&2i$G`tU@Q+^WNuqm6oeK!V60}TT#;4v$ z94-E3$tR3-@mWJR?S3-ij(?@mF>jv7J>%IcfK7+%qs5uuPBglQKjW>;ZYoM{vR?Q^ z(*i??e6DV$V}F`)`pBcFn_MXcM$HJ~Bwx=Ea$Wtg{g)F+Atp)>0PGDvv)_Wf^oe|u zfudQDXU29_?6>qvrF)_4FACJl_@Iv5+&DLvpjoY+#Vye|{w6+>=#kq`7uyjCrD}6y zfb*gNbIY`IEDmOI`d>?F>i^@}{&t?G%c)DZ=O%uxk}bW{fSJv!eU42`!f06|z5f_Q*D!=jrFM ze@kn!7oi+m`=5^7Nj_Tp^R+=&Q^;~#KL=NJBEB0Ze%Q(X``z5TTq&p{kt#=dw2?-A z5GR?#p=)h3F+l1rU&V^|)^x5}FLR=Ee@jA=>k)5ebhdBW_Hzy2vpnuS4uDv_FqE+E z1&&N4m*o;f-eaORh}Vym*B!3KS*{9pt`s^Y!dCEiak~~64+^W=v*hH{U(Afu-B+^_ zb*Mc7eb>#-P`(zz_S)_oh2sD`Q@QSTl`GDLvh^(I?lFZ^=0}L#rN7uIUQKfn%{Fxz zaOA%hgMTy~FyzX`lL;6y+xIm>PE(=`bx2J7JmPDywcy(f9&CohoA0$s7Nk-7-K<~1U09zYl7c0qE*iiv%@YY+$x5t$DP#YpqdF)z$QF(B5=|uMa&hGzq zAA(b`#LlyyF(a0q80{dr1TJs!Beu6bh2pe9p81TX651^`Hw3cAcdQ>-bTGR>p4GD& z)b!1>-T(J|k6jXrLi$2tFDlQO5H`zr{oRg(s^%SOR6N!#HY%O~;A8-T;;}g=_gq5P zD18Wvu(M@kAm%8ez zsh?q&Wh>vjE)m+QFTHP~v6WOW;e4*-7palU9v@#tI(&T3*EjM#uPN)(E^Hd}cFa)E ztRK0(l-;CK8ac^bH)DjH7`I1Z&ji%oSuZ4>Rh_d$ViQ6hY#%!rc2@G+qe7Asf-CP=Gc{*!?_`e$ zW($eRPQn9=4PE3#bVe~1nJ&2F#2KYjMo!DGkmWoLXa}hC#f}=5<-d%TdTWK+`)tC3oMf8 zTHZo~G;E`>ufAzTiBQ!D+r=Ub%htl0&R7tJ<}2vXk;gV!I-~_4zj*c@ zV&R$QAHp|~hFMttakG8S%P zt*@rK1wqJhxb=>=0y{9q{r+8wos=<*f_t--^Zm+eXe1m|s`h*D?Di{-J(znl&$y#C zUNP6k1grJH;8=6ntC$c2);)j;+gPll^J6{w6}}-lZ`ZvFkxCO2+%9ZnPQB9o!sQ(# zNj$8>1%S=Vd$?7xHf*6ePxFbB+F2HWuNfup+`cphY+YTdx0{zwT+m*yV7{#x$nipk zGvBW>*}j7=Xqim)Jr4byKO5xyuxp)}{MMP0-A*{zVlOTt5@@ZI0}_M0X+ZOS$3D;S zA6%AAZi+@%<%O@DYgjSA>_gO9cNblGGh%Q6|=G`i8$zH@$B~q!&b!@Si%emTyWiflia| ze$xrV8EIQV?(*pJ%xy=08g`jUCz6|2GiQ053c~O?9*wA99}3-F6Ii$uZgDk%NxJo{e~bkDJ4zC+fi?6lJ6KpGWQH!&VbN@=K& zs$rm0w!JJmF|Vs<|DV^&jx3&M&lu7>Z0H0TDsCO(ZecD9 z*6uk1lzTsk7$X zru}O7M=0XU!f!R72~38K5r8o(7T~s>vD?#JdA#PpB`kqoJ3#V3ZBv9R~DutJGy z$k+1gyCD)J^-oH|B9yCa{eH)$=;?@Sa?g$oy}=bs_0!LZv@!OIOo(W}$TRw6!_1F~ zFVqz?G8PKI?MB91v5eJf?T6~X=SS)Z9rkBtF;ObTbqevC7zoQrfHTa?f%p7jjO^F& zt30?mwudH)Ll2kV?$vR-L_kMxR{H3@9l(MPwc2t$3ojtCF?$HF{V# z_;F-78yedBWqv92)?jb<2o{BcCTWOr7F*pA3!=@zdmpY~jV!jNYEl^sDVF(#_Ppo6 z1cU{IM5|9&SiQysT^>rREHLV1omyPwFU(0%p^4pm)%}RTp#aJFzV%(Yb+>tcdM?z< z+CT=#7&aT6#a^%?E6tH^U%xF#CsZ9C`59kLeycxvUfp3a?N-KBjcg4+=jqDS#; zY;IBTlg2tGRu=c+Uh9tfkOnt_%ew z^yv$CVmYmjOctHGOV)42-q5z5J0&OEWuI|l%Jg6ZOETm1q7G``N)%vcPiLC^6#gR)v5ka z*&@#KjX6t<6k*+Qgz)S?KA~*BC$c@V@dnt_f93*0p1219$7etP9AbeLp1wU*)@r5~ z^x3&3Uz~_Oe(~NozQL0Lk{4;~R}mqQF=;L{0O0(ewnK{^yI;bF z{?Sp%0rRP={p)_>8xh$j*p1=o7ZP49sN@PNnAsY5`pjh`rQbyUZU{TQp8vsiYuV*f z7Zx0PYUZ%FE;;EWX6cO!iDr2jA;+4)xis%Ismn$$!Z=ZoK10oy8G0A&{TXUz-N+GX z)I*v~f;1lWR;z!nK+PRBJJ87breFoAjk0kt{;b<8_C2!TOS`Uq2@qLM3INe4)n9$SEZ0z(T6&n*Alx?$@DDh}Jhpb7agS@gUT1 z+8xQ>`<@xAu0mus)AZuZ-~GL@3DUfgv#9-uQCnYGTetl$ykdP`47oXU1+lO3R0!b3 zY*7_e6k@oPkXMWk_z#sQ{XYGrKkMgl!X!y=rz{@nzB3?2{>HazZhRSI-B?68n9A&5 zb*9`UV-<3Cyrm?W^iitrAsnYa|Y<~4;fj+VU2?ve5J`Q`0ev6%7#riL~TS~f8u{wP3>X#N= zD~b0s?tF|>yj7IDWf-(M;?mI_Fvu9Rs`&eE4C(6j^QNO!%2Wc-OIpp<#j%ERkSV5B z-KsYrO?k1HYUyOHZ2Lab{X5Y(daca#6jHTxRD7=tcyUbq+4`EYek`7Ty!0u)*<(QW ze}8pn2n+hRdF&BWEMw7@S)c5S_-r8$+*ga+`mgpArEE_8knE~@cX)Ux=8%IFo>WmZ zgS7V51E2?@OW*oOvod77b{sMh`&(a$@5dn|D zoM{4k{!bv&-MBFPPvS(i-^Jmd_H_gtJrCN^1_liVV}~){4a*zjg2o8i&v^Ppz0KPh zJ5iiF9#%&zwAaTqS+Y@8AmfPd&)lJx{|j3aL^!gCUOGA#i{@QqPS?{3`24?NH5tH__AfZeSF(+r}P<)^BHC9`t zz^B-)D=qZ-#6Kk}i?oqgvC9|k(PF#smJh?8*hfd44_FNEY|+9d{Eft3EftC8;_65~ z?Ae4O$;vK*^1a`;$v9H6l2Kho|bA`={idFpg4yPalhxGz^C{& zeKEN@2S8|M&_eziCIv)nXsA0H})GAXBj&dO8`UR2z!|1w5y<)EpPrzz;Sz(UROtkD%VP z!!|_&-6YEV$#a|1+}CGK78i0?TF7fOeeVeM=L8%Cg9CZ)-?LjQkKV?No#my*7LYl6Ox4!6|1|=F@)G(K+o6zl5b!jwl(!bosdx zbhsW;Wre6O(raiz9{8JP%6xn%e%8eM^}GoN9HaDSJdT9Dvb>rpFMt(NNRYZ@WuEc} z0rGCj4zk2ssTpw(-ARzIKhdG~;_0(Fp2;rKLf9V(3Gbs}!t?xM4%qt$@axfQ^xoHu z)#&?A?uR{n`E|xK#htj#O{2d(U)AoZ2yk-BXjce5+;BNK#C{$sTmKevIG)HRO~?4? zZXfyfwkY(*ygatoy|g8T=<}}jQCF!opW$}au}$LfN@Q`C9!4MCALgo>`{1$Pt}8b! zW>#?LT3haXrS32t5nXAkEpC5GMZRWA>XMdv$}i*D3$YU$N-5kXzvm*1LtoRKhp7h5 zAV8Noex4v0G&62iT?rW6Vc^i5TKeI%;|Yz8`Hu!9?BuB*s1)rl{2c})D2qsR4l#Z8M z_>9?}F8k5J8$?2=e`BwFgrU9#EsZr|o_Bl*a7VTa?t`mhHn}}Hlbrf5{DwwQK-5?% zIb*dfel8nQ5pP0KE--NLMpevixxuE%J6Ika`jwH<$6nJiTtDD_Gh`xT)I2K`&+6$E z1Eu6QJ=L3F#hZeijAfW*fAA2MWx1$E-Pfu27uy8x8_R{tM!2->t%W*_VwEcyz<@Y? zhG3kJX4ox1DG0%9?jSv3eqK7lRR7`S6|BCKUle=`TNYrx*mXT{aZ6yt6~I1|{4V%g z(Bc&MU|VnHST9+6-pUYsP zTW1VqZ;Q`g$x+G?-Mja4@p1Fg_P)P9rnVB66e_+6Z$D|&zKG}lKl}#=|94Bjj3U7> Yo->Gd!F`uG?!-`2)>f)gc=zf51Nus`-v9sr literal 0 HcmV?d00001 From b09f519b67b6cb49900325ece6158da92c7d21c1 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Mon, 9 Sep 2024 11:42:28 +0800 Subject: [PATCH 2/3] Turn `tensor.Tensor.shape` and `tensor.Tensor.strides` into tuples --- src/ninetoothed/jit.py | 2 +- src/ninetoothed/tensor.py | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/ninetoothed/jit.py b/src/ninetoothed/jit.py index 6092ed9..70cd8b9 100644 --- a/src/ninetoothed/jit.py +++ b/src/ninetoothed/jit.py @@ -178,7 +178,7 @@ def visit_Attribute(self, node): if isinstance(value, Tensor): inner = value.dtype - return Symbol(inner.__dict__[node.attr]).node + return Symbol(getattr(inner, node.attr)).node self.generic_visit(node) diff --git a/src/ninetoothed/tensor.py b/src/ninetoothed/tensor.py index 10e3961..ffd5681 100644 --- a/src/ninetoothed/tensor.py +++ b/src/ninetoothed/tensor.py @@ -24,8 +24,8 @@ def __init__( self.name = f"tensor_{type(self).num_instances}" if ndim is not None: - self.shape = [Symbol(self.size_string(i)) for i in range(ndim)] - self.strides = [Symbol(self.stride_string(i)) for i in range(ndim)] + self.shape = (Symbol(self.size_string(i)) for i in range(ndim)) + self.strides = (Symbol(self.stride_string(i)) for i in range(ndim)) else: self.shape = shape @@ -191,6 +191,22 @@ def stride(self, dim=None): return self.strides[dim] + @property + def shape(self): + return self._shape + + @shape.setter + def shape(self, value): + self._shape = tuple(value) + + @property + def strides(self): + return self._strides + + @strides.setter + def strides(self, value): + self._strides = tuple(value) + @property def ndim(self): return len(self.shape) From c96c47269b164d886ff73441526797ec2acfcbe8 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Mon, 9 Sep 2024 14:17:33 +0800 Subject: [PATCH 3/3] Increment the version number from 0.4.0 to 0.5.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 42afa67..e1dd19c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "ninetoothed" -version = "0.4.0" +version = "0.5.0" authors = [{ name = "Jiacheng Huang", email = "huangjiacheng0709@outlook.com" }] description = "A domain-specific language based on Triton but providing higher-level abstraction." readme = "README.md"