From bf4cc6ada39df1dedabba2547492377929a33faf Mon Sep 17 00:00:00 2001 From: TinyTapeoutBot <139130078+TinyTapeoutBot@users.noreply.github.com> Date: Mon, 4 Nov 2024 00:08:28 +0200 Subject: [PATCH] feat: update project tt_um_dlmiles_muldiv8 from dlmiles/ttihp0p2-muldiv8 Commit: 7b3785a10f463adaf65af8c28543434504cdc8f9 Workflow: https://github.com/dlmiles/ttihp0p2-muldiv8/actions/runs/11654628495 --- projects/tt_um_dlmiles_muldiv8/commit_id.json | 4 +- projects/tt_um_dlmiles_muldiv8/docs/info.md | 194 +++++++++++++++++- projects/tt_um_dlmiles_muldiv8/info.yaml | 1 - .../tt_um_dlmiles_muldiv8.gds | Bin 2431094 -> 2431094 bytes .../tt_um_dlmiles_muldiv8.spef | 2 +- 5 files changed, 192 insertions(+), 9 deletions(-) diff --git a/projects/tt_um_dlmiles_muldiv8/commit_id.json b/projects/tt_um_dlmiles_muldiv8/commit_id.json index 5bcc977..996659e 100644 --- a/projects/tt_um_dlmiles_muldiv8/commit_id.json +++ b/projects/tt_um_dlmiles_muldiv8/commit_id.json @@ -1,7 +1,7 @@ { "app": "Tiny Tapeout tt09 b176ed7c", "repo": "https://github.com/dlmiles/ttihp0p2-muldiv8", - "commit": "e72e07ced0cbe3feadaad3e82f4212ca8088e56c", - "workflow_url": "https://github.com/dlmiles/ttihp0p2-muldiv8/actions/runs/11647252828", + "commit": "7b3785a10f463adaf65af8c28543434504cdc8f9", + "workflow_url": "https://github.com/dlmiles/ttihp0p2-muldiv8/actions/runs/11654628495", "sort_id": 1730599967999 } \ No newline at end of file diff --git a/projects/tt_um_dlmiles_muldiv8/docs/info.md b/projects/tt_um_dlmiles_muldiv8/docs/info.md index 6e8dbeb..a0a6931 100644 --- a/projects/tt_um_dlmiles_muldiv8/docs/info.md +++ b/projects/tt_um_dlmiles_muldiv8/docs/info.md @@ -1,13 +1,197 @@ -# Information + + +## Background + +Combinational multiply / divider unit (8bit+8bit input) + +This is an updated version of the original project that was submitted and +manufactured in TT04 (https://github.com/dlmiles/tt04-muldiv4). The previous +project was hand crafted in Logisim-Evolution then exported as verilog and +integrated into a TT04 project. + +This version is the same design, extended to 8-bit wide inputs, but instead +of hand crafting the logic gates in a GUI we convert functional blocks into +SpinalHDL language constructs. Part of the purpose of this design is to +understand the area and timing changes introduced by adding more bits, then +to explore alternative topologies. + +The goal of the next iteration of this design maybe to introduce a FMA +(Fused Multiply Add/Accumulate) function and ALU function to explore if +there is some useful composition of these functions (that might be useful +in an 8bit CPU/MCU design, or scale to something bigger). The next +iteration on from this could explore how to draw the transistors directly +(instead of using standard cell library) for such an arrangement, this may +result in non-rectangular cells that interlock to improve both area density +and timing performance. Or it might go up in smoke... who knows. + +# How It Works + +Due to the limited total IOs available at the external TT interface it is +necessary to clock the project and setup UI\_IN[0] to load each of the 2 +8-bit input registers. + +The input side uses latches to capture, which means during the appropiate +phase CLK (high) and ADDR state, it alternatively opens/closes, the data is +becomes captured into the latches at the CLK NEGEDGE. During the whole time +it is open and closed it is providing the data into the appropiate input +side of both MUL and DIV units (which are seperate logic modules). + +The result becomes immediately available (after propagation and ripple +settling time) at the outputs. While the latch it open, maybe artificially +by extending duty-cycle of CLK, you should also be able to conduct +experiments on modifying input and observing output (when in immediate +result mode) + +The result output is also multiplexed and has an immediate and registered +mode. The immediate mode provides a direct visibility of the MUL/DIV +combintational output and should allos timing between input and outputs +to be observed. (you need to account for address multiplex of high-low +8bit sides of result). The registered mode capture the result in full at +the time of the last ADDR and a CLK posedge. This allows you to change +the values for the input side during the next few cycles, while the module +ensures to sustain the result value of the last computation at the output. +With an appropiate pipeline interleave request and result information to +achieve higher throughput. + +----- + +**FIXME** + +FIXME please check out the original github for any enhanced +documentation for this project, potentially improved information +nearer PCB+IC delivery (to customer) schedule but also post-production +post-physically testing results and information. +I hope to produce some kind graphs showing the timing capture and +reliability to show and demonstrate the cascade effect. This assumes +I have the design correct to allow this to happen, but there are some +tricks (like extending CLK on-duty cycle when latches are open) enough +to see result capture output. + +FIXME provide wavedrom diagram (MULU, MULS, DIVU, DIVS) + +FIXME explain IMMediate mode and REGistered mode (to pipeline) + +```c +FIXME provide blockdiagram of functional units +// D +// MUX +// X Y registers (loaded from multiplexed D) +// OP -> res flags +// P P registers +// DEMUX +// R +``` + +FIXME explain architective difference to previous example and +considerations why to change. + +FIXME explain addressing mode to allow much wider units and + potentially uneven input sizes. + +----- + + +Multiplier (signed/unsigned) +Method uses Ripple Carry Array as 'high speed multiplier' +Setup operation mode bits MULDIV=0 and OPSIGNED(unsigned=0/signed=1) +Setup A (multiplier 8-bit) * B (multiplicand 8-bit) +Expect result P (product 16-bit) + + +Divider (signed/unsigned) +Method uses Full Adder with Mux as 'combinational restoring array divider algorithm'. +Setup operation mode bits MULDIV=1 and OPSIGNED(unsigned=0/signed=1) +Setup Dend (dividend 8-bit) / Dsor (divisor 8-bit) +Expect result Q (quotient 8-bit) with R (remainder 8-bit) + +Divider has error bit indicators that take precedence over any result. +If any error bit is set then the output Q and R should be disregarded. +When in multiplier mode error bits are muted to 0. +No input values can cause an overflow error so the bit is always reset. ## How to test -See the other file docs/info_moved.md +Please check back with the project github main page and the published +docs/ directory. There is expected to be some instructions provided +around the time the TT05 chips a received (Q4 2024). + +At the time of writing receiving a physical chip (from a previous TT +edition) back has not occured, so there is no experience on the best +way to test this project, so I defer the task of writing this section +to a later time. + +There should be sufficient instructions here start you own journey. ## External hardware -See the other file docs/info_moved.md +It is expect the RP2040 and a Python REPL should be sufficient test this +project. + +## Thoughts to the future (next iteration) + +uio\_in[3] might moved to bit4 and DIV0/OVER combined into bit5 +This would allow the address the contigious area below. +However during a test build of a MULDIV16 version it easily exceeds 1x1, as +this stage looking towards making builds with permutations of +design/topology and method to generate GDS. So 1x1 is good to achieve this. + +The uio\_in[3] feature wants to use registered mode to lock result when last address +is clocked in this way we can pipeline result and demonstration of what pipelining +can do to increase thoughput. + +The TB is limited to the 4bit version. Ran out of time to validate +registered output and pipeline. + + +Encapsulate the SpinalHDL Scala netlist generation, and write a yosys JVM +module harness (a yosys C++ module that is a JVM thread/process runner, with +communication interface, data/ffi API/lifecycle). Then write a yosys plugin +that allows it to directly include, use and call for generated data based on +parametric details. + +Consider emitting a custom cell/macro/GDS\_object that yosys can call for, +then emit verilog like a regular standard cell module. + +Consider modifying OpenROAD/OpenLane to incorporate generated macros +directly into other detailed routing environment then have the existing +detailed routing work around it as-is. + +## TODO + +Fixup the original logisim schematic labels. + +The input re-ordering (which made the SpinalHDL algo easier) + +Relabel the P6\_EXTND\_EN to P7\_EXTND\_EN the original product index label was +a bad choice in retrospect. + + +Provide the SpinalHDL directory to the project with the sbt project and +netlist generation code. + + +Fill out SpinalHDL unit testing testing. + +Test support for SUPPORT\_SIGNED=false (try to completely remove nets from +output instead of assigning constant False and letting synthesis optimize +away) + +Implement support for seperate SUPPORT\_SIGNED for each input with 3 modes +of operation ALWAYS/NEVER/BOTH(like now using control input bit) + +Implement and test support for odd-sized inputs, so the width of X and Y or +DEND and DSOR can be different sizes. + +When input width can be unequal, test out the EOVERFLOW in the divider is +wired to the correct port and works in this scenarios. + +Provide unit testing for common multipler sizes, obvious byte boundaries +but also the sizes common in FPGA DSP primitives. diff --git a/projects/tt_um_dlmiles_muldiv8/info.yaml b/projects/tt_um_dlmiles_muldiv8/info.yaml index 6a6066a..05034c5 100644 --- a/projects/tt_um_dlmiles_muldiv8/info.yaml +++ b/projects/tt_um_dlmiles_muldiv8/info.yaml @@ -19,7 +19,6 @@ project: source_files: - top_tt_um_dlmiles_muldiv8.v - pdk_generic_sg13g2/sg13g2_dlhrq.v - - pdk_sg13g2/sg13g2_dlhrq_1.v - TT06MULDIV8Top.v # The pinout of your project. Leave unused pins blank. DO NOT delete or add any pins. diff --git a/projects/tt_um_dlmiles_muldiv8/tt_um_dlmiles_muldiv8.gds b/projects/tt_um_dlmiles_muldiv8/tt_um_dlmiles_muldiv8.gds index 53b64b7b8bb813556532be69d3afac626ee1de02..a481284421b964ec9cb2610411f84e3c5e5b1ed3 100644 GIT binary patch delta 1567 zcmZ{kUr1A77{QP9F@+F=mZtgE81La1(P3@uDTSqKqIFLV=}$BQ2ve&^vi@An?g*?45r24f1J zkTNGaNcz7EO(+o)x-kB~zGd!>xaC02Evu1_n|(zSqfM(X86Q-yG>`cs)@U3^FkIAu{yO4>fWCk8&wik1=DMOlue@Jx1$y;|in;X@ zqG_O==W!ZscgLL*8Oo#l{86e#K;Ax0Z@@Ip&=6Ho&*2HWje1rlDb)h{6A$SSSb}q@ zN{e8X&Z!0JjDwzuWx9xbV3YFAAb-`6wJ`7(lClToOBuO@`tS5f1I<5wR(?VIc|0uh zN$_0LmiN%!KHrt^43K|1D=icF!bh1xdHb7u0p8>6c~wVi7>uX}T7Myl^Cx>$BM8RH zWz`t!S?yOJz|YNDxUA|L=y^G!Djwi(jH`+l_}q2%I{^GSTO9-+?zPgCVmyqO@iBfT zz%(&C7>x-sIul|HCd@>bW~POSGBGC3>||P*T}&I(&U7%lnFOQ!H#P(x)KXd^1t}E|xDn=e@xk!T&p+p$VP@l@O&g3U zd_u~c=pgC;E;OM8iMZJJ@AYkSPsA-x*WA+TBV@C(4zd}A>~0UTZC=zH^db8j+BJVY z>Nv#e{c|8u)G_qU~Zf5z0Tg4R=f^)#(ndySs* zgnoqF$1ksi8c6>P_hv!**zu1kpbrccb)c7qoev5o0eyCQr zeoQnCwDT-R*V^52=R}6`C_jIcsu7U4PtgLH#u*%>D(X2rM&qbwd4f_cpg-||4uK^& zm#Xw0tkUUUq^=m~8T(2Xkq>N9z8U1N8nPA!{z6iAqkJhNzo7o9UTL8D=g-N{Xg`mJ zWIhR=YufTI+S{i)@{IxVPiCcM0$=zbQz&nLlh46>oE=bgw1%@0)j;bnBys*^w`v5z zIJvC4j(S%5)I0EVa~3bFx(0e)46BL<_?siD;sri;L;VT>f8JIH!G}9qnpBL3@iIQf z&jgqzW*eh1K}KgnjKPGN2-D28Fj1zJi80%mIJ1LkW7?SxW+#(iI+-NX#h6UWDNUwd GUikwK