Skip to content

Commit

Permalink
[rtl] Organize todos.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li authored and sequencer committed May 21, 2024
1 parent c641272 commit 1c3f894
Show file tree
Hide file tree
Showing 11 changed files with 13 additions and 41 deletions.
3 changes: 0 additions & 3 deletions t1/src/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,6 @@ class InstructionControlRecord(param: LaneParameter) extends Bundle {
/** 存 mask */
val mask: ValidIO[UInt] = Valid(UInt(param.datapathWidth.W))

/** 把mask按每四个分一个组,然后看orR */
val maskGroupedOrR: UInt = UInt((param.datapathWidth / param.sewMin).W)

/** 这一组写vrf的mask */
val vrfWriteMask: UInt = UInt(4.W)
}
Expand Down
3 changes: 0 additions & 3 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready
executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready

// todo: connect state from stage1
stage2.enqueue.bits.elements.foreach { case (k ,d) =>
stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData)
}
Expand Down Expand Up @@ -988,8 +987,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
entranceControl.mask.valid := laneRequest.bits.mask
// assign mask from [[V]]
entranceControl.mask.bits := maskInput
// TODO: remove it.
entranceControl.maskGroupedOrR := maskGroupedOrR
// mask used for VRF write in this group.
entranceControl.vrfWriteMask := 0.U

Expand Down
22 changes: 6 additions & 16 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ case class LSUBankParameter(name: String, region: BitSet, beatbyte: Int, accessS
*
* @note
* Chaining:
* - limited by VRF Memory Port. TODO: add bank in VRF.
* - limited by VRF Memory Port.
* - the chaining size is decided by logic units. if the bandwidth is limited by the logic units, we should increase lane size.
* TODO: sort a machine-readable chaining matrix for test case generation.
*/
Expand Down Expand Up @@ -183,9 +183,7 @@ case class T1Parameter(
/** Used in memory bundle parameter. */
val memoryDataWidthBytes: Int = lsuBankParameters.head.beatbyte

/** LSU MSHR Size, from experience, we use 3 for 2R1W,this is also limited by the number of memory ports.
* TODO: in vector design, there are some instructions which have 3R1W, this may decrease performance. we need perf it.
*/
/** LSU MSHR Size, Contains a load unit, a store unit and an other unit. */
val lsuMSHRSize: Int = 3

/** 2 for 3 MSHR(read + write + otherUnit) */
Expand All @@ -204,7 +202,6 @@ case class T1Parameter(
/** for TileLink `mask` element. */
val maskWidth: Int = lsuBankParameters.head.beatbyte

// todo
val vrfReadLatency = 2

// each element: Each lane will be connected to the other two lanes,
Expand Down Expand Up @@ -295,9 +292,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
/** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */
val storeBufferClear: Bool = IO(Input(Bool()))

/** TileLink memory ports.
* TODO: Multiple LSU support
*/
/** TileLink memory ports. */
val memoryPorts: Vec[TLBundle] = IO(Vec(parameter.lsuBankParameters.size, parameter.tlParam.bundle()))

// TODO: this is an example of adding a new Probe
Expand Down Expand Up @@ -365,8 +360,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
// manually maintain a queue for requestReg.
requestRegDequeue.bits := requestReg.bits.request
requestRegDequeue.valid := requestReg.valid
// TODO: decode the 7 bits in LSB, to get the instruction type.
// we only need to use it to find if it's a load/store instruction.
decode.decodeInput := request.bits.instruction

/** alias to [[requestReg.bits.decodeResult]], it is commonly used. */
Expand Down Expand Up @@ -554,7 +547,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
)
})
val dataResult: ValidIO[UInt] = RegInit(0.U.asTypeOf(Valid(UInt(parameter.datapathWidth.W))))
// todo: viota & compress & reduce

val executeForLastLaneFire: Bool = WireDefault(false.B)

Expand All @@ -581,7 +573,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
val lsuFinished: Bool = ohCheck(lsu.lastReport, control.record.instructionIndex, parameter.chainingSize)
val busClear: Bool = !ohCheck(dataInCrossBus, control.record.instructionIndex, parameter.chainingSize)
// instruction is allocated to this slot.
when(requestRegDequeue.fire && instructionToSlotOH(index)) {
when(instructionToSlotOH(index)) {
// instruction metadata
control.record.instructionIndex := requestReg.bits.instructionIndex
// TODO: remove
Expand Down Expand Up @@ -725,7 +717,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
(unOrderType && !allSlotFree) ||
(requestReg.bits.vdIsV0 && existMaskType)) ||
(vd === 0.U && maskType && slotValid))
when(requestRegDequeue.fire && instructionToSlotOH(index)) {
when(instructionToSlotOH(index)) {
writeBackCounter := 0.U
groupCounter := 0.U
executeCounter := 0.U
Expand Down Expand Up @@ -1323,7 +1315,6 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
val executeFinish: Bool =
(lastReduceCounter || !(reduce || popCount) || orderedReduce) && maskUnitIdle
val schedulerWrite = decodeResultReg(Decoder.maskDestination) || (reduce && !popCount) || writeMv
// todo: decode
val groupSync = decodeResultReg(Decoder.ffo)
// 写回
when(readFinish && (executeFinish || writeMv || executeFinishReg)) {
Expand Down Expand Up @@ -1627,8 +1618,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa
requestRegDequeue.ready := executionReady && slotReady && (!gatherNeedRead || gatherReadFinish) &&
instructionRAWReady && instructionIndexFree

// TODO: change to `requestRegDequeue.fire`.
instructionToSlotOH := Mux(requestRegDequeue.ready, slotToEnqueue, 0.U)
instructionToSlotOH := Mux(requestRegDequeue.fire, slotToEnqueue, 0.U)

// instruction commit
{
Expand Down
1 change: 0 additions & 1 deletion t1/src/VectorWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class VectorWrapper(parameter: T1Parameter) extends Module {
val response: ValidIO[VResponse] = IO(Valid(new VResponse(parameter.xLen)))
val csrInterface: CSRInterface = IO(Input(new CSRInterface(parameter.laneParam.vlMaxBits)))
val storeBufferClear: Bool = IO(Input(Bool()))
// TODO: multiple LSU support.
val memoryPorts: Vec[TLBundle] = IO(Vec(parameter.lsuBankParameters.size, parameter.tlParam.bundle()))

// v主体
Expand Down
2 changes: 1 addition & 1 deletion t1/src/laneStage/LaneExecutionBridge.scala
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd
// data in executionRecord is narrow type
val narrowInRecord: Bool = !executionRecord.decodeResult(Decoder.crossWrite) &&
executionRecord.decodeResult(Decoder.crossRead)
// todo: Need to collapse the results of combined calculations
// reduceReady is false: Need to collapse the results of combined calculations
val reduceReady: Bool = WireDefault(true.B)
val sendFoldReduce: Option[Bool] = Option.when(isLastSlot)(Wire(Bool()))
val recordQueueReadyForNoExecute = Wire(Bool())
Expand Down
1 change: 0 additions & 1 deletion t1/src/laneStage/LaneStage0.scala
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ class LaneStage0(parameter: LaneParameter, isLastSlot: Boolean) extends
enqueue.bits.maskForMaskGroup,
(-1.S(parameter.datapathWidth.W)).asUInt
)
// TODO: use 'record.maskGroupedOrR' & update it
val maskForDataGroup: UInt =
VecInit(maskCorrection.asBools.grouped(dataGroupSize).map(_.reduce(_ || _)).toSeq).asUInt
val groupFilterByMask = maskForDataGroup & groupFilter
Expand Down
2 changes: 1 addition & 1 deletion t1/src/laneStage/LaneStage3.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class LaneStage3Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends B
// pipe state
val decodeResult: DecodeBundle = Decoder.bundle(parameter.fpuEnable)
val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W)
// todo: Need real-time status
// Need real-time status, no pipe
val ffoByOtherLanes: Bool = Bool()
val loadStore: Bool = Bool()
/** vd or rd */
Expand Down
12 changes: 4 additions & 8 deletions t1/src/lsu/SimpleAccessUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,7 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic {
1.U,
(1.U << csrInterface.vlmul(1, 0)).asUInt(3, 0)
),
// TODO: reset to 0.U
1.U,
0.U,
lsuRequest.valid
)

Expand Down Expand Up @@ -367,7 +366,7 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic {
segmentIndex := Mux(segmentEnd || lsuRequest.valid, 0.U, segmentIndexNext)
}

// TODO: why [[!isSegmentLoadStore]]? alias segmentEnd
// [[!isSegmentLoadStore]]: segSize = 1 -> always segmentEnd
val lastElementForSegment = !isSegmentLoadStore || segmentEnd

/** signal indicates this is the last transaction for the element(with handshake) */
Expand All @@ -394,11 +393,8 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic {
/** unsent memory transactions to s0. */
val unsentMemoryRequests: UInt = (~sentMemoryRequests).asUInt

/** mask [[unsentMemoryRequests]]
* TODO: maskFilter = maskReg & unsentMemoryRequests
*/
val maskedUnsentMemoryRequests: UInt = Wire(UInt(param.maskGroupWidth.W))
maskedUnsentMemoryRequests := maskReg & unsentMemoryRequests
/** mask [[unsentMemoryRequests]] */
val maskedUnsentMemoryRequests: UInt = (maskReg & unsentMemoryRequests).asUInt(param.maskGroupWidth - 1, 0)

/** the find the next [[maskedUnsentMemoryRequests]] */
val findFirstMaskedUnsentMemoryRequests: UInt = ffo(maskedUnsentMemoryRequests)
Expand Down
3 changes: 1 addition & 2 deletions t1/src/lsu/StrideBase.scala
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ abstract class StrideBase(param: MSHRParam) extends Module {
1.U,
(1.U << csrInterface.vlmul(1, 0)).asUInt(3, 0)
),
// TODO: reset to 0.U
1.U,
0.U,
lsuRequest.valid
)

Expand Down
1 change: 0 additions & 1 deletion t1/src/vfu/ReduceAdder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class ReduceAdder(datapathWidth: Int) extends Module {
@public
val response = IO(Output(new ReduceAdderResponse(datapathWidth)))

// todo: decode
// ["add", "sub", "slt", "sle", "sgt", "sge", "max", "min", "seq", "sne", "adc", "sbc"]
val uopOH: UInt = UIntToOH(request.opcode)(11, 0)
val isSub: Bool = !(uopOH(0) || uopOH(10))
Expand Down
4 changes: 0 additions & 4 deletions t1/src/vrf/VRF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ object VRFParam {
* - we can have more memory ports.
* - a big VRF memory is split into small memories, the shell of memory contributes more area...
*
* TODO: change to use 32bits memory + mask,
* use portFactor to increase port number
*
* TODO: add ECC cc @sharzyL
* 8bits -> 5bits
* 16bits -> 6bits
Expand Down Expand Up @@ -126,7 +123,6 @@ class VRFProbe(regNumBits: Int, offsetBits: Int, instructionIndexSize: Int, data
* - out of order chaining hazard detection:
* TODO: move to Top.
*
* TODO: implement [[parameter.portFactor]] for increasing VRF bandwidth.
* TODO: probe each ports to benchmark the bandwidth.
*/
@instantiable
Expand Down

0 comments on commit 1c3f894

Please sign in to comment.