From 48fdf547e9195984a8db94d0ecd5d8bf12e4b09a Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Sun, 7 Jul 2024 19:32:14 +0800 Subject: [PATCH 1/5] [rtl] support zvbb --- configgen/generated/blastoise.json | 83 +------- configgen/generated/machamp.json | 83 +------- configgen/generated/psyduck.json | 191 ++++++++++++++++++ configgen/generated/sandslash.json | 131 +----------- configgen/src/Main.scala | 60 +++++- t1/src/LaneZvbb.scala | 178 ++++++++++++++++ t1/src/T1.scala | 14 +- t1/src/VectorFunctionUnit.scala | 6 +- t1/src/decoder/Decoder.scala | 25 ++- t1/src/decoder/InstructionDocumentation.scala | 17 ++ t1/src/decoder/T1DecodePattern.scala | 1 + t1/src/decoder/attribute/isCrosswrite.scala | 4 + t1/src/decoder/attribute/isItype.scala | 3 + t1/src/decoder/attribute/isPopcount.scala | 1 + t1/src/decoder/attribute/isScheduler.scala | 2 +- t1/src/decoder/attribute/isSreadvd.scala | 2 +- t1/src/decoder/attribute/isSwrite.scala | 6 +- t1/src/decoder/attribute/isUnsigned0.scala | 16 ++ t1/src/decoder/attribute/isUnsigned1.scala | 16 ++ t1/src/decoder/attribute/isVtype.scala | 5 + t1/src/decoder/attribute/isZvbb.scala | 50 +++++ t1/src/decoder/attribute/uop.scala | 3 +- t1/src/decoder/attribute/zvbbUop.scala | 95 +++++++++ 23 files changed, 688 insertions(+), 304 deletions(-) create mode 100644 configgen/generated/psyduck.json create mode 100644 t1/src/LaneZvbb.scala create mode 100644 t1/src/decoder/attribute/isZvbb.scala create mode 100644 t1/src/decoder/attribute/zvbbUop.scala diff --git a/configgen/generated/blastoise.json b/configgen/generated/blastoise.json index d0e26eb82..290ef86c1 100644 --- a/configgen/generated/blastoise.json +++ b/configgen/generated/blastoise.json @@ -6,86 +6,6 @@ "Zve32f" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01???????????????????????00?????\nb10???????????????????????00?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01???????????????????????01?????\nb10???????????????????????01?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01???????????????????????10?????\nb10???????????????????????10?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01???????????????????????11?????\nb10???????????????????????11?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000?????????????000?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000?????????????001?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000?????????????010?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000?????????????011?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000?????????????100?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000?????????????101?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000?????????????110?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000?????????????111?????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 1, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", "vfuInstantiateParameter": { @@ -246,7 +166,8 @@ 3 ] ] - ] + ], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/machamp.json b/configgen/generated/machamp.json index dc0a4b2d9..ceeaf5e59 100644 --- a/configgen/generated/machamp.json +++ b/configgen/generated/machamp.json @@ -6,86 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01??????????????????????00??????\nb10??????????????????????00??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01??????????????????????01??????\nb10??????????????????????01??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01??????????????????????10??????\nb10??????????????????????10??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01??????????????????????11??????\nb10??????????????????????11??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000????????????000??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000????????????001??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000????????????010??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000????????????011??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000????????????100??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000????????????101??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000????????????110??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000????????????111??????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 2, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w", "vfuInstantiateParameter": { @@ -230,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/psyduck.json b/configgen/generated/psyduck.json new file mode 100644 index 000000000..04a2f3572 --- /dev/null +++ b/configgen/generated/psyduck.json @@ -0,0 +1,191 @@ +{ + "parameter": { + "vLen": 512, + "dLen": 256, + "extensions": [ + "Zve32f", + "Zvbb" + ], + "t1customInstructions": [], + "vrfBankSize": 1, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", + "vfuInstantiateParameter": { + "slotCount": 4, + "logicModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.MaskedLogic" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "aluModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 0 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 1 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 2 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 3 + ] + ] + ], + "shifterModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneShifter" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "mulModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 2 + }, + "generator": "org.chipsalliance.t1.rtl.LaneMul" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "divModuleParameters": [], + "divfpModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneDivFP" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "otherModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "vlMaxBits": 10, + "groupNumberBits": 4, + "laneNumberBits": 3, + "dataPathByteWidth": 4, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.OtherUnit" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneFloat" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "zvbbModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneZvbb" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ] + } + }, + "generator": "org.chipsalliance.t1.rtl.T1" +} \ No newline at end of file diff --git a/configgen/generated/sandslash.json b/configgen/generated/sandslash.json index 5ae0cb6b3..688085fe1 100644 --- a/configgen/generated/sandslash.json +++ b/configgen/generated/sandslash.json @@ -6,134 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01?????????????????????00???????\nb10?????????????????????00???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01?????????????????????01???????\nb10?????????????????????01???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01?????????????????????10???????\nb10?????????????????????10???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01?????????????????????11???????\nb10?????????????????????11???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b1100000000?????????0000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b1100000000?????????0001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b1100000000?????????0010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b1100000000?????????0011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b1100000000?????????0100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b1100000000?????????0101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b1100000000?????????0110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b1100000000?????????0111?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank8", - "region": "b1100000000?????????1000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank9", - "region": "b1100000000?????????1001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank10", - "region": "b1100000000?????????1010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank11", - "region": "b1100000000?????????1011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank12", - "region": "b1100000000?????????1100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank13", - "region": "b1100000000?????????1101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank14", - "region": "b1100000000?????????1110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank15", - "region": "b1100000000?????????1111?????????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 4, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rw", "vfuInstantiateParameter": { @@ -278,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index c48760fef..88e3bc326 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -99,7 +99,59 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() + ) + ) + if (doEmit) param.emit(targetFile) + param + } + + // DLEN256 VLEN256; FP; VRF p0rw,p1rw bank1; LSU bank8 beatbyte 8; Zvbb + @main def psyduck( + @arg(name = "target-file", short = 't') targetFile: os.Path, + @arg(name = "emit", short = 'e', doc = "emit config") doEmit: Boolean = true + ): T1Parameter = { + val vLen = 512 + val dLen = 256 + val param = T1Parameter( + vLen, + dLen, + extensions = Seq("Zve32f", "Zvbb"), + t1customInstructions = Nil, + vrfBankSize = 1, + vrfRamType = RamType.p0rwp1rw, + vfuInstantiateParameter = VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))) ) ) if (doEmit) param.emit(targetFile) @@ -148,7 +200,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) @@ -197,7 +250,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala new file mode 100644 index 000000000..a438f363c --- /dev/null +++ b/t1/src/LaneZvbb.scala @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl + +import chisel3.experimental.hierarchy.instantiable +import chisel3._ +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} + +object LaneZvbbParam { + implicit def rw: upickle.default.ReadWriter[LaneZvbbParam] = upickle.default.macroRW +} + +case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { + val inputBundle = new LaneZvbbRequest(datapathWidth) + val decodeField: BoolField = Decoder.zvbb + val outputBundle = new LaneZvbbResponse(datapathWidth) + override val NeedSplit: Boolean = false +} + +class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { + val src = Vec(3, UInt(datapathWidth.W)) + val opcode = UInt(4.W) + val vSew = UInt(2.W) + val shifterSize = UInt(log2Ceil(datapathWidth).W) +} + +class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { + val data = UInt(datapathWidth.W) +} + +@instantiable +class LaneZvbb(val parameter: LaneZvbbParam) + extends VFUModule(parameter) with SerializableModule[LaneZvbbParam]{ + val response: LaneZvbbResponse = Wire(new LaneZvbbResponse(parameter.datapathWidth)) + val request : LaneZvbbRequest = connectIO(response).asTypeOf(parameter.inputBundle) + + val zvbbSrc: UInt = request.src(1) // vs2 + val zvbbRs: UInt = request.src(0) // vs1 or rs1 + val vSew: UInt = UIntToOH(request.vSew) // sew = 0, 1, 2 + + val zvbbBRev = VecInit(zvbbSrc.asBools.reverse).asUInt // element's bit reverse + val zvbbBRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s.reverse)).toSeq).asUInt // byte's bit reverse + val zvbbRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s)).toSeq.reverse).asUInt // element's byte reverse + + val zvbbSrc16a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbSrc16b = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbSrc8a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbSrc8b = zvbbSrc(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbSrc8c = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbSrc8d = zvbbSrc(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zvbbRs16a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbRs16b = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbRs8a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbRs8b = zvbbRs(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbRs8c = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbRs8d = zvbbRs(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zero32: UInt = 0.U(32.W) + val zero16: UInt = 0.U(16.W) + val zero10: UInt = 0.U(11.W) + val zero8: UInt = 0.U(8.W) + val zero3: UInt = 0.U(4.W) + + val zvbbCLZ32: UInt = (32.U - PopCount(scanRightOr(zvbbSrc))).asUInt + val zvbbCLZ16: UInt = { + val clz16a: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16a))).asUInt(4, 0) + val clz16b: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## clz16a ## zero10 ## clz16b + } + val zvbbCLZ8: UInt = { + val clz8a: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8a))).asUInt(3, 0) + val clz8b: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8b))).asUInt(3, 0) + val clz8c: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8c))).asUInt(3, 0) + val clz8d: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## clz8a ## zero3 ## clz8b ## zero3 ## clz8c ## zero3 ## clz8d + } + val zvbbCLZ: UInt = Mux1H(vSew, Seq( + zvbbCLZ8, + zvbbCLZ16, + zvbbCLZ32, + )) + + val zvbbCTZ32 = (32.U - PopCount(scanLeftOr(zvbbSrc))).asUInt + val zvbbCTZ16: UInt = { + val ctz16a: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16a))).asUInt(4, 0) + val ctz16b: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## ctz16a ## zero10 ## ctz16b + } + val zvbbCTZ8: UInt = { + val ctz8a: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8a))).asUInt(3, 0) + val ctz8b: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8b))).asUInt(3, 0) + val ctz8c: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8c))).asUInt(3, 0) + val ctz8d: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## ctz8a ## zero3 ## ctz8b ## zero3 ## ctz8c ## zero3 ## ctz8d + } + val zvbbCTZ = Mux1H(vSew, Seq( + zvbbCTZ8, + zvbbCTZ16, + zvbbCTZ32, + )) + + val zvbbROL32 = zvbbSrc.rotateLeft(zvbbRs(4, 0)).asUInt + val zvbbROL16: UInt = { + val rol16a = zvbbSrc16a.rotateLeft(zvbbRs16a(3, 0)).asUInt(15, 0) + val rol16b = zvbbSrc16b.rotateLeft(zvbbRs16b(3, 0)).asUInt(15, 0) + rol16a ## rol16b + } + val zvbbROL8: UInt = { + val rol8a = zvbbSrc8a.rotateLeft(zvbbRs8a(2, 0)).asUInt(7, 0) + val rol8b = zvbbSrc8b.rotateLeft(zvbbRs8b(2, 0)).asUInt(7, 0) + val rol8c = zvbbSrc8c.rotateLeft(zvbbRs8c(2, 0)).asUInt(7, 0) + val rol8d = zvbbSrc8d.rotateLeft(zvbbRs8d(2, 0)).asUInt(7, 0) + rol8a ## rol8b ## rol8c ## rol8d + } + val zvbbROL = Mux1H(vSew, Seq( + zvbbROL8, + zvbbROL16, + zvbbROL32, + )) + + val zvbbROR32 = zvbbSrc.rotateRight(zvbbRs(4, 0)).asUInt + val zvbbROR16: UInt = { + val ror16a = zvbbSrc16a.rotateRight(zvbbRs16a(3, 0)).asUInt(15, 0) + val ror16b = zvbbSrc16b.rotateRight(zvbbRs16b(3, 0)).asUInt(15, 0) + ror16a ## ror16b + } + val zvbbROR8: UInt = { + val ror8a = zvbbSrc8a.rotateRight(zvbbRs8a(2, 0)).asUInt(7, 0) + val ror8b = zvbbSrc8b.rotateRight(zvbbRs8b(2, 0)).asUInt(7, 0) + val ror8c = zvbbSrc8c.rotateRight(zvbbRs8c(2, 0)).asUInt(7, 0) + val ror8d = zvbbSrc8d.rotateRight(zvbbRs8d(2, 0)).asUInt(7, 0) + ror8a ## ror8b ## ror8c ## ror8d + } + val zvbbROR = Mux1H(vSew, Seq( + zvbbROR8, + zvbbROR16, + zvbbROR32, + )) + + val zvbbSLL64_32 = ((zero32 ## zvbbSrc).asUInt << zvbbRs(4, 0)).asUInt(31, 0) + val zvbbSLL64_16: UInt = { + val sll64_16a = ((zero16 ## zvbbSrc16a).asUInt << zvbbRs16a(3, 0)).asUInt(15, 0) + val sll64_16b = ((zero16 ## zvbbSrc16b).asUInt << zvbbRs16b(3, 0)).asUInt(15, 0) + sll64_16a ## sll64_16b + } + val zvbbSLL64_8: UInt = { + val sll64_8a = ((zero8 ## zvbbSrc8a).asUInt << zvbbRs8a(2, 0)).asUInt(7, 0) + val sll64_8b = ((zero8 ## zvbbSrc8b).asUInt << zvbbRs8b(2, 0)).asUInt(7, 0) + val sll64_8c = ((zero8 ## zvbbSrc8c).asUInt << zvbbRs8c(2, 0)).asUInt(7, 0) + val sll64_8d = ((zero8 ## zvbbSrc8d).asUInt << zvbbRs8d(2, 0)).asUInt(7, 0) + sll64_8a ## sll64_8b ## sll64_8c ## sll64_8d + } + val zvbbSLL64 = Mux1H(vSew, Seq( + zvbbSLL64_8, + zvbbSLL64_16, + zvbbSLL64_32, + )) + val zvbbSLL = zvbbSLL64(parameter.datapathWidth-1, 0) + + val zvbbANDN = zvbbSrc & (~zvbbRs) + + response.data := Mux1H(UIntToOH(request.opcode), Seq( + zvbbBRev, + zvbbBRev8, + zvbbRev8, + zvbbCLZ, + zvbbCTZ, + zvbbROL, + zvbbROR, + zvbbSLL, + zvbbANDN, + )) +} + diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 4dca51f4e..1395dd89b 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -118,7 +118,12 @@ case class T1Parameter( val allInstructions: Seq[Instruction] = { org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) - .filter(instruction => instruction.instructionSet.name == "rv_v")++ + .filter{ + instruction => instruction.instructionSet.name match { + case "rv_v" => true + case "rv_zvbb" => if (zvbbEnable) true else false + case _ => false + }} ++ t1customInstructions.map(_.instruction) }.toSeq.sortBy(_.instructionSet.name).filter{ insn => insn.name match { @@ -127,7 +132,7 @@ case class T1Parameter( } } - require(extensions.forall(Seq("Zve32x", "Zve32f").contains), "unsupported extension.") + require(extensions.forall(Seq("Zve32x", "Zve32f", "Zvbb").contains), "unsupported extension.") // TODO: require bank not overlap /** xLen of T1, we currently only support 32. */ val xLen: Int = 32 @@ -144,6 +149,9 @@ case class T1Parameter( /** does t1 has floating datapath? */ val fpuEnable: Boolean = extensions.contains("Zve32f") + /** support of zvbb */ + lazy val zvbbEnable: Boolean = extensions.contains("Zvbb") + /** how many chaining does T1 support, this is not a parameter yet. */ val chainingSize: Int = 4 @@ -217,7 +225,7 @@ case class T1Parameter( // and the values are their respective delays. val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1)) - val decoderParam: DecoderParam = DecoderParam(fpuEnable, allInstructions) + val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions) /** paraemter for AXI4. */ val axi4BundleParameter: AXI4BundleParameter = AXI4BundleParameter( diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 25ff98a49..cf06a66af 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -105,7 +105,8 @@ case class VFUInstantiateParameter( divModuleParameters: Seq[(SerializableModuleGenerator[LaneDiv, LaneDivParam], Seq[Int])], divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], - floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] + floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])] ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ @@ -115,7 +116,8 @@ case class VFUInstantiateParameter( divModuleParameters ++ divfpModuleParameters ++ otherModuleParameters ++ - floatModuleParameters + floatModuleParameters ++ + zvbbModuleParameters genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 3ebe07df3..3a0299389 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -13,7 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.attribute._ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, allInstructions: Seq[Instruction]) +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName @@ -221,6 +221,10 @@ object Decoder { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isOrderreduce.value } + object zvbb extends BoolField { + override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvbb.value + } + object topUop extends T1TopUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.topUop.value match { case _: TopT0.type => BitPat("b000") @@ -328,6 +332,19 @@ object Decoder { case _: zeroUop0.type => BitPat("b0000") case _ => BitPat.dontCare(4) } + case zvbbCase: ZvbbUOPType => + zvbbCase match { + case _: zvbbUop0.type => BitPat("b0000") // brev + case _: zvbbUop1.type => BitPat("b0001") // brev8 + case _: zvbbUop2.type => BitPat("b0010") // rev8 + case _: zvbbUop3.type => BitPat("b0011") // clz + case _: zvbbUop4.type => BitPat("b0100") // ctz + case _: zvbbUop5.type => BitPat("b0101") // rol + case _: zvbbUop6.type => BitPat("b0110") // ror + case _: zvbbUop7.type => BitPat("b0111") // wsll + case _: zvbbUop8.type => BitPat("b1000") // andn + case _ => BitPat.dontCare(4) + } case _ => BitPat.dontCare(4) } } @@ -399,6 +416,12 @@ object Decoder { orderReduce ) else Seq() + } ++ { + if (param.zvbbEnable) + Seq( + zvbb, + ) + else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/InstructionDocumentation.scala b/t1/src/decoder/InstructionDocumentation.scala index 22cf95823..86c5a7e35 100644 --- a/t1/src/decoder/InstructionDocumentation.scala +++ b/t1/src/decoder/InstructionDocumentation.scala @@ -422,5 +422,22 @@ case class InstructionDocumentation(instruction: Instruction, param: DecoderPara case "vzext.vf2" => "TODO!" case "vzext.vf4" => "TODO!" case "vzext.vf8" => "TODO!" + // rv_zvbb + case "vandn.vv" => "TODO!" + case "vandn.vx" => "TODO!" + case "vbrev.v" => "TODO!" + case "vbrev8.v" => "TODO!" + case "vrev8.v" => "TODO!" + case "vclz.v" => "TODO!" + case "vctz.v" => "TODO!" + case "vcpop.v" => "TODO!" + case "vrol.vv" => "TODO!" + case "vrol.vx" => "TODO!" + case "vror.vv" => "TODO!" + case "vror.vx" => "TODO!" + case "vror.vi" => "TODO!" + case "vwsll.vv" => "TODO!" + case "vwsll.vx" => "TODO!" + case "vwsll.vi" => "TODO!" } } diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index d1bb84930..5c7d10733 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -107,6 +107,7 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def isVtype: isVtype = attribute.isVtype(this) def isVwmacc: isVwmacc = attribute.isVwmacc(this) def isWidenreduce: isWidenreduce = attribute.isWidenreduce(this) + def isZvbb: isZvbb = attribute.isZvbb(this) def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) diff --git a/t1/src/decoder/attribute/isCrosswrite.scala b/t1/src/decoder/attribute/isCrosswrite.scala index cbe920dbb..bddbc3818 100644 --- a/t1/src/decoder/attribute/isCrosswrite.scala +++ b/t1/src/decoder/attribute/isCrosswrite.scala @@ -46,6 +46,10 @@ object isCrosswrite { "vwsubu.vx", "vwsubu.wv", "vwsubu.wx", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isItype.scala b/t1/src/decoder/attribute/isItype.scala index aafc0641c..5ba9baf2e 100644 --- a/t1/src/decoder/attribute/isItype.scala +++ b/t1/src/decoder/attribute/isItype.scala @@ -51,6 +51,9 @@ object isItype { "vssra.vi", "vssrl.vi", "vxor.vi", + // rv_zvbb + "vror.vi", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isPopcount.scala b/t1/src/decoder/attribute/isPopcount.scala index 0137b77b0..3a949c436 100644 --- a/t1/src/decoder/attribute/isPopcount.scala +++ b/t1/src/decoder/attribute/isPopcount.scala @@ -18,6 +18,7 @@ object isPopcount { def y(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched = Seq( "vcpop.m", + "vcpop.v", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 229c45575..423b59a35 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -274,5 +274,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. */ " + override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. Instructions that will communicate with T1 top module.*/ " } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index bf9fc6837..e6fa9bb76 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -307,5 +307,5 @@ object isSreadvd { } case class isSreadvd(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sReadVD -> !(ma || maskLogic) instruction need to read vd as operator. " + override val description: String = "sReadVD -> !(ma || maskLogic): instructions that need to read vd as the operator. " } diff --git a/t1/src/decoder/attribute/isSwrite.scala b/t1/src/decoder/attribute/isSwrite.scala index cfddf2e04..f16f28e5d 100644 --- a/t1/src/decoder/attribute/isSwrite.scala +++ b/t1/src/decoder/attribute/isSwrite.scala @@ -210,6 +210,10 @@ object isSwrite { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } @@ -224,5 +228,5 @@ object isSwrite { } case class isSwrite(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. " + override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. No write to vd when isSwrite is True!!!" } diff --git a/t1/src/decoder/attribute/isUnsigned0.scala b/t1/src/decoder/attribute/isUnsigned0.scala index c180180bd..fb041c3c7 100644 --- a/t1/src/decoder/attribute/isUnsigned0.scala +++ b/t1/src/decoder/attribute/isUnsigned0.scala @@ -130,6 +130,22 @@ object isUnsigned0 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isUnsigned1.scala b/t1/src/decoder/attribute/isUnsigned1.scala index 1f71f2310..cf4f517a0 100644 --- a/t1/src/decoder/attribute/isUnsigned1.scala +++ b/t1/src/decoder/attribute/isUnsigned1.scala @@ -102,6 +102,22 @@ object isUnsigned1 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 605588b08..7649d715a 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -181,6 +181,11 @@ object isVtype { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vrol.vv", + "vror.vv", + "vwsll.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala new file mode 100644 index 000000000..c5735aaf9 --- /dev/null +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object isZvbb { + def apply(t1DecodePattern: T1DecodePattern): isZvbb = + Seq( + y _ -> Y, + n _ -> N, + dc _ -> DC + ).collectFirst { + case (fn, tri) if fn(t1DecodePattern) => isZvbb(tri) + }.get + + def y(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + def n(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = t1DecodePattern.param.allInstructions.filter(i => + !(y(t1DecodePattern) || dc(t1DecodePattern)) + ) + allMatched.contains(t1DecodePattern.instruction) + } + + def dc(t1DecodePattern: T1DecodePattern): Boolean = false +} + +case class isZvbb(value: TriState) extends BooleanDecodeAttribute { + override val description: String = "goes to [[org.chipsalliance.t1.rtl.LaneZvbb]]." +} diff --git a/t1/src/decoder/attribute/uop.scala b/t1/src/decoder/attribute/uop.scala index 66d8dbf02..97d49365c 100644 --- a/t1/src/decoder/attribute/uop.scala +++ b/t1/src/decoder/attribute/uop.scala @@ -15,7 +15,8 @@ object DecoderUop { isLogic.y(t1DecodePattern) -> LogicUop(t1DecodePattern), isShift.y(t1DecodePattern) -> ShiftUop(t1DecodePattern), isOther.y(t1DecodePattern) -> OtherUop(t1DecodePattern), - isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern) + isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern), + isZvbb.y(t1DecodePattern) -> ZvbbUOP(t1DecodePattern), ).collectFirst { case (fn, tpe) if fn => DecoderUop(tpe) } diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala new file mode 100644 index 000000000..06524e95b --- /dev/null +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +trait ZvbbUOPType extends Uop +object zvbbUop0 extends ZvbbUOPType // brev +object zvbbUop1 extends ZvbbUOPType // brev8 +object zvbbUop2 extends ZvbbUOPType // rev8 +object zvbbUop3 extends ZvbbUOPType // clz +object zvbbUop4 extends ZvbbUOPType // ctz +object zvbbUop5 extends ZvbbUOPType // rol +object zvbbUop6 extends ZvbbUOPType // ror +object zvbbUop7 extends ZvbbUOPType // wsll +object zvbbUop8 extends ZvbbUOPType // andn + +object ZvbbUOP { + def apply(t1DecodePattern: T1DecodePattern): Uop = { + Seq( + t0 _ -> zvbbUop0, + t1 _ -> zvbbUop1, + t2 _ -> zvbbUop2, + t3 _ -> zvbbUop3, + t4 _ -> zvbbUop4, + t5 _ -> zvbbUop5, + t6 _ -> zvbbUop6, + t7 _ -> zvbbUop7, + t8 _ -> zvbbUop8, + ).collectFirst { + case (fn, tpe) if fn(t1DecodePattern) => tpe + }.getOrElse(UopDC) + } + def t0(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t1(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t2(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t3(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vclz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t4(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vctz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t5(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrol.vv", + "vrol.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t6(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vror.vv", + "vror.vx", + "vror.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t7(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t8(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vandn.vv", + "vandn.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } +} From 4089b844c89a1e421ff40834d2d293254cebe88e Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 13:37:05 +0800 Subject: [PATCH 2/5] [nix] bump rvv-codegen Signed-off-by: Avimitin --- nix/pkgs/rvv-codegen.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/pkgs/rvv-codegen.nix b/nix/pkgs/rvv-codegen.nix index 4e6d3ed7b..bffddec26 100644 --- a/nix/pkgs/rvv-codegen.nix +++ b/nix/pkgs/rvv-codegen.nix @@ -11,10 +11,10 @@ buildGoModule { pname = "riscv-vector-test"; version = "unstable-2023-04-12"; src = fetchFromGitHub { - owner = "ksco"; + owner = "chipsalliance"; repo = "riscv-vector-tests"; - rev = "bafa717d37b9bef3e80b66a50b01c22f532306bc"; - hash = "sha256-C91HUDyMykS3qM9h+rJ2uKAJcKHkoakw9I+wwtco0m8="; + rev = "caae5c8fcf465be73266f9b3bd672f71a362548e"; + hash = "sha256-388MKOO+g4PjR3BcxiA8vNY7itDcIhz88vZmMZkbsj8="; }; doCheck = false; vendorHash = "sha256-9cQlivpHg6IDYpmgBp34n6BR/I0FIYnmrXCuiGmAhNE="; From 04892eefe35495831bb30bbfd69bbc615d7a81b7 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 20:38:20 +0800 Subject: [PATCH 3/5] [ci] add psyduck into CI Signed-off-by: Avimitin --- .github/cases/psyduck/default.json | 529 +++++++++++++++++++++++++++++ .github/cases/psyduck/perf.json | 9 + tests/codegen/common.txt | 154 +++++---- 3 files changed, 623 insertions(+), 69 deletions(-) create mode 100644 .github/cases/psyduck/default.json create mode 100644 .github/cases/psyduck/perf.json diff --git a/.github/cases/psyduck/default.json b/.github/cases/psyduck/default.json new file mode 100644 index 000000000..460e5e84f --- /dev/null +++ b/.github/cases/psyduck/default.json @@ -0,0 +1,529 @@ +{ + "asm.mmm": 0, + "asm.smoke": 0, + "codegen.vaadd_vv": 0, + "codegen.vaadd_vx": 0, + "codegen.vaaddu_vv": 0, + "codegen.vaaddu_vx": 0, + "codegen.vadc_vim": 0, + "codegen.vadc_vvm": 0, + "codegen.vadc_vxm": 0, + "codegen.vadd_vi": 0, + "codegen.vadd_vv": 0, + "codegen.vadd_vx": 0, + "codegen.vand_vi": 0, + "codegen.vand_vv": 0, + "codegen.vand_vx": 0, + "codegen.vandn_vv": 0, + "codegen.vandn_vx": 0, + "codegen.vasub_vv": 0, + "codegen.vasub_vx": 0, + "codegen.vasubu_vv": 0, + "codegen.vasubu_vx": 0, + "codegen.vbrev8_v": 0, + "codegen.vbrev_v": 0, + "codegen.vclz_v": 0, + "codegen.vcompress_vm": 0, + "codegen.vcpop_m": 0, + "codegen.vcpop_v": 0, + "codegen.vctz_v": 0, + "codegen.vdiv_vv": 0, + "codegen.vdiv_vx": 0, + "codegen.vdivu_vv": 0, + "codegen.vdivu_vx": 0, + "codegen.vfadd_vf": 0, + "codegen.vfadd_vv": 0, + "codegen.vfclass_v": 0, + "codegen.vfcvt_f_x_v": 0, + "codegen.vfcvt_f_xu_v": 0, + "codegen.vfcvt_rtz_x_f_v": 0, + "codegen.vfcvt_rtz_xu_f_v": 0, + "codegen.vfcvt_x_f_v": 0, + "codegen.vfcvt_xu_f_v": 0, + "codegen.vfdiv_vf": 0, + "codegen.vfdiv_vv": 0, + "codegen.vfirst_m": 0, + "codegen.vfmacc_vf": 0, + "codegen.vfmacc_vv": 0, + "codegen.vfmadd_vf": 0, + "codegen.vfmadd_vv": 0, + "codegen.vfmax_vf": 0, + "codegen.vfmax_vv": 0, + "codegen.vfmerge_vfm": 0, + "codegen.vfmin_vf": 0, + "codegen.vfmin_vv": 0, + "codegen.vfmsac_vf": 0, + "codegen.vfmsac_vv": 0, + "codegen.vfmsub_vf": 0, + "codegen.vfmsub_vv": 0, + "codegen.vfmul_vf": 0, + "codegen.vfmul_vv": 0, + "codegen.vfmv_f_s": 0, + "codegen.vfmv_s_f": 0, + "codegen.vfmv_v_f": 0, + "codegen.vfnmacc_vf": 0, + "codegen.vfnmacc_vv": 0, + "codegen.vfnmadd_vf": 0, + "codegen.vfnmadd_vv": 0, + "codegen.vfnmsac_vf": 0, + "codegen.vfnmsac_vv": 0, + "codegen.vfnmsub_vf": 0, + "codegen.vfnmsub_vv": 0, + "codegen.vfrdiv_vf": 0, + "codegen.vfrec7_v": 0, + "codegen.vfredmax_vs": 0, + "codegen.vfredmin_vs": 0, + "codegen.vfredosum_vs": 0, + "codegen.vfredusum_vs": 0, + "codegen.vfrsqrt7_v": 0, + "codegen.vfrsub_vf": 0, + "codegen.vfsgnj_vf": 0, + "codegen.vfsgnj_vv": 0, + "codegen.vfsgnjn_vf": 0, + "codegen.vfsgnjn_vv": 0, + "codegen.vfsgnjx_vf": 0, + "codegen.vfsgnjx_vv": 0, + "codegen.vfsqrt_v": 0, + "codegen.vfsub_vf": 0, + "codegen.vfsub_vv": 0, + "codegen.vid_v": 0, + "codegen.viota_m": 0, + "codegen.vl1re16_v": 0, + "codegen.vl1re32_v": 0, + "codegen.vl1re8_v": 0, + "codegen.vl2re16_v": 0, + "codegen.vl2re32_v": 0, + "codegen.vl2re8_v": 0, + "codegen.vl4re16_v": 0, + "codegen.vl4re32_v": 0, + "codegen.vl4re8_v": 0, + "codegen.vl8re16_v": 0, + "codegen.vl8re32_v": 0, + "codegen.vl8re8_v": 0, + "codegen.vle16_v": 0, + "codegen.vle16ff_v": 0, + "codegen.vle32_v": 0, + "codegen.vle32ff_v": 0, + "codegen.vle8_v": 0, + "codegen.vle8ff_v": 0, + "codegen.vlm_v": 0, + "codegen.vloxei16_v": 0, + "codegen.vloxei32_v": 0, + "codegen.vloxei8_v": 0, + "codegen.vloxseg2ei16_v": 0, + "codegen.vloxseg2ei32_v": 0, + "codegen.vloxseg2ei8_v": 0, + "codegen.vloxseg3ei16_v": 0, + "codegen.vloxseg3ei32_v": 0, + "codegen.vloxseg3ei8_v": 0, + "codegen.vloxseg4ei16_v": 0, + "codegen.vloxseg4ei32_v": 0, + "codegen.vloxseg4ei8_v": 0, + "codegen.vloxseg5ei16_v": 0, + "codegen.vloxseg5ei32_v": 0, + "codegen.vloxseg5ei8_v": 0, + "codegen.vloxseg6ei16_v": 0, + "codegen.vloxseg6ei32_v": 0, + "codegen.vloxseg6ei8_v": 0, + "codegen.vloxseg7ei16_v": 0, + "codegen.vloxseg7ei32_v": 0, + "codegen.vloxseg7ei8_v": 0, + "codegen.vloxseg8ei16_v": 0, + "codegen.vloxseg8ei32_v": 0, + "codegen.vloxseg8ei8_v": 0, + "codegen.vlse16_v": 0, + "codegen.vlse32_v": 0, + "codegen.vlse8_v": 0, + "codegen.vlseg2e16_v": 0, + "codegen.vlseg2e32_v": 0, + "codegen.vlseg2e8_v": 0, + "codegen.vlseg3e16_v": 0, + "codegen.vlseg3e32_v": 0, + "codegen.vlseg3e8_v": 0, + "codegen.vlseg4e16_v": 0, + "codegen.vlseg4e32_v": 0, + "codegen.vlseg4e8_v": 0, + "codegen.vlseg5e16_v": 0, + "codegen.vlseg5e32_v": 0, + "codegen.vlseg5e8_v": 0, + "codegen.vlseg6e16_v": 0, + "codegen.vlseg6e32_v": 0, + "codegen.vlseg6e8_v": 0, + "codegen.vlseg7e16_v": 0, + "codegen.vlseg7e32_v": 0, + "codegen.vlseg7e8_v": 0, + "codegen.vlseg8e16_v": 0, + "codegen.vlseg8e32_v": 0, + "codegen.vlseg8e8_v": 0, + "codegen.vlsseg2e16_v": 0, + "codegen.vlsseg2e32_v": 0, + "codegen.vlsseg2e8_v": 0, + "codegen.vlsseg3e16_v": 0, + "codegen.vlsseg3e32_v": 0, + "codegen.vlsseg3e8_v": 0, + "codegen.vlsseg4e16_v": 0, + "codegen.vlsseg4e32_v": 0, + "codegen.vlsseg4e8_v": 0, + "codegen.vlsseg5e16_v": 0, + "codegen.vlsseg5e32_v": 0, + "codegen.vlsseg5e8_v": 0, + "codegen.vlsseg6e16_v": 0, + "codegen.vlsseg6e32_v": 0, + "codegen.vlsseg6e8_v": 0, + "codegen.vlsseg7e16_v": 0, + "codegen.vlsseg7e32_v": 0, + "codegen.vlsseg7e8_v": 0, + "codegen.vlsseg8e16_v": 0, + "codegen.vlsseg8e32_v": 0, + "codegen.vlsseg8e8_v": 0, + "codegen.vluxei16_v": 0, + "codegen.vluxei32_v": 0, + "codegen.vluxei8_v": 0, + "codegen.vluxseg2ei16_v": 0, + "codegen.vluxseg2ei32_v": 0, + "codegen.vluxseg2ei8_v": 0, + "codegen.vluxseg3ei16_v": 0, + "codegen.vluxseg3ei32_v": 0, + "codegen.vluxseg3ei8_v": 0, + "codegen.vluxseg4ei16_v": 0, + "codegen.vluxseg4ei32_v": 0, + "codegen.vluxseg4ei8_v": 0, + "codegen.vluxseg5ei16_v": 0, + "codegen.vluxseg5ei32_v": 0, + "codegen.vluxseg5ei8_v": 0, + "codegen.vluxseg6ei16_v": 0, + "codegen.vluxseg6ei32_v": 0, + "codegen.vluxseg6ei8_v": 0, + "codegen.vluxseg7ei16_v": 0, + "codegen.vluxseg7ei32_v": 0, + "codegen.vluxseg7ei8_v": 0, + "codegen.vluxseg8ei16_v": 0, + "codegen.vluxseg8ei32_v": 0, + "codegen.vluxseg8ei8_v": 0, + "codegen.vmacc_vv": 0, + "codegen.vmacc_vx": 0, + "codegen.vmadc_vi": 0, + "codegen.vmadc_vim": 0, + "codegen.vmadc_vv": 0, + "codegen.vmadc_vvm": 0, + "codegen.vmadc_vx": 0, + "codegen.vmadc_vxm": 0, + "codegen.vmadd_vv": 0, + "codegen.vmadd_vx": 0, + "codegen.vmand_mm": 0, + "codegen.vmandn_mm": 0, + "codegen.vmax_vv": 0, + "codegen.vmax_vx": 0, + "codegen.vmaxu_vv": 0, + "codegen.vmaxu_vx": 0, + "codegen.vmerge_vim": 0, + "codegen.vmerge_vvm": 0, + "codegen.vmerge_vxm": 0, + "codegen.vmfeq_vf": 0, + "codegen.vmfeq_vv": 0, + "codegen.vmfge_vf": 0, + "codegen.vmfgt_vf": 0, + "codegen.vmflt_vf": 0, + "codegen.vmflt_vv": 0, + "codegen.vmfne_vf": 0, + "codegen.vmfne_vv": 0, + "codegen.vmin_vv": 0, + "codegen.vmin_vx": 0, + "codegen.vminu_vv": 0, + "codegen.vminu_vx": 0, + "codegen.vmnand_mm": 0, + "codegen.vmnor_mm": 0, + "codegen.vmor_mm": 0, + "codegen.vmorn_mm": 0, + "codegen.vmsbc_vv": 0, + "codegen.vmsbc_vvm": 0, + "codegen.vmsbc_vx": 0, + "codegen.vmsbc_vxm": 0, + "codegen.vmsbf_m": 0, + "codegen.vmseq_vi": 0, + "codegen.vmseq_vv": 0, + "codegen.vmseq_vx": 0, + "codegen.vmsgt_vi": 0, + "codegen.vmsgt_vv": 0, + "codegen.vmsgt_vx": 0, + "codegen.vmsgtu_vi": 0, + "codegen.vmsgtu_vv": 0, + "codegen.vmsgtu_vx": 0, + "codegen.vmsif_m": 0, + "codegen.vmsle_vi": 0, + "codegen.vmsle_vv": 0, + "codegen.vmsle_vx": 0, + "codegen.vmsleu_vi": 0, + "codegen.vmsleu_vv": 0, + "codegen.vmsleu_vx": 0, + "codegen.vmslt_vv": 0, + "codegen.vmslt_vx": 0, + "codegen.vmsltu_vv": 0, + "codegen.vmsltu_vx": 0, + "codegen.vmsne_vi": 0, + "codegen.vmsne_vv": 0, + "codegen.vmsne_vx": 0, + "codegen.vmsof_m": 0, + "codegen.vmul_vv": 0, + "codegen.vmul_vx": 0, + "codegen.vmulh_vv": 0, + "codegen.vmulh_vx": 0, + "codegen.vmulhsu_vv": 0, + "codegen.vmulhsu_vx": 0, + "codegen.vmulhu_vv": 0, + "codegen.vmulhu_vx": 0, + "codegen.vmv1r_v": 0, + "codegen.vmv2r_v": 0, + "codegen.vmv4r_v": 0, + "codegen.vmv8r_v": 0, + "codegen.vmv_s_x": 0, + "codegen.vmv_v_i": 0, + "codegen.vmv_v_v": 0, + "codegen.vmv_v_x": 0, + "codegen.vmv_x_s": 0, + "codegen.vmxnor_mm": 0, + "codegen.vmxor_mm": 0, + "codegen.vnclip_wi": 0, + "codegen.vnclip_wv": 0, + "codegen.vnclip_wx": 0, + "codegen.vnclipu_wi": 0, + "codegen.vnclipu_wv": 0, + "codegen.vnclipu_wx": 0, + "codegen.vnmsac_vv": 0, + "codegen.vnmsac_vx": 0, + "codegen.vnmsub_vv": 0, + "codegen.vnmsub_vx": 0, + "codegen.vnsra_wi": 0, + "codegen.vnsra_wv": 0, + "codegen.vnsra_wx": 0, + "codegen.vnsrl_wi": 0, + "codegen.vnsrl_wv": 0, + "codegen.vnsrl_wx": 0, + "codegen.vor_vi": 0, + "codegen.vor_vv": 0, + "codegen.vor_vx": 0, + "codegen.vredand_vs": 0, + "codegen.vredmax_vs": 0, + "codegen.vredmaxu_vs": 0, + "codegen.vredmin_vs": 0, + "codegen.vredminu_vs": 0, + "codegen.vredor_vs": 0, + "codegen.vredsum_vs": 0, + "codegen.vredxor_vs": 0, + "codegen.vrem_vv": 0, + "codegen.vrem_vx": 0, + "codegen.vremu_vv": 0, + "codegen.vremu_vx": 0, + "codegen.vrev8_v": 0, + "codegen.vrgather_vi": 0, + "codegen.vrgather_vv": 0, + "codegen.vrgather_vx": 0, + "codegen.vrgatherei16_vv": 0, + "codegen.vrol_vv": 0, + "codegen.vrol_vx": 0, + "codegen.vror_vi": 0, + "codegen.vror_vv": 0, + "codegen.vror_vx": 0, + "codegen.vrsub_vi": 0, + "codegen.vrsub_vx": 0, + "codegen.vs1r_v": 0, + "codegen.vs2r_v": 0, + "codegen.vs4r_v": 0, + "codegen.vs8r_v": 0, + "codegen.vsadd_vi": 0, + "codegen.vsadd_vv": 0, + "codegen.vsadd_vx": 0, + "codegen.vsaddu_vi": 0, + "codegen.vsaddu_vv": 0, + "codegen.vsaddu_vx": 0, + "codegen.vsbc_vvm": 0, + "codegen.vsbc_vxm": 0, + "codegen.vse16_v": 0, + "codegen.vse32_v": 0, + "codegen.vse8_v": 0, + "codegen.vsetivli": 0, + "codegen.vsetvl": 0, + "codegen.vsetvli": 0, + "codegen.vsext_vf2": 0, + "codegen.vsext_vf4": 0, + "codegen.vslide1down_vx": 0, + "codegen.vslide1up_vx": 0, + "codegen.vslidedown_vi": 0, + "codegen.vslidedown_vx": 0, + "codegen.vslideup_vi": 0, + "codegen.vslideup_vx": 0, + "codegen.vsll_vi": 0, + "codegen.vsll_vv": 0, + "codegen.vsll_vx": 0, + "codegen.vsm_v": 0, + "codegen.vsmul_vv": 0, + "codegen.vsmul_vx": 0, + "codegen.vsoxei16_v": 0, + "codegen.vsoxei32_v": 0, + "codegen.vsoxei8_v": 0, + "codegen.vsoxseg2ei16_v": 0, + "codegen.vsoxseg2ei32_v": 0, + "codegen.vsoxseg2ei8_v": 0, + "codegen.vsoxseg3ei16_v": 0, + "codegen.vsoxseg3ei32_v": 0, + "codegen.vsoxseg3ei8_v": 0, + "codegen.vsoxseg4ei16_v": 0, + "codegen.vsoxseg4ei32_v": 0, + "codegen.vsoxseg4ei8_v": 0, + "codegen.vsoxseg5ei16_v": 0, + "codegen.vsoxseg5ei32_v": 0, + "codegen.vsoxseg5ei8_v": 0, + "codegen.vsoxseg6ei16_v": 0, + "codegen.vsoxseg6ei32_v": 0, + "codegen.vsoxseg6ei8_v": 0, + "codegen.vsoxseg7ei16_v": 0, + "codegen.vsoxseg7ei32_v": 0, + "codegen.vsoxseg7ei8_v": 0, + "codegen.vsoxseg8ei16_v": 0, + "codegen.vsoxseg8ei32_v": 0, + "codegen.vsoxseg8ei8_v": 0, + "codegen.vsra_vi": 0, + "codegen.vsra_vv": 0, + "codegen.vsra_vx": 0, + "codegen.vsrl_vi": 0, + "codegen.vsrl_vv": 0, + "codegen.vsrl_vx": 0, + "codegen.vsse16_v": 0, + "codegen.vsse32_v": 0, + "codegen.vsse8_v": 0, + "codegen.vsseg2e16_v": 0, + "codegen.vsseg2e32_v": 0, + "codegen.vsseg2e8_v": 0, + "codegen.vsseg3e16_v": 0, + "codegen.vsseg3e32_v": 0, + "codegen.vsseg3e8_v": 0, + "codegen.vsseg4e16_v": 0, + "codegen.vsseg4e32_v": 0, + "codegen.vsseg4e8_v": 0, + "codegen.vsseg5e16_v": 0, + "codegen.vsseg5e32_v": 0, + "codegen.vsseg5e8_v": 0, + "codegen.vsseg6e16_v": 0, + "codegen.vsseg6e32_v": 0, + "codegen.vsseg6e8_v": 0, + "codegen.vsseg7e16_v": 0, + "codegen.vsseg7e32_v": 0, + "codegen.vsseg7e8_v": 0, + "codegen.vsseg8e16_v": 0, + "codegen.vsseg8e32_v": 0, + "codegen.vsseg8e8_v": 0, + "codegen.vssra_vi": 0, + "codegen.vssra_vv": 0, + "codegen.vssra_vx": 0, + "codegen.vssrl_vi": 0, + "codegen.vssrl_vv": 0, + "codegen.vssrl_vx": 0, + "codegen.vssseg2e16_v": 0, + "codegen.vssseg2e32_v": 0, + "codegen.vssseg2e8_v": 0, + "codegen.vssseg3e16_v": 0, + "codegen.vssseg3e32_v": 0, + "codegen.vssseg3e8_v": 0, + "codegen.vssseg4e16_v": 0, + "codegen.vssseg4e32_v": 0, + "codegen.vssseg4e8_v": 0, + "codegen.vssseg5e16_v": 0, + "codegen.vssseg5e32_v": 0, + "codegen.vssseg5e8_v": 0, + "codegen.vssseg6e16_v": 0, + "codegen.vssseg6e32_v": 0, + "codegen.vssseg6e8_v": 0, + "codegen.vssseg7e16_v": 0, + "codegen.vssseg7e32_v": 0, + "codegen.vssseg7e8_v": 0, + "codegen.vssseg8e16_v": 0, + "codegen.vssseg8e32_v": 0, + "codegen.vssseg8e8_v": 0, + "codegen.vssub_vv": 0, + "codegen.vssub_vx": 0, + "codegen.vssubu_vv": 0, + "codegen.vssubu_vx": 0, + "codegen.vsub_vv": 0, + "codegen.vsub_vx": 0, + "codegen.vsuxei16_v": 0, + "codegen.vsuxei32_v": 0, + "codegen.vsuxei8_v": 0, + "codegen.vsuxseg2ei16_v": 0, + "codegen.vsuxseg2ei32_v": 0, + "codegen.vsuxseg2ei8_v": 0, + "codegen.vsuxseg3ei16_v": 0, + "codegen.vsuxseg3ei32_v": 0, + "codegen.vsuxseg3ei8_v": 0, + "codegen.vsuxseg4ei16_v": 0, + "codegen.vsuxseg4ei32_v": 0, + "codegen.vsuxseg4ei8_v": 0, + "codegen.vsuxseg5ei16_v": 0, + "codegen.vsuxseg5ei32_v": 0, + "codegen.vsuxseg5ei8_v": 0, + "codegen.vsuxseg6ei16_v": 0, + "codegen.vsuxseg6ei32_v": 0, + "codegen.vsuxseg6ei8_v": 0, + "codegen.vsuxseg7ei16_v": 0, + "codegen.vsuxseg7ei32_v": 0, + "codegen.vsuxseg7ei8_v": 0, + "codegen.vsuxseg8ei16_v": 0, + "codegen.vsuxseg8ei32_v": 0, + "codegen.vsuxseg8ei8_v": 0, + "codegen.vwadd_vv": 0, + "codegen.vwadd_vx": 0, + "codegen.vwadd_wv": 0, + "codegen.vwadd_wx": 0, + "codegen.vwaddu_vv": 0, + "codegen.vwaddu_vx": 0, + "codegen.vwaddu_wv": 0, + "codegen.vwaddu_wx": 0, + "codegen.vwmacc_vv": 0, + "codegen.vwmacc_vx": 0, + "codegen.vwmaccsu_vv": 0, + "codegen.vwmaccsu_vx": 0, + "codegen.vwmaccu_vv": 0, + "codegen.vwmaccu_vx": 0, + "codegen.vwmaccus_vx": 0, + "codegen.vwmul_vv": 0, + "codegen.vwmul_vx": 0, + "codegen.vwmulsu_vv": 0, + "codegen.vwmulsu_vx": 0, + "codegen.vwmulu_vv": 0, + "codegen.vwmulu_vx": 0, + "codegen.vwredsum_vs": 0, + "codegen.vwredsumu_vs": 0, + "codegen.vwsll_vi": 0, + "codegen.vwsll_vv": 0, + "codegen.vwsll_vx": 0, + "codegen.vwsub_vv": 0, + "codegen.vwsub_vx": 0, + "codegen.vwsub_wv": 0, + "codegen.vwsub_wx": 0, + "codegen.vwsubu_vv": 0, + "codegen.vwsubu_vx": 0, + "codegen.vwsubu_wv": 0, + "codegen.vwsubu_wx": 0, + "codegen.vxor_vi": 0, + "codegen.vxor_vv": 0, + "codegen.vxor_vx": 0, + "codegen.vzext_vf2": 0, + "codegen.vzext_vf4": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.softmax": 0, + "mlir.hello": 0, + "mlir.rvv_vp_intrinsic_add": 0, + "mlir.rvv_vp_intrinsic_add_scalable": 0, + "mlir.stripmining": 0, + "rvv_bench.ascii_to_utf16": 0, + "rvv_bench.ascii_to_utf32": 0, + "rvv_bench.byteswap": 0, + "rvv_bench.chacha20": 0, + "rvv_bench.mandelbrot": 0, + "rvv_bench.memcpy": 0, + "rvv_bench.memset": 0, + "rvv_bench.mergelines": 0, + "rvv_bench.poly1305": 0, + "rvv_bench.strlen": 0, + "rvv_bench.utf8_count": 0 +} diff --git a/.github/cases/psyduck/perf.json b/.github/cases/psyduck/perf.json new file mode 100644 index 000000000..3527224f9 --- /dev/null +++ b/.github/cases/psyduck/perf.json @@ -0,0 +1,9 @@ +{ + "mlir.conv": 0, + "mlir.matmul": 0, + "mlir.stripmining": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.matmul": 0, + "intrinsic.softmax": 0 +} diff --git a/tests/codegen/common.txt b/tests/codegen/common.txt index cb27e22f4..506b3a08a 100644 --- a/tests/codegen/common.txt +++ b/tests/codegen/common.txt @@ -11,12 +11,19 @@ vadd.vx vand.vi vand.vv vand.vx +vandn.vv +vandn.vx vasub.vv vasub.vx vasubu.vv vasubu.vx +vbrev.v +vbrev8.v +vclz.v vcompress.vm vcpop.m +vcpop.v +vctz.v vdiv.vv vdiv.vx vdivu.vv @@ -24,118 +31,118 @@ vdivu.vx vfirst.m vid.v viota.m -vl1re8.v vl1re16.v vl1re32.v -vl2re8.v +vl1re8.v vl2re16.v vl2re32.v -vl4re8.v +vl2re8.v vl4re16.v vl4re32.v -vl8re8.v +vl4re8.v vl8re16.v vl8re32.v -vle8.v -vle8ff.v +vl8re8.v vle16.v vle16ff.v vle32.v vle32ff.v +vle8.v +vle8ff.v vlm.v -vloxei8.v vloxei16.v vloxei32.v -vloxseg2ei8.v +vloxei8.v vloxseg2ei16.v vloxseg2ei32.v -vloxseg3ei8.v +vloxseg2ei8.v vloxseg3ei16.v vloxseg3ei32.v -vloxseg4ei8.v +vloxseg3ei8.v vloxseg4ei16.v vloxseg4ei32.v -vloxseg5ei8.v +vloxseg4ei8.v vloxseg5ei16.v vloxseg5ei32.v -vloxseg6ei8.v +vloxseg5ei8.v vloxseg6ei16.v vloxseg6ei32.v -vloxseg7ei8.v +vloxseg6ei8.v vloxseg7ei16.v vloxseg7ei32.v -vloxseg8ei8.v +vloxseg7ei8.v vloxseg8ei16.v vloxseg8ei32.v -vlse8.v +vloxseg8ei8.v vlse16.v vlse32.v -vlseg2e8.v +vlse8.v vlseg2e16.v vlseg2e32.v -vlseg3e8.v +vlseg2e8.v vlseg3e16.v vlseg3e32.v -vlseg4e8.v +vlseg3e8.v vlseg4e16.v vlseg4e32.v -vlseg5e8.v +vlseg4e8.v vlseg5e16.v vlseg5e32.v -vlseg6e8.v +vlseg5e8.v vlseg6e16.v vlseg6e32.v -vlseg7e8.v +vlseg6e8.v vlseg7e16.v vlseg7e32.v -vlseg8e8.v +vlseg7e8.v vlseg8e16.v vlseg8e32.v -vlsseg2e8.v +vlseg8e8.v vlsseg2e16.v vlsseg2e32.v -vlsseg3e8.v +vlsseg2e8.v vlsseg3e16.v vlsseg3e32.v -vlsseg4e8.v +vlsseg3e8.v vlsseg4e16.v vlsseg4e32.v -vlsseg5e8.v +vlsseg4e8.v vlsseg5e16.v vlsseg5e32.v -vlsseg6e8.v +vlsseg5e8.v vlsseg6e16.v vlsseg6e32.v -vlsseg7e8.v +vlsseg6e8.v vlsseg7e16.v vlsseg7e32.v -vlsseg8e8.v +vlsseg7e8.v vlsseg8e16.v vlsseg8e32.v -vluxei8.v +vlsseg8e8.v vluxei16.v vluxei32.v -vluxseg2ei8.v +vluxei8.v vluxseg2ei16.v vluxseg2ei32.v -vluxseg3ei8.v +vluxseg2ei8.v vluxseg3ei16.v vluxseg3ei32.v -vluxseg4ei8.v +vluxseg3ei8.v vluxseg4ei16.v vluxseg4ei32.v -vluxseg5ei8.v +vluxseg4ei8.v vluxseg5ei16.v vluxseg5ei32.v -vluxseg6ei8.v +vluxseg5ei8.v vluxseg6ei16.v vluxseg6ei32.v -vluxseg7ei8.v +vluxseg6ei8.v vluxseg7ei16.v vluxseg7ei32.v -vluxseg8ei8.v +vluxseg7ei8.v vluxseg8ei16.v vluxseg8ei32.v +vluxseg8ei8.v vmacc.vv vmacc.vx vmadc.vi @@ -242,10 +249,16 @@ vrem.vv vrem.vx vremu.vv vremu.vx +vrev8.v vrgather.vi vrgather.vv vrgather.vx vrgatherei16.vv +vrol.vv +vrol.vx +vror.vi +vror.vv +vror.vx vrsub.vi vrsub.vx vs1r.v @@ -260,9 +273,9 @@ vsaddu.vv vsaddu.vx vsbc.vvm vsbc.vxm -vse8.v vse16.v vse32.v +vse8.v vsetivli vsetvl vsetvli @@ -280,117 +293,117 @@ vsll.vx vsm.v vsmul.vv vsmul.vx -vsoxei8.v vsoxei16.v vsoxei32.v -vsoxseg2ei8.v +vsoxei8.v vsoxseg2ei16.v vsoxseg2ei32.v -vsoxseg3ei8.v +vsoxseg2ei8.v vsoxseg3ei16.v vsoxseg3ei32.v -vsoxseg4ei8.v +vsoxseg3ei8.v vsoxseg4ei16.v vsoxseg4ei32.v -vsoxseg5ei8.v +vsoxseg4ei8.v vsoxseg5ei16.v vsoxseg5ei32.v -vsoxseg6ei8.v +vsoxseg5ei8.v vsoxseg6ei16.v vsoxseg6ei32.v -vsoxseg7ei8.v +vsoxseg6ei8.v vsoxseg7ei16.v vsoxseg7ei32.v -vsoxseg8ei8.v +vsoxseg7ei8.v vsoxseg8ei16.v vsoxseg8ei32.v +vsoxseg8ei8.v vsra.vi vsra.vv vsra.vx vsrl.vi vsrl.vv vsrl.vx -vsse8.v vsse16.v vsse32.v -vsseg2e8.v +vsse8.v vsseg2e16.v vsseg2e32.v -vsseg3e8.v +vsseg2e8.v vsseg3e16.v vsseg3e32.v -vsseg4e8.v +vsseg3e8.v vsseg4e16.v vsseg4e32.v -vsseg5e8.v +vsseg4e8.v vsseg5e16.v vsseg5e32.v -vsseg6e8.v +vsseg5e8.v vsseg6e16.v vsseg6e32.v -vsseg7e8.v +vsseg6e8.v vsseg7e16.v vsseg7e32.v -vsseg8e8.v +vsseg7e8.v vsseg8e16.v vsseg8e32.v +vsseg8e8.v vssra.vi vssra.vv vssra.vx vssrl.vi vssrl.vv vssrl.vx -vssseg2e8.v vssseg2e16.v vssseg2e32.v -vssseg3e8.v +vssseg2e8.v vssseg3e16.v vssseg3e32.v -vssseg4e8.v +vssseg3e8.v vssseg4e16.v vssseg4e32.v -vssseg5e8.v +vssseg4e8.v vssseg5e16.v vssseg5e32.v -vssseg6e8.v +vssseg5e8.v vssseg6e16.v vssseg6e32.v -vssseg7e8.v +vssseg6e8.v vssseg7e16.v vssseg7e32.v -vssseg8e8.v +vssseg7e8.v vssseg8e16.v vssseg8e32.v +vssseg8e8.v vssub.vv vssub.vx vssubu.vv vssubu.vx vsub.vv vsub.vx -vsuxei8.v vsuxei16.v vsuxei32.v -vsuxseg2ei8.v +vsuxei8.v vsuxseg2ei16.v vsuxseg2ei32.v -vsuxseg3ei8.v +vsuxseg2ei8.v vsuxseg3ei16.v vsuxseg3ei32.v -vsuxseg4ei8.v +vsuxseg3ei8.v vsuxseg4ei16.v vsuxseg4ei32.v -vsuxseg5ei8.v +vsuxseg4ei8.v vsuxseg5ei16.v vsuxseg5ei32.v -vsuxseg6ei8.v +vsuxseg5ei8.v vsuxseg6ei16.v vsuxseg6ei32.v -vsuxseg7ei8.v +vsuxseg6ei8.v vsuxseg7ei16.v vsuxseg7ei32.v -vsuxseg8ei8.v +vsuxseg7ei8.v vsuxseg8ei16.v vsuxseg8ei32.v +vsuxseg8ei8.v vwadd.vv vwadd.vx vwadd.wv @@ -414,6 +427,9 @@ vwmulu.vv vwmulu.vx vwredsum.vs vwredsumu.vs +vwsll.vi +vwsll.vv +vwsll.vx vwsub.vv vwsub.vx vwsub.wv From b6daf857bb12e7b9f462e5dab18d943858e8146a Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 1 Aug 2024 15:49:55 +0800 Subject: [PATCH 4/5] [nix] fix codegen compile for zvbb insn Signed-off-by: Avimitin --- tests/builder.nix | 30 +++++++++++++++++++----------- tests/codegen/common.txt | 16 ---------------- tests/codegen/default.nix | 5 ++--- tests/codegen/zvbb.txt | 2 +- tests/default.nix | 5 +---- 5 files changed, 23 insertions(+), 35 deletions(-) diff --git a/tests/builder.nix b/tests/builder.nix index f250a8b13..4730af191 100644 --- a/tests/builder.nix +++ b/tests/builder.nix @@ -26,17 +26,25 @@ let CC = "${stdenv.targetPlatform.config}-cc"; - NIX_CFLAGS_COMPILE = [ - "-mabi=ilp32f" - "-march=${rtlDesignMetadata.march}" - "-mno-relax" - "-static" - "-mcmodel=medany" - "-fvisibility=hidden" - "-fno-PIC" - "-g" - "-O3" - ]; + NIX_CFLAGS_COMPILE = + let + march = lib.pipe rtlDesignMetadata.march [ + (lib.splitString "_") + (map (ext: if ext == "zvbb" then "zvbb1" else ext)) + (lib.concatStringsSep "_") + ]; + in + [ + "-mabi=ilp32f" + "-march=${march}" + "-mno-relax" + "-static" + "-mcmodel=medany" + "-fvisibility=hidden" + "-fno-PIC" + "-g" + "-O3" + ] ++ lib.optionals (lib.elem "zvbb" (lib.splitString "_" rtlDesignMetadata.march)) [ "-menable-experimental-extensions" ]; installPhase = '' runHook preInstall diff --git a/tests/codegen/common.txt b/tests/codegen/common.txt index 506b3a08a..98fc1ae39 100644 --- a/tests/codegen/common.txt +++ b/tests/codegen/common.txt @@ -11,19 +11,12 @@ vadd.vx vand.vi vand.vv vand.vx -vandn.vv -vandn.vx vasub.vv vasub.vx vasubu.vv vasubu.vx -vbrev.v -vbrev8.v -vclz.v vcompress.vm vcpop.m -vcpop.v -vctz.v vdiv.vv vdiv.vx vdivu.vv @@ -249,16 +242,10 @@ vrem.vv vrem.vx vremu.vv vremu.vx -vrev8.v vrgather.vi vrgather.vv vrgather.vx vrgatherei16.vv -vrol.vv -vrol.vx -vror.vi -vror.vv -vror.vx vrsub.vi vrsub.vx vs1r.v @@ -427,9 +414,6 @@ vwmulu.vv vwmulu.vx vwredsum.vs vwredsumu.vs -vwsll.vi -vwsll.vv -vwsll.vx vwsub.vv vwsub.vx vwsub.wv diff --git a/tests/codegen/default.nix b/tests/codegen/default.nix index fd8edb612..e4883ade6 100644 --- a/tests/codegen/default.nix +++ b/tests/codegen/default.nix @@ -74,11 +74,10 @@ let commonTests = buildTestsFromFile ./common.txt { featuresRequired = [ ]; }; fpTests = buildTestsFromFile ./fp.txt { featuresRequired = [ "zve32f" ]; }; zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = [ "zvbb" ]; }; - hasFeature = feat: lib.any (f: feat == f) currentFeatures; in lib.recurseIntoAttrs ( commonTests // - lib.optionalAttrs (hasFeature "zve32f") fpTests // - lib.optionalAttrs (hasFeature "zvbb") zvbbTests + lib.optionalAttrs (lib.elem "zve32f" currentFeatures) fpTests // + lib.optionalAttrs (lib.elem "zvbb" currentFeatures) zvbbTests ) diff --git a/tests/codegen/zvbb.txt b/tests/codegen/zvbb.txt index 77ed67621..d109f5570 100644 --- a/tests/codegen/zvbb.txt +++ b/tests/codegen/zvbb.txt @@ -1,7 +1,7 @@ vandn.vv vandn.vx vbrev.v -vbreav8.v +vbrev8.v vclz.v vcpop.v vctz.v diff --git a/tests/default.nix b/tests/default.nix index 8a607d59e..35fbc2ab8 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -11,8 +11,6 @@ }: let - hasExt = cmp: lib.any (ext: cmp == (lib.toLower ext)) rtlDesignMetadata.extensions; - # Add an extra abstract layer between test case and RTL design, so that we can have clean and organized way # for developer to specify their required features without the need to parse ISA string themselves. currentFeatures = [ @@ -20,8 +18,7 @@ let "dlen:${rtlDesignMetadata.dlen}" "xlen:${if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then "32" else "64"}" ] - ++ lib.optionals (hasExt "zve32f") [ "zve32f" ] - ++ lib.optionals (hasExt "zvbb") [ "zvbb" ]; + ++ (lib.splitString "_" rtlDesignMetadata.march); # isSubSetOf m n: n is subset of m isSubsetOf = m: n: lib.all (x: lib.elem x m) n; From 2ff8446fa96eb4bcf7fb72af5f17a99305b32cd0 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 2 Aug 2024 16:00:16 +0800 Subject: [PATCH 5/5] [ci] dont patch elf for VCS result --- tests/make-emu-result.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/make-emu-result.nix b/tests/make-emu-result.nix index a8540f806..5507e15f4 100644 --- a/tests/make-emu-result.nix +++ b/tests/make-emu-result.nix @@ -108,6 +108,7 @@ let passthru.with-vcs = self.overrideAttrs (old: { name = old.name + "-with-vcs"; __noChroot = true; + dontPatchELF = true; buildPhase = '' runHook preBuild @@ -143,6 +144,7 @@ let passthru.with-vcs-trace = self.overrideAttrs (old: { name = old.name + "-with-vcs-trace"; __noChroot = true; + dontPatchELF = true; buildPhase = '' runHook preBuild