diff --git a/CMakeLists.txt b/CMakeLists.txt index 427dc664..0053a4de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.5) project(silice) INCLUDE_DIRECTORIES( @@ -16,6 +16,10 @@ INCLUDE_DIRECTORIES( if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:4194304") +else() +if (MINGW) +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --static") +endif() endif() set(CMAKE_CXX_STANDARD 17) @@ -124,22 +128,47 @@ add_library(libsilice STATIC ${SILICE_CORE}) add_executable(silice src/silice.cpp) # set_target_properties(silice PROPERTIES OUTPUT_NAME "silice") target_link_libraries(silice libsilice antlr4_static lua luabind) + if(WIN32) -target_link_libraries(silice shlwapi) -endif(WIN32) + target_link_libraries(silice shlwapi) +endif() -install(TARGETS silice RUNTIME DESTINATION ${CMAKE_SOURCE_DIR}/bin) -install(TARGETS libsilice ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) -install(TARGETS lua ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) -install(TARGETS lua ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) -install(TARGETS luabind ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) -install(TARGETS antlr4_static ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) +# install and paths + +if(WIN32) + IF(MINGW) + set(INSTALL_IN_REPO OFF CACHE BOOL "Install in repository") + else() + set(INSTALL_IN_REPO ON CACHE BOOL "Install in repository") + endif() +else() + set(INSTALL_IN_REPO OFF CACHE BOOL "Install in repository") +endif() + +if (INSTALL_IN_REPO) + + install(TARGETS silice RUNTIME DESTINATION ${CMAKE_SOURCE_DIR}/bin) + install(TARGETS libsilice ARCHIVE DESTINATION ${CMAKE_SOURCE_DIR}/lib) + add_definitions(-DFRAMEWORKS_DEFAULT_PATH=\"${CMAKE_SOURCE_DIR}/frameworks\") + +else() + + install(TARGETS silice RUNTIME DESTINATION bin/) + install(TARGETS libsilice ARCHIVE DESTINATION bin/) + install(FILES bin/silice-make.py PERMISSIONS WORLD_EXECUTE OWNER_WRITE WORLD_READ DESTINATION bin/) + install(FILES bin/report-cycles.py PERMISSIONS WORLD_EXECUTE OWNER_WRITE WORLD_READ DESTINATION bin/) + install(FILES projects/ice-v/CPUs/ice-v.si DESTINATION share/silice/projects/ice-v/CPUs/) + install(FILES projects/ice-v/CPUs/ice-v-dual.si DESTINATION share/silice/projects/ice-v/CPUs/) + install(DIRECTORY frameworks DESTINATION share/silice/ USE_SOURCE_PERMISSIONS) + install(DIRECTORY src/libs/LibSL-small DESTINATION share/silice/src/libs/) + add_definitions(-DFRAMEWORKS_DEFAULT_PATH=\"${CMAKE_INSTALL_PREFIX}/share/silice/frameworks\") + +endif() # compiler checks -if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND - CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8) - message(FATAL_ERROR "Silice requires g++ 8 at least") +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8) + message(FATAL_ERROR "Silice requires g++ 8 at least") endif() if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") diff --git a/ChangeLog.md b/ChangeLog.md index f5f00c73..5087f65a 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -3,6 +3,7 @@ - [\[CL0002\] Tracker declarations](#cl0002-tracker-declarations) - [\[CL0003\] Pipeline syntax](#cl0003-pipeline-syntax) - [\[CL0004\] While loops cycle rules](#cl0004-while-loops-cycle-rules) +- [\[CL0005\] If-else cycle rules](#cl0005-if-else-cycle-rules) ## [CL0001] Instantiation time pre-processor diff --git a/GetStarted_Linux.md b/GetStarted_Linux.md index 5210d56c..3e67a07c 100644 --- a/GetStarted_Linux.md +++ b/GetStarted_Linux.md @@ -2,49 +2,18 @@ ## Compiling Silice -First, some dependencies are required. To install all of them you may run the script `install_dependencies_linux.sh` that uses `apt`, or inspect the script content and manually add the missing packages. Note that the Java jre/jdk are only required for compilation. +Hopefully this will be as simple as running `./get_started_linux.sh` +Beware that this script is installing dependencies and will request sudo access. +If that is not ok, please open the script and see what it wants to install. +The script also installs Silice on the system, using standard paths (`/usr/local/bin` and `/usr/local/shared/silice`). +Finally, the script appends paths to `.bashrc`. -Compiling Silice should then be as simple as: -```shell -git clone --recurse-submodules https://github.com/sylefeb/Silice.git -cd Silice -./compile_silice_linux.sh -``` +## Notes on dependencies -Done! This compiled and installed the Silice executable in `Silice/bin/`. +The `get_started_linux.sh` script calls the scripts `install_dependencies_*.sh` to install dependencies. It attempts to detect your distrib to call the corresponding dependencies installation script, but if that fails you may have to manually install the dependencies. In such a case, please refer to the script's contents to see what is needed. Note that the Java jre/jdk are only required for compilation. -**Note:** It is highly recommended for all tools to be available from the PATH (Silice/bin, yosys, nextpnr, dfu-utils, fujprog, etc.). This is required by the default build system. +The script also downloads and sets up pre-compiled binaries for the FPGA toolchain (from [oss-cad-suite](https://github.com/YosysHQ/oss-cad-suite-build)). These are placed in `/usr/local/shared/silice`, and environment variables are set by adding a line to the user's `.bashrc`. -## Getting the toolchain +## Testing after installation -Using Silice with your FPGA requires many other tools: yosys, icestorm, trellis, nextpnr, verilator, icarus verilog (*iverilog*), gtkwave, fujprog, dfu-utils. - -**Note:** It is highly recommended for all tools to be available from the PATH. This is required by the default build system. - -The most critical are yosys, icestorm, trellis, nextpnr. For these ones, please do not use any package that -may come with your system. These are likely outdated and won't understand the latest features. Verilator also often needs to be updated. - -There are two options: - -### Compile from source (recommended) - -Yosys, icestorm, trellis, nextpnr, verilator are not difficult to compile and install, and have detailed instructions on their git pages: -- [Yosys](https://github.com/YosysHQ/yosys) -- [Project trellis](https://github.com/YosysHQ/prjtrellis) -- [Project icestorm](https://github.com/YosysHQ/icestorm) -- [NextPNR](https://github.com/YosysHQ/nextpnr) -- [Verilator](https://github.com/verilator/verilator) - -Note that trellis and icestorm have to be compiled and installed before nextpnr (please refer to the NextPNR setup instructions). - -These tools take a bit of time to compile, but it is worth doing as they constantly improve. - -> **Note** You might want to apply my [ice40 DSP patch](https://github.com/sylefeb/fpga-binutils/blob/master/patches/yosys_patch_ice40_dsp.diff) on Yosys before compiling. - -### Use compiled binaries - -Checkout the [fpga-toolchain project](https://github.com/open-tool-forge/fpga-toolchain) as they provide nightly builds of many tools for multiple platforms. - -## Testing - -Time to [run a few tests](GetStarted.md#testing). +Time to [run a few tests](GetStarted.md#testing) and [start having fun!](projects/README.md) diff --git a/GetStarted_Windows.md b/GetStarted_Windows.md index 55867f56..e6d0ebb8 100644 --- a/GetStarted_Windows.md +++ b/GetStarted_Windows.md @@ -18,9 +18,11 @@ The prompt should look like this (note the MinGW64 label in purple): Then, from the prompt, enter the Silice directory and type: `./get_started_mingw64.sh`. -> **Note:** The script adds Silice and the FPGA toolchain to PATH in` ~/.bashrc`. Open a new MinGW64 prompt to start using Silice. +> **Note:** The script downloads necessary MinGW packages, compiles and installs Silice using standard paths (`/usr/local/bin` and `/usr/local/shared/silice`) as well as downloads and sets up the [oss-cad-suite](https://github.com/YosysHQ/oss-cad-suite-build) FPGA toolchain in `/usr/local/shared/silice`, adding a line in ` ~/.bashrc` to set environment variables. -> **Note:** This automatically downloads pre-compiled FPGA tools from https://github.com/sylefeb/fpga-binutils/ (~16MB) as well as installs required MinGW64 packages. For details please refer to the [script source code](get_started_mingw64.sh). +Open a new MinGW64 prompt to start using Silice. + +For details please refer to the [script source code](get_started_mingw64.sh). ## Drivers diff --git a/README.md b/README.md index f19a94cd..35064b2d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Silice -*A language for hardcoding algorithms into FPGA hardware* +*A language for hardcoding algorithms with pipelines and parallelism into FPGA hardware* --- **Quick links:** diff --git a/antlr/antlr4-cpp-runtime-4.7.2-source/CMakeLists.txt b/antlr/antlr4-cpp-runtime-4.7.2-source/CMakeLists.txt index 85198c01..1aef34af 100644 --- a/antlr/antlr4-cpp-runtime-4.7.2-source/CMakeLists.txt +++ b/antlr/antlr4-cpp-runtime-4.7.2-source/CMakeLists.txt @@ -1,5 +1,5 @@ # -*- mode:cmake -*- -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 3.5) # 2.8 needed because of ExternalProject # Detect build type, fallback to release and throw a warning if use didn't specify any @@ -41,7 +41,7 @@ endif() if(CMAKE_SYSTEM_NAME MATCHES "Linux") find_package(PkgConfig REQUIRED) - pkg_check_modules(UUID REQUIRED uuid) + # pkg_check_modules(UUID REQUIRED uuid) endif() if(APPLE) find_library(COREFOUNDATION_LIBRARY CoreFoundation) @@ -106,7 +106,7 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU" OR "${CMAKE_CXX_COMPILER_ID}" MATCHE if(NOT (GCC_VERSION VERSION_GREATER 5.0 OR GCC_VERSION VERSION_EQUAL 5.0)) message(FATAL_ERROR "${PROJECT_NAME} requires g++ 5.0 or greater.") endif () -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) +elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ANDROID) # Need -Os cflag and cxxflags here to work with exception handling on armeabi. # see https://github.com/android-ndk/ndk/issues/573 # and without -stdlib=libc++ cxxflags @@ -123,7 +123,10 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND ( CMAKE_SYSTEM_NAME MATCH set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() elseif(MSVC_VERSION GREATER 1800 OR MSVC_VERSION EQUAL 1800) - # Visual Studio 2012+ supports c++11 features + # Visual Studio 2012+ supports c++11 features +elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + # default fallback (e.g. EMSCRIPTEN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++") else () message(FATAL_ERROR "Your C++ compiler does not support C++11.") endif() diff --git a/antlr/antlr4-cpp-runtime-4.7.2-source/runtime/CMakeLists.txt b/antlr/antlr4-cpp-runtime-4.7.2-source/runtime/CMakeLists.txt index 5381f7b2..dc1f8323 100644 --- a/antlr/antlr4-cpp-runtime-4.7.2-source/runtime/CMakeLists.txt +++ b/antlr/antlr4-cpp-runtime-4.7.2-source/runtime/CMakeLists.txt @@ -38,7 +38,7 @@ add_dependencies(antlr4_static make_lib_output_dir) if(CMAKE_SYSTEM_NAME MATCHES "Linux") # target_link_libraries(antlr4_shared ${UUID_LIBRARIES}) - target_link_libraries(antlr4_static ${UUID_LIBRARIES}) + # target_link_libraries(antlr4_static ${UUID_LIBRARIES}) elseif(APPLE) # target_link_libraries(antlr4_shared ${COREFOUNDATION_LIBRARY}) target_link_libraries(antlr4_static ${COREFOUNDATION_LIBRARY}) diff --git a/antlr/silice.g4 b/antlr/silice.g4 index b846277d..ee90ea27 100644 --- a/antlr/silice.g4 +++ b/antlr/silice.g4 @@ -192,7 +192,7 @@ sformdepth : '#depth' '=' NUMBER ; sformtimeout : '#timeout' '=' NUMBER ; sformmode : '#mode' '=' IDENTIFIER ('&' IDENTIFIER)* ; sspecialize : IDENTIFIER ':' TYPE ; -sparam : IDENTIFIER '=' NUMBER ; +sparam : IDENTIFIER '=' (NUMBER|CONSTANT) ; bpModifier : sclock | sreset | sautorun | sonehot | sstacksz | sformdepth | sformtimeout | sformmode | sreginput | sspecialize | sparam; bpModifiers : '<' bpModifier (',' bpModifier)* '>' ; diff --git a/antlr/vmodule.g4 b/antlr/vmodule.g4 index 20cd870b..da6c8a09 100644 --- a/antlr/vmodule.g4 +++ b/antlr/vmodule.g4 @@ -39,10 +39,14 @@ OUTP : 'output'; INOUTP : 'inout'; +PARAMETER : 'parameter'; + NUMBER : DIGIT+ ; IDENTIFIER : LETTER+ (DIGIT|LETTER)* ; +ALPHANUM : (LETTER|DIGIT|'\'')+ ; + WHITESPACE : (' ' | '\t') -> skip ; NEWLINE : ('\r'? '\n' | '\r')+ -> skip ; @@ -69,8 +73,12 @@ inout : INOUTP mod IDENTIFIER ; inOrOut : input | output | inout ; -inOutList : (inOrOut ',') * inOrOut | ; +inOutList : (inOrOut ',') * inOrOut | ; + +paramDecl : PARAMETER name=IDENTIFIER '=' value=(NUMBER|IDENTIFIER|ALPHANUM) | ; + +paramList : '#' '(' paramDecl ( ',' paramDecl )* ')' ; -vmodule : 'module' IDENTIFIER '(' inOutList ')' ';' ; +vmodule : 'module' IDENTIFIER paramList? '(' inOutList ')' ';' ; root : vmodule EOF ; diff --git a/bin/report-cycles.py b/bin/report-cycles.py old mode 100644 new mode 100755 diff --git a/bin/silice-make.py b/bin/silice-make.py index cb09ab78..22d6832d 100755 --- a/bin/silice-make.py +++ b/bin/silice-make.py @@ -27,6 +27,7 @@ import argparse import platform import sysconfig +import subprocess # from termcolor import colored def colored(str,clr,attrs=0): @@ -43,8 +44,7 @@ def colored(str,clr,attrs=0): parser.add_argument('-r','--root', help="Root directory, use to override default frameworks.") parser.add_argument('-D','--defines', help="List of comma-separated defines to pass to Silice, e.g. -D A=0,B=1") parser.add_argument('--no_build', help="Only generate verilog output file.", action="store_true") -parser.add_argument('--no_program', help="Only generate verilog output file and build bitstream.", - action="store_true") +parser.add_argument('--no_program', help="Only generate verilog output file and build bitstream.", action="store_true") parser.add_argument('--reprogram', help="Only program device.", action="store_true") args = parser.parse_args() @@ -75,15 +75,28 @@ def colored(str,clr,attrs=0): os.environ["BUILD_DIR"] = out_dir # - frameworks directory -frameworks_dir = os.path.realpath(os.path.join(make_dir,"../frameworks/")) +frameworks_dirs=list() +frameworks_dirs.append(os.path.realpath(os.path.join(make_dir,"../frameworks/"))) +frameworks_dirs.append('/usr/local/share/silice/frameworks/') +if platform.system() == "Windows": + if sysconfig.get_platform().startswith("mingw"): + frameworks_dirs.append(subprocess.check_output('cygpath -m /usr/local/share/silice/frameworks/').decode('utf-8').strip()) if args.root: - frameworks_dir = os.path.realpath(os.path.abspath(args.root)) -print("* Silice frameworks directory: ",frameworks_dir,"\t\t\t",end='') -if (os.path.exists(frameworks_dir)): - print(colored("[ok]", 'green')) + frameworks_dirs.append(os.path.realpath(os.path.abspath(args.root))) +# search in expected paths +frameworks_dir = None +for fdir in frameworks_dirs: + if (os.path.exists(fdir)): + frameworks_dir = fdir + break +if frameworks_dir == None: + print("* Silice frameworks directory: \t\t\t",end='') + print(colored("[not found]", 'red')) + sys.exit(-1) else: - print(colored("[not found]", 'red')) - sys.exit(-1) + print("* Silice frameworks directory: ",frameworks_dir,"\t\t\t",end='') + print(colored("[ok]", 'green')) + frameworks_dir = fdir os.environ["FRAMEWORKS_DIR"] = frameworks_dir # enter build directory diff --git a/compile_silice_linux.sh b/compile_silice_linux.sh index 02f0f9ab..9974a07a 100755 --- a/compile_silice_linux.sh +++ b/compile_silice_linux.sh @@ -1,12 +1,13 @@ #!/bin/bash if ! type "javac" > /dev/null; then - echo "Silice compilation requires packages default-jre and default-jdk" + echo "Silice compilation requires javac (typically in package default-jdk or jdk-openjdk)" exit fi git submodule init git submodule update +rm -f bin/silice || true mkdir BUILD cd BUILD @@ -18,15 +19,15 @@ mkdir build-silice cd build-silice cmake -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" ../.. -make -j$(nproc) install +make -j$(nproc) +sudo make -j$(nproc) install cd .. - cd .. echo -e "\nInstalling python packages for building designs\n" -pip install termcolor -pip install edalize +pip install --upgrade termcolor +pip install --upgrade edalize echo " " echo " " diff --git a/compile_silice_mingw64.sh b/compile_silice_mingw64.sh index e5873edd..02a1dd8d 100755 --- a/compile_silice_mingw64.sh +++ b/compile_silice_mingw64.sh @@ -25,9 +25,8 @@ export PATH=$PATH:$DIR/jdk-14.0.1/bin/ mkdir build-silice cd build-silice -/mingw64/bin/cmake -DCMAKE_BUILD_TYPE=Release -G "MinGW Makefiles" ../.. +/mingw64/bin/cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local -G "MinGW Makefiles" ../.. mingw32-make -j16 install cd .. - cd .. diff --git a/frameworks/boards/brot/board.json b/frameworks/boards/brot/board.json index 6ae0bdcb..d6fa5214 100644 --- a/frameworks/boards/brot/board.json +++ b/frameworks/boards/brot/board.json @@ -5,14 +5,20 @@ "name" : "configurable", "framework" : "brot.v", "pins" : [ - {"set" : "basic"}, + {"set" : "basic", "define" : "BASIC=1"}, {"set" : "buttons", "define" : "BUTTONS=1"}, {"set" : "pmod", "define" : "PMOD=1"}, + {"set" : "uart", "define" : "UART=1"}, + {"set" : "uart2", "define" : "UART2=1"}, {"set" : "spiflash", "define" : "SPIFLASH=1"}, - {"set" : "pmod_qqspi", "define" : "PMOD_QQSPI=1"}, + {"set" : "spiflash_dspi", "define" : "SPIFLASH_DSPI=1"}, + {"set" : "pmod_dspi", "define" : "PMOD_DSPI=1"}, {"set" : "pmod_com_out", "define" : "PMOD_COM_OUT=1"}, {"set" : "pmod_com_in", "define" : "PMOD_COM_IN=1"}, - {"set" : "parallel_screen", "define" : "PARALLEL_SCREEN=1"} + {"set" : "parallel_screen", "define" : "PARALLEL_SCREEN=1"}, + {"set" : "qpsram", "define" : "QPSRAM=1"}, + {"set" : "sync_in", "define" : "SYNC_IN=1"}, + {"set" : "sync_out", "define" : "SYNC_OUT=1"} ], "builders": [ { diff --git a/frameworks/boards/brot/brot.pcf b/frameworks/boards/brot/brot.pcf index c6e1a304..d4eb00eb 100644 --- a/frameworks/boards/brot/brot.pcf +++ b/frameworks/boards/brot/brot.pcf @@ -2,7 +2,49 @@ # ------- # from https://github.com/machdyne/brot/blob/main/brot_v4.pcf -set_io CLK_48 35 +# Additional info (@sylefeb) +# +# PMOD B +# ________ +# _____|______|____ +# --| | +# G --| |__ +# P --| | | PMOD A +# I --| | | +# O --| |__| +# --| | +# --|_______________| +# +# Name BEL PIN Bank +# +# PMOD_A1 x 6/y 0 47 2 +# PMOD_A2 x 7/y 0 45 2 +# PMOD_A3 x 9/y31 43 0 +# PMOD_A4 x 8/y31 38 0 +# PMOD_A7 x 5/y 0 46 2 +# PMOD_A8 x 6/y 0 44 2 +# PMOD_A9 x 8/y31 42 0 +# PMOD_A10 x 9/y31 36 0 +# +# PMOD_B1 x17/y 0 11 1 +# PMOD_B2 x15/y 0 9 1 +# PMOD_B3 x 9/y 0 4 2 +# PMOD_B4 x 8/y 0 2 2 +# PMOD_B7 x16/y 0 10 1 +# PMOD_B8 x13/y 0 6 1 +# PMOD_B9 x 9/y 0 3 2 +# PMOD_B10 x 7/y 0 48 2 +# +# GPIO0 x22/y 0 18 1 +# GPIO1 x21/y 0 19 1 +# GPIO2 x19/y 0 20 1 +# GPIO3 x18/y 0 21 1 +# GPIO4 x19/y31 23 0 +# GPIO5 x19/y31 25 0 +# GPIO6 x18/y31 26 0 +# GPIO7 x18/y31 27 0 + +set_io CLK_48 35 # G0 set_io LED_R 41 set_io LED_G 40 @@ -11,8 +53,8 @@ set_io LED_B 39 # GPIO set_io GPIO0 18 set_io GPIO1 19 -set_io GPIO2 20 # NOTE: constrained on same clock as SPIflash? -set_io GPIO3 21 # NOTE: constrained on same clock as SPIflash? +set_io GPIO2 20 # G3 NOTE: constrained on same clock as SPIflash? +set_io GPIO3 21 # NOTE: constrained on same clock as SPIflash? set_io GPIO4 23 set_io GPIO5 25 set_io GPIO6 26 @@ -24,7 +66,7 @@ set_io PMOD_A2 45 set_io PMOD_A3 43 set_io PMOD_A4 38 set_io PMOD_A7 46 -set_io PMOD_A8 44 +set_io PMOD_A8 44 # G6 set_io PMOD_A9 42 set_io PMOD_A10 36 @@ -49,11 +91,11 @@ set_io USB_DN 31 set_io USB_DP 32 set_io USB_DP_PU 34 -# SPI +# SPI (both QPSRAM and MMOD, selected with SPI_SS_RAM, SPI_SS_FLASH) set_io SPI_SS_FLASH 16 -set_io SPI_SS_RAM 37 +set_io SPI_SS_RAM 37 # G1 set_io SPI_SCK 15 set_io SPI_MISO 17 # CSPI_SI set_io SPI_MOSI 14 # CSPI_SO set_io SPI_IO2 12 -set_io SPI_IO3 13 \ No newline at end of file +set_io SPI_IO3 13 diff --git a/frameworks/boards/brot/brot.v b/frameworks/boards/brot/brot.v index 1c2f905f..cbb71993 100644 --- a/frameworks/boards/brot/brot.v +++ b/frameworks/boards/brot/brot.v @@ -42,15 +42,14 @@ $$config['simple_dualport_bram_wenable0_width'] = '1' $$config['simple_dualport_bram_wenable1_width'] = '1' module top( +`ifdef BASIC output LED_R, output LED_G, output LED_B, +`endif `ifdef BUTTONS `error_this_board_has_no_buttons `endif -`ifdef UART -`error_this_board_has_no_uart -`endif `ifdef PMOD inout PMOD_A1, inout PMOD_A2, @@ -76,19 +75,30 @@ module top( output SPI_SS_FLASH, output SPI_MOSI, input SPI_MISO, - output SPI_IO2, - output SPI_IO3, + output SPI_SS_RAM, // unselect psram as lines are shared (disallowed together) `endif -`ifdef PMOD_QQSPI - output PMOD_A1, - inout PMOD_A2, - inout PMOD_A3, - output PMOD_A4, - inout PMOD_A7, +`ifdef SPIFLASH_DSPI + output SPI_SCK, + output SPI_SS_FLASH, + inout SPI_MOSI, + inout SPI_MISO, + output SPI_SS_RAM, // unselect psram as lines are shared (disallowed together) +`endif +`ifdef PMOD_DSPI + output PMOD_A7, inout PMOD_A8, - output PMOD_A9, + inout PMOD_A9, output PMOD_A10, `endif +`ifdef QPSRAM + output SPI_SCK, + output SPI_SS_RAM, + inout SPI_MOSI, + inout SPI_MISO, + inout SPI_IO2, + inout SPI_IO3, + output SPI_SS_FLASH, // unselect spiflash as lines are shared (disallowed together) +`endif `ifdef PARALLEL_SCREEN output GPIO0, output GPIO1, @@ -102,6 +112,44 @@ module top( output PMOD_B2, output PMOD_B7, output PMOD_B8, +`endif +`ifdef UART + output PMOD_B7, // TX + input PMOD_B10, // RX +`endif +`ifdef UART2 + output GPIO0, // TX + input GPIO1, // RX +`endif +`ifdef PMOD_COM_OUT + output PMOD_B1, + output PMOD_B2, + output PMOD_B3, + output PMOD_B4, + output PMOD_B7, + output PMOD_B8, + output PMOD_B9, + output PMOD_B10, + output PMOD_A3, + output PMOD_A4, +`endif +`ifdef PMOD_COM_IN + input PMOD_A1, + input PMOD_A2, + input PMOD_A3, + input PMOD_A4, + input PMOD_A7, + input PMOD_A8, + input PMOD_A9, + input PMOD_A10, + input PMOD_B3, + input PMOD_B4, +`endif +`ifdef SYNC_IN + input PMOD_A1, +`endif +`ifdef SYNC_OUT + output PMOD_B9, `endif input CLK_48 ); @@ -138,47 +186,132 @@ end wire run_main; assign run_main = 1'b1; +`ifdef QPSRAM +wire [1:0] psram_unused; +assign SPI_SS_FLASH = 1'b1; +`ifdef SPIFLASH +`error_cannot_use_spiflash_and_qpsram_together +`endif +`ifdef SPIFLASH_DSPI +`error_cannot_use_spiflash_and_qpsram_together +`endif +`endif + +`ifdef SPIFLASH +assign SPI_SS_RAM = 1'b1; +`ifdef QPSRAM +`error_cannot_use_spiflash_and_qpsram_together +`endif +`endif + +`ifdef SPIFLASH_DSPI +assign SPI_SS_RAM = 1'b1; +`ifdef QPSRAM +`error_cannot_use_spiflash_and_qpsram_together +`endif +`endif + +`ifdef BASIC +wire lr; +wire lg; +wire lb; +assign LED_R = ~lr; +assign LED_G = ~lg; +assign LED_B = ~lb; +`endif + +`ifdef PARALLEL_SCREEN +wire prlscreen_unused; +`endif + M_main __main( .clock(CLK_48), .out_clock(design_clk), .reset(~RST_q[15]), - .out_leds({LED_B,LED_G,LED_R}), +`ifdef BASIC + .out_leds({lb,lg,lr}), +`endif `ifdef BUTTONS `endif `ifdef PMOD .inout_pmod({PMOD_A10,PMOD_A9,PMOD_A8,PMOD_A7,PMOD_A4,PMOD_A3,PMOD_A2,PMOD_A1}), `endif `ifdef SPIFLASH - .out_sf_clk(FLASH_SCK), - .out_sf_csn(FLASH_SSB), - .out_sf_mosi(FLASH_IO0), - .in_sf_miso(FLASH_IO1), -`endif -`ifdef QSPIFLASH - .out_sf_clk(FLASH_SCK), - .out_sf_csn(FLASH_SSB), - .inout_sf_io0(FLASH_IO0), - .inout_sf_io1(FLASH_IO1), - .inout_sf_io2(FLASH_IO2), - .inout_sf_io3(FLASH_IO3), -`endif -`ifdef PMOD_QQSPI - .inout_ram_io0(PMOD_A2), - .inout_ram_io1(PMOD_A3), - .inout_ram_io2(PMOD_A7), - .inout_ram_io3(PMOD_A8), - .out_ram_clk(PMOD_A4), - .out_ram_csn(PMOD_A1), - .out_ram_bank({PMOD_A10,PMOD_A9}), + .out_sf_csn (SPI_SS_FLASH), + .out_sf_clk (SPI_SCK), + .out_sf_mosi(SPI_MOSI), + .in_sf_miso (SPI_MISO), +`endif +`ifdef SPIFLASH_DSPI + .out_sf_csn (SPI_SS_FLASH), + .out_sf_clk (SPI_SCK), + .inout_sf_io0(SPI_MOSI), + .inout_sf_io1(SPI_MISO), +`endif +`ifdef PMOD_DSPI + .out_sf_csn(PMOD_A7), + .inout_sf_io0(PMOD_A8), + .inout_sf_io1(PMOD_A9), + .out_sf_clk(PMOD_A10), +`endif +`ifdef QPSRAM + .out_ram_csn (SPI_SS_RAM), + .inout_ram_io0(SPI_MOSI), + .inout_ram_io1(SPI_MISO), + .inout_ram_io2(SPI_IO2), + .inout_ram_io3(SPI_IO3), + .out_ram_clk (SPI_SCK), + .out_ram_bank (psram_unused), `endif `ifdef PARALLEL_SCREEN - .out_prlscreen_d({GPIO6,GPIO7,GPIO4,GPIO5,GPIO2,GPIO3,GPIO0,GPIO1}), + .out_prlscreen_d({GPIO7,GPIO6,GPIO5,GPIO4,GPIO3,GPIO2,GPIO1,GPIO0}), .out_prlscreen_resn(PMOD_B1), - .out_prlscreen_csn(PMOD_B7), - .out_prlscreen_rs(PMOD_B2), - .out_prlscreen_clk(PMOD_B8), + .out_prlscreen_csn (/*PMOD_B7*/prlscreen_unused), + .out_prlscreen_rs (PMOD_B8), + .out_prlscreen_clk (PMOD_B2), +`endif +`ifdef UART + .out_uart_tx(PMOD_B7), + .in_uart_rx (PMOD_B10), +`endif +`ifdef UART2 + .out_uart_tx(GPIO0), + .in_uart_rx (GPIO1), +`endif +`ifdef SYNC_IN + .in_sync(PMOD_A1), +`endif +`ifdef SYNC_OUT + .out_sync(PMOD_B9), +`endif +/* +PMOD com wiring: +out fpga in fpga +PMOD_B10 <-> PMOD_A1 +PMOD_B9 <-> PMOD_A2 +PMOD_B8 <-> PMOD_A3 +PMOD_B7 <-> PMOD_A4 +PMOD_B4 <-> PMOD_A7 +PMOD_B3 <-> PMOD_A8 +PMOD_B2 <-> PMOD_A9 +PMOD_A3 <-> PMOD_B4 +PMOD_A4 <-> PMOD_B3 + +PMOD_A8 is on a global buffer on the 'in fpga' and has to be used for the clock +*/ +`ifdef PMOD_COM_OUT + .out_com_data({PMOD_B10,PMOD_B9,PMOD_B8,PMOD_B7,PMOD_B4,PMOD_A3,PMOD_B2,PMOD_B1}), + .out_com_clock(PMOD_B3), + .out_com_valid(PMOD_A4), +`endif +`ifdef PMOD_COM_IN + .in_com_data({PMOD_A1,PMOD_A2,PMOD_A3,PMOD_A4,PMOD_A7,PMOD_B4,PMOD_A9,PMOD_A10}), + .in_com_clock(PMOD_A8), + .in_com_valid(PMOD_B3), `endif +// ----------------------------------------------------------------------------- .in_run(run_main) ); +// ----------------------------------------------------------------------------- endmodule diff --git a/frameworks/boards/formal/formal.v b/frameworks/boards/formal/formal.v index 1d47706f..bbf2b5b5 100644 --- a/frameworks/boards/formal/formal.v +++ b/frameworks/boards/formal/formal.v @@ -5,21 +5,21 @@ List contributors with: git shortlog -n -s -- MIT license -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, +the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (header_2_M) @@ -29,6 +29,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. $$FORMAL=1 $$NUM_LEDS=8 +$$config['__display_supported'] = 'yes' +$$config['__write_supported'] = 'yes' +$$config['__finish_supported'] = 'yes' + module top(); // do nothing endmodule diff --git a/frameworks/boards/icarus/icarus.sh b/frameworks/boards/icarus/icarus.sh index e77d77c8..dff5754e 100755 --- a/frameworks/boards/icarus/icarus.sh +++ b/frameworks/boards/icarus/icarus.sh @@ -33,7 +33,7 @@ rm build* trace.fst trace.fst.hier silice --frameworks_dir $FRAMEWORKS_DIR -f $FRAMEWORK_FILE -o build.v $1 "${@:2}" -iverilog -o build -pfileline=1 build.v +iverilog -g2012 -o build -pfileline=1 build.v vvp build -fst echo "====================================" diff --git a/frameworks/boards/icarus/icarus.v b/frameworks/boards/icarus/icarus.v index 064d3d40..99daec37 100644 --- a/frameworks/boards/icarus/icarus.v +++ b/frameworks/boards/icarus/icarus.v @@ -25,7 +25,9 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (header_2_M) */ -`define ICARUS 1 +`define ICARUS 1 +`define SIMULATION 1 + $$ICARUS = 1 $$SIMULATION = 1 $$NUM_LEDS = 8 @@ -35,6 +37,9 @@ $$config['bram_wenable_width'] = '1' $$config['dualport_bram_wenable0_width'] = 'data' $$config['dualport_bram_wenable1_width'] = 'data' $$config['reg_init_zero'] = '1' +$$config['__display_supported'] = 'yes' +$$config['__write_supported'] = 'yes' +$$config['__finish_supported'] = 'yes' `timescale 1ns / 1ps diff --git a/frameworks/boards/icebreaker/board.json b/frameworks/boards/icebreaker/board.json index f51e26a6..f6bc15b3 100644 --- a/frameworks/boards/icebreaker/board.json +++ b/frameworks/boards/icebreaker/board.json @@ -18,6 +18,7 @@ {"set" : "pmod_qqspi", "define" : "PMOD_QQSPI=1"}, {"set" : "pmod_com_out", "define" : "PMOD_COM_OUT=1"}, {"set" : "pmod_com_in", "define" : "PMOD_COM_IN=1"}, + {"set" : "pmod_dspi", "define" : "PMOD_DSPI=1"}, {"set" : "parallel_screen", "define" : "PARALLEL_SCREEN=1"} ], "builders": [ @@ -28,7 +29,7 @@ "tool_options": [ { "icepack_options": ["-s"], - "yosys_synth_options": ["-dsp","-abc9","-device u","-top top"], + "yosys_synth_options": ["-dsp","-abc9","-device u","-relut","-top top"], "nextpnr_options": ["--up5k", "--freq 12", "--package sg48", "-r"], "pnr": "next" } diff --git a/frameworks/boards/icebreaker/icebreaker.v b/frameworks/boards/icebreaker/icebreaker.v index 0400857f..20f9ea0a 100644 --- a/frameworks/boards/icebreaker/icebreaker.v +++ b/frameworks/boards/icebreaker/icebreaker.v @@ -174,6 +174,12 @@ module top( inout RGB_B, inout P1B9, inout P1B10, +`endif +`ifdef PMOD_DSPI + output P1A7, + inout P1A8, + inout P1A9, + output P1A10, `endif input CLK ); @@ -313,6 +319,12 @@ M_main __main( .out_prlscreen_csn(P1B8), .out_prlscreen_rs(P1B3), .out_prlscreen_clk(P1B9), +`endif +`ifdef PMOD_DSPI + .out_sf_csn(P1A7), + .inout_sf_io0(P1A8), + .inout_sf_io1(P1A9), + .out_sf_clk(P1A10), `endif .in_run(run_main) ); diff --git a/frameworks/boards/mch2022/mch2022.sh b/frameworks/boards/mch2022/mch2022.sh index e6502e2f..95399710 100755 --- a/frameworks/boards/mch2022/mch2022.sh +++ b/frameworks/boards/mch2022/mch2022.sh @@ -49,7 +49,7 @@ icepack -s build.asc build.bin echo "=========================================================" echo "Upload the bitstream with:" -echo " webusb_fpga.py BUILD_mch2022/build.bin" +echo " fpga.py BUILD_mch2022/build.bin" echo "" echo "(available at https://github.com/badgeteam/mch2022-tools)" echo "=========================================================" diff --git a/frameworks/boards/verilator/verilator.sh b/frameworks/boards/verilator/verilator.sh index 87faeeae..e852d83a 100755 --- a/frameworks/boards/verilator/verilator.sh +++ b/frameworks/boards/verilator/verilator.sh @@ -17,17 +17,16 @@ esac # LDFLAGS for OpenGL (VGA / SPIscreen) case "$(uname -s)" in MINGW*) -LDFLAGS="-LDFLAGS -lopengl32 -LDFLAGS -lfreeglut" +LDFLAGS+="-LDFLAGS --static -LDFLAGS -lopengl32 -LDFLAGS -lglfw3 -LDFLAGS -lgdi32" ;; Darwin*) #nproc doesn't work on mac, so alias an equivalent command alias nproc="sysctl -n hw.logicalcpu" - #add openGL frameworks -LDFLAGS='-LDFLAGS -framework -LDFLAGS OpenGL -LDFLAGS -framework -LDFLAGS GLUT -LDFLAGS -pthread' +LDFLAGS+='-LDFLAGS -framework -LDFLAGS OpenGL -LDFLAGS -framework -LDFLAGS glfw3 -LDFLAGS -pthread' ;; *) -LDFLAGS="-LDFLAGS -lGL -LDFLAGS -lglut -LDFLAGS -pthread" +LDFLAGS+="-LDFLAGS -lGL -LDFLAGS -lglfw -LDFLAGS -pthread" ;; esac @@ -43,21 +42,18 @@ else export MAKE=mingw32-make fi -export PATH=$PATH:$SILICE_DIR/../tools/fpga-binutils/mingw64/bin/:$SILICE_DIR +export PATH=$PATH:$SILICE_DIR + +if [[ -n "${YOSYSHQ_ROOT}" ]]; then + export VERILATOR_ROOT=${YOSYSHQ_ROOT}/share/verilator/ +fi if [[ -z "${VERILATOR_ROOT}" ]]; then -case "$(uname -s)" in -Linux) -unset VERILATOR_ROOT -;; -*) -# export VERILATOR_ROOT=$SILICE_DIR/../tools/fpga-binutils/mingw64/ -;; -esac -echo "VERILATOR_ROOT is set to ${VERILATOR_ROOT}" +echo "[WARNING] **** VERILATOR_ROOT is NOT set ****" else echo "VERILATOR_ROOT already defined, using its value" fi +echo "build script: VERILATOR_ROOT = $VERILATOR_ROOT" # check Verilator support for -Wno-TIMESCALEMOD set +e @@ -82,54 +78,70 @@ if [[ ! -z "${NO_BUILD}" ]]; then exit fi -LIBSL_DIR=$SILICE_DIR/../src/libs/LibSL-small/src/LibSL/ -VERILATOR_LIB_DIR=$SILICE_DIR/../frameworks/verilator/ +LIBSL_DIR=$FRAMEWORKS_DIR/../src/libs/LibSL-small/src/LibSL/ +VERILATOR_LIB_DIR=$FRAMEWORKS_DIR/verilator/ + +echo "LIBSL_DIR is set to ${LIBSL_DIR}" # NOTE: this below is necessary due to some wierd behaviour on some MinGW # installs where absolute paths generated by Verilator in its makefile # for include directories are incorrectly interepreted by g++. # Relative paths seem ok. cp $VERILATOR_LIB_DIR/verilator_callbacks.h . -cp -R $SILICE_DIR/../src/libs/LibSL-small/src/LibSL . +cp -R $LIBSL_DIR . -VERILATOR_BASE="$VERILATOR_LIB_DIR/verilator_main.cpp $VERILATOR_LIB_DIR/verilator_data.cpp $LIBSL_DIR/Image/ImageFormat_TGA.cpp $LIBSL_DIR/Image/Image.cpp $LIBSL_DIR/Image/tga.cpp $LIBSL_DIR/Math/Vertex.cpp $LIBSL_DIR/Math/Math.cpp $LIBSL_DIR/StlHelpers/StlHelpers.cpp $LIBSL_DIR/CppHelpers/CppHelpers.cpp $LIBSL_DIR/System/System.cpp $VERILATOR_LIB_DIR/display.cpp $VERILATOR_LIB_DIR/sdr_sdram.cpp $VERILATOR_LIB_DIR/VgaChip.cpp $VERILATOR_LIB_DIR/ParallelScreen.cpp $VERILATOR_LIB_DIR/SPIScreen.cpp" +VERILATOR_GFX_SRC=" $LIBSL_DIR/Image/ImageFormat_TGA.cpp $LIBSL_DIR/Image/Image.cpp $LIBSL_DIR/Image/tga.cpp $LIBSL_DIR/Math/Vertex.cpp $LIBSL_DIR/Math/Math.cpp $LIBSL_DIR/StlHelpers/StlHelpers.cpp $LIBSL_DIR/CppHelpers/CppHelpers.cpp $LIBSL_DIR/System/System.cpp" -VERILATOR_LIB_SRC="$VERILATOR_BASE" +VERILATOR_LIB_SRC="$VERILATOR_LIB_DIR/verilator_main.cpp $VERILATOR_LIB_DIR/verilator_data.cpp $VERILATOR_LIB_DIR/display.cpp" +VERILATOR_LIB="verilator_main" + +if test -f "$1.cpp"; then + echo ">>>>>> custom verilator framework detected <<<<<<" + VERILATOR_LIB="verilator_custom" + VERILATOR_LIB_SRC="$1.cpp $VERILATOR_LIB_DIR/verilator_data.cpp" + if test -f "$1.h"; then + cp "$1.h" custom.h + else + touch custom.h + fi +else + touch custom.h +fi DEFINES="" if [[ -n "${VGA}" ]]; then DEFINES+="-CFLAGS -DVGA " + VERILATOR_LIB_SRC+=$VERILATOR_GFX_SRC + VERILATOR_LIB_SRC+=" $VERILATOR_LIB_DIR/VgaChip.cpp" fi if [[ -n "${SDRAM}" ]]; then DEFINES+="-CFLAGS -DSDRAM " + VERILATOR_LIB_SRC+=" $VERILATOR_LIB_DIR/sdr_sdram.cpp" fi if [[ -n "${SPISCREEN}" ]]; then DEFINES+="-CFLAGS -DSPISCREEN " + VERILATOR_LIB_SRC+=$VERILATOR_GFX_SRC + VERILATOR_LIB_SRC+=" $VERILATOR_LIB_DIR/SPIScreen.cpp" fi if [[ -n "${OLED}" ]]; then DEFINES+="-CFLAGS -DSPISCREEN " + VERILATOR_LIB_SRC+=$VERILATOR_GFX_SRC + VERILATOR_LIB_SRC+=" $VERILATOR_LIB_DIR/SPIScreen.cpp" fi if [[ -n "${PARALLEL_SCREEN}" ]]; then DEFINES+="-CFLAGS -DPARALLEL_SCREEN " -fi -VERILATOR_LIB="verilator_main" - -if test -f "$1.cpp"; then - echo ">>>>>> custom verilator framework detected <<<<<<" - VERILATOR_LIB="verilator_custom" - VERILATOR_LIB_SRC="$1.cpp $VERILATOR_LIB_DIR/verilator_data.cpp" - if test -f "$1.h"; then - cp "$1.h" custom.h - else - touch custom.h - fi -else - touch custom.h + VERILATOR_LIB_SRC+=$VERILATOR_GFX_SRC + VERILATOR_LIB_SRC+=" $VERILATOR_LIB_DIR/ParallelScreen.cpp" fi echo "using verilator framework $VERILATOR_LIB" echo "defines: $DEFINES" +# Verilator wants to include these but they are not generated +# fixes the issue (Verilator 5.019 devel rev v5.018-42-g2dba76a7c) +touch Vtop__pch.h.slow +touch Vtop__pch.h.fast + verilator -Wno-fatal -Wno-PINMISSING -Wno-WIDTH -O3 -cc build.v --report-unoptflat $OPT --top-module top --exe $VERILATOR_LIB_SRC -CFLAGS "-include" -CFLAGS "../verilator_callbacks.h" -CFLAGS "-include" -CFLAGS "custom.h" -CFLAGS "-I$SILICE_DIR/../frameworks/verilator/" -CFLAGS "-I../" -CFLAGS "-I../LibSL/" -CFLAGS "-DNO_SHLWAPI" $DEFINES $LDFLAGS cd obj_dir @@ -138,7 +150,9 @@ $MAKE -f Vtop.mk -j$(nproc) cd .. if [[ -z "${NO_PROGRAM}" ]]; then + rm -f output.txt ./obj_dir/Vtop | tee out.log + # ./obj_dir/Vtop > out.log 2>&1 else echo "Skipping execution." fi diff --git a/frameworks/boards/verilator/verilator.v b/frameworks/boards/verilator/verilator.v index ed49c92b..c9bb0dee 100644 --- a/frameworks/boards/verilator/verilator.v +++ b/frameworks/boards/verilator/verilator.v @@ -28,12 +28,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. `define VERILATOR 1 `define COLOR_DEPTH 6 `define SDRAM_WORD_WIDTH 16 +`define SIMULATION 1 $$VERILATOR = 1 $$NUM_LEDS = 8 $$SIMULATION = 1 $$color_depth = 6 $$color_max = 63 +$$config['__display_supported'] = 'yes' +$$config['__write_supported'] = 'yes' +$$config['__finish_supported'] = 'yes' `timescale 1ns / 1ps `default_nettype none diff --git a/frameworks/templates/bram_generic.v.in b/frameworks/templates/bram_generic.v.in index 9f189826..6dc9088d 100644 --- a/frameworks/templates/bram_generic.v.in +++ b/frameworks/templates/bram_generic.v.in @@ -6,12 +6,25 @@ input [%ADDR_WIDTH%-1:0] in_addr, output reg %DATA_TYPE% [%DATA_WIDTH%-1:0] out_rdata, input clock ); -(* no_rw_check *) reg %DATA_TYPE% [%DATA_WIDTH%-1:0] buffer[%DATA_SIZE%-1:0]; -always @(posedge clock) begin - if (in_wenable) begin - buffer[in_addr] <= in_wdata; + (* no_rw_check *) reg %DATA_TYPE% [%DATA_WIDTH%-1:0] buffer[%DATA_SIZE%-1:0]; +`ifdef SIMULATION + // in simulation we use a different code that matches yosys output with + // no_rw_check enabled (which we use to preserve compact LUT designs) + always @(posedge clock) begin + if (in_wenable) begin + buffer[in_addr] <= in_wdata; + out_rdata <= in_wdata; + end else begin + out_rdata <= buffer[in_addr]; + end end - out_rdata <= buffer[in_addr]; -end +`else + always @(posedge clock) begin + if (in_wenable) begin + buffer[in_addr] <= in_wdata; + end + out_rdata <= buffer[in_addr]; + end +`endif %INITIAL% endmodule diff --git a/frameworks/templates/simple_dualport_bram_generic_rw.v.in b/frameworks/templates/simple_dualport_bram_generic_rw.v.in new file mode 100644 index 00000000..9a91b904 --- /dev/null +++ b/frameworks/templates/simple_dualport_bram_generic_rw.v.in @@ -0,0 +1,22 @@ +// SL 2019, MIT license +module %MODULE%( +input [%ADDR0_WIDTH%-1:0] in_addr0, +output reg %DATA_TYPE% [%DATA_WIDTH%-1:0] out_rdata0, +output reg %DATA_TYPE% [%DATA_WIDTH%-1:0] out_rdata1, +input [%WENABLE1_WIDTH%-1:0] in_wenable1, +input [%DATA_WIDTH%-1:0] in_wdata1, +input [%ADDR1_WIDTH%-1:0] in_addr1, +input clock0, +input clock1 +); +reg %DATA_TYPE% [%DATA_WIDTH%-1:0] buffer[%DATA_SIZE%-1:0]; +always @(posedge clock0) begin + out_rdata0 <= buffer[in_addr0]; +end +always @(posedge clock1) begin + if (in_wenable1) begin + buffer[in_addr1] <= in_wdata1; + end +end +%INITIAL% +endmodule diff --git a/frameworks/verilator/ParallelScreen.cpp b/frameworks/verilator/ParallelScreen.cpp index 44c28e0c..20951780 100644 --- a/frameworks/verilator/ParallelScreen.cpp +++ b/frameworks/verilator/ParallelScreen.cpp @@ -104,6 +104,9 @@ void ParallelScreen::cmd_idle_ILI9341() case 0x3A: m_command = std::bind( &ParallelScreen::cmd_mode_ILI9341, this ); break; + case 0x36: + m_command = std::bind( &ParallelScreen::cmd_madctl_ILI9341, this ); + break; default: break; } @@ -115,7 +118,7 @@ void ParallelScreen::cmd_idle_ILI9341() void ParallelScreen::cmd_mode_ILI9341() { if (m_byte != 0x55) { - fprintf(stderr,"ParallelScreen error, only supported mode on ILI9341 is 16 bits per pixel\n"); + fprintf(stderr,"ParallelScreen error, only supported mode on ILI9341 is 16 bits per pixel (got 0x%x, expected 0x55)\n"); exit(-1); } set_idle(); @@ -123,6 +126,19 @@ void ParallelScreen::cmd_mode_ILI9341() // ---------------------------------------------------------------------------- +void ParallelScreen::cmd_madctl_ILI9341() +{ + if ((m_byte & 0x20) != 0) { + fprintf(stdout,"screen in row major mode\n"); + m_row_major = true; + } else { + m_row_major = false; + } + set_idle(); +} + +// ---------------------------------------------------------------------------- + void ParallelScreen::cmd_start_end(int *p_start,int *p_end,int nbytes) { fprintf(stdout,"cmd_start_end, byte: %x (step:%d)\n",m_byte,m_step); @@ -151,8 +167,9 @@ void ParallelScreen::cmd_start_end(int *p_start,int *p_end,int nbytes) void ParallelScreen::cmd_write_ram() { if (!m_dc) { - // exit - set_idle(); + // command + cmd_idle_ILI9341(); + return; } if (m_step == 0) { // first time @@ -172,18 +189,31 @@ void ParallelScreen::cmd_write_ram() m_rgb[0] <<= 3; m_rgb[1] <<= 2; m_rgb[2] <<= 3; m_framebuffer.pixel( m_x_cur,m_y_cur) = m_rgb; + m_framebuffer_changed = true; // update every pixel } ++m_step; if (m_step > 2) { // move to next pixel m_step = 1; - ++ m_y_cur; - if (m_y_cur > m_y_end) { - m_y_cur = m_y_start; + if (!m_row_major) { + ++ m_y_cur; + if (m_y_cur > m_y_end) { + m_y_cur = m_y_start; + ++ m_x_cur; + if (m_x_cur > m_x_end) { + m_x_cur = m_x_start; + m_framebuffer_changed = true; + } + } + } else { ++ m_x_cur; - if (m_x_cur > m_x_end) { - m_x_cur = m_x_start; - m_framebuffer_changed = true; + if (m_x_cur > m_y_end) { + m_x_cur = m_y_start; + ++ m_y_cur; + if (m_y_cur > m_x_end) { + m_y_cur = m_x_start; + m_framebuffer_changed = true; + } } } } diff --git a/frameworks/verilator/ParallelScreen.h b/frameworks/verilator/ParallelScreen.h index d2663767..806a3e55 100644 --- a/frameworks/verilator/ParallelScreen.h +++ b/frameworks/verilator/ParallelScreen.h @@ -68,6 +68,8 @@ class ParallelScreen : public DisplayChip int m_x_cur = 0; int m_y_cur = 0; + + bool m_row_major = false; LibSL::Math::v4b m_rgb; @@ -77,6 +79,7 @@ class ParallelScreen : public DisplayChip void cmd_idle_ILI9341(); void cmd_mode_ILI9341(); + void cmd_madctl_ILI9341(); void cmd_start_end(int *p_start,int *p_end,int nbytes); void cmd_write_ram(); diff --git a/frameworks/verilator/SPIScreen.cpp b/frameworks/verilator/SPIScreen.cpp index a6e6e5e5..22d9a8ce 100644 --- a/frameworks/verilator/SPIScreen.cpp +++ b/frameworks/verilator/SPIScreen.cpp @@ -204,7 +204,7 @@ void SPIScreen::cmd_write_ram() // 6-6-6 m_rgb[(m_step - 1)] = m_byte; if (m_step == 3) { - // fprintf(stdout,"666 x %d, y %d\n",m_x_cur,m_y_cur); + //fprintf(stdout,"666 x %d, y %d\n",m_x_cur,m_y_cur); m_framebuffer.pixel( m_y_cur,m_x_cur) = m_rgb; } diff --git a/frameworks/verilator/display.cpp b/frameworks/verilator/display.cpp index 255e38ea..83d28606 100644 --- a/frameworks/verilator/display.cpp +++ b/frameworks/verilator/display.cpp @@ -34,10 +34,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifdef __APPLE__ #include -#include +#include #else #include -#include +#include #endif // ---------------------------------------------------------------------------- @@ -54,6 +54,22 @@ std::mutex g_Mutex; // Mutex to lock the chip during rendering // ---------------------------------------------------------------------------- +void refresh() +{ + glTexSubImage2D( GL_TEXTURE_2D,0,0,0, + g_Chip->framebuffer().w(),g_Chip->framebuffer().h(), + GL_RGBA,GL_UNSIGNED_BYTE, + g_Chip->framebuffer().pixels().raw()); + glBegin(GL_QUADS); + glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f); + glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f); + glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f, 1.0f); + glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f, 1.0f); + glEnd(); +} + +// ---------------------------------------------------------------------------- + void simul() { #ifdef TRACE_FST @@ -90,23 +106,10 @@ void render() // lock the mutex before accessing g_Chip std::lock_guard lock(g_Mutex); // has the framebuffer changed? - if (g_Chip->framebufferChanged()) { - // yes: refresh frame - glTexSubImage2D( GL_TEXTURE_2D,0,0,0, - g_Chip->framebuffer().w(),g_Chip->framebuffer().h(), - GL_RGBA,GL_UNSIGNED_BYTE, - g_Chip->framebuffer().pixels().raw()); - glBegin(GL_QUADS); - glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f, 0.0f); - glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f, 0.0f); - glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f, 1.0f); - glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f, 1.0f); - glEnd(); - // swap buffers - glutSwapBuffers(); - } - // ask glut to immediately redraw - glutPostRedisplay(); + //if (g_Chip->framebufferChanged()) { + // yes: refresh frame + refresh(); + //} } // ---------------------------------------------------------------------------- @@ -114,20 +117,23 @@ void render() void display_loop(DisplayChip *chip) { g_Chip = chip; - // glut window - int argc=0; - char *argv[1] = {NULL}; - glutInit(&argc, argv); - glutInitDisplayMode(GLUT_RGBA | GLUT_SINGLE); + // glfw window + if (!glfwInit()) { + fprintf(stderr,"ERROR: cannot initialize glfw."); + exit(-1); + } + GLFWwindow* window = NULL; if (g_Chip->framebuffer().w() <= 320) { - glutInitWindowSize(2*g_Chip->framebuffer().w(), - 2*g_Chip->framebuffer().h()); + window = glfwCreateWindow(2*g_Chip->framebuffer().w(), + 2*g_Chip->framebuffer().h(), + "Silice verilator framework", NULL, NULL); } else { - glutInitWindowSize(g_Chip->framebuffer().w(), - g_Chip->framebuffer().h()); + window = glfwCreateWindow(g_Chip->framebuffer().w(), + g_Chip->framebuffer().h(), + "Silice verilator framework", NULL, NULL); } - glutCreateWindow("Silice verilator framework"); - glutDisplayFunc(render); + glfwMakeContextCurrent(window); + glfwSwapInterval(1); // prepare texture glGenTextures(1,&g_FBtexture); glBindTexture(GL_TEXTURE_2D,g_FBtexture); @@ -143,7 +149,6 @@ void display_loop(DisplayChip *chip) glEnable(GL_TEXTURE_2D); glColor3f(1.0f,1.0f,1.0f); // setup view - glViewport(0,0,g_Chip->framebuffer().w(),g_Chip->framebuffer().h()); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0f, 1.0f, 1.0f, 0.0f, -1.0f, 1.0f); @@ -152,7 +157,17 @@ void display_loop(DisplayChip *chip) // start simulation in a thread std::thread th(simul); // enter main loop - glutMainLoop(); + while (!glfwWindowShouldClose(window)) { + int width, height; + glfwGetFramebufferSize(window, &width, &height); + glViewport(0, 0, width, height); + render(); + glfwSwapBuffers(window); + glfwPollEvents(); + } + // terminate + glfwDestroyWindow(window); + glfwTerminate(); } // ---------------------------------------------------------------------------- diff --git a/frameworks/verilator/verilator_callbacks.h b/frameworks/verilator/verilator_callbacks.h index 029aab28..e72eae04 100644 --- a/frameworks/verilator/verilator_callbacks.h +++ b/frameworks/verilator/verilator_callbacks.h @@ -6,3 +6,4 @@ int data(int addr); void data_write(int wenable,int addr,unsigned char byte); void set_vga_resolution(int w,int h); int get_random(); +void output_char(int c); diff --git a/frameworks/verilator/verilator_data.cpp b/frameworks/verilator/verilator_data.cpp index fdfbe182..7fb0f9f2 100644 --- a/frameworks/verilator/verilator_data.cpp +++ b/frameworks/verilator/verilator_data.cpp @@ -57,3 +57,12 @@ int get_random() { return rand() ^ (rand()<<8) ^ (rand()<<16) ^ (rand()<<24); } + +void output_char(int c) +{ + FILE *f = fopen("output.txt","a"); + if (f != NULL) { + fputc(c,f); + fclose(f); + } +} diff --git a/get_started_linux.sh b/get_started_linux.sh new file mode 100755 index 00000000..9a27efad --- /dev/null +++ b/get_started_linux.sh @@ -0,0 +1,60 @@ +#!/bin/bash +echo "--------------------------------------------------------------------" +echo "This script installs necessary packages, compiles and install Silice" +echo "Please refer to the script source code to see the list of packages" +echo "--------------------------------------------------------------------" +echo " >>>> it will request sudo access to install packages <<<<" +echo "--------------------------------------------------------------------" + +read -p "Please type 'y' to go ahead, any other key to exit: " -n 1 -r +if [[ ! $REPLY =~ ^[Yy]$ ]] +then + echo + echo "Exiting." + exit +fi +echo "" + +# -------------- install packages ---------------------------- +# attempt to guess +source /etc/os-release +if [ "$ID" == "arch" ]; then + sudo ./install_dependencies_archlinux.sh +else + if [ "$ID" == "fedora" ]; then + sudo ./install_dependencies_fedora.sh + else + if [ "$ID" == "debian" ] || [ "$ID_LIKE" == "debian" ]; then + sudo ./install_dependencies_debian_like.sh + else + echo "Cannot determine Linux distrib to install dependencies\n(if this fails please run the install_dependencies script that matches your distrib)." + fi + fi +fi + +# -------------- retrieve oss-cad-suite package -------------- +OSS_CAD_MONTH=11 +OSS_CAD_DAY=29 +OSS_CAD_YEAR=2023 +OSS_PACKAGE=oss-cad-suite-linux-x64-$OSS_CAD_YEAR$OSS_CAD_MONTH$OSS_CAD_DAY.tgz + +rm -rf tools/fpga-binutils/ +rm -rf tools/oss-cad-suite/ +sudo rm -rf /usr/local/share/silice +wget -c https://github.com/YosysHQ/oss-cad-suite-build/releases/download/$OSS_CAD_YEAR-$OSS_CAD_MONTH-$OSS_CAD_DAY/$OSS_PACKAGE +sudo mkdir -p /usr/local/share/silice +sudo mv $OSS_PACKAGE /usr/local/share/silice/ +sudo cp tools/oss-cad-suite-env.sh /usr/local/share/silice/ +cd /usr/local/share/silice ; sudo tar xvfz ./$OSS_PACKAGE ; sudo rm ./$OSS_PACKAGE ; cd - + +# -------------- compile Silice ----------------------------- +./compile_silice_linux.sh + +# -------------- add path to .bashrc ------------------------ +DIR=`pwd` +echo 'source /usr/local/share/silice/oss-cad-suite-env.sh' >> ~/.bashrc + +echo "" +echo "--------------------------------------------------------------------" +echo "Please start a new shell before using Silice (PATH has been changed)" +echo "--------------------------------------------------------------------" diff --git a/get_started_mingw64.sh b/get_started_mingw64.sh index cd787652..0a0effcc 100755 --- a/get_started_mingw64.sh +++ b/get_started_mingw64.sh @@ -12,6 +12,7 @@ then exit fi +# -------------- install packages ---------------------------- pacman -S --noconfirm --needed unzip pacman -S --noconfirm --needed wget pacman -S --noconfirm --needed make @@ -23,22 +24,30 @@ pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-gtkwave pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-verilator pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-openFPGALoader pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-dfu-util -#pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-icestorm -#pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-prjtrellis pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-boost -# pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-nextpnr -# pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-yosys - -wget -c https://github.com/sylefeb/fpga-binutils/releases/download/v20230510/fpga-binutils-64.zip - -unzip -o fpga-binutils-64.zip -d tools/fpga-binutils/ - -rm fpga-binutils-64.zip - +pacman -S --noconfirm --needed ${MINGW_PACKAGE_PREFIX}-glfw + +# -------------- retrieve oss-cad-suite package -------------- +OSS_CAD_MONTH=11 +OSS_CAD_DAY=29 +OSS_CAD_YEAR=2023 +OSS_PACKAGE=oss-cad-suite-windows-x64-$OSS_CAD_YEAR$OSS_CAD_MONTH$OSS_CAD_DAY.exe + +rm -rf tools/fpga-binutils/ +rm -rf tools/oss-cad-suite/ +rm -rf /usr/local/share/silice +wget -c https://github.com/YosysHQ/oss-cad-suite-build/releases/download/$OSS_CAD_YEAR-$OSS_CAD_MONTH-$OSS_CAD_DAY/$OSS_PACKAGE +mkdir -p /usr/local/share/silice +mv $OSS_PACKAGE /usr/local/share/silice/ +cp tools/oss-cad-suite-env.sh /usr/local/share/silice/ +cd /usr/local/share/silice ; ./$OSS_PACKAGE ; rm ./$OSS_PACKAGE ; cd - + +# -------------- compile Silice ----------------------------- ./compile_silice_mingw64.sh +# -------------- add path to .bashrc ------------------------ DIR=`pwd` -echo 'export PATH=$PATH:'$DIR/bin':'$DIR/tools/fpga-binutils/mingw64/bin >> ~/.bashrc +echo 'source /usr/local/share/silice/oss-cad-suite-env.sh' >> ~/.bashrc echo "" echo "--------------------------------------------------------------------" diff --git a/install_dependencies_archlinux.sh b/install_dependencies_archlinux.sh new file mode 100755 index 00000000..fe939781 --- /dev/null +++ b/install_dependencies_archlinux.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pacman -S jdk-openjdk git gcc make cmake pkg-config glfw riscv64-linux-gnu-gcc diff --git a/install_dependencies_debian_like.sh b/install_dependencies_debian_like.sh index f0a80f0e..9a50bb05 100755 --- a/install_dependencies_debian_like.sh +++ b/install_dependencies_debian_like.sh @@ -1,3 +1,3 @@ #!/bin/bash -sudo apt install default-jre default-jdk git gcc g++ make cmake pkg-config uuid uuid-dev freeglut3 freeglut3-dev +sudo apt install default-jre default-jdk git gcc g++ make cmake pkg-config uuid uuid-dev libglfw3 libglfw3-dev gcc-riscv64-linux-gnu diff --git a/install_dependencies_fedora.sh b/install_dependencies_fedora.sh index 38c3cdf2..e5179b41 100755 --- a/install_dependencies_fedora.sh +++ b/install_dependencies_fedora.sh @@ -1,3 +1,3 @@ #!/bin/bash -sudo dnf install -y git gcc g++ make cmake pkg-config uuid uuid-devel freeglut-devel java-11-openjdk-devel iverilog +sudo dnf install -y git gcc g++ make cmake pkg-config uuid uuid-devel java-11-openjdk-devel iverilog glfw glfw-devel gcc-riscv64-linux-gnu perl-FindBin diff --git a/learn-silice/classroom/soc_wave_player/Makefile b/learn-silice/classroom/soc_wave_player/Makefile index adf561ad..a638be17 100644 --- a/learn-silice/classroom/soc_wave_player/Makefile +++ b/learn-silice/classroom/soc_wave_player/Makefile @@ -78,6 +78,17 @@ endif step6.si : hardware/main.si python ../tools/solutions.py -i hardware/main.si -s LED,AUDIO,SCREEN,BTN,SD,STREAM > $@ +step7: step7.si + make -C firmware $(FIRMWARE) DEFINES="-DHWFBUFFER" +ifeq ($(BOARD),verilator) + silice-make.py -s $@.si -b $(BOARD) -p basic,oled -o BUILD_$(subst :,_,$@) $(ARGS) +else + silice-make.py -s $@.si -b $(BOARD) -p basic,audio,oled,buttons,sdcard -o BUILD_$(subst :,_,$@) $(ARGS) +endif + +step7.si : hardware/main.si + python ../tools/solutions.py -i hardware/main.si -s LED,AUDIO,SCREEN,BTN,SD,STREAM,HWFBUFFER > $@ + final: final.si make -C firmware $(FIRMWARE) ifeq ($(BOARD),verilator) @@ -87,7 +98,7 @@ else endif final.si : hardware/main.si - python ../tools/solutions.py -i hardware/main.si -s LED,AUDIO,SCREEN,BTN,SD,STREAM,PWM > $@ + python ../tools/solutions.py -i hardware/main.si -s LED,AUDIO,SCREEN,BTN,SD,STREAM,HWFBUFFER,PWM > $@ clean: make -C firmware clean diff --git a/learn-silice/classroom/soc_wave_player/README.md b/learn-silice/classroom/soc_wave_player/README.md index 082f76a2..8cb07259 100644 --- a/learn-silice/classroom/soc_wave_player/README.md +++ b/learn-silice/classroom/soc_wave_player/README.md @@ -239,7 +239,7 @@ Encode a music, copy it onto the SDcard and listen to it with the design produce by `make step5 FIRMWARE=step5_audio_stream`. The music will be recognizable but the quality will be horrendous. -> The music file should be called `music.raw` and place on the SDcard root directory. Prepare the file using `./encode_music.sh ` (ffmpeg has to be installed). Use your favorite `mp3` (or perhaps not, this music is going to go through a grinder ;) ). +> The music file should be called `music.raw` and place on the SDcard root directory. Prepare the file using `./encode_music.sh file.mp3` (ffmpeg has to be installed). Use your favorite `mp3` (or perhaps not, this music is going to go through a grinder ;) ). Why is the sound so bad? The onboard DAC is only 4 bits and our music file only has 8 bits per sample to start with. We need a way to take into account this diff --git a/learn-silice/classroom/soc_wave_player/firmware/Makefile b/learn-silice/classroom/soc_wave_player/firmware/Makefile index d6070507..19c26a93 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/Makefile +++ b/learn-silice/classroom/soc_wave_player/firmware/Makefile @@ -9,9 +9,10 @@ OBJS = sdcard.o div.o std.o config.o oled.o display.o printf.o mul.o crt0.o fat_ $(PRGS): %: %.o $(OBJS) $(LD) -m elf32lriscv -b elf32-littleriscv -Tconfig_c.ld --no-relax -o code.elf $^ $(ARCH)-objcopy -O verilog code.elf code.hex + $(ARCH)-objdump --disassemble code.elf > code.s %.o : %.c - $(CC) -fno-builtin -fno-unroll-loops -O2 -fno-stack-protector -fno-pic -march=rv32i -mabi=ilp32 -c $< -o $@ + $(CC) -fno-builtin -fno-unroll-loops $(DEFINES) -Os -fno-stack-protector -fno-pic -march=rv32i -mabi=ilp32 -c $< -o $@ %.o : %.s $(AS) -march=rv32i -mabi=ilp32 $< -o $@ diff --git a/learn-silice/classroom/soc_wave_player/firmware/config.c b/learn-silice/classroom/soc_wave_player/firmware/config.c index 72596073..de5b8ac6 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/config.c +++ b/learn-silice/classroom/soc_wave_player/firmware/config.c @@ -8,4 +8,5 @@ volatile int* const OLED_RST = (int*)0x10010; // 10000000000010000 volatile int* const UART = (int*)0x10020; // 10000000000100000 volatile int* const SDCARD = (int*)0x10080; // 10000000010000000 volatile int* const BUTTONS = (int*)0x10100; // 10000000100000000 +volatile int* const DISPLAY = (int*)0x14000; // 10100000000000000 volatile int* const AUDIO = (int*)0x18000; // 11000000000000000 diff --git a/learn-silice/classroom/soc_wave_player/firmware/config.h b/learn-silice/classroom/soc_wave_player/firmware/config.h index f552fa61..69ac3a31 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/config.h +++ b/learn-silice/classroom/soc_wave_player/firmware/config.h @@ -11,3 +11,4 @@ extern volatile int* const UART; extern volatile int* const SDCARD; extern volatile int* const AUDIO; extern volatile int* const BUTTONS; +extern volatile int* const DISPLAY; diff --git a/learn-silice/classroom/soc_wave_player/firmware/config_c.ld b/learn-silice/classroom/soc_wave_player/firmware/config_c.ld index 7fb5a191..703a5ae0 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/config_c.ld +++ b/learn-silice/classroom/soc_wave_player/firmware/config_c.ld @@ -6,6 +6,9 @@ MEMORY ENTRY(_start) SECTIONS { + + __stacktop = ORIGIN(ram) + LENGTH(ram); + .text 0x00000000 : { crt0.o (.text) *(.text) diff --git a/learn-silice/classroom/soc_wave_player/firmware/crt0.s b/learn-silice/classroom/soc_wave_player/firmware/crt0.s index 55ba862e..c41d653b 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/crt0.s +++ b/learn-silice/classroom/soc_wave_player/firmware/crt0.s @@ -3,7 +3,7 @@ .type _start, @function _start: - li sp,0x10000 + li sp,0x10000 # __stacktop call main tail exit diff --git a/learn-silice/classroom/soc_wave_player/firmware/display.c b/learn-silice/classroom/soc_wave_player/firmware/display.c index 15bf8c6b..f5478628 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/display.c +++ b/learn-silice/classroom/soc_wave_player/firmware/display.c @@ -11,9 +11,13 @@ int cursor_y; unsigned char front_color; unsigned char back_color; +#ifdef HWFBUFFER +#define framebuffer ((volatile unsigned char *)DISPLAY) +#else unsigned char framebuffer[128*128]; +#endif -unsigned char *display_framebuffer() +volatile unsigned char *display_framebuffer() { return framebuffer; } @@ -61,6 +65,7 @@ void display_putchar(int c) void display_refresh() { +#ifndef HWFBUFFER unsigned char *ptr = framebuffer; for (int i=0;i<128*128;i++) { unsigned char c = (*ptr)>>2; @@ -68,6 +73,7 @@ void display_refresh() oled_pix(c,c,c); oled_wait(); } +#endif } void dual_putchar(int c) diff --git a/learn-silice/classroom/soc_wave_player/firmware/display.h b/learn-silice/classroom/soc_wave_player/firmware/display.h index ea802d17..34ce4857 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/display.h +++ b/learn-silice/classroom/soc_wave_player/firmware/display.h @@ -9,4 +9,4 @@ void display_set_front_back_color(unsigned char f,unsigned char b); void display_putchar(int c); void display_refresh(); void dual_putchar(int c); -unsigned char *display_framebuffer(); +volatile unsigned char *display_framebuffer(); diff --git a/learn-silice/classroom/soc_wave_player/firmware/std.c b/learn-silice/classroom/soc_wave_player/firmware/std.c index 33a3e669..812c28ed 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/std.c +++ b/learn-silice/classroom/soc_wave_player/firmware/std.c @@ -64,3 +64,20 @@ char *strncpy(char *dst,const char *src,size_t num) } return dst_start; } + +char *strcpy(char *dst, const char *src) +{ + while (*src) { + *dst++ = *src++; + } + *dst = '\0'; + return dst; +} + +char *strcat(char *dest, const char *src) +{ + char *rdest = dest; + while (*dest) { dest++; } + while ((*dest++ = *src++) != 0) { } + return rdest; +} diff --git a/learn-silice/classroom/soc_wave_player/firmware/std.h b/learn-silice/classroom/soc_wave_player/firmware/std.h index 8ea67b88..87b666a6 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/std.h +++ b/learn-silice/classroom/soc_wave_player/firmware/std.h @@ -32,3 +32,5 @@ void *memcpy(void *dst,const void *src,size_t sz); size_t strlen(const char *str); int strncmp(const char * str1,const char * str2,size_t sz); char *strncpy(char *dst,const char *src,size_t num); +char *strcpy(char *dst, const char *src); +char *strcat(char *dest, const char *src); diff --git a/learn-silice/classroom/soc_wave_player/firmware/step5_audio_stream.c b/learn-silice/classroom/soc_wave_player/firmware/step5_audio_stream.c index 7bad5982..f70f4a53 100644 --- a/learn-silice/classroom/soc_wave_player/firmware/step5_audio_stream.c +++ b/learn-silice/classroom/soc_wave_player/firmware/step5_audio_stream.c @@ -11,6 +11,22 @@ #include "fat_io_lib/src/fat_filelib.h" +void clear_audio() +{ + // wait for a buffer swap (sync) + int *addr = (int*)(*AUDIO); + while (addr == (int*)(*AUDIO)) { } + // go ahead + for (int b=0 ; b<2 ; ++b) { + // read directly in hardware buffer + addr = (int*)(*AUDIO); + // clear buffer + memset(addr,0,512); + // wait for buffer swap + while (addr == (int*)(*AUDIO)) { } + } +} + void main() { // install putchar handler for printf diff --git a/learn-silice/classroom/soc_wave_player/firmware/step7_hrdw_screen.c b/learn-silice/classroom/soc_wave_player/firmware/step7_hrdw_screen.c new file mode 100644 index 00000000..64469a46 --- /dev/null +++ b/learn-silice/classroom/soc_wave_player/firmware/step7_hrdw_screen.c @@ -0,0 +1,60 @@ +// @sylefeb 2022-01-10 +// MIT license, see LICENSE_MIT in Silice repo root +// https://github.com/sylefeb/Silice/ + +#include "config.h" +#include "std.h" +#include "oled.h" +#include "display.h" +#include "printf.h" + +#ifndef HWFBUFFER +#error This firmware needs HWFBUFFER defined +#endif + +void main() +{ + // install putchar handler for printf + f_putchar = display_putchar; + // init display + oled_init(); + oled_fullscreen(); + oled_clear(0); + // print message + display_set_cursor(0,0); + display_set_front_back_color(255,0); + + // gradient background + for (int i = 0 ; i < 128; ++i) { + for (int j = 0 ; j < 128; ++j) { + display_framebuffer()[i + (j << 7)] = i; + } + } + + // bouncing text + int dx = 3; + int dy = 1; + int x = 0; + int y = 0; + while (1) { + display_set_cursor(x,y); + printf("Hello world!"); + x += dx; + if (x > 64) { + x = 64; + dx = -dx; + } else if (x < 0) { + x = 0; + dx = -dx; + } + y += dy; + if (y > 108) { + y = 108; + y = -dy; + } else if (y < 0) { + y = 0; + dy = -dy; + } + } + +} diff --git a/learn-silice/classroom/soc_wave_player/hardware/main.si b/learn-silice/classroom/soc_wave_player/hardware/main.si index a93e1444..b24a0d83 100644 --- a/learn-silice/classroom/soc_wave_player/hardware/main.si +++ b/learn-silice/classroom/soc_wave_player/hardware/main.si @@ -60,8 +60,8 @@ $$if not Solution_PWM then } $$else uint8 counter = 0; - uint4 low = 4b0; - uint4 high = 4b1; + uint4 low = 0; + uint4 high = 15; always { uint8 frac = audio_in; audio_out = counter < frac ? high : low; @@ -129,16 +129,20 @@ $$end $$if Solution_SCREEN then // SPIscreen (OLED) controller chip - uint1 displ_dta_or_cmd <: prev_wdata[10,1]; - uint8 displ_byte <: prev_wdata[0,8]; oled display( - data_or_command <: displ_dta_or_cmd, - byte <: displ_byte, oled_din :> oled_mosi, oled_clk :> oled_clk, oled_dc :> oled_dc, ); $$end +$$if Solution_HWFBUFFER then + // allocate a BRAM for the display pixels + simple_dualport_bram uint8 frame_buffer[$128*128$] = uninitialized; + uint1 fb_enabled = 0; + uint14 fb_pixcount = 0; + uint3 fb_channel = 3b010; + uint17 fb_wait = 1; +$$end $$if Solution_SPIflash then $$if not SPIFLASH then @@ -243,6 +247,22 @@ $$if Solution_STREAM then : (audio_buffer_sample+1) // go to next sample ); audio_counter = (audio_counter == $PERIOD$) ? 0 : (audio_counter+1); +$$end +$$if Solution_HWFBUFFER then + // ---- hardware display buffer + frame_buffer.wenable1 = 0; + display.enable = fb_enabled & fb_wait[0,1]; + display.data_or_command = 1; + display.byte = frame_buffer.rdata0; + frame_buffer.addr0 = fb_pixcount; + fb_channel = (fb_enabled & fb_wait[0,1]) + ? {fb_channel[0,2],fb_channel[2,1]} + : fb_channel; + fb_wait = (fb_enabled) ? {fb_wait[0,16],fb_wait[16,1]} + : fb_wait; + fb_pixcount = (fb_enabled & fb_wait[0,1] & fb_channel[0,1]) + ? fb_pixcount + 1 + : fb_pixcount; $$end // ---- check whether the CPU read from or wrote to a peripheral address uint1 peripheral = prev_mem_addr[$periph_bit$,1]; @@ -255,7 +275,7 @@ $$if Solution_LED then uint1 leds_access = prev_mem_addr[ 0,1]; $$end $$if Solution_SCREEN then - uint1 display_en_access = prev_mem_addr[ 1,1]; + uint1 display_direct_access = prev_mem_addr[ 1,1]; uint1 display_reset_access = prev_mem_addr[ 2,1]; $$end $$if Solution_UART then @@ -269,6 +289,9 @@ $$if Solution_SD then $$end $$if Solution_BTN then uint1 button_access = prev_mem_addr[ 6,1]; +$$end +$$if Solution_HWFBUFFER then + uint1 framebuffer_access = prev_mem_addr[$periph_bit-2$,1]; $$end // ---- memory access CPU <-> BRAM (reads and writes) // reads RAM, peripherals => CPU @@ -283,7 +306,7 @@ $$if Solution_SD then | ((peripheral_r & sd_access) ? {31b0, sd_miso} : 32b0) $$end $$if Solution_BTN then - | ((peripheral_r & button_access) ? {31b0, btns} : 32b0) + | ((peripheral_r & button_access) ? {25b0, btns} : 32b0) $$end $$if Solution_STREAM then | ((peripheral_r & audio_access) ? audio_addr_cpu : 32b0) @@ -295,19 +318,29 @@ $$end ram.wdata = memio.wdata; ram.addr = memio.addr; // writes CPU => peripherals +$$if Solution_HWFBUFFER and Solution_STREAM then + if (peripheral_w & ~audio_access & ~framebuffer_access) { +$$else $$if Solution_STREAM then if (peripheral_w & ~audio_access) { $$else if (peripheral_w) { $$end +$$end $$if Solution_LED then /// LEDs leds = leds_access ? prev_wdata[0,8] : leds; $$end $$if Solution_SCREEN then /// display - // -> whether to send command or data - display.enable = display_en_access & (prev_wdata[9,1] | prev_wdata[10,1]); + if (display_direct_access) { + // -> whether to send command or data + display.enable = (prev_wdata[9,1] | prev_wdata[10,1]); + // -> byte to send + display.byte = prev_wdata[0,8]; + // -> data or command + display.data_or_command = prev_wdata[10,1]; + } // -> SPIscreen reset oled_resn = ~ (display_reset_access & prev_wdata[0,1]); $$end @@ -366,16 +399,27 @@ $$if SIMULATION then $$end } -$$if Solution_STREAM then +$$if Solution_STREAM or Solution_HWFBUFFER then if (peripheral_w) { uint2 which = prev_mem_rw[1,2] | {2{prev_mem_rw[3,1]}}; // ^^^^^ produces 0,1,2,3 based on write mask +$$if Solution_STREAM then + // ---- audio audio_buffer.wdata1 = prev_wdata >> {which,3b0}; // ^^^ sample to be written ^^ (shift due to 32bits addressing) audio_buffer.addr1 = audio_buffer_start_waddr | {prev_mem_addr[0,7],2b0}//addr from CPU (32bits) | which; // sample address audio_buffer.wenable1 = audio_access; // write sample +$$end +$$if Solution_HWFBUFFER then + // ---- display + frame_buffer.wenable1 = framebuffer_access; + fb_enabled = fb_enabled | framebuffer_access; + frame_buffer.addr1 = {prev_mem_addr[0,12],which}; //addr from CPU (32bits) + uint8 clr = prev_wdata[{which,3b000},8]; // get 8 bit channel value + frame_buffer.wdata1 = {2b00,clr[2,6]}; // remap to 6 bits per channel +$$end } $$end diff --git a/projects/common/clean_reset.si b/projects/common/clean_reset.si index 5d0b34a5..78bf8ade 100644 --- a/projects/common/clean_reset.si +++ b/projects/common/clean_reset.si @@ -12,7 +12,7 @@ $$end unit clean_reset( output uint1 out(1) ) { - uint$clean_reset_width$ trigger( $(1<> 1; diff --git a/projects/common/divint_std.si b/projects/common/divint_std.si index c76d0451..b2ec8bdd 100644 --- a/projects/common/divint_std.si +++ b/projects/common/divint_std.si @@ -14,7 +14,11 @@ $$div_width_pow2 = clog2(div_width) algorithm div$div_width$( input int$div_width$ inum, input int$div_width$ iden, - output int$div_width$ ret = 0) + output int$div_width$ ret = 0, +$$if div_remainder then + output int$div_width$ rem = 0, +$$end + ) { uint$div_width+1$ ac = uninitialized; uint$div_width+1$ diff <:: ac - :den; @@ -54,4 +58,7 @@ $$end $$if not div_unsigned then ret = ((inum_neg) ^ (iden_neg)) ? -ret : ret; $$end +$$if div_remainder then + rem = ac[1,$div_width$]; +$$end } diff --git a/projects/common/ice40_sb_gb.v b/projects/common/ice40_sb_gb.v index 7ecbd5f7..b480244c 100644 --- a/projects/common/ice40_sb_gb.v +++ b/projects/common/ice40_sb_gb.v @@ -1,3 +1,6 @@ +`ifndef ICE40_SB_GB +`define ICE40_SB_GB + module sb_gb( input user, output buffered @@ -10,4 +13,6 @@ module sb_gb( endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_sb_io.v b/projects/common/ice40_sb_io.v index 0dc6b697..8c77345c 100644 --- a/projects/common/ice40_sb_io.v +++ b/projects/common/ice40_sb_io.v @@ -1,3 +1,6 @@ +`ifndef ICE40_SB_IO +`define ICE40_SB_IO + module sb_io( input clock, input out, @@ -11,9 +14,11 @@ module sb_io( ) sbio ( .PACKAGE_PIN(pin), .D_OUT_0(out), - .OUTPUT_CLK(clock), + .OUTPUT_CLK(clock) ); endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_sb_io_ddr.v b/projects/common/ice40_sb_io_ddr.v index 74900864..75968265 100644 --- a/projects/common/ice40_sb_io_ddr.v +++ b/projects/common/ice40_sb_io_ddr.v @@ -1,3 +1,6 @@ +`ifndef ICE40_SB_IO_DDR +`define ICE40_SB_IO_DDR + module sb_io_ddr( input clock, input out_0, @@ -18,4 +21,6 @@ module sb_io_ddr( endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_sb_io_in_ddr.v b/projects/common/ice40_sb_io_in_ddr.v index 489d7ffe..2aef111a 100644 --- a/projects/common/ice40_sb_io_in_ddr.v +++ b/projects/common/ice40_sb_io_in_ddr.v @@ -1,6 +1,10 @@ +`ifndef ICE40_SB_IO_IN_DDR +`define ICE40_SB_IO_IN_DDR + module sb_io_in_ddr( input clock, - output in, + output in0, + output in1, input pin ); @@ -8,10 +12,13 @@ module sb_io_in_ddr( .PIN_TYPE(6'b0000_00) ) sbio ( .PACKAGE_PIN(pin), - .D_IN_0(in), + .D_IN_0(in0), + .D_IN_1(in1), .INPUT_CLK(clock) ); endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_sb_io_in_ddr_8.v b/projects/common/ice40_sb_io_in_ddr_8.v index 34f9e637..91c5f2ec 100644 --- a/projects/common/ice40_sb_io_in_ddr_8.v +++ b/projects/common/ice40_sb_io_in_ddr_8.v @@ -1,6 +1,10 @@ +`ifndef ICE40_SB_IO_IN_DDR_8 +`define ICE40_SB_IO_IN_DDR_8 + module sb_io_in_ddr_8( input clock, - output [7:0] in, + output [7:0] in0, + output [7:0] in1, input [7:0] pin ); @@ -8,10 +12,13 @@ module sb_io_in_ddr_8( .PIN_TYPE(6'b0000_00) ) sbio[7:0] ( .PACKAGE_PIN(pin), - .D_IN_0(in), + .D_IN_0(in0), + .D_IN_1(in1), .INPUT_CLK(clock) ); endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_sb_io_inout.v b/projects/common/ice40_sb_io_inout.v index e3a1f9d2..cfa0fb71 100644 --- a/projects/common/ice40_sb_io_inout.v +++ b/projects/common/ice40_sb_io_inout.v @@ -1,4 +1,7 @@ -module sb_io_inout( +`ifndef ICE40_SB_IO_INOUT +`define ICE40_SB_IO_INOUT + +module sb_io_inout #(parameter TYPE=6'b1101_00) ( input clock, input oe, input out, @@ -7,11 +10,12 @@ module sb_io_inout( ); SB_IO #( - .PIN_TYPE(6'b1101_00) + .PIN_TYPE(TYPE) ) sbio ( .PACKAGE_PIN(pin), .OUTPUT_ENABLE(oe), .D_OUT_0(out), + .D_OUT_1(out), .D_IN_1(in), .OUTPUT_CLK(clock), .INPUT_CLK(clock) @@ -19,4 +23,6 @@ module sb_io_inout( endmodule +`endif + // http://www.latticesemi.com/~/media/LatticeSemi/Documents/TechnicalBriefs/SBTICETechnologyLibrary201504.pdf diff --git a/projects/common/ice40_spram.v b/projects/common/ice40_spram.v index aff9b5b3..cf93d82a 100644 --- a/projects/common/ice40_spram.v +++ b/projects/common/ice40_spram.v @@ -1,10 +1,13 @@ +`ifndef ICE40_SPRAM +`define ICE40_SPRAM + module ice40_spram( input clock, input [13:0] addr, // 16K entries input [15:0] data_in, // 16 bits input [3:0] wmask, input wenable, - output [15:0] data_out // 16 bits + output [15:0] data_out // 16 bits ); SB_SPRAM256KA spram ( @@ -20,4 +23,6 @@ SB_SPRAM256KA spram ( .DATAOUT(data_out) ); +`endif + endmodule diff --git a/projects/common/ice40_warmboot.v b/projects/common/ice40_warmboot.v index 5249d17e..960ae312 100644 --- a/projects/common/ice40_warmboot.v +++ b/projects/common/ice40_warmboot.v @@ -1,12 +1,17 @@ +`ifndef ICE40_WARMBOOT +`define ICE40_WARMBOOT + module ice40_warmboot( input boot, input [1:0] slot ); -SB_WARMBOOT wb( +SB_WARMBOOT wb( .BOOT(boot), .S0(slot[0]), .S1(slot[1]) ); endmodule + +`endif diff --git a/projects/common/oled_ssd1331.si b/projects/common/oled_ssd1331.si index 8f819efb..49da3479 100644 --- a/projects/common/oled_ssd1331.si +++ b/projects/common/oled_ssd1331.si @@ -21,9 +21,9 @@ group oledio { $$oled_send_delay = 8*2 algorithm oled_send( - input! uint1 enable, - input! uint1 data_or_command, - input! uint8 byte, + input uint1 enable, + input uint1 data_or_command, + input uint8 byte, output uint1 oled_clk, output uint1 oled_mosi, output uint1 oled_dc, diff --git a/projects/common/oled_ssd1351.si b/projects/common/oled_ssd1351.si index f813756f..28293bbd 100644 --- a/projects/common/oled_ssd1351.si +++ b/projects/common/oled_ssd1351.si @@ -21,9 +21,9 @@ group oledio { $$oled_send_delay = 8*2 algorithm oled_send( - input! uint1 enable, - input! uint1 data_or_command, - input! uint8 byte, + input uint1 enable, + input uint1 data_or_command, + input uint8 byte, output uint1 oled_clk, output uint1 oled_mosi, output uint1 oled_dc, diff --git a/projects/common/passthrough.v b/projects/common/passthrough.v index 72521b56..c87b11d8 100644 --- a/projects/common/passthrough.v +++ b/projects/common/passthrough.v @@ -1,7 +1,12 @@ +`ifndef PASSTHROUGH +`define PASSTHROUGH + module passthrough( input inv, output outv); - + assign outv = inv; endmodule + +`endif diff --git a/projects/common/plls/icebrkr_15.v b/projects/common/plls/icebrkr_15.v index b1d14533..8d90b805 100644 --- a/projects/common/plls/icebrkr_15.v +++ b/projects/common/plls/icebrkr_15.v @@ -10,7 +10,7 @@ module pll( SB_PLL40_PAD #(.FEEDBACK_PATH("SIMPLE"), .PLLOUT_SELECT("GENCLK_HALF"), // outputs half of request .DIVR(4'b0000), -// 30 +// 30 (but we output half with GENCLK_HALF) .DIVF(7'b1001111), .DIVQ(3'b101), // diff --git a/projects/common/plls/icebrkr_20_lock.v b/projects/common/plls/icebrkr_20_lock.v new file mode 100644 index 00000000..a7b3e316 --- /dev/null +++ b/projects/common/plls/icebrkr_20_lock.v @@ -0,0 +1,39 @@ +/** + * PLL configuration + */ + +module pll( + input clock_in, + output clock_out, + output reset + ); + + wire lock; + assign reset = ~lock; + + SB_PLL40_PAD #(.FEEDBACK_PATH("SIMPLE"), + .PLLOUT_SELECT("GENCLK"), + .DIVR(4'b0000), +// 20 + .DIVF(7'b0110100), + .DIVQ(3'b101), +// + .FILTER_RANGE(3'b001), + .DELAY_ADJUSTMENT_MODE_FEEDBACK("FIXED"), + .FDA_FEEDBACK(4'b0000), + .DELAY_ADJUSTMENT_MODE_RELATIVE("FIXED"), + .FDA_RELATIVE(4'b0000), + .SHIFTREG_DIV_MODE(2'b00), + .ENABLE_ICEGATE(1'b0) + ) uut ( + .PACKAGEPIN(clock_in), + .PLLOUTCORE(clock_out), + .EXTFEEDBACK(), + .DYNAMICDELAY(), + .LATCHINPUTVALUE(), + .RESETB(1'b1), + .LOCK(lock), + .BYPASS(1'b0) + ); + +endmodule diff --git a/projects/common/plls/icebrkr_25_lock.v b/projects/common/plls/icebrkr_25_lock.v new file mode 100644 index 00000000..9ac22545 --- /dev/null +++ b/projects/common/plls/icebrkr_25_lock.v @@ -0,0 +1,39 @@ +/** + * PLL configuration + */ + +module pll( + input clock_in, + output clock_out, + output reset + ); + + wire lock; + assign reset = ~lock; + + SB_PLL40_PAD #(.FEEDBACK_PATH("SIMPLE"), + .PLLOUT_SELECT("GENCLK"), + .DIVR(4'b0000), +// 25 + .DIVF(7'b1000010), + .DIVQ(3'b101), +// + .FILTER_RANGE(3'b001), + .DELAY_ADJUSTMENT_MODE_FEEDBACK("FIXED"), + .FDA_FEEDBACK(4'b0000), + .DELAY_ADJUSTMENT_MODE_RELATIVE("FIXED"), + .FDA_RELATIVE(4'b0000), + .SHIFTREG_DIV_MODE(2'b00), + .ENABLE_ICEGATE(1'b0) + ) uut ( + .PACKAGEPIN(clock_in), + .PLLOUTCORE(clock_out), + .EXTFEEDBACK(), + .DYNAMICDELAY(), + .LATCHINPUTVALUE(), + .RESETB(1'b1), + .LOCK(lock), + .BYPASS(1'b0) + ); + +endmodule diff --git a/projects/common/qpsram2x.si b/projects/common/qpsram2x.si index 85670e3a..7dbbe906 100644 --- a/projects/common/qpsram2x.si +++ b/projects/common/qpsram2x.si @@ -43,10 +43,11 @@ $$if not qpsram_fast then $$ qpsram_fast = false $$end -$$if not SIMULATION then +$$if not SIMULATION or ICE40_SIMULATION then $$if ICE40 then import('ice40_sb_io_inout.v') import('ice40_sb_io.v') +import('ice40_sb_io_ddr.v') $$end $$if ECP5 then import('ecp5_inout.v') @@ -88,6 +89,7 @@ $$end // // **IMPORTANT**: If the QPSRAM chip has just been powered up, pulse `init` // once to initialize the QPI mode, and wait for `busy` to be low again. +// Do not leave init floating! // // >>> Otherwise the interface will NOT work <<< // @@ -105,15 +107,21 @@ unit qpsram_ram( input uint1 in_ready, // set high to start reading/writting input uint1 init, // pulse high to initiate QPI mode on the QPSRAM chip $$if SIMULATION then - input uint32 addr, + input uint32 addr, // 32bits for large simulations (hardware may have banks) $$else - input uint24 addr, // address to be written (24 bits is 16 MB) + input uint24 addr, // address to be written (24bits is 16 MB) $$end - output uint8 rdata, // read byte - input uint8 wdata, // byte to be written - input uint1 wenable, // set high if writting - output uint1 busy(0), // high during operation - output uint1 data_next(0), // pulses high when next byte is ready/needed + output uint8 rdata, // read byte + input uint8 wdata, // byte to be written + input uint1 wenable, // set high if writting + output uint1 busy(0), // high during operation + output uint1 data_next(0), // pulses high when next byte is ready/needed + output uint1 wstream_1x(0), // goes high when data should start + // streaming in every cycle from a 1x host; this + // is necessary as data_next latency otherwise + // makes streaming impossible from a 1x host + // wstream_1x is expected to go through a 1x reg + // before being used by host // QPSRAM io pins output uint1 ram_csn(1), output uint1 ram_clk, inout uint1 ram_io0, inout uint1 ram_io1, @@ -122,7 +130,6 @@ $$end uint32 sendvec(0); - // qpsram_qspi spi( qpsram_qspi spi( clk :> ram_clk, csn :> ram_csn, @@ -166,6 +173,7 @@ $$end } case 1: { // ---- idle accept_in = 1; + busy = 0; } case 2: { // ---- sending command and address spi.trigger = 1; @@ -174,26 +182,27 @@ $$end stage = 0; // wait wait = 16; //_ 2 cycles after = sending[0,1] ? 3 : 2; + wstream_1x = wstream_1x | (sending[0,1] & wenable); // 1x write stream should start now sending = sending >> 1; } case 3: { // ---- setup read/write delay send_else_read = wenable; spi.trigger = ~init; spi.send = wdata; - data_next = wenable; + data_next = wenable; // request next if writting stage = 0; // wait wait = wenable ? //_ 16 => 2 cycles, 6 => 12 cycles, 7 => 11 cycles $$if ECP5 then $$ if qpsram_fast then - 13 : 5; + 13 : 4; $$ else - 13 : 6; + 13 : 5; $$ end $$else $$ if qpsram_fast then - 16 : 7; + 16 : 6; $$ else - 16 : 8; // icebreaker, icestick <= 60 use 8, faster 7 + 16 : 7; // icebreaker, icestick <= 60 MHz $$ end $$end after = 4; @@ -202,6 +211,7 @@ $$if VERILATOR then if (wenable) { vdta.wenable = 1; vdta.wdata = spi.send; + // __display("[qpsram|%d] first written byte: %x @%x",cycle,vdta.wdata,vdta.addr); } $$end } @@ -211,22 +221,34 @@ $$end vdta.wdata = wdata; vdta.wenable = wenable & continue; vdta.addr = vdta.addr + 1; + //if (wenable) { + // __display("[qpsram|%d] written byte: %x @%x",cycle,vdta.wdata,vdta.addr); + //} else { + // __display("[qpsram|%d] read byte : %x @%x",cycle,vdta.rdata,vdta.addr - 1); + //} + $$elseif ICARUS then + rdata = cycle[0,8]; // dummy data for icarus sum $$else rdata = spi.read; $$end data_next = 1; + wstream_1x = continue; spi.trigger = continue; spi.send = wdata; - busy = continue; wait = 16; //_ 2 cycles // return to start stage if no further reads, otherwise wait and ... - stage = ~continue ? 1 : 0; + stage = ~continue ? 5 : 0; after = 4; // ... keep going accept_in = ~continue; // accept an input immediately $$if SIMULATION then //__display("[%d] qpsram [4] (%x|%x) w:%b in_ready:%b accept_in:%b",cycle,rdata,wdata,wenable,in_ready,accept_in); $$end } + case 5: { + stage = 0; + wait = 14; + after = 1; + } } // switch // start sending? @@ -256,7 +278,7 @@ $$end // Physical layer, sends one byte every two cycle in QPI mode // ------------------------------------------------------------- -algorithm qpsram_qspi( +unit qpsram_qspi( input uint8 send, // byte to be sent input uint1 trigger, // trigger: set to high to start operation input uint1 send_else_read, // set to high if sending, low if reading @@ -271,41 +293,52 @@ algorithm qpsram_qspi( // internal state uint1 dc(0); uint8 sending(0); uint1 osc(0); // driving the QPSRAM chip clock with a ddr module - uint1 enable(0); + uint1 trigger_delay(0); + uint1 send_else_read_delay(0); + uint1 enable(0); uint1 enable_delay(0); // setup registered tristate ios uint4 io_oe(0); uint4 io_i(0); uint4 io_o(0); - uint1 chip_select(0); + uint1 chip_select(1); uint1 chip_select_1(1); $$if ICE40 then // ---- Lattice ice40 implementation using vendor specific primitives - ddr_clock _(clock <: clock, enable <:: enable, ddr_clock :> clk); - sb_io_inout _(clock <: clock, oe <: io_oe[0,1], in :> io_i[0,1], out <: io_o[0,1], pin <:> io0); - sb_io_inout _(clock <: clock, oe <: io_oe[1,1], in :> io_i[1,1], out <: io_o[1,1], pin <:> io1); - sb_io_inout _(clock <: clock, oe <: io_oe[2,1], in :> io_i[2,1], out <: io_o[2,1], pin <:> io2); - sb_io_inout _(clock <: clock, oe <: io_oe[3,1], in :> io_i[3,1], out <: io_o[3,1], pin <:> io3); - sb_io _(clock <: clock, out <: chip_select, pin :> csn); + uint1 zero(0); + sb_io_ddr _(clock <: clock, out_0 <:: enable_delay, out_1 <:: zero, pin :> clk); + sb_io_inout _(clock <: clock, oe <: io_oe[0,1], in :> io_i[0,1], out <: io_o[0,1], pin <:> io0); + sb_io_inout _(clock <: clock, oe <: io_oe[1,1], in :> io_i[1,1], out <: io_o[1,1], pin <:> io1); + sb_io_inout _(clock <: clock, oe <: io_oe[2,1], in :> io_i[2,1], out <: io_o[2,1], pin <:> io2); + sb_io_inout _(clock <: clock, oe <: io_oe[3,1], in :> io_i[3,1], out <: io_o[3,1], pin <:> io3); + sb_io _(clock <: clock, out <:: chip_select_1, pin :> csn); $$end $$if ECP5 then // ---- Lattice ecp5 implementation using vendor specific primitives - ddr_clock _(clock <: clock, enable <: enable, ddr_clock :> clk); + ddr_clock _(clock <: clock, enable <: enable_delay, ddr_clock :> clk); ecp5_inout _(clock <: clock, oe <:: io_oe[0,1], in :> io_i[0,1], out <:: io_o[0,1], pin <:> io0); ecp5_inout _(clock <: clock, oe <:: io_oe[1,1], in :> io_i[1,1], out <:: io_o[1,1], pin <:> io1); ecp5_inout _(clock <: clock, oe <:: io_oe[2,1], in :> io_i[2,1], out <:: io_o[2,1], pin <:> io2); ecp5_inout _(clock <: clock, oe <:: io_oe[3,1], in :> io_i[3,1], out <:: io_o[3,1], pin <:> io3); - ecp5_out _(clock <: clock, out <: chip_select, pin :> csn); + ecp5_out _(clock <: clock, out <:: chip_select_1, pin :> csn); +$$end +$$if ICARUS then + $$end uint4 read_reg(0); always { - chip_select = ~ ( trigger | enable ); // output enable on ios - io_oe = {4{send_else_read}}; + io_oe = {4{send_else_read_delay}}; // read current - read = {read[0,4],io_i}; - read_reg = io_i; + read = {read[0,4],io_i}; + read_reg = io_i; // update outputs - io_o = ~osc ? sending[0,4] : sending[4,4]; + io_o = ~osc ? sending[0,4] : sending[4,4]; // start/keep sending? - sending = (~osc | ~enable) ? send : sending; - osc = ~trigger ? 1b0 : ~osc; - enable = trigger; + sending = (~osc | ~enable) ? send : sending; + osc = ~trigger_delay ? 1b0 : ~osc; + enable_delay = enable; + enable = trigger; + chip_select_1 = ~ enable & chip_select; + chip_select = ~ enable; + + trigger_delay = trigger; + send_else_read_delay = send_else_read; } } diff --git a/projects/common/sdcard.si b/projects/common/sdcard.si index 11c22a8e..bf6b2f7a 100644 --- a/projects/common/sdcard.si +++ b/projects/common/sdcard.si @@ -20,8 +20,8 @@ group sdcardio { } interface sdcardio_ctrl { - input! addr_sector, - input! read_sector, + input addr_sector, + input read_sector, output ready, input offset, } diff --git a/projects/common/sdram_controller_autoprecharge_r128_w8.si b/projects/common/sdram_controller_autoprecharge_r128_w8.si index 7cd746f9..29a41d3e 100644 --- a/projects/common/sdram_controller_autoprecharge_r128_w8.si +++ b/projects/common/sdram_controller_autoprecharge_r128_w8.si @@ -38,7 +38,6 @@ $$if not SDRAM_COLUMNS_WIDTH then $$ if ULX3S then -$$ -- print('setting SDRAM_COLUMNS_WIDTH=10 for ULX3S with AS4C32M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ print('setting SDRAM_COLUMNS_WIDTH=9 for ULX3S with AS4C32M16 or IS42S16160G chip') $$ print('Note: the AS4C32M16 is only partially used with this setting') @@ -46,8 +45,8 @@ $$ elseif DE10NANO then $$ print('setting SDRAM_COLUMNS_WIDTH=9 for DE10NANO with AS4C16M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ elseif SIMULATION then -$$ print('setting SDRAM_COLUMNS_WIDTH=10 for simulation') -$$ SDRAM_COLUMNS_WIDTH = 10 +$$ print('setting SDRAM_COLUMNS_WIDTH=9 for simulation') +$$ SDRAM_COLUMNS_WIDTH = 9 $$ else $$ error('SDRAM_COLUMNS_WIDTH not specified') $$ end diff --git a/projects/common/sdram_controller_autoprecharge_r16_w16.si b/projects/common/sdram_controller_autoprecharge_r16_w16.si index 853d2c18..e3512519 100644 --- a/projects/common/sdram_controller_autoprecharge_r16_w16.si +++ b/projects/common/sdram_controller_autoprecharge_r16_w16.si @@ -38,7 +38,6 @@ $$if not SDRAM_COLUMNS_WIDTH then $$ if ULX3S then -$$ -- print('setting SDRAM_COLUMNS_WIDTH=10 for ULX3S with AS4C32M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ print('setting SDRAM_COLUMNS_WIDTH=9 for ULX3S with AS4C32M16 or IS42S16160G chip') $$ print('Note: the AS4C32M16 is only partially used with this setting') @@ -46,8 +45,8 @@ $$ elseif DE10NANO then $$ print('setting SDRAM_COLUMNS_WIDTH=9 for DE10NANO with AS4C16M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ elseif SIMULATION then -$$ print('setting SDRAM_COLUMNS_WIDTH=10 for simulation') -$$ SDRAM_COLUMNS_WIDTH = 10 +$$ print('setting SDRAM_COLUMNS_WIDTH=9 for simulation') +$$ SDRAM_COLUMNS_WIDTH = 9 $$ else $$ error('SDRAM_COLUMNS_WIDTH not specified') $$ end diff --git a/projects/common/sdram_controller_r128_w8.si b/projects/common/sdram_controller_r128_w8.si index e52632f4..163ce312 100644 --- a/projects/common/sdram_controller_r128_w8.si +++ b/projects/common/sdram_controller_r128_w8.si @@ -38,7 +38,6 @@ $$if not SDRAM_COLUMNS_WIDTH then $$ if ULX3S then -$$ -- print('setting SDRAM_COLUMNS_WIDTH=10 for ULX3S with AS4C32M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ print('setting SDRAM_COLUMNS_WIDTH=9 for ULX3S with AS4C32M16 or IS42S16160G chip') $$ print('Note: the AS4C32M16 is only partially used with this setting') @@ -46,8 +45,8 @@ $$ elseif DE10NANO then $$ print('setting SDRAM_COLUMNS_WIDTH=9 for DE10NANO with AS4C16M16 chip') $$ SDRAM_COLUMNS_WIDTH = 9 $$ elseif SIMULATION then -$$ print('setting SDRAM_COLUMNS_WIDTH=10 for simulation') -$$ SDRAM_COLUMNS_WIDTH = 10 +$$ print('setting SDRAM_COLUMNS_WIDTH=9 for simulation') +$$ SDRAM_COLUMNS_WIDTH = 9 $$ else $$ error('SDRAM_COLUMNS_WIDTH not specified') $$ end diff --git a/projects/common/uart.si b/projects/common/uart.si index 79fd46d7..518d2153 100644 --- a/projects/common/uart.si +++ b/projects/common/uart.si @@ -20,6 +20,8 @@ $$ elseif ICESTICK then $$ uart_in_clock_freq_mhz = 12 $$ elseif RIEGEL then $$ uart_in_clock_freq_mhz = 48 +$$ elseif BROT then +$$ uart_in_clock_freq_mhz = 48 $$ elseif ECPIX5 then $$ uart_in_clock_freq_mhz = 100 $$ else diff --git a/projects/ice-v/CPUs/ice-v-conveyor.si b/projects/ice-v/CPUs/ice-v-conveyor.si index 60623739..33ee6120 100644 --- a/projects/ice-v/CPUs/ice-v-conveyor.si +++ b/projects/ice-v/CPUs/ice-v-conveyor.si @@ -4,7 +4,9 @@ // // https://github.com/sylefeb/Silice // MIT license, see LICENSE_MIT in Silice repo root -$$DEBUG = nil +$$DEBUG_conveyor = nil +$$TRACE_conveyor = nil + // -------------------------------------------------- $$if ICEV_FAST_SHIFT then $$print("Ice-V-conveyor configured for fast shift (barrel shifter)") @@ -91,7 +93,11 @@ unit rv32i_cpu_conveyor(bram_port mem,bram_port rom) { // decoder + ALU, executes the instruction and tells processor what to do decode_and_ALU_conveyor exec; // unit is defined after the CPU $$if SIMULATION then - uint32 cycle(0); // cycle counter for simulation + uint32 cycle(0); // cycle counter for simulation + uint32 nretired(0); // number of retired instr. +$$end +$$if TRACE_conveyor then + uint1 trace_on <:: nretired > 3145018; $$end always { @@ -121,7 +127,7 @@ $$end if ( written_regs[Rtype(rom.rdata).rs1,1] | written_regs[Rtype(rom.rdata).rs2,1] | (has_rd & written_regs[Rtype(rom.rdata).rd,1])) { -$$if DEBUG then +$$if DEBUG_conveyor then __display("[1] *** data hazard! ***"); $$end bubble = ~alu_was_working[0,1]; @@ -136,7 +142,7 @@ $$end : pc; // flag register being written written_regs_ins = {31b0,has_rd & ~bubble} << Rtype(rom.rdata).rd; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[1] instr: %x @%x (bubble:%b refetch:%b alu:%b alu_was:%b)",instr,pc<<2,bubble,refetch,exec.working,alu_was_working); $$end // remember ALU was just busy @@ -150,7 +156,7 @@ $$end // ^^^ ^^^ start a bubble on refectch or ALU busy // give instruction, pc and registers to decoder+ALU instr = bubble ? exec.instr : instr; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[2] instr: %x @%x (bubble:%b)",instr,pc<<2,bubble); $$end $$if SIMULATION then @@ -169,7 +175,7 @@ $$end // start a bubble on refectch or ALU busy, resume when ALU stops being busy bubble = (bubble & ~alu_was_working[2,1]) | refetch | exec.working; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[3] instr: %x @%x (bubble:%b)",instr,pc<<2,bubble); $$end // memory address from which to load/store @@ -213,7 +219,7 @@ $$end xregsA.wenable1 = ~no_rd & ~bubble & ~refetch; xregsA.addr1 = rd; xregsA.wdata1 = load ? loaded : write_back; -$$if DEBUG then +$$if DEBUG_conveyor then __display("[4] instr: %x @%x (bubble:%b refetch:%b), loaded:%x, alu_n:%x", instr,pc<<2,bubble,refetch,loaded,alu_n); if (~bubble & ~refetch) { @@ -221,12 +227,30 @@ $$if DEBUG then instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1); } $$end +$$if SIMULATION then + uint1 instr_done = ~bubble & ~refetch & ~reset; +$$end $$if ICEV_VERILATOR_TRACE then // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs - if (~bubble & ~refetch & ~reset) { + if (instr_done) { __verilog("$c32(\"cpu_retires(2,\",%,\",\",%,\",\",%,\",\",%,\");\");", pc<<2,instr,Rtype(instr).rd,xregsA.wdata1); } +$$end +$$if TRACE_conveyor then + if (instr_done) { + nretired = nretired + 1; + } + if (trace_on) { + if (instr_done) { + __write("@%h %h ",pc<<2,instr); + if (xregsA.wenable1) { + __display("x[%d]=%h",Rtype(instr).rd,xregsA.wdata1); + } else { + __display(""); + } + } + } $$end // clear register write bit written_regs_del = {31b0,~bubble & ~no_rd & ~refetch} << rd; @@ -246,7 +270,7 @@ $$end ? 0 // on a refetch, we reset the set of written regs : (written_regs & ~written_regs_del) | written_regs_ins ) & (~32b1); // keep x0 clear -$$if DEBUG then +$$if DEBUG_conveyor then __display("++ %b ",written_regs_ins); __display("-- %b ",written_regs_del); __display("= %b ",written_regs); @@ -258,6 +282,13 @@ $$end xregsB.wenable1 = xregsA.wenable1; xregsB.wdata1 = xregsA.wdata1; xregsB.addr1 = xregsA.addr1; +$$if TRACE_conveyor then + if (trace_on) { + if (|mem.wenable) { + __display("store @%x = %x",mem.addr<<2,mem.wdata); + } + } +$$end $$if SIMULATION then cycle = cycle + 1; $$end diff --git a/projects/ice-v/CPUs/ice-v-dual.si b/projects/ice-v/CPUs/ice-v-dual.si index d59f83ae..32ddef2b 100644 --- a/projects/ice-v/CPUs/ice-v-dual.si +++ b/projects/ice-v/CPUs/ice-v-dual.si @@ -266,7 +266,7 @@ $$end unit rv32i_cpu( bram_port mem, $$if ICEV_STALL then - // advanced feature to stall the CPU (rise only on even stage F/LS1) + // optional feature to stall the CPU (rise only on even stage F/LS1) input uint1 stall_cpu, $$end $$if ICEV_USERDATA then diff --git a/projects/ice-v/CPUs/ice-v-ram.si b/projects/ice-v/CPUs/ice-v-ram.si index 0bd8ac04..6c100028 100644 --- a/projects/ice-v/CPUs/ice-v-ram.si +++ b/projects/ice-v/CPUs/ice-v-ram.si @@ -27,7 +27,7 @@ $$end // -------------------------------------------------- $$if SIMULATION then -$$VERBOSE = nil +$$TRACE = nil $$end $$ print("--------------- Ice-V-ram ---------------") @@ -36,8 +36,9 @@ $$ cycleW = 32 $$if ICEV_MULDIV then $$ print("Ice-V-ram configured with mul and div (*not* full RV32IM)") -$$div_width = 32 -$$div_signed = 1 +$$div_width = 32 +$$div_unsigned = 1 +$$div_remainder = 1 $include('../../common/divint_std.si') $$end @@ -51,7 +52,7 @@ bitfield Rtype { uint1 unused1, uint1 sign, uint4 unused2, uint1 muldiv, // -------------------------------------------------- -// Risc-V RV32I CPU with RAM memory interface +// Risc-V RV32I CPU with 8 bits memory interface // // ---- CPU stages (same as dual on a single core) ---- // @@ -74,7 +75,7 @@ bitfield Rtype { uint1 unused1, uint1 sign, uint4 unused2, uint1 muldiv, // // --------------------------------- -// interface for ram user +// 8 bits ram interface interface icev_ram_user { output addr, // memory address output wenable, // write enable @@ -89,7 +90,10 @@ interface icev_ram_user { unit icev_ram( icev_ram_user mem, $$if ICEV_USERDATA then - input uint32 user_data + input uint32 user_data, +$$end +$$if TRACE then + input uint1 trace_on, $$end ) { @@ -124,10 +128,10 @@ $$end // uint1 load_store <: (exec.load | exec.store); -$$if VERBOSE then +$$if TRACE then uint32 cycle(0); uint32 last_cycle(0); - uint1 debug(0); + uint1 trace(0); $$end int32 write_back(0); @@ -143,12 +147,6 @@ $$end // always block, done every cycle always { -$$if VERBOSE then - if (~reset) { - //__display("\n[cycle %d] ---------------- stage: %b ----- exec.working:%b mem.done:%b(%b|%b)=%h(@%h)",cycle,stage,exec.working,mem.done,reqmem,reqmem_done,mem.rdata,mem.addr); - //__display("[cycle %d] RAM, reqmem_done:%d, reqmem: %b",cycle,reqmem_done,reqmem); - } -$$end // maintain request valid low mem.req_valid = 0; do_load_store = 0; @@ -182,10 +180,6 @@ $$end // (pulsed when necessary) xregsA.wenable = 0; -$$if VERBOSE then - // __display("[cycle %d] ====== stage:%b reset:%b pc:%h mem.done:%b",cycle,stage,reset,pc,mem.done); -$$end - // four states: F, T, LS1, LS2/commit onehot (stage) { case 0: { @@ -195,22 +189,15 @@ $$end pc = reqmem_done ? mem.addr : pc; // instruction fetch done? instr_trigger = reqmem_done; -$$if VERBOSE then - if (instr_trigger) { - //if (cycle-last_cycle < 18) { - __display("[cycle %d] (0) F instr:%h (@%h) [elapsed: %d cycles]",cycle,instr,pc<<2,cycle-last_cycle); - debug = 1; - //} - last_cycle = cycle; +$$if TRACE then + if (trace_on) { + if (instr_trigger) { + trace = 1; + } } $$end } case 1: { - $$if VERBOSE then - if (instr_trigger) { - // __display("[cycle %d] (0) T %h @%h xa[%d]=%h xb[%d]=%h",cycle,instr,pc,xregsA.addr,xregsA.rdata,xregsB.addr,xregsB.rdata); - } - $$end // ---- T // registers are now in, triggers exec exec.trigger = instr_trigger; @@ -229,35 +216,40 @@ $$end mem.wenable = exec.store; mem.byte_size = 1 << exec.op[0,2]; // gives 4,2 or 1 mem.byte_offset = exec.n[0,2]; -$$if VERBOSE then - if (debug) { - // __display("[cycle %d] load: %b store: %b",cycle,exec.load,exec.store); - if (mem.req_valid) { - // __display("[cycle %d] (0) LS1(%b) @%h = %h (wen:%b)",cycle,mem.req_valid,mem.addr,mem.wdata,mem.wenable); - } - } -$$end } case 3: { // ---- LS2/commit - // may wait on RAM + uint1 instr_done = ~reqmem_pending & ~exec.working; + // ^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ + // wait on RAM if request pending wait on working ALU // commit result - xregsA.wenable = ~exec.no_rd & ~reqmem_pending; - // ^^^^^^^^^^^^^^ - // wait on RAM if request pending -$$if VERBOSE then - if (xregsA.wenable & debug) { - __display("[cycle %d] (0) LS2/C xr[%d]=%h (mem.rdata:%x)",cycle,exec.write_rd,write_back,mem.rdata); - debug = 0; + xregsA.wenable = ~exec.no_rd & ~reqmem_pending & ~exec.working; +$$if TRACE then + if (trace_on) { + if (trace & instr_done) { + __write("@%h %h ",pc<<2,instr); + last_cycle = cycle; + if (xregsA.wenable) { + __display("x[%d]=%h",exec.write_rd,write_back); + } else { + __display(""); + } + trace = 0; + } } +$$end +$$if ICEV_VERILATOR_TRACE then + // this is used to track retired instr. and compare CPUs + if (instr_done) { + __verilog("$c32(\"cpu_retires(4,\",%,\",\",%,\",\",%,\",\",%,\");\");", + pc<<2,instr,Rtype(instr).rd,write_back); + } $$end // prepare instruction fetch // instruction fetch in progress - do_fetch = ~reqmem_pending & ~reset; - // ^^^^^^^^^^^^^^ - // wait on RAM from load/store + do_fetch = instr_done & ~reset; mem.addr = do_fetch ? (exec.jump ? (exec.n >> 2) : pc_plus1) - : mem.addr; // <= preserve addr is RAM is busy + : mem.addr; // <= preserve addr if RAM is busy mem.req_valid = do_fetch; } } @@ -276,10 +268,6 @@ $$end // ^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^ // on fetch, on load/store, or previous still active -$$if VERBOSE then - // if (reset) { __display("[cycle %d] reset (reqmem:%b reqmem_done:%b)",cycle,reqmem,reqmem_done); } -$$end - // write back data to both register BRAMs xregsA.wdata = write_back; xregsB.wdata = write_back; // xregsB written when xregsA is @@ -287,11 +275,13 @@ $$end // write to write_rd, else track instruction register xregsA.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs1; xregsB.addr = xregsA.wenable ? exec.write_rd : Rtype(instr).rs2; -$$if VERBOSE then - // __display("[cycle %d] mem @%h (req:%b pending:%b) w:%b ",cycle,mem.addr,mem.req_valid,reqmem_pending,mem.wenable); -$$end -$$if VERBOSE then +$$if TRACE then + if (trace_on) { + if (mem.wenable) { + __display("store @%x = %x",mem.addr<<2,mem.wdata<<{mem.byte_offset,3b000}); + } + } cycle = cycle + 1; $$end @@ -342,7 +332,6 @@ $$if ICEV_MULDIV then uint1 muldiv <: IntReg & Rtype(instr).muldiv; // mul or div div32 div; uint1 dividing(0); - uint1 div_done(0); $$end // ==== select next address adder first input @@ -367,12 +356,6 @@ $$end always { uint1 j(0); // temp variables for and comparator -$$if VERBOSE then - if (trigger) { - // __display("[cycle %d] %h @%h xa=%h xb=%h",cycle,instr,pc,xa,xb); - } -$$end - // ==== set decoder outputs depending on incoming instructions // load/store? load = opcode == 5b00000; store = opcode == 5b01000; @@ -433,49 +416,39 @@ $$end $$if ICEV_MULDIV then // mul div - div.inum = xa; - div.iden = xb; + uint1 mulh = op[0,2] == 2b01; + uint1 mulhsu = op[0,2] == 2b10; + uint1 signa = xa[31,1]; + uint1 signb = xb[31,1]; // vvvvvvvvvv keep the sign? + int33 ma = {signa & (mulh | mulhsu), xa}; + int33 mb = {signb & mulh, xb}; + int64 mul = ma * mb; // multiply + uint1 signdiv = ~ op[0,1]; + uint1 divdone = isdone(div) & ~prev_divdone; // pulses on div done + uint1 prev_divdone = isdone(div); + //if (muldiv & working & divdone) { + // __display("DIVISION %d / %d = %d (%d)\n",div.inum,div.iden,div.ret,div.rem); + //} + working = (working | (trigger & op[2,1])) + & muldiv + & ~(working & divdone); + if (trigger) { // div restarts each trigger + div.inum = (signdiv&signa) ? -xa : xa; + div.iden = (signdiv&signb) ? -xb : xb; + div <- (); + } + uint1 div_negate = signdiv & (signa ^ signb); + uint1 ret_h = |op[0,2]; if (muldiv) { - //__display("[cycle %d] dividing:%b working:%b isdone(div):%b",cycle,dividing,working,isdone(div)); - switch ({op}) { - case 3b000: { // MUL - //__display("MULTIPLICATION %d * %d",xa,xb); - r = ra * rb; - dividing = 0; // NOTE: required for hrdwr to work? highly suspicious. - } - case 3b100: { // DIV - if (~working & ~dividing) { - //__display("[cycle %d] DIVISION trigger",cycle); - working = 1; - dividing = 1; - div <- (); - } else { - if (isdone(div) & ~div_done) { - //__display("[cycle %d] DIVISION %d / %d = %d",cycle,xa,xb,div.ret); - div_done = 1; - dividing = 1; - } else { - //if (isdone(div)) { __display("[cycle %d] DIVISION done",cycle); } - div_done = 0; - dividing = 0; - } - working = ~isdone(div); - } - r = div.ret; - } - default: { r = {32{1bx}}; } - } - } else { - dividing = 0; + r = ((~op[2,1] & ret_h) ? mul[32,32] : 32b0) // MULH, MULHSU, MULHU + | ((~op[2,1] & ~ret_h) ? mul[ 0,32] : 32b0) // MUL + | (( op[2,1] & div_negate & op[1,1] ) ? -div.rem : 32b0) // REM + | (( op[2,1] & ~div_negate & op[1,1] ) ? div.rem : 32b0) // REMU + | (( op[2,1] & div_negate & ~op[1,1] ) ? -div.ret : 32b0) // DIV + | (( op[2,1] & ~div_negate & ~op[1,1] ) ? div.ret : 32b0);// DIVU } $$end -$$if VERBOSE then - if (trigger) { - // __display("[cycle %d] load %b store %b",cycle,load,store); - } -$$end - // ====================== Comparator for branching switch (op[1,2]) { case 2b00: { j = a_eq_b; } /*BEQ */ case 2b10: { j=a_lt_b;} /*BLT*/ diff --git a/projects/ice-v/CPUs/ice-v-swirl.si b/projects/ice-v/CPUs/ice-v-swirl.si index 107624b5..64e5b5a2 100644 --- a/projects/ice-v/CPUs/ice-v-swirl.si +++ b/projects/ice-v/CPUs/ice-v-swirl.si @@ -1,29 +1,35 @@ // SL 2022-05-04 @sylefeb // -// Pipelined RISC-V CPU with data bypass +// Pipelined RV32I CPU with branch prediction +// BRAM memory interfaces for data and instructions // // https://github.com/sylefeb/Silice // MIT license, see LICENSE_MIT in Silice repo root // -------------------------------------------------- +// // === Options -// ICEV_FAST_SHIFT -// set to 1 to have a barrel shifter (one cycle shift) -// set to nil for one bit shift per cycle (saves LUTs) +// +// ICEV_MULDIV implements RV32IM +// ICEV_STALL adds stall_cpu as an input to stall the cpu anytime +// -$$if ICEV_FAST_SHIFT then -$$print("Ice-V-swirl configured for fast shift (barrel shifter)") +$$if ICEV_MULDIV then +$$ print("Ice-V-swirl configured with mul and div (*not* full RV32IM)") +$$div_width = 32 +$$div_unsigned = 1 +$$div_remainder = 1 +$include('../../common/divint_std.si') $$end -$$ICEV_BRANCH_PRED = 1 - // set to 1 for a copious amount of debug output in simulation -$$DEBUG_swirl = nil +$$DEBUG_swirl = nil +$$TRACE_swirl = nil // -------------------------------------------------- // bitfield for easier decoding of instructions $$if not ICEV_RTYPE then -bitfield Rtype { uint1 unused1, uint1 sign, uint5 unused2, uint5 rs2, - uint5 rs1, uint3 op, uint5 rd, uint7 opcode} +bitfield Rtype { uint1 unused1, uint1 sign, uint4 unused2, uint1 muldiv, + uint5 rs2, uint5 rs1, uint3 op, uint5 rd, uint7 opcode} $$ICEV_RTYPE = 1 $$end // -------------------------------------------------- @@ -32,16 +38,18 @@ $$end // Risc-V RV32I pipelined CPU $$print("====== ice-v swirl (pipeline, data bypass, rdcycle) ======") // -// Four stages pipeline +// Five stages pipeline // -------------------- -// Stage 1, in: instruction, setup: reg read A,B reg write bit, next fetch -// => [registers read] => +// Stage 1, in: instruction, setup: reg read A,B, next fetch +// => [registers read] => // Stage 2, in: reg A,B, setup: ALU+decode (trigger) // => [decode+ALU performed] => -// Stage 3, in: ALU done, setup: load/store, read ALU output +// Stage 3, in: ALU done, setup: read ALU output +// => [ALU output registered] +// Stage 4, in: ALU output (registered), setup: load/store // => [load/store performed] => -// Stage 4, in: ALU + load, setup: reg write, refetch if jump -// => [register written] => +// Stage 5, in: ALU + load, setup: reg write, refetch if jump +// => [register written] => // // Compiling a demo // ---------------- @@ -59,9 +67,7 @@ $$print("====== ice-v swirl (pipeline, data bypass, rdcycle) ======") // Overview // -------- // -// The CPU has four stages, which deviates a bit from the typical five stages -// design. I have no specific reason for this apart from this being the most -// natural evolution of prior IceV version. +// The CPU has five stages (see above). // // The pipeline implements bypasses on data hazards, such that it does not have // to insert bubbles ('do nothing') in case of potential trouble (see also the @@ -71,24 +77,31 @@ $$print("====== ice-v swirl (pipeline, data bypass, rdcycle) ======") // On a jump the entire pipeline is flushed (see refetch and refetch_addr). // // There is a little bit of additional complexity due to the ALU since -// shifts are performed one cycle at a time to save LUTs. When the ALU -// is busy the entire pipeline holds (see exec.working and alu_was_working). +// divisions are performed over multiple cycles. When the ALU is busy the entire +// pipeline holds. // // ------------------------ // Data hazards, at cycle i // ------------------------ // -// Note: registers are only written at stage 4 +// Note: registers are only written at stage 5 // // input register value: given as input to the stage // setup register value: set by stage as input to next stage // -// Data hazards are all detected at stage 2: -// a) input register value incorrect +// Data hazards are all detected at stage 2. Stage 2 is responsible for +// giving the register values to the ALU. The key question is where to read +// the values from: +// - the register BRAM setup at cycle 1 [no hazard] +// - the register written by stage 5 at the previous cycle [case a] +// - the register written by stage 5 at this cycle [case b] +// - none of the above: we have to wait and hold the pipeline [case c] +// +// case a) input register value incorrect due to write at i-1 // due to [write at i-1] => mux -// b) setup register value incorrect, ALU and store +// case b) setup register value incorrect due to write at i // due to [write at i ] => mux -// c) instruction in stage 3 /will/ invalidate a setup register +// case c) instruction in stage 3 /will/ invalidate a setup register // due to [write at i+1] => hold // - holds and inserts a bubble so that ALU skips one cycle // - will mux on a case b at i+1 @@ -96,9 +109,21 @@ $$print("====== ice-v swirl (pipeline, data bypass, rdcycle) ======") // -------------------------------------------------- // The CPU // -------------------------------------------------- -unit rv32i_cpu_swirl(bram_port mem,bram_port rom) { +unit rv32i_cpu_swirl( + bram_port dmem, // data memory (load/store) + bram_port imem, // instruction memory +$$if ICEV_STALL then + // optional feature to stall the CPU (e.g. while filling a cache) + input uint1 stall_cpu, +$$end +$$if TRACE_swirl then + input uint1 trace_on, +$$end +) { // register file, uses two BRAMs to fetch two registers at once + //simple_dualport_bram int32 xregsA<"simple_dualport_bram_generic_rw">[32] = {pad(0)}; + //simple_dualport_bram int32 xregsB<"simple_dualport_bram_generic_rw">[32] = {pad(0)}; simple_dualport_bram int32 xregsA[32] = {pad(0)}; simple_dualport_bram int32 xregsB[32] = {pad(0)}; // ^^^^^^^^^^^^^ dualport so that we can read/write simultaneously @@ -106,19 +131,24 @@ unit rv32i_cpu_swirl(bram_port mem,bram_port rom) { // stage 1 => stage 2 uint$addrW$ pc(0); uint32 instr(0); - // stage 2 => stage 3 + // stage 2 => stage 4 int32 xb(0); // stage 3 => stage 4 - int32 alu_r(0); int32 alu_val(0); uint1 no_rd(0); - uint1 jump(0); uint1 load(0); + int32 alu_r(0); int32 alu_val(0); + uint1 jump(0); uint1 load(0); uint1 store(0); uint$addrW+2$ alu_n(0); uint1 storeAddr(0); uint1 storeVal(0); - uint1 intop(0); uint3 op(0); uint5 rd(0); - // stage 4 => stage 3 + uint1 intop(0); uint3 op(0); + uint1 no_rd_3(0); uint5 rd_3(0); + // stage 4 => stage 5 + uint1 no_rd_4(0); uint5 rd_4(0); + // stage 5 => stage 3 uint1 jumping(0); + // stage 4 => outside + int32 xb_store(0); uint$addrW+2$ alu_n_store(0); // pipeline control signals - uint1 hold(0); uint1 bubble(0); - uint$addrW$ refetch_addr(0); uint1 refetch(0); uint1 stage3_bubble(0); - uint1 alu_was_working(0); uint1 bpred(0); + uint1 hold(0); uint1 bubble(0); uint1 stage3_bubble(0); + uint$addrW$ refetch_addr(0); uint1 refetch(0); uint1 stage4_bubble(0); + uint1 bpred(0); // what to write in decoder + ALU register inputs // these are mutually exclusive choices; see exec.xa = ... after pipeline @@ -129,80 +159,99 @@ unit rv32i_cpu_swirl(bram_port mem,bram_port rom) { decode_and_ALU_swirl exec; $$if SIMULATION then - uint32 cycle(0); - uint32 reinstr(0); + uint32 cycle(0); uint32 nretired(0); uint1 has_rs2(0); uint1 stage2_bubble(0); +$$end +$$if TRACE_swirl then + uint32 last_cycle(0); $$end +$$if not ICEV_STALL then + uint1 stall_cpu(0); // stall disabled, never used +$$end + uint1 refetching(0); // tracks if a refetch is in progress always { +$$if DEBUG_swirl then + uint1 debug_on = 1 +$$end + // tracks whether a register was written cycle before - uint1 reg_was_written(0); - reg_was_written = xregsA.wenable1; + uint1 reg_was_written = xregsA.wenable1; +$$if SIMULATION then + uint5 xregsA_conflict_possible = xregsA.wenable1 && xregsA.addr1 == xregsA.addr0; + uint5 xregsB_conflict_possible = xregsB.wenable1 && xregsB.addr1 == xregsB.addr0; +$$end // maintain register wenable low (pulsed when necessary) xregsA.wenable1 = 0; - // maintain memory write low - mem.wenable = 0; + // maintain memory write low, or to its previous state if stalled + dmem.wenable = stall_cpu ? dmem.wenable : 0; + // just stalled ? + uint1 on_stall = stall_cpu & ~refetch; // CPU pipeline { // pipeline is contained within this block - { // ==== stage 1 -$$if ICEV_BRANCH_PRED then - uint1 jinstr(0); uint1 jal(0); uint1 branch(0); - int$addrW+3$ addr_a(0); int$addrW+3$ addr_imm(0); - uint$addrW+2$ bpred_n(0); + { // ==== stage 1 ========================================================== +$$if DEBUG_swirl then +if (debug_on) { + __display("[1] cycle:%d stall_cpu:%b refetch:%b refetch_addr:%x refetching:%b pc:%x",cycle,stall_cpu,refetch,refetch_addr<<2,refetching,pc<<2); + __display("[1] cycle:%d imem.addr:%x imem.rdata:%x dmem.addr:%x dmem.rdata:%x",cycle,imem.addr<<2,imem.rdata,dmem.addr<<2,dmem.rdata); +} $$end // capture pc, instr in pipeline - pc = (exec.working | hold) ? pc : rom.addr; - instr = (exec.working | hold) ? instr : rom.rdata; - // ^^^^^^^^^^^^^^^^^^^^^ hold if ALU is busy or hazard (c) - // insert a bubble on a refetch or ALU busy or reset - bubble = refetch | exec.working | reset; + pc = (hold | exec.working) ? pc : imem.addr; + instr = (hold | exec.working) ? instr : imem.rdata; + // ^^^^ hold if hazard (c) + // ^^^^^^^^^^^^ hold immediately if ALU busy + // insert a bubble on refetch, hold or reset + bubble = (refetch | reset); // setup register read xregsA.addr0 = Rtype(instr).rs1; xregsB.addr0 = Rtype(instr).rs2; - // fetch next -$$if ICEV_BRANCH_PRED then - jinstr = instr[ 4, 3] == 3b110; - jal = instr[2,2] == 2b11; - branch = instr[2,2] == 2b00; - bpred = jinstr & (jal|branch); - addr_a = __signed({1b0,pc[0,$addrW-1$],2b0}); - addr_imm = jal ? {{12{instr[31,1]}},instr[12,8],instr[20,1],instr[21,10],1b0} + // branch prediction + uint1 jinstr = instr[ 4, 3] == 3b110; + uint1 jal = instr[2,2] == 2b11; + uint1 branch = instr[2,2] == 2b00; + bpred = jinstr & (jal|branch) & ~refetching; + // no bpred if refetching ^^^^ cache may ping-pong + int$addrW+3$ addr_a = __signed({1b0,pc[0,$addrW-1$],2b0}); + int$addrW+3$ addr_imm = jal ? {{12{instr[31,1]}},instr[12,8],instr[20,1],instr[21,10],1b0} : {{20{instr[31,1]}},instr[7,1],instr[25,6],instr[8,4],1b0}; - bpred_n = addr_a + addr_imm; -$$end - rom.addr = refetch ? refetch_addr - : (~bubble & ~reset) ? -$$if ICEV_BRANCH_PRED then - (bpred ? (bpred_n>>2) : (pc[0,$addrW$] + 1)) -$$else - (pc[0,$addrW$] + 1) -$$end - : pc; + uint$addrW+2$ bpred_n = addr_a + addr_imm; + // fetch next + imem.addr = stall_cpu ? imem.addr // fix it during stall + : refetch ? refetch_addr // go to refetch addr + : (~bubble & ~reset) ? // follow prediction + (bpred ? (bpred_n>>2) : (pc[0,$addrW$] + 1)) + : pc; // next by pc $$if DEBUG_swirl then - __display("[1] %d instr: %x @%x (bubble:%b reset:%b refetch:%b hold:%b alu:%b rs1 %d rs2 %d)", - cycle,instr,pc<<2,bubble,reset,refetch,hold,exec.working,xregsA.addr0,xregsB.addr0); - if (bpred) { - __display("[1] pc @%x following branch to @%x",pc<<2,rom.addr<<2); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[1] instr: %x @%x (bubble:%b reset:%b refetch:%b hold:%b stall_cpu:%b alu_busy:%b rs1 %d rs2 %d)", + instr,pc<<2,bubble,reset,refetch,hold,stall_cpu,exec.working,xregsA.addr0,xregsB.addr0); + if (bpred) { + __display("[1] pc @%x following branch to @%x",pc<<2,imem.addr<<2); + } } +} $$end - // remember ALU was just busy - alu_was_working vv= exec.working; - // |-> assign after pipeline, stages will only see it next cycle - } -> { // ==== stage 2 + } -> { // ==== stage 2 ===================================================== // maintain bpred on bubbles uint1 prev_bpred(0); // for data hazard detection - uint1 has_rs2(0); uint1 store(0); +$$if not SIMULATION then + uint1 has_rs2(0); +$$end + uint1 store(0); // give instruction, pc and registers to decoder+ALU - instr = (exec.working | hold) ? exec.instr : instr; - pc = (exec.working | hold) ? exec.pc : pc; + instr = (hold | exec.working) ? exec.instr : instr; + pc = (hold | exec.working) ? exec.pc : pc; + // ^^^^^^^^^^^^ hold immediately if ALU busy exec.instr = instr; exec.pc = pc; // propagate bpred - bpred = (exec.working | hold) ? prev_bpred : bpred; + bpred = (hold | exec.working) ? prev_bpred : bpred; prev_bpred = bpred; // data hazards detection // -> we decode just enough from the instruction for detection @@ -211,100 +260,136 @@ $$end | (Rtype(instr).opcode[2,5] == 5b11000) // branch | store; // by default we select the register value read after stage 1 - // (assuming no data haward) - xa_regR = 1; xa_regW = 0; xa_regW_prev = 0; xa_keep = 0; - xb_regR = 1; xb_regW = 0; xb_regW_prev = 0; xb_keep = 0; + // or keep the values as is on a hold or alu is working + uint1 keep = hold | exec.working; + xa_regR = ~keep; xa_regW = 0; xa_regW_prev = 0; xa_keep = keep; + xb_regR = ~keep; xb_regW = 0; xb_regW_prev = 0; xb_keep = keep; // [data hazards] case (c) detection - // instruction in stage 3 will write on a register used now + // instruction in stage 3 will (cycle+2) write on a register needed now + // instruction in stage 4 will (cycle+1) write on a register needed now // (checks with exec.rd and exec.write_rd as seen in stage 3) - if (~hold) { - uint1 rs1_eq_rd(0); // is rs1 equal to rd from stage 3? - uint1 rs2_eq_rd(0); // is rs2 equal to rd from stage 3? - rs1_eq_rd = Rtype(instr).rs1 == exec.write_rd; - rs2_eq_rd = (Rtype(instr).rs2 == exec.write_rd) & has_rs2; - // not all instructions use rs2 ^^^^^^^ - // on such a data hazard we hold the pipeline one cycle - hold = (rs1_eq_rd|rs2_eq_rd) & ~exec.no_rd - // all the conditions below mean there is in fact no hazard - & ~stage3_bubble & ~reset & ~refetch & ~exec.working; - } else { - // holding, keep the same values on ALU inputs vvvv - xa_regR = 0; xa_regW = 0; xa_regW_prev = 0; xa_keep = 1; - xb_regR = 0; xb_regW = 0; xb_regW_prev = 0; xb_keep = 1; - hold = 0; // release the hold + // is rs1 equal to rd from stage 3? + uint1 rs1_eq_rd_3 = Rtype(instr).rs1 == exec.write_rd; + // is rs2 equal to rd from stage 3? + uint1 rs2_eq_rd_3 = (Rtype(instr).rs2 == exec.write_rd) & has_rs2; + // not all instructions use rs2 ^^^^^^^ + // is rs1 equal to rd in stage 4? // vvvv value from stage 3 cycle i-1 + uint1 rs1_eq_rd_4 = Rtype(instr).rs1 == rd_3; + // is rs2 equal to rd in stage 4? + uint1 rs2_eq_rd_4 = (Rtype(instr).rs2 == rd_3) & has_rs2; + // on such a data hazard we hold the pipeline one cycle + hold = (((rs1_eq_rd_3|rs2_eq_rd_3) & ~exec.no_rd & ~stage3_bubble) + |((rs1_eq_rd_4|rs2_eq_rd_4) & ~no_rd_3 & ~stage4_bubble) + |exec.working) + // all the conditions below mean there is in fact no hazard + & ~reset & ~refetch & ~bubble; +$$if DEBUG_swirl then +if (debug_on) { + if (~stall_cpu | on_stall) { + if (hold) { + if ((rs1_eq_rd_3|rs2_eq_rd_3) & ~exec.no_rd & ~stage3_bubble) { + __display("[2] *** data hazard (c,3) *** rs1[%d] rs2[%d](%b) rd(stage3)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,has_rs2,exec.write_rd); + } + if ((rs1_eq_rd_4|rs2_eq_rd_4) & ~no_rd_3 & ~stage4_bubble) { + __display("[2] *** data hazard (c,4) *** rs1[%d] rs2[%d](%b) rd(stage4)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,has_rs2,rd_3); + } + if (exec.working) { + __display("[2] *** data hazard (c,alu)"); + } } + } +} +$$end // update bubble - bubble = bubble | refetch | exec.working | hold; -$$if DEBUG_swirl then - __display("[2] instr: %x @%x (bubble:%b bpred:%b) rA:%x rB:%x",instr,pc<<2,bubble,bpred,xregsA.rdata0,xregsB.rdata0); + bubble = bubble | refetch | hold; +$$if SIMULATION then + stage2_bubble = bubble; $$end $$if DEBUG_swirl then - if (hold) { - __display("[2] *** data hazard (c) *** rs1[%d] rs2[%d](%b) rd(stage3)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,has_rs2,exec.write_rd); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[2] instr: %x @%x (bubble:%b bpred:%b) rA(%d):%x rB(%d):%x",instr,pc<<2,bubble,bpred,Rtype(instr).rs1,xregsA.rdata0,Rtype(instr).rs2,xregsB.rdata0); } +} $$end // [data hazards] case (a) detection - // instruction in stage 3 wrote on input registers read after stage 1 - // the value is thus incorrect, use the previously written value instead + // instruction retired in stage 5 (previous cycle) wrote on input + // registers read after stage 1; we have to use the previously written + // value instead of that coming out of BRAM if (Rtype(instr).rs1 == xregsA.addr1 & reg_was_written) { $$if DEBUG_swirl then - __display("[2] *** data hazard (a) on rs1 *** rs1[%d] rs2[%d] rd_was[%d]",Rtype(instr).rs1,Rtype(instr).rs2,xregsA.addr1); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[2] *** data hazard (a) on rs1 *** rs1[%d] rs2[%d] rd_was[%d]",Rtype(instr).rs1,Rtype(instr).rs2,xregsA.addr1); + } +} $$end xa_regR = 0; xa_regW = 0; xa_regW_prev = 1; xa_keep = 0; // ^^^^^^^^^^^^^ selects value previously written } - if (Rtype(instr).rs2 == xregsA.addr1 & reg_was_written & has_rs2) { + if (Rtype(instr).rs2 == xregsB.addr1 & reg_was_written & has_rs2) { $$if DEBUG_swirl then - __display("[2] *** data hazard (a) on rs2 *** rs1[%d] rs2[%d] rd_was[%d]",Rtype(instr).rs1,Rtype(instr).rs2,xregsA.addr1); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[2] *** data hazard (a) on rs2 *** rs1[%d] rs2[%d] rd_was[%d]",Rtype(instr).rs1,Rtype(instr).rs2,xregsA.addr1); + } +} $$end xb_regR = 0; xb_regW = 0; xb_regW_prev = 1; xb_keep = 0; // same for rs2 + // ^^^^^^^^^^^^^ selects value previously written } // [data hazards] case (b) detection - // instruction in stage 4 writes on a register used now - // the value is thus outdated, use the written value instead - // (checks with rd and write_rd from stage 4) - if (~no_rd & Rtype(instr).rs1 == rd) { + // instruction in stage 5 writes on a register needed now; + // we use the value being written to the register + // (checks with rd and write_rd from stage 5) + if (~no_rd_4 & Rtype(instr).rs1 == rd_4) { $$if DEBUG_swirl then - __display("[2] *** data hazard (b) on rs1 *** rs1[%d] rs2[%d] rd(stage4)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[2] *** data hazard (b) on rs1 *** rs1[%d] rs2[%d] rd(stage5)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); + } +} $$end xa_regR = 0; xa_regW = 1; xa_regW_prev = 0; xa_keep = 0; + // ^^^^^^^^^^^ selects value being written } - if (~no_rd & (Rtype(instr).rs2 == rd) & has_rs2) { // same for rs2 + if (~no_rd_4 & (Rtype(instr).rs2 == rd_4) & has_rs2) { // same for rs2 $$if DEBUG_swirl then - __display("[2] *** data hazard (b) on rs2 *** rs1[%d] rs2[%d] rd(stage4)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[2] *** data hazard (b) on rs2 *** rs1[%d] rs2[%d] rd(stage5)[%d]",Rtype(instr).rs1,Rtype(instr).rs2,rd_4); + } +} $$end xb_regR = 0; xb_regW = 1; xb_regW_prev = 0; xb_keep = 0; // ^^^^^^^^^^^ selects value being written } // trigger ALU if not in bubble (used by multi-cycle shifts, div) - exec.trigger = ~bubble; - - } -> { // ==== stage 3 - uint1 prev_bpred(0); + exec.trigger = ~bubble & ~alu_was_working; + uint1 alu_was_working = exec.working; + } -> { // ==== stage 3 ===================================================== + uint1 prev_bpred(0); // propagate bpred - bpred = exec.working ? prev_bpred : bpred; - // start a bubble on refetch or ALU busy - bubble = (bubble & ~alu_was_working) | refetch | exec.working; + bpred = (bubble | exec.working) ? prev_bpred : bpred; + // start a bubble on refetch + bubble = bubble | refetch | exec.working; + // ^^^^^^^^^^^^ create a bubble as instr + // is just out of stage 2 and ALU is busy stage3_bubble ^= bubble; $$if DEBUG_swirl then - __display("[3] instr: %x @%x (bubble:%b bpred:%b)",instr,pc<<2,bubble,bpred); -$$end - // memory address from which to load/store - mem.addr = (exec.n >> 2); - if (exec.store & ~bubble & ~jumping) { - // ^^^^^^ if stage 4 jumps, cancel store - // build write mask depending on SB, SH, SW - // assumes aligned, e.g. SW => next_addr[0,2] == 2 - mem.wenable = ( { { 2{exec.op[0,2]==2b10} }, - exec.op[0,1] | exec.op[1,1], 1b1 - } ) << exec.n[0,2]; +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[3] instr: %x @%x (bubble:%b bpred:%b exec.r:%d)",instr,pc<<2,bubble,bpred,exec.r); } - // decoder outputs to trickle down the pipeline towards stage 4 - no_rd = exec.no_rd | bubble; - // ^^^^ disables data hazard in stage 2 on a bubble - rd = exec.write_rd; +} +$$end + // decoder outputs to trickle down the pipeline towards stage 5 + no_rd_3 = exec.no_rd | bubble; + // ^^^ disables data hazard in stage 2 on a bubble + rd_3 = exec.write_rd; jump = exec.jump & ~bubble; load = exec.load; + store = exec.store; intop = exec.intop; alu_n = exec.n; alu_r = exec.r; @@ -314,22 +399,70 @@ $$end storeVal = exec.storeVal; // track bpred prev_bpred = bpred; + // capture xb set by stage 2 on previous cycle (for store) + xb = exec.xb; + } -> { // ==== stage 4 ===================================================== + // start a bubble on refetch or ALU busy + bubble = (bubble | refetch); +$$if DEBUG_swirl then +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[4] instr: %x @%x (bubble:%b bpred:%b alu_r:%d)",instr,pc<<2,bubble,bpred,alu_r); + } +} +$$end + // record rd, no_rd for the stage (used in hazard detection) + rd_4 = rd_3; + no_rd_4 = no_rd_3 | bubble; + // ^^^ disables data hazard in stage 2 on a bubble + stage4_bubble ^= bubble; + // value to store + xb_store = xb; + alu_n_store = alu_n; + // memory address from which to load/store +$$if not ICEV_STALL then + dmem.addr = (alu_n >> 2); +$$else + dmem.addr = (store|load) & ~bubble & ~jumping + ? (alu_n >> 2) : dmem.addr; + // ^^ if a cache is used, we preserve dmem.addr when not accessing dmem +$$end + if (store & ~bubble & ~jumping) { + // ^^^^^^ if stage 5 jumps, cancel store + // build write mask depending on SB, SH, SW + // assumes aligned SW + dmem.wenable = ( { { 2{op[0,2]==2b10} }, + op[0,1] | op[1,1], 1b1 + } ) << alu_n[0,2]; + + } +$$if SIMULATION then + // check for unaligned loads (unsupported) + if ((load|store) & ~bubble & ~jumping + & (op[0,2]==2b10) & (alu_n[0,2] != 2b00)) { + __display("[cycle %d] ERROR @%h %h, unaligned access (%b) @%h",cycle,pc<<2,instr,store,alu_n); + __finish(); + } +$$end - } -> { // ==== stage 4 + } -> { // ==== stage 5 ===================================================== sameas(pc) pcp1 = pc + 1; // decodes values loaded from memory (if any) int32 loaded(0); uint32 aligned(0); - aligned = mem.rdata >> {alu_n[0,2],3b000}; + aligned = dmem.rdata >> {alu_n[0,2],3b000}; switch ( op[0,2] ) { // LB / LBU, LH / LHU, LW case 2b00:{ loaded = {{24{(~op[2,1])&aligned[ 7,1]}},aligned[ 0,8]}; } case 2b01:{ loaded = {{16{(~op[2,1])&aligned[15,1]}},aligned[ 0,16]};} case 2b10:{ loaded = aligned; } default: { loaded = {32{1bx}}; } // don't care } + uint1 instr_done = ~bubble & ~refetch & ~reset & ~(on_stall & load); + // redo the load on a stall ^^^^^^^^^^^^^^^ + // (even though this could be imem and not dmem stalling) // register write back - xregsA.wenable1 = ~no_rd & ~bubble & ~refetch; - xregsA.addr1 = rd; + xregsA.wenable1 = ~no_rd_4 & instr_done; + xregsA.addr1 = rd_4; xregsA.wdata1 = (jump ? ((pcp1)<<2) : 32b0) | (storeAddr ? alu_n : 32b0) | (storeVal ? alu_val : 32b0) @@ -337,39 +470,85 @@ $$end | (intop ? alu_r : 32b0); $$if ICEV_VERILATOR_TRACE then // this is used by SOCs/ice-v-cmp, to track retired instr. and compare CPUs - if (~bubble & ~refetch & ~reset) { + if (instr_done) { __verilog("$c32(\"cpu_retires(3,\",%,\",\",%,\",\",%,\",\",%,\");\");", pc<<2,instr,Rtype(instr).rd,xregsA.wdata1); - reinstr = reinstr + 1; } $$end $$if DEBUG_swirl then - __display("[4] instr: %x @%x (bubble:%b jump:%b bpred:%b) reinstr:%d",instr,pc<<2,bubble,jump,bpred,reinstr); - if (~bubble & ~refetch & ~reset) { - __display("[4] ++++ %x (@%x) jump %b, wreg:[%d]=%x (%b) reinstr:%d", - instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1,reinstr); +if (debug_on) { + if (~stall_cpu | on_stall) { + __display("[5] instr: %x @%x (bubble:%b jump:%b bpred:%b load:%b alu_r:%d) nretired:%d",instr,pc<<2,bubble,jump,bpred,load,alu_r,nretired); + if (instr_done) { + __display("[5] ++++ %x (@%x) jump %b, wreg:[%d]=%x (%b) nretired:%d", + instr,pc<<2,jump,Rtype(instr).rd,xregsA.wdata1,xregsA.wenable1,nretired); + } + } + if (xregsA.wenable1) { + __display("[5] wreg:[%d]=%x",Rtype(instr).rd,xregsA.wdata1); + } +} +$$end +$$if SIMULATION then + if (instr_done) { + nretired = nretired + 1; + } +$$end +$$if TRACE_swirl then + if (trace_on) { + if (instr_done) { + __write("@%h %h ",pc<<2,instr); + last_cycle = cycle; + if (xregsA.wenable1) { + __display("x[%d]=%h",xregsA.addr1,xregsA.wdata1); + } else { + __display(""); + } } + } $$end + // signal a jump if needed (flushes pipeline and jumps) - refetch = ( jump ^ bpred ) // jump prediction failed - & ~refetch & ~bubble; + refetch = stall_cpu // on a stall trigger (and hold) a refetch + | ( ( jump ^ bpred ) // jump prediction failed + & ~refetch & ~bubble ); // ^^^^^^ reset refetch if done at prev cycle - jumping ^= refetch; - refetch_addr = jump ? (alu_n>>2) : pcp1; // new address if jumping +$$if ICEV_STALL then + // indicates whether a refetch is in progress + // (on a stall, refetch at the ongoing refetch if one is in progress) + uint1 was_refetching = refetching & ~instr_done; + refetching = refetch | was_refetching; +$$else + uint1 was_refetching(0); +$$end + jumping ^= refetch; // warn stage 3 (cancel any store) + exec.cancel = refetch; // cancel any pending ALU op on refetch + // new address on refetch + refetch_addr = ~refetch | was_refetching | (stall_cpu & ~on_stall) ? refetch_addr // keep the refetch_addr + : jump ? (alu_n>>2) // jump destination + : instr_done ? pcp1 // next by address + : pc; // stay on same + //if (debug_on || ((pc<<2) == 32h0b50)) { + // __display("[!] cycle:%d refetch:%b (jump:%b pred:%b) refetch_addr:%h on_stall:%b",cycle,refetch,jump,bpred,refetch_addr<<2,on_stall); + //} $$if DEBUG_swirl then - if (bpred & ~refetch) { - __display("[4] pc @%x branch predicted towards @%x (jump %b)",pc<<2,alu_n,jump); - } - if (refetch) { - __display("[4] REFETCH to @%x (jump %b bpred %b)",refetch_addr<<2,jump,bpred); +if (debug_on) { + if (~stall_cpu | on_stall) { + if (bpred & ~refetch) { + __display("[5] pc @%x branch predicted towards @%x (jump %b)",pc<<2,alu_n,jump); + } + if (refetch) { + __display("[5] REFETCH to @%x (stall_cpu %b jump %b bpred %b)",refetch_addr<<2,stall_cpu,jump,bpred); + } } +} $$end } } // end of pipeline // set decoder+ALU inputs - // (out of pipeline to get up-to-date value of xregsA.wdata1 from stage 4) + // (out of pipeline to get up-to-date value of xregsA.wdata1 from stage 5) exec.xa = xa_keep ? exec.xa : 32b0 | xa_regR ? xregsA.rdata0 : 32b0 | xa_regW ? xregsA.wdata1 : 32b0 @@ -379,25 +558,51 @@ $$end | xb_regW ? xregsA.wdata1 : 32b0 | xb_regW_prev ? xregsB.wdata1 : 32b0; // what to write on a store - mem.wdata = (xb_regW ? xregsA.wdata1 : xb) << {exec.n[0,2],3b000}; - // ^^---------\ - // capture xb from stage 2 so that stage 3 assign above sees the correct value + dmem.wdata = stall_cpu ? dmem.wdata + : (xb_store) << {alu_n_store[0,2],3b000}; + // ^^^^^^^^ ^^^^^^^^^^^ + // captured from stage 2 at stage 4 so that store sees the correct value $$if DEBUG_swirl then - if (mem.wenable) { - __display("[3] store @%x = %x",mem.addr,mem.wdata); +if (debug_on) { + if (dmem.wenable) { + __display("[4] store @%x = %x",dmem.addr<<2,dmem.wdata); + } +} +$$end +$$if TRACE_swirl then + if (trace_on) { + if ((|dmem.wenable) & ~stall_cpu) { + __display("store @%x = %x",dmem.addr<<2,dmem.wdata); + } } $$end - xb = exec.xb; + // register bank B follows A writes xregsB.wenable1 = xregsA.wenable1; xregsB.wdata1 = xregsA.wdata1; xregsB.addr1 = xregsA.addr1; +$$if SIMULATION then + if (xregsA_conflict_possible & xa_regR & ~stage2_bubble) { + __display("[cycle %d] ERROR reading from a written register (A) @%h",cycle,pc<<2); + __finish(); + } + if (xregsB_conflict_possible & xb_regR & ~stage2_bubble & has_rs2) { + __display("[cycle %d] ERROR reading from a written register (B) @%h",cycle,pc<<2); + __finish(); + } +$$end $$if DEBUG_swirl then - __display("exec.xa = %x exec.xb = %x mem.wdata = %x",exec.xa,exec.xb,mem.wdata); - __display("exec.jump = %b exec.n = %x",exec.jump,exec.n); - __display("xa_keep %b xa_regR %b xa_regW %b xa_regW_prev %b",xa_keep,xa_regR,xa_regW,xa_regW_prev); - __display("xb_keep %b xb_regR %b xb_regW %b xb_regW_prev %b\n",xb_keep,xb_regR,xb_regW,xb_regW_prev); +if (debug_on) { + if (~stall_cpu) { + __display("exec.xa = %x exec.xb = %x mem.wdata = %x",exec.xa,exec.xb,dmem.wdata); + __display("exec.jump = %b exec.n = %x xb_store = %x",exec.jump,exec.n,xb_store); + __display("xa_keep %b xa_regR %b xa_regW %b xa_regW_prev %b",xa_keep,xa_regR,xa_regW,xa_regW_prev); + __display("xb_keep %b xb_regR %b xb_regW %b xb_regW_prev %b",xb_keep,xb_regR,xb_regW,xb_regW_prev); + __display("imem.addr @%x, dmem.addr @%x\n",imem.addr<<2,dmem.addr<<2); + } +} $$end + $$if SIMULATION then cycle = cycle + 1; $$end @@ -417,13 +622,15 @@ $$end // fairly large multiplexer due to data hazards bypass. // See also ../../learn-silice/AlgoInOuts.md // +// The ALU outputs are registered by stage 3, before +// entering stage 4, allowing a better fmax. // -------------------------------------------------- unit decode_and_ALU_swirl( // instruction, program counter and registers input uint32 instr, input uint$addrW$ pc, input int32 xa, input int32 xb, // trigger: pulsed high when the decoder + ALU should start - input uint1 trigger, + input uint1 trigger, input uint1 cancel, // outputs all information the processor needs to decide what to do next output! uint3 op, output! uint5 write_rd, output! uint1 no_rd, output! uint1 jump, output! uint1 load, output! uint1 store, @@ -450,6 +657,10 @@ unit decode_and_ALU_swirl( uint1 pcOrReg <: AUIPC | JAL | branch; // pc or reg in addr? uint1 sub <: IntReg & Rtype(instr).sign; // subtract uint1 aluShift <: (IntImm | IntReg) & op[0,2] == 2b01; // shift requested +$$if ICEV_MULDIV then + uint1 muldiv <: IntReg & Rtype(instr).muldiv; // mul or div + div32 div; +$$end // ==== select next address adder first input int$addrW+3$ addr_a <: pcOrReg ? __signed({1b0,pc[0,$addrW-1$],2b0}) : xa; // ==== select ALU second input @@ -465,62 +676,74 @@ unit decode_and_ALU_swirl( int$addrW+3$ addr_imm <: (AUIPC ? imm_u : 32b0) | (JAL ? imm_j : 32b0) | (branch ? imm_b : 32b0) | ((JALR|load) ? imm_i : 32b0) | (store ? imm_s : 32b0); - // ==== set decoder outputs depending on incoming instructions - // load/store? - load := opcode == 5b00000; store := opcode == 5b01000; - // operator for load/store // register to write to? - op := Rtype(instr).op; write_rd := Rtype(instr).rd; - // do we have to write a result to a register? - no_rd := branch | store | (Rtype(instr).rd == 5b0); - // integer operations // store next address? - intop := (IntImm | IntReg); storeAddr := AUIPC; - // value to store directly - val := LUI ? imm_u : cycle; - // store value? - storeVal := LUI | CSR; - // branch instruction? - is_branch := branch; always { + // ==== set decoder outputs depending on incoming instructions + // load/store? + load = opcode == 5b00000; store = opcode == 5b01000; + // operator for load/store // register to write to? + op = Rtype(instr).op; write_rd = Rtype(instr).rd; + // do we have to write a result to a register? + no_rd = branch | store | (Rtype(instr).rd == 5b0); + // integer operations // store next address? + intop = (IntImm | IntReg); storeAddr = AUIPC; + // value to store directly + val = LUI ? imm_u : cycle; + // store value? + storeVal = LUI | CSR; + // branch instruction? + is_branch = branch; + // ====================== ALU: IntOps uint1 j(0); // temp variables for and comparator - // ====================== ALU -$$if not ICEV_FAST_SHIFT then - // are we still shifting? - uint1 shiting <:: (shamt != 0); - // shift (one bit per clock) - if (trigger) { - // start shifting? - shamt = aluShift ? __unsigned(b[0,5]) : 0; - // store value to be shifted - r = xa; - } else { - if (shiting) { - // decrease shift size - shamt = shamt - 1; - // shift one bit - r = op[2,1] ? (Rtype(instr).sign ? {r[31,1],r[1,31]} - : {__signed(1b0),r[1,31]}) : {r[0,31],__signed(1b0)}; - } - } - working = (shamt != 0); -$$end // all ALU operations switch (op) { case 3b000: { r = sub ? a_minus_b : xa + b; } // ADD / SUB case 3b010: { r = a_lt_b; } case 3b011: { r = a_lt_b_u; } // SLTI / SLTU case 3b100: { r = xa ^ b; } case 3b110: { r = xa | b; } // XOR / OR -$$if not ICEV_FAST_SHIFT then - case 3b001: { } case 3b101: { } // SLLI/SRLI/SRAI -$$else case 3b001: { r = (xa <<< b[0,5]); } case 3b101: { r = Rtype(instr).sign ? (xa >>> b[0,5]) : (xa >> b[0,5]); } -$$end case 3b111: { r = xa & b; } // AND default: { r = {32{1bx}}; } // don't care } + // ====================== ALU: MulDiv +$$if ICEV_MULDIV then + // mul div + uint1 mulh = op[0,2] == 2b01; + uint1 mulhsu = op[0,2] == 2b10; + uint1 signa = xa[31,1]; + uint1 signb = xb[31,1]; // vvvvvvvvvv keep the sign? + int33 ma = {signa & (mulh | mulhsu), xa}; + int33 mb = {signb & mulh, xb}; + int64 mul = ma * mb; // multiply + uint1 signdiv = ~ op[0,1]; + uint1 divdone = isdone(div) & ~prev_divdone; // pulses on div done + uint1 prev_divdone = isdone(div); + //if (muldiv & working & divdone) { + // __display("DIVISION %d / %d = %d (%d)\n",div.inum,div.iden,div.ret,div.rem); + //} + working = (working | (trigger & op[2,1])) + & muldiv + & ~cancel + & ~(working & divdone); + if (trigger) { // div restarts each trigger + div.inum = (signdiv&signa) ? -xa : xa; + div.iden = (signdiv&signb) ? -xb : xb; + div <- (); + } + uint1 div_negate = signdiv & (signa ^ signb); + uint1 ret_h = |op[0,2]; + if (muldiv) { + r = ((~op[2,1] & ret_h) ? mul[32,32] : 32b0) // MULH, MULHSU, MULHU + | ((~op[2,1] & ~ret_h) ? mul[ 0,32] : 32b0) // MUL + | (( op[2,1] & div_negate & op[1,1] ) ? -div.rem : 32b0) // REM + | (( op[2,1] & ~div_negate & op[1,1] ) ? div.rem : 32b0) // REMU + | (( op[2,1] & div_negate & ~op[1,1] ) ? -div.ret : 32b0) // DIV + | (( op[2,1] & ~div_negate & ~op[1,1] ) ? div.ret : 32b0);// DIVU + } +$$end // ====================== Comparator for branching switch (op[1,2]) { - case 2b00: { j = a_eq_b; } /*BEQ */ case 2b10: { j=a_lt_b;} /*BLT*/ + case 2b00: { j = a_eq_b; } /*BEQ */ case 2b10: { j = a_lt_b;} /*BLT*/ case 2b11: { j = a_lt_b_u;} /*BLTU*/ default: { j = 1bx; } } jump = (JAL | JALR) | (branch & (j ^ op[0,1])); diff --git a/projects/ice-v/Makefile.swirl-cache b/projects/ice-v/Makefile.swirl-cache new file mode 100644 index 00000000..e349c573 --- /dev/null +++ b/projects/ice-v/Makefile.swirl-cache @@ -0,0 +1,14 @@ + +.DEFAULT: SOCs/ice-v-soc-swirl-cache.si + cp ./compile/build/code.bin data.raw + # time dd if=/dev/urandom of=data.raw bs=1 count=65536 + silice-make.py -s SOCs/ice-v-soc-swirl-cache.si -b $@ -p basic -o BUILD_$(subst :,_,$@) $(ARGS) + +icebreaker: SOCs/ice-v-soc-swirl-cache.si + silice-make.py -s SOCs/ice-v-soc-swirl-cache.si -b $@ -p basic,pmod_qqspi,uart -o BUILD_$(subst :,_,$@) $(ARGS) + +brot: SOCs/ice-v-soc-swirl-cache.si + silice-make.py -s SOCs/ice-v-soc-swirl-cache.si -b $@ -p basic,qpsram,uart2 -o BUILD_$(subst :,_,$@) $(ARGS) + +clean: + rm -rf BUILD_* diff --git a/projects/ice-v/SOCs/ice-v-soc-cmp.si b/projects/ice-v/SOCs/ice-v-soc-cmp.si index f76ccb7d..9a3798a4 100644 --- a/projects/ice-v/SOCs/ice-v-soc-cmp.si +++ b/projects/ice-v/SOCs/ice-v-soc-cmp.si @@ -24,7 +24,7 @@ $$ICEV_FAST_SHIFT=1 $$ICEV_VERILATOR_TRACE=1 $include('../CPUs/ice-v.si') $include('../CPUs/ice-v-conveyor.si') -$include('../CPUs/ice-v-swirl-compact.si') +$include('../CPUs/ice-v-swirl.si') // -------------------------------------------------- // SOC @@ -141,7 +141,7 @@ unit cpu3_bench() sameas(memio.addr) prev_mem_addr(0); // cpu - rv32i_cpu_swirl cpu( mem <:> memio, rom <:> romio ); + rv32i_cpu_swirl cpu( dmem <:> memio, imem <:> romio ); // io mapping always { diff --git a/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp b/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp index 07895ecf..89d9746d 100644 --- a/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp +++ b/projects/ice-v/SOCs/ice-v-soc-cmp.si.cpp @@ -61,8 +61,8 @@ void check_and_synch() ++ num_retired_synch; #if 0 // verify coherence - if (retired[0].front() == retired[1].front() - && retired[0].front() == retired[2].front()) { + if ( retired[0].front() == retired[1].front() + && retired[1].front() == retired[2].front()) { for (int i = 0 ; i < 3 ; ++i) { retired[i].pop_front(); } @@ -125,8 +125,8 @@ void cpu_retires(int id,unsigned int pc,unsigned int instr, exit (-1); } t_retired_instr ri; - ri.pc = pc; ri.instr = instr; - ri.rd = rd; ri.val = val; + ri.pc = pc; ri.instr = instr; + ri.rd = rd&31; ri.val = val; retired[id-1].push_back(ri); ++ num_retired[id-1]; check_and_synch(); diff --git a/projects/ice-v/SOCs/ice-v-soc-ram.si b/projects/ice-v/SOCs/ice-v-soc-ram.si index 83b29182..a3ddcdd2 100644 --- a/projects/ice-v/SOCs/ice-v-soc-ram.si +++ b/projects/ice-v/SOCs/ice-v-soc-ram.si @@ -27,7 +27,8 @@ // Address space and boot configuration // allow for 32MB RAM addressing + 1 bit periph -$$addrW = 24 +// (addressing 32bits: 2^23 x 32bits, 8MB x 4 bytes) +$$addrW = 24 -- 23 bits for addresses, 1 bit for peripheral mapping // bit for peripheral addresses $$periph = addrW-1 // boot address in QQSPI @@ -334,6 +335,9 @@ $$if SIMULATION then spiscreen_clk = screen_clk; spiscreen_mosi = screen_mosi; // assign pins spiscreen_dc = screen_dc; spiscreen_resn = screen_resn; $$end +$$end +$$if TRACE then + cpu.trace_on = reset ? 0 : cpu.trace_on; $$end // ---- memory mapping to peripherals: writes @@ -345,8 +349,18 @@ $$end // leds for activity leds = ramio.wdata[0,5]; $$if SIMULATION then +$$if not TRACE then __write("%c",ramio.wdata[0,8]); - // __display("==> %d",ramio.wdata); +$$else + if (ramio.wdata[31,1]) { + if (cpu.trace_on) { + __finish(); + } else { + cpu.trace_on = 1; + } + } +$$end + __verilog("$c32(\"output_char(\",%,\");\");",ramio.wdata[0,8]); $$end // send over uart uo.data_in_ready = 1; diff --git a/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si b/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si new file mode 100644 index 00000000..faacdd5b --- /dev/null +++ b/projects/ice-v/SOCs/ice-v-soc-swirl-cache.si @@ -0,0 +1,267 @@ +// SL 2020-05 @sylefeb +// +// Pipelined RV32I cpu, see ../CPUs/ice-v-swirl.si for notes +// +// https://github.com/sylefeb/Silice +// MIT license, see LICENSE_MIT in Silice repo root + +// Clocks +$$if ICEBREAKER then +$$ICE40 = 1 +import('../../common/plls/icebrkr_50_lock.v') +import('../../common/ice40_spram.v') +import('../../common/ice40_sb_gb.v') +import('../../common/ice40_half_clock.v') +$$elseif not SIMULATION then +$$error('This SOC does not support the selected board.') +$$end + +$$uart_in_clock_freq_mhz = 25 +$include('../../common/uart.si') + +$$config['bram_wmask_byte_wenable_width'] = 'data' + +$$addrW = 25 +$$periph = addrW - 1 +$$ramW = 24 -- actual memory address width (<= addrW) + +// include the processor +$$ICEV_FAST_SHIFT = 1 +$$ICEV_MULDIV = 1 +$$ICEV_STALL = 1 +$include('../CPUs/ice-v-swirl.si') + +$include('../common/simulation_spram.si') + +// -------------------------------------------------- +// PLL for simulation +// -------------------------------------------------- + +$$if SIMULATION then +// PLL for simulation +import('../common/passthrough.v') +unit pll( + output uint1 clock2x, + output uint1 clock1x, + output uint1 rst +) { + uint2 counter(0); + uint8 reset_counter(255); + passthrough _(inv <: clock, outv :> clock2x); + algorithm { + while (1) { + rst = (reset_counter != 8d0); + reset_counter = (reset_counter != 8d0) ? (reset_counter-1) : 0; + clock1x = counter[0,1]; // x2 slower + counter = counter + 1; + } + } +} +$$end + +// -------------------------------------------------- +// QPSRAM +// -------------------------------------------------- + +$include('../common/qpsram2x.si') + +group bram_io +{ + uint4 wenable(0), + int32 wdata(0), + int32 rdata(0), + uint$addrW$ addr(0), // boot address +} + +// -------------------------------------------------- +// Cache +// -------------------------------------------------- + +$include('swirl-cache.si') + +// -------------------------------------------------- +// SOC +// -------------------------------------------------- + +unit reset_delay(input uint1 rst, output uint1 rst_delayed(1)) +{ + uint24 counter(1); + always { + rst_delayed = rst ? 1 : (rst_delayed & (counter == 0)); + counter = rst ? 1 : (counter + 1); + } +} + +unit main( // I guess this is the SOC :-D + output uint5 leds, +$$if not SIMULATION then + inout uint1 ram_io0, + inout uint1 ram_io1, + inout uint1 ram_io2, + inout uint1 ram_io3, + output uint1 ram_clk, + output uint1 ram_csn, + output uint2 ram_bank(2b00), + input uint1 uart_rx, + output uint1 uart_tx, +$$end +) +$$if not SIMULATION then +<@clock1x,!rst> { + uint1 clock2x = uninitialized; + uint1 pll_rst = uninitialized; + uint1 rst = uninitialized; + reset_delay _(rst <: pll_rst, rst_delayed :> rst); + pll _( + clock_in <: clock, + clock_out :> clock2x, + reset :> pll_rst + ); + uint1 clock1x = uninitialized; + ice40_half_clock hc( + clock_in <: clock2x, + clock_out :> clock1x, + ); +$$else +<@clock1x,!rst> { + uint1 clock1x = uninitialized; + uint1 clock2x = uninitialized; + uint1 pll_rst = uninitialized; + uint1 rst = uninitialized; + reset_delay _(rst <: pll_rst, rst_delayed :> rst); + pll clkgen<@clock,!reset>( + clock1x :> clock1x, + clock2x :> clock2x, + rst :> pll_rst + ); + // dummy pins for simulation + uint1 ram_io0(0); uint1 ram_io1(0); uint1 ram_io2(0); + uint1 ram_io3(0); uint1 ram_clk(0); uint1 ram_csn(0); +$$end + + // for memory mapping, need to record prev. cycle addr and rw + uint$addrW$ prev_mem_addr(0); + uint1 prev_mem_rw(0); +$$if SIMULATION then + uint32 cycle(0); uint32 last_cycle(0); +$$end + + // cache + bram_io mem0io; bram_io mem1io; + cache cache( clock2x <: clock2x, + ram_csn :> ram_csn, ram_clk :> ram_clk, + ram_io0 <:> ram_io0, ram_io1 <:> ram_io1, + ram_io2 <:> ram_io2, ram_io3 <:> ram_io3, + mem0 <:> mem0io, mem1 <:> mem1io, + ); + + // in-the-middle data interface for peripheral mapping + bram_io data; + + // cpu + rv32i_cpu_swirl cpu( imem <:> mem0io, dmem <:> data, stall_cpu <: cache.wait ); + + // uart + uart_out uo; +$$if not SIMULATION then + uart_sender usend(io <:> uo,uart_tx :> uart_tx); +$$end + + // io mapping + always_before { + // ---- memory mapping, masking away peripherals + uint1 on_periph = data.addr[$periph$,1]; + mem1io.wenable = on_periph ? 0 : data.wenable; + mem1io.wdata = data.wdata; + mem1io.addr = on_periph ? mem1io.addr : data.addr; + data.rdata = mem1io.rdata; + // ---- peripherals + uo.data_in_ready = 0; // maintain low, pulse high to send +$$if SIMULATION then + //if ((data.wenable != 0) & ~on_periph) { + // __display("[cycle %d] write %x @%x",cycle,data.wdata,data.addr); + //} +$$end + if ((data.wenable != 0) & on_periph) { +$$if SIMULATION then + if (data.wdata == 32hffffffff) { __finish(); } else +$$end + { + uint3 select = data.addr[0,3]; + onehot (select) { + case 0: { leds = data.wdata[0,5]; +$$if SIMULATION then + __display("[cycle %d (%d)] LEDs: %b (%d)",cycle,cycle - last_cycle,leds,data.wdata); + last_cycle = cycle; +$$end + } + case 1: { + uo.data_in = data.wdata[0,8]; + uo.data_in_ready = 1; +$$if SIMULATION then + __write("%c",data.wdata[0,8]); +$$end + } + case 2: { } + } + } + } + +$$if SIMULATION then + /* + // cache tests + uint2 test_step(0); + if (~cache.wait) { + mem0io.addr = (mem0io.addr + 1) & 511; + mem0io.wdata = {16hfaab,cycle[0,16]}; + mem0io.wenable = 4b1111; + // mem0io.addr = ((cycle>>2)&1) ? 16384 : 0; + $$if false then + switch (test_step) + { + case 0: { + mem0io.addr = 0; + mem0io.wdata = {16hfaab,cycle[0,16]}; + mem0io.wenable = 4b1111; + } + case 1: { + mem0io.addr = 16384; + mem0io.wdata = {16hbeef,cycle[0,16]}; + mem0io.wenable = 4b1111; + } + case 2: { + mem0io.addr = 0; + mem0io.wdata = 32h0; + mem0io.wenable = 4b0000; + } + case 3: { + mem0io.addr = 16384; + mem0io.wdata = 32h0; + mem0io.wenable = 4b0000; + } + } + test_step = test_step + 1; + $$end + // mem1io.addr = mem1io.addr + 1; + __display("[%d] 0: %x 1: %x next: 0@%x %b|%x 1@%x %b|%x",cycle,mem0io.rdata,mem1io.rdata,mem0io.addr<<2,mem0io.wenable,mem0io.wdata,mem1io.addr<<2,mem1io.wenable,mem1io.wdata); + } else { + //__display("[%d] cache wait",cycle); + } + */ + cycle = cycle + 1; +$$end + } + + algorithm { +$$if SIMULATION and not BARE then + //while (1) { } + while (cycle < 1000) { } + __display("stopping at cycle %d",cycle); +$$else + while (1) { } +$$end + } + +} + +// -------------------------------------------------- diff --git a/projects/ice-v/SOCs/ice-v-soc-swirl.si b/projects/ice-v/SOCs/ice-v-soc-swirl.si index 60c027c6..b8f975da 100644 --- a/projects/ice-v/SOCs/ice-v-soc-swirl.si +++ b/projects/ice-v/SOCs/ice-v-soc-swirl.si @@ -24,6 +24,7 @@ $$addrW = 12 // include the processor $$ICEV_FAST_SHIFT = 1 +$$ICEV_MULDIV = 1 $include('../CPUs/ice-v-swirl.si') // -------------------------------------------------- @@ -112,7 +113,7 @@ $$end // uses template "bram_wmask_byte", that turns wenable into a byte mask // cpu - rv32i_cpu_swirl cpu( mem <:> memio, rom <:> romio ); + rv32i_cpu_swirl cpu( dmem <:> memio, imem <:> romio ); // io mapping always_before { @@ -150,7 +151,11 @@ $$end if (memio.wenable != 0 & memio.addr[11,1]) { leds = mem.wdata[0,5] & {5{memio.addr[0,1]}}; $$if SIMULATION then - if (memio.addr[0,1]) { __display("[cycle %d] LEDs: %b",cycle,leds); } + if (memio.addr[0,1]) { + if (mem.wdata == 32hffffffff) { __finish(); } + // __display("[cycle %d] LEDs: %b (%d)",cycle,leds,mem.wdata); + else { __write("%c",mem.wdata[0,8]); } + } $$end $$if OLED then // if (memio.addr[1,1]) { __display("[cycle %d] OLED: byte %x",cycle,displ_byte); } diff --git a/projects/ice-v/SOCs/swirl-cache.si b/projects/ice-v/SOCs/swirl-cache.si new file mode 100644 index 00000000..bb33e379 --- /dev/null +++ b/projects/ice-v/SOCs/swirl-cache.si @@ -0,0 +1,499 @@ +// SL 2023-08 @sylefeb +// +// Cache for the ice-v-swirl +// +// directly mapped +// +// IMPORTANT: when wait goes high, memory interface should not change +// their inputs! +// WARNING: Currently the two caches do not see writes into one another if +// both are caching the same lines. +// This is ok when using mem0 for instructions and mem1 for data as long as +// code is not loaded/generated and then executed. Otherwiwe a software +// cache flush is required. +// TODO: Implement a mechanism to avoid this issue? +// NOTE: when writing, the read value is undertermined (it is the previously +// stored value but this is subject to change) +// NOTE: if SPIFLASH is present, it is never selected at the same time as +// the PSRAM so they could share the other SPI com lines. +// NOTE: SPRAM and SPIFLASH are assumed the same width. +// +// PRE-PROCESSOR PARAMETERS: +// - ramW is the memory cached space address width +// (if SPIFLASH is present, the highest bit selects it) +// - cache_has_spiflash indicates SPIFLASH is present +// +// https://github.com/sylefeb/Silice +// MIT license, see LICENSE_MIT in Silice repo root + +/* + +How does this work? +------------------- + +This is a double 32bits cache storing data in SPRAM (64KB each). Both caches +work the same and can be used for data/instructions for instance. Beware they +do not 'see' each other when a line at a same address is mapped by both (so +keep instructions read only). +As both caches operate the same, the description next considers only cache 0. + +The cache uses a simple direct mapping. A BRAM (cache0_lines) stores the upper +parts of the cache line addresses. An example setup is cache lines of 256x32bits, +on 64 cache lines. + +Reading: +------- +The read is performed while cache0_lines is accessed. If the upper address +match, it's a hit. Otherwise, bad luck, wait goes high immediately. On the next +cycle the cache will start the process of writing back (if needed) the current +line content, and then reading the new content, before releasing wait. + +Writing: +------- +We cannot perform the writes blindly, as we first have to know whether the +current line is the correct one. However we do know which line was previously +accessed, and if it has not changed (still the same line) we can check the upper +addresses immediately. Otherwise we have no choice but to raise wait for one +cycle, until the result from looking up cache0_lines is available. + +*/ + +// from the outside the cache acts as a BRAM when filled +interface bram_provider +{ + input wenable, + input wdata, + output! rdata, + input addr, +} + +// SPRAM can store 12384 x 32bits for one cache, 14 bits address +// ram address is split as: +// +// [ cache addr | line id | line addr ] +// <--- cache_addr_w ---> <- num_cache_lines_w -> <----- cache_line_w -----> +// <--------------------------------- ramW --------------------------------> +// ^<-------------------- 14 bits -------------------> +// | +// cache_addr_start +// +$$cache_line_w = 3 -- has to be < 14 +$$cache_line_size = 1 << cache_line_w +$$num_cache_lines_w = (14 - cache_line_w) +$$num_cache_lines = 1 << num_cache_lines_w +$$cache_addr_w = ramW - 14 +$$cache_addr_start = 14 +$$if cache_has_spiflash then +$$cache_memW = ramW - 1 -- ram/rom actual addr width +$$else +$$cache_memW = ramW +$$end +$$cache_ext_w = cache_memW - 14 -- cached addr width of ram/rom + +$$print('<> ' .. num_cache_lines +$$ .. ' cache lines of ' .. cache_line_size .. ' int32') + +// -------------------------------------------------- +// cache unit +// -------------------------------------------------- +unit cache( + // bram interfaces + bram_provider mem0, + bram_provider mem1, + // wait goes high when cache cannot answer immediately + // this happens the cycle after the missed address is set + output! uint1 wait(1), + // clock 2x for ram + input uint1 clock2x, + // ram io + inout uint1 ram_io0, inout uint1 ram_io1, + inout uint1 ram_io2, inout uint1 ram_io3, + output uint1 ram_clk, output uint1 ram_csn, +$$if cache_has_spiflash then + // spiflash io + inout uint1 sf_io0, inout uint1 sf_io1, + output uint1 sf_clk, output uint1 sf_csn, +$$end +) { + + // cache memory (SPRAM, FLASH) + // + // cache for mem0 is spram0,spram1 (2x 16 bits) + // cache for mem1 is spram2,spram3 (2x 16 bits) + // + // each cache stores 16K x int32 + // thus 2^(14 - cache_line_w) lines of 2^cache_line_w int32 + // + // If FLASH is present, the highest bit is used to select + // between SPRAM and FLASH, with SPRAM in the lower range + // (this makes SPRAM and FLASH the same size) + +$$for n=0,3 do +$$if SIMULATION then + simulation_spram spram$n$; +$$else + ice40_spram spram$n$(clock <: clock); +$$end +$$end + +$$init_value='' +$$for b=1,cache_addr_w do +$$ init_value = '1' .. init_value +$$end +$$init_value = (cache_addr_w+1) .. 'b0' .. init_value + + // cache mapping, stores for each line the upper address part, and a dirty bit + bram uint$cache_addr_w+1$ cache0_lines[$num_cache_lines$] = {pad($init_value$)}; + bram uint$cache_addr_w+1$ cache1_lines[$num_cache_lines$] = {pad($init_value$)}; + // ^ dirty bit + +$$for c=0,1 do + // determines whether requested addr match the corresponding cache line + uint$num_cache_lines_w$ line$c$_id <: mem$c$.addr[$cache_line_w$,$num_cache_lines_w$]; + uint1 cache$c$_hit <:: qaddr$c$[$cache_addr_start$,$cache_addr_w$] + == :cache$c$_lines.rdata[0,$cache_addr_w$]; + uint1 cache$c$_canwrite <:: (:line$c$_id == qline$c$_id) + && :mem$c$.addr[$cache_addr_start$,$cache_addr_w$] + == :cache$c$_lines.rdata[0,$cache_addr_w$]; + // registers for memory interface inputs + uint$num_cache_lines_w$ qline$c$_id(0); + uint$ramW$ qaddr$c$(0); + uint4 qwen$c$(0); +$$if cache_has_spiflash then + uint1 in_rom$c$ <: mem$c$.addr[$ramW-1$,1]; +$$else + uint1 in_rom$c$(0); +$$end +$$end + +$$if SIMULATION then + uint32 cycle(0); +$$end + + // external PSRAM + qpsram_ram ram<@clock2x>( + ram_csn :> ram_csn, ram_clk :> ram_clk, + ram_io0 <:> ram_io0, ram_io1 <:> ram_io1, + ram_io2 <:> ram_io2, ram_io3 <:> ram_io3, + ); + adapt_data_next adapt<@clock2x>(data_next_2x <: ram.data_next, + rdata_2x <: ram.rdata); + uint1 reg_ram_datanext(0); + uint8 reg_ram_rdata(0); + uint1 reg_ram_wstream(0); + +$$if cache_has_spiflash then + // external FLASH (runs at 1x and dual SPI, 'slow' ROM) + spiflash_rom_core rom( + sf_clk :> sf_clk, sf_csn :> sf_csn, + sf_io0 <:> sf_io0, sf_io1 <:> sf_io1, + ); +$$end + + // cache internal state + uint1 update_lines_todo(0); + uint1 update_lines_done(0); + uint1 write_cycle(0); + uint1 keep_wait(0); + uint1 write_fault(0); + +$$if SIMULATION then + uint1 debug_on(0); +$$end + + always_before { +$$if SIMULATION then + if (mem0.addr[$periph$,1] || mem1.addr[$periph$,1]) { + __display("ERROR cache: access on peripheral address"); + __finish(); + } +$$if cache_has_spiflash then + if ((in_rom0 & (|mem0.wenable)) + || (in_rom1 & (|mem1.wenable)) ) { + __display("ERROR cache: write in ROM space (@%x)",in_rom0 ? mem0.addr : mem1.addr); + __finish(); + } +$$end + debug_on = 0; // cycle > 3010340; +$$end +/* + __display("[%d] >> cache status in: @%x|@%x q: @%x|@%x miss: %b|%b ln: %x|%x", + cycle,mem0.addr<<2,mem1.addr<<2, + qaddr0<<2,qaddr1<<2, + ~cache0_hit,~cache1_hit, + cache0_lines.rdata, cache1_lines.rdata); + __display("[%d] >> cache status in: todo: %b",cycle,update_lines_todo); + __display("[%d] >> cache status in: wen0: %b wdata0:%x",cycle,mem0.wenable,mem0.wdata); + __display("[%d] >> cache status in: wen1: %b wdata1:%x",cycle,mem1.wenable,mem1.wdata); +*/ + // cache lookup + cache0_lines.addr = line0_id; + cache1_lines.addr = line1_id; + cache0_lines.wenable = 0; + cache1_lines.wenable = 0; + // ram access + ram.in_ready = 0; // maintain low, pulse high to read/write + ram.init = 0; // maintain low, pulse high to init + ram.wenable = 0; // maintain low, set high when writing +$$if cache_has_spiflash then + rom.in_ready = 0; // maintain low, pulse high to read +$$end + // default lookup + mem0.rdata = {spram1.data_out,spram0.data_out}; + mem1.rdata = {spram3.data_out,spram2.data_out}; + spram0.addr = {line0_id,mem0.addr[0,$cache_line_w$]}; + spram1.addr = {line0_id,mem0.addr[0,$cache_line_w$]}; + spram2.addr = {line1_id,mem1.addr[0,$cache_line_w$]}; + spram3.addr = {line1_id,mem1.addr[0,$cache_line_w$]}; + spram0.wmask = {{2{mem0.wenable[1,1]}},{2{mem0.wenable[0,1]}}}; + spram1.wmask = {{2{mem0.wenable[3,1]}},{2{mem0.wenable[2,1]}}}; + spram0.wenable = (|mem0.wenable) & cache0_canwrite; + spram1.wenable = (|mem0.wenable) & cache0_canwrite; + spram0.data_in = mem0.wdata[ 0,16]; + spram1.data_in = mem0.wdata[16,16]; + spram2.wmask = {{2{mem1.wenable[1,1]}},{2{mem1.wenable[0,1]}}}; + spram3.wmask = {{2{mem1.wenable[3,1]}},{2{mem1.wenable[2,1]}}}; + spram2.wenable = (|mem1.wenable) & cache1_canwrite; + spram3.wenable = (|mem1.wenable) & cache1_canwrite; + spram2.data_in = mem1.wdata[ 0,16]; + spram3.data_in = mem1.wdata[16,16]; + // if both caches missed, raise wait + wait = reset | ~cache0_hit | ~cache1_hit + // ^^^^^ wait during reset + | write_cycle | keep_wait | write_fault; + write_cycle = 0; // additional cycle for writes, after an update + keep_wait = 0; // see note where set + write_fault = ((|mem0.wenable) & ~cache0_canwrite) + | ((|mem1.wenable) & ~cache1_canwrite); + + //if (cache1_hit & (|mem1.wenable) & ~cache1_canwrite) { + // __display("[%d] cache1 write fault @%x <= %x (line: %d,%d prev@%x)",cycle,mem1.addr<<2,mem1.wdata,line1_id,qline1_id,qaddr1); + //} + //if (spram2.wenable) { + // __display("[%d] cache1 write on line @%x <= %x",cycle,mem1.addr<<2,mem1.wdata); + //} + //if (~cache0_hit | ~cache1_hit) { + // __display("[%d] >> cache miss %b|%b %x|%x",cycle,~cache0_hit,~cache1_hit,qaddr0<<2,qaddr1<<2); + //} + } + + algorithm { + + // init QPSRAM NOTE: assume already initialized for now + // ram.init = 1; + // while (ram.busy) {} + // main loop + + while (1) { + if (update_lines_todo) { + // there is a pending cache miss +$$if SIMULATION then + if (debug_on) { + __display("[%d] => cache miss %b|%b %x|%x (%x_%d|%x_%d)",cycle, + ~cache0_hit,~cache1_hit, + {qaddr0,2b00},{qaddr1,2b00}, + cache0_lines.rdata[0,$cache_addr_w$],cache0_lines.addr, + cache1_lines.rdata[0,$cache_addr_w$],cache1_lines.addr); + } +$$end + // if line is dirty, store back + // vvvvv start by cache0, then cache1 + uint1 line_dirty = ~cache0_hit ? cache0_lines.rdata[$cache_addr_w$,1] + : cache1_lines.rdata[$cache_addr_w$,1]; + if (line_dirty) { + // store back line as it was written to + uint$cache_memW$ storeAddr = + ~cache0_hit ? {cache0_lines.rdata[0,$cache_ext_w$],qline0_id,$cache_line_w$b0} + : {cache1_lines.rdata[0,$cache_ext_w$],qline1_id,$cache_line_w$b0}; + uint$cache_line_w+2+1$ n = 0; +$$if SIMULATION then + if (debug_on) { + __display("[%d] => [cache %b] writing at @%x from line %d",cycle,cache0_hit,storeAddr<<2,~cache0_hit ? qline0_id : qline1_id); + } +$$end + // write start addr + ram.addr = {storeAddr,2b00}; + while (~n[$cache_line_w+2$,1]) { + // maintain write status + ram.in_ready = 1; + ram.wenable = 1; + // vvvvv NOTE: on first iteration spramN.data_out is not ready yet, + // however the QPSRAM controller does not access it immediately + // + ram.wdata = ~cache0_hit + ? {spram1.data_out,spram0.data_out} >> {n[0,2],3b0} + : {spram3.data_out,spram2.data_out} >> {n[0,2],3b0}; + // next? + if (reg_ram_wstream) { +$$if SIMULATION then + //uint32 full = ram.addr + n; + //if (full[8,16] == 16h14cd || debug_on) { + // __display("store @%x = %x (cache %b, line %d, cycle %d)",full,ram.wdata,cache0_hit,~cache0_hit ? qline0_id : qline1_id,cycle); + //} + // __write("%x,",ram.wdata); +$$end + // next + n = n + 1; + } + // read in cache 0 + spram0.addr = {qline0_id,n[2,$cache_line_w$]}; + spram1.addr = {qline0_id,n[2,$cache_line_w$]}; + // read in cache 1 + spram2.addr = {qline1_id,n[2,$cache_line_w$]}; + spram3.addr = {qline1_id,n[2,$cache_line_w$]}; + } + // __display(" "); +++: // this one cycle pause is needed before starting to read, otherwise + // data_next from the write remains visible to the read (latencies) + } + // fetch new line + uint$cache_memW$ fetchAddr = + ~cache0_hit ? {qaddr0[$cache_line_w$,$cache_memW-cache_line_w$], + $cache_line_w$b0} + : {qaddr1[$cache_line_w$,$cache_memW-cache_line_w$], + $cache_line_w$b0}; + uint$cache_line_w+2+1$ n = 0; + ram.addr = {fetchAddr,2b00}; +$$if cache_has_spiflash then + rom.addr = {fetchAddr,2b00}; +$$if SIMULATION then + rom.addr = {1b1,fetchAddr,2b00}; + // ^^^ goes to high part of data file in simulation +$$end +$$end +$$if SIMULATION and cache_has_spiflash then + // __display("[%d] +> cache miss %b|%b %x|%x",cycle,~cache0_hit,~cache1_hit,qaddr0<<2,qaddr1<<2); + if (debug_on) { + uint1 in_rom = ~cache0_hit ? in_rom0 : in_rom1; + __display("[%d] => [cache %b] fetching from @%x in line %d (in_rom:%b)",cycle,cache0_hit,in_rom?rom.addr:ram.addr,~cache0_hit ? qline0_id : qline1_id,in_rom); + } +$$end + while (~n[$cache_line_w+2$,1]) { + ram.in_ready = ~cache0_hit ? ~in_rom0 : ~in_rom1; +$$if cache_has_spiflash then + rom.in_ready = ~cache0_hit ? in_rom0 : in_rom1; + if (reg_ram_datanext || rom.rdata_available) { +$$else + if (reg_ram_datanext) { +$$end + uint4 wmask = 2b11 << {n[0,1],1b0}; +$$if cache_has_spiflash then + uint16 wdata = (ram.in_ready ? reg_ram_rdata : rom.rdata) << {n[0,1],3b0}; +$$else + uint16 wdata = reg_ram_rdata << {n[0,1],3b0}; +$$end + $$if SIMULATION then + //uint32 full = ram.addr + n; + //if (debug_on) { + // __display("[cache|%d] load @%x = %x (cache %b, line %d, cycle %d, n %d, ram:%b rom:%b)", + // cycle,full,reg_ram_rdata,cache0_hit,~cache0_hit ? qline0_id : qline1_id,cycle,n,reg_ram_datanext,rom.rdata_available); + //} + // __write("%x,", reg_ram_rdata); + $$end + // cache 0 + spram0.addr = {qline0_id,n[2,$cache_line_w$]}; + spram1.addr = {qline0_id,n[2,$cache_line_w$]}; + spram0.data_in = wdata; + spram1.data_in = wdata; + spram0.wmask = wmask; + spram1.wmask = wmask; + spram0.wenable = ~cache0_hit & ~n[1,1]; + spram1.wenable = ~cache0_hit & n[1,1]; + // cache 1 + spram2.addr = {qline1_id,n[2,$cache_line_w$]}; + spram3.addr = {qline1_id,n[2,$cache_line_w$]}; + spram2.data_in = wdata; + spram3.data_in = wdata; + spram2.wmask = wmask; + spram3.wmask = wmask; + spram2.wenable = cache0_hit & ~n[1,1]; + spram3.wenable = cache0_hit & n[1,1]; + // next + n = n + 1; + } + } + // __display(" "); + // update cache register + if (~cache0_hit) { + cache0_lines.wdata = {1b0,qaddr0[$cache_addr_start$,$cache_addr_w$]}; + cache0_lines.wenable = 1; + } else { + cache1_lines.wdata = {1b0,qaddr1[$cache_addr_start$,$cache_addr_w$]}; + cache1_lines.wenable = 1; + } + keep_wait = 1; // IMPORTANT: keeps wait high during bram transaction + // otherwise results depends on bram read-on-write + // behavior + // NOTE: simply keep wait high while loop active? +++: // wait for cache lines bram to be written + update_lines_done = 1; // done + write_cycle = 1; // keeps wait high while write to cache line occurs + // TODO: only if indeed writing? +$$if SIMULATION then + // __display("[%d] cache done.",cycle); +$$end + } else { + // if no cache miss, maintain dirty flag + uint1 upd_line0 = (|mem0.wenable) & cache0_canwrite & ~in_rom0; + uint1 upd_line1 = (|mem1.wenable) & cache1_canwrite & ~in_rom1; +$$if SIMULATION then + // __display("[%d] dirty status update %b|%b",cycle,upd_line0,upd_line1); +$$end + cache0_lines.wdata = {upd_line0,$cache_addr_w$b0} | cache0_lines.rdata; + cache0_lines.wenable = upd_line0; + cache1_lines.wdata = {upd_line1,$cache_addr_w$b0} | cache1_lines.rdata; + cache1_lines.wenable = upd_line1; + } + } + + } + + always_after { + // note that cache lines have to be updated next cycle + update_lines_todo = ~reset & (~cache0_hit|~cache1_hit) & ~update_lines_done; + update_lines_done = 0; + // register inputs from memory interfaces: improves fmax and 'free' + // as cache update always occurs a cycle after memory interfaces change + qline0_id = line0_id; qline1_id = line1_id; + qaddr0 = mem0.addr; qaddr1 = mem1.addr; + qwen0 = mem0.wenable; qwen1 = mem1.wenable; + // cross clock domain + reg_ram_datanext = adapt.data_next; + reg_ram_rdata = adapt.rdata; + reg_ram_wstream = ram.wstream_1x; +/* + __display("[%d] >> cache status out: @%x|@%x q: @%x|@%x miss: %b|%b ln: %x|%x ln@: %x|%x lnw: %b|%b", + cycle,mem0.addr<<2,mem1.addr<<2, + qaddr0<<2,qaddr1<<2, + ~cache0_hit,~cache1_hit, + cache0_lines.rdata, cache1_lines.rdata, + cache0_lines.addr, cache1_lines.addr, + cache0_lines.wenable, cache1_lines.wenable, + ); + __display("[%d] >> cache status out: todo: %b",cycle,update_lines_todo); +*/ +$$if SIMULATION then + cycle = cycle + 1; +$$end + } + +} + +// -------------------------------------------------- +// unit to adapt data next signal across clock domain +// - should run at 2x +// -------------------------------------------------- + +unit adapt_data_next( + input uint1 data_next_2x, output uint1 data_next, + input uint8 rdata_2x, output uint8 rdata) +{ + uint2 dnext(0); + always { + dnext = data_next_2x ? 2b11 : {1b0,dnext[1,1]}; + rdata = data_next_2x ? rdata_2x : rdata; + data_next = dnext[0,1]; + } +} + +// -------------------------------------------------- diff --git a/projects/ice-v/compile/icebreaker/swirl-cache/compile_asm.sh b/projects/ice-v/compile/icebreaker/swirl-cache/compile_asm.sh new file mode 100644 index 00000000..17ffbda4 --- /dev/null +++ b/projects/ice-v/compile/icebreaker/swirl-cache/compile_asm.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export PATH=$PATH:$DIR/../../tools/fpga-binutils/mingw32/bin/ + +source ../../tools/bash/find_riscv.sh + +echo "using $ARCH" + +BASE=./compile/icebreaker/swirl-cache +DST=./compile/build + +$ARCH-as.exe -march=rv32im -mabi=ilp32 -o $DST/code.o $1 +$ARCH-ld.exe -m elf32lriscv -b elf32-littleriscv -T$BASE/config_c.ld --no-relax -o $DST/code.elf $DST/code.o +$ARCH-objcopy.exe -O verilog $DST/code.elf $DST/code.hex + +$ARCH-objcopy.exe -O binary $DST/code.elf $DST/code.bin +$ARCH-objdump.exe -D -b binary -m riscv $DST/code.bin + +# uncomment to see the actual code, useful for debugging +$ARCH-objdump.exe --disassemble $DST/code.elf > $DST/code.s diff --git a/projects/ice-v/compile/icebreaker/swirl-cache/compile_c.sh b/projects/ice-v/compile/icebreaker/swirl-cache/compile_c.sh new file mode 100644 index 00000000..0d2c3d01 --- /dev/null +++ b/projects/ice-v/compile/icebreaker/swirl-cache/compile_c.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export PATH=$PATH:$DIR/../../tools/fpga-binutils/mingw32/bin/ + +source ../../tools/bash/find_riscv.sh + +echo "using $ARCH" + +BASE=./compile/icebreaker/swirl-cache +DST=./compile/build + +$ARCH-gcc -DICEBREAKER_SWIRL_CACHE -fstack-reuse=none -fno-builtin -O3 -fno-stack-protector -fno-pic -march=rv32im -mabi=ilp32 -T$BASE/config_c.ld -ffreestanding -nostdlib -o $DST/code.elf $BASE/crt0.s $DIR/../../../src/printf.c $1 + +$ARCH-objcopy -O verilog $DST/code.elf $DST/code.hex + +# output a binary blob of the code +$ARCH-objcopy.exe -O binary $DST/code.elf $DST/code.bin + +# uncomment to see the actual code, useful for debugging +$ARCH-objdump.exe --disassemble $DST/code.elf > $DST/code.s diff --git a/projects/ice-v/compile/icebreaker/swirl-cache/config_c.ld b/projects/ice-v/compile/icebreaker/swirl-cache/config_c.ld new file mode 100644 index 00000000..15b4e197 --- /dev/null +++ b/projects/ice-v/compile/icebreaker/swirl-cache/config_c.ld @@ -0,0 +1,41 @@ +/* from https://raw.githubusercontent.com/YosysHQ/picorv32/master/picosoc/sections.lds */ +/* modified for icev-conveyor */ + +MEMORY +{ + RAM (xrw) : ORIGIN = 0x00000000, LENGTH = 0x1000000 /* 16 MB */ +} + +SECTIONS { + .text : + { + . = ALIGN(4); + *(.text) /* .text sections (code) */ + *(.text*) /* .text* sections (code) */ + } >RAM + + .data : + { + . = ALIGN(4); + *(.rodata) /* .rodata sections (constants, strings, etc.) */ + *(.rodata*) /* .rodata* sections (constants, strings, etc.) */ + *(.srodata) /* .rodata sections (constants, strings, etc.) */ + *(.srodata*) /* .rodata* sections (constants, strings, etc.) */ + *(.data) /* .data sections */ + *(.data*) /* .data* sections */ + *(.sdata) /* .sdata sections */ + *(.sdata*) /* .sdata* sections */ + } >RAM + + /* Uninitialized data section */ + .bss : + { + . = ALIGN(4); + *(.bss) + *(.bss*) + *(.sbss) + *(.sbss*) + *(COMMON) + } >RAM + +} diff --git a/projects/ice-v/compile/icebreaker/swirl-cache/crt0.s b/projects/ice-v/compile/icebreaker/swirl-cache/crt0.s new file mode 100644 index 00000000..43204134 --- /dev/null +++ b/projects/ice-v/compile/icebreaker/swirl-cache/crt0.s @@ -0,0 +1,15 @@ +.text +.global _start +.type _start, @function + +_start: + li sp,0x100000 # end of RAM + # init done + call main # let's roll! (core1) + tail exit + +.global exit +.type exit, @function +exit: + j exit + ret diff --git a/projects/ice-v/compile/icebreaker/swirl/compile_c.sh b/projects/ice-v/compile/icebreaker/swirl/compile_c.sh index f91b36b0..f7f9ef89 100644 --- a/projects/ice-v/compile/icebreaker/swirl/compile_c.sh +++ b/projects/ice-v/compile/icebreaker/swirl/compile_c.sh @@ -12,10 +12,12 @@ echo "using $ARCH" BASE=./compile/icebreaker/swirl DST=./compile/build -$ARCH-gcc -DICEBREAKER_SWIRL -fstack-reuse=none -fno-builtin -O3 -fno-stack-protector -fno-pic -march=rv32i -mabi=ilp32 -T$BASE/config_c.ld -ffreestanding -nostdlib -o $DST/code.elf $BASE/crt0.s $1 +$ARCH-gcc -DICEBREAKER_SWIRL -fstack-reuse=none -fno-builtin -O3 -fno-stack-protector -fno-pic -march=rv32im -mabi=ilp32 -T$BASE/config_c.ld -ffreestanding -nostdlib -o $DST/code.elf $BASE/crt0.s $DIR/../../../src/printf.c $1 $ARCH-objcopy -O verilog $DST/code.elf $DST/code.hex +# output a binary blob of the code +$ARCH-objcopy.exe -O binary $DST/code.elf $DST/code.bin + # uncomment to see the actual code, useful for debugging -# $ARCH-objcopy.exe -O binary $DST/code.elf $DST/code.bin -# $ARCH-objdump.exe -D -m riscv $DST/code.elf +$ARCH-objdump.exe --disassemble $DST/code.elf > $DST/code.s diff --git a/projects/ice-v/src/config.h b/projects/ice-v/src/config.h index d9768b6f..982abaf8 100644 --- a/projects/ice-v/src/config.h +++ b/projects/ice-v/src/config.h @@ -24,3 +24,7 @@ #ifdef ICEBREAKER_SWIRL #include "icebreaker-swirl/config.h" #endif + +#ifdef ICEBREAKER_SWIRL_CACHE +#include "icebreaker-swirl-cache/config.h" +#endif diff --git a/projects/ice-v/src/fire.c b/projects/ice-v/src/fire.c index bf6b440d..d0381a93 100644 --- a/projects/ice-v/src/fire.c +++ b/projects/ice-v/src/fire.c @@ -4,6 +4,8 @@ #include "oled.h" +void f_putchar(int) {} + unsigned char tbl[32*32]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/projects/ice-v/src/icebreaker-swirl-cache/config.h b/projects/ice-v/src/icebreaker-swirl-cache/config.h new file mode 100644 index 00000000..70e6ce5c --- /dev/null +++ b/projects/ice-v/src/icebreaker-swirl-cache/config.h @@ -0,0 +1,6 @@ +// MIT license, see LICENSE_MIT in Silice repo root +// @sylefeb 2020 +#pragma once + +volatile int* const LEDS = (int*)0x4000004; +volatile int* const UART = (int*)0x4000008; diff --git a/projects/ice-v/src/printf.c b/projects/ice-v/src/printf.c new file mode 100644 index 00000000..887a593f --- /dev/null +++ b/projects/ice-v/src/printf.c @@ -0,0 +1,61 @@ +#include "printf.h" + +void f_putchar(int c); + +void print_string(const char* s) +{ + for (const char* p = s; *p; ++p) { + f_putchar(*p); + } +} + +void print_dec(int val) +{ + char buffer[255]; + char *p = buffer; + if (val < 0) { + f_putchar('-'); + print_dec(-val); + return; + } + while (val || p == buffer) { + int q = val / 10; + *(p++) = val - q * 10; + val = q; + } + while (p != buffer) { + f_putchar('0' + *(--p)); + } +} + +void print_hex_digits(unsigned int val, int nbdigits) +{ + for (int i = (4*nbdigits)-4; i >= 0; i -= 4) { + f_putchar("0123456789ABCDEF"[(val >> i) & 15]); + } +} + +void print_hex(unsigned int val) +{ + print_hex_digits(val, 8); +} + +#include + +int printf(const char *fmt,...) +{ + va_list ap; + for (va_start(ap, fmt);*fmt;fmt++) { + if (*fmt=='%') { + fmt++; + if (*fmt=='s') print_string(va_arg(ap,char *)); + else if (*fmt=='x') print_hex(va_arg(ap,int)); + else if (*fmt=='d') print_dec(va_arg(ap,int)); + else if (*fmt=='c') f_putchar(va_arg(ap,int)); + else f_putchar(*fmt); + } else { + f_putchar(*fmt); + } + } + va_end(ap); +} diff --git a/projects/ice-v/src/printf.h b/projects/ice-v/src/printf.h new file mode 100644 index 00000000..8ccd6f83 --- /dev/null +++ b/projects/ice-v/src/printf.h @@ -0,0 +1,10 @@ +// @sylefeb 2022 +// MIT license, see LICENSE_MIT in Silice repo root +// https://github.com/sylefeb/Silice/ + +#pragma once + +#include + +// everyone needs a printf! +int printf(const char *fmt,...); diff --git a/projects/ice-v/src/test_div.s b/projects/ice-v/src/test_div.s new file mode 100644 index 00000000..11f4ec1e --- /dev/null +++ b/projects/ice-v/src/test_div.s @@ -0,0 +1,19 @@ +.globl _start + +_start: + + addi t1, zero,-123 + addi t2, zero, 10 + div t3, t1, t2 + rem t3, t1, t2 + divu t3, t1, t2 + remu t3, t1, t2 + + addi t1, zero,-3 + addi t2, zero,-5 + mul t3, t1, t2 + mulh t3, t1, t2 + mulhsu t3, t1, t2 + mulhu t3, t1, t2 + +jal _start diff --git a/projects/ice-v/src/test_leds.c b/projects/ice-v/src/test_leds.c index 2f908c55..6e8c1d36 100644 --- a/projects/ice-v/src/test_leds.c +++ b/projects/ice-v/src/test_leds.c @@ -4,6 +4,8 @@ #include "config.h" +void f_putchar(int) {} + void main() { volatile int i = 0; @@ -24,6 +26,7 @@ void main() *LEDS = l; // for (i=0;i<655360;i++) { asm volatile ("nop;"); } for (i=0;i<655360;i++) { } + // for (i=0;i<32;i++) { } } } diff --git a/projects/ice-v/src/test_leds_simul.c b/projects/ice-v/src/test_leds_simul.c index f7e653ba..c41fda56 100644 --- a/projects/ice-v/src/test_leds_simul.c +++ b/projects/ice-v/src/test_leds_simul.c @@ -4,6 +4,8 @@ #include "config.h" +void f_putchar(int) {} + void main() { volatile int i = 0; diff --git a/projects/ice-v/src/test_pip.s b/projects/ice-v/src/test_pip.s new file mode 100644 index 00000000..35bf43d2 --- /dev/null +++ b/projects/ice-v/src/test_pip.s @@ -0,0 +1,15 @@ +.globl _start + +_start: + +addi t0,zero,0 + +addi t1,t0,1 +addi t2,t1,1 +addi t3,t2,1 +addi t4,t3,1 +addi t5,t4,1 +addi t6,t5,1 + +_end: +jal _end diff --git a/projects/ice-v/src/test_uart.c b/projects/ice-v/src/test_uart.c new file mode 100644 index 00000000..64e3f5ed --- /dev/null +++ b/projects/ice-v/src/test_uart.c @@ -0,0 +1,26 @@ +// MIT license, see LICENSE_MIT in Silice repo root +// @sylefeb 2021 +// https://github.com/sylefeb/Silice + +#include "config.h" +#include "printf.h" + +void f_putchar(int c) +{ +#ifdef ICEBREAKER_SWIRL + *LEDS = c; +#else + *UART = c; +#endif + for (volatile int i = 0 ; i < 1024 ; ++i ) { } +} + +void main() +{ + int n = 0; + // for (int i = 0 ; i < 32 ; ++i) { + while (1) { + printf("hello world %d\n",n+=13); + } + *LEDS = 0xffffffff; +} diff --git a/projects/ice40-warmboot/README.md b/projects/ice40-warmboot/README.md index 6d5fc6d6..41512ecb 100644 --- a/projects/ice40-warmboot/README.md +++ b/projects/ice40-warmboot/README.md @@ -36,7 +36,7 @@ Both are compiled independently as usual, see the [Makefile](Makefile). This obt But now let's pack them together, and enable switching when pressing button 1! -First, we modify both designs in the same way. We import a Verilog module [ice40_warmboot.v](ice40_warmboot.v). The Verilog code for this module is straightforward, it simply encapsulates the `SB_WARMBOOT` ice40 primitive: +First, we modify both designs in the same way. We import a Verilog module [ice40_warmboot.v](../common/ice40_warmboot.v). The Verilog code for this module is straightforward, it simply encapsulates the `SB_WARMBOOT` ice40 primitive: ```v module ice40_warmboot( diff --git a/projects/qpsram/Makefile b/projects/qpsram/Makefile index 8ac28451..02f5e57f 100644 --- a/projects/qpsram/Makefile +++ b/projects/qpsram/Makefile @@ -13,6 +13,12 @@ ecpix5: xfer.si ulx3s: xfer.si silice-make.py -s xfer.si -b $@ -p basic,uart,pmod_qqspi -o BUILD_$(subst :,_,$@) $(ARGS) +brot: xfer.si + silice-make.py -s xfer.si -b brot -p basic,uart,qpsram -o BUILD_$(subst :,_,$@) $(ARGS) + +brot2: xfer.si + silice-make.py -s xfer.si -b brot -p basic,uart2,qpsram -o BUILD_$(subst :,_,$@) $(ARGS) + icarus: xfer.si silice-make.py -s xfer.si -b $@ -p basic -o BUILD_$(subst :,_,$@) $(ARGS) diff --git a/projects/qpsram/memtest.py b/projects/qpsram/memtest.py index 408e9152..fca29500 100644 --- a/projects/qpsram/memtest.py +++ b/projects/qpsram/memtest.py @@ -12,15 +12,16 @@ read_packed_size = 32 if len(sys.argv) < 4: - print("xfer.py ") + print("memtest.py ") sys.exit() # open serial port -ser = serial.Serial(sys.argv[1],500000, timeout=1) -# ser = serial.Serial(sys.argv[1],115200, timeout=1) +# ser = serial.Serial(sys.argv[1],500000, timeout=1) +ser = serial.Serial(sys.argv[1],115200, timeout=1) # seed S = int(time.time()) +# S = 42 # address addr = int(sys.argv[2], 0) @@ -30,49 +31,50 @@ size = int(sys.argv[3], 0) size = math.ceil(size / read_packed_size) * read_packed_size -# send start tag -packet = bytearray() -packet.append(0xD5) # write -ser.write(packet) - -# send address -packet = bytearray() -packet.append((addr>>24)&255) -packet.append((addr>>16)&255) -packet.append((addr>>8)&255) -packet.append(addr&255) -ser.write(packet) - -# send size -# we report a size of one less (avoids a 32 bits -1 in logic) -size_m1 = size - 1 -packet = bytearray() -packet.append((size_m1>>24)&255) -packet.append((size_m1>>16)&255) -packet.append((size_m1>>8)&255) -packet.append(size_m1&255) -ser.write(packet) - -# send data (pseudo random) -print('Sending ...') -packet = bytearray() -n = 0 -ntot = 0 -pbs = tqdm(total=size) -random.seed(S) -while True: - b = random.randint(0,255) - packet.append(b) - n = n + 1 - if n == 32768 or ntot+n == size: - ser.write(packet) - pbs.update(n) - packet = bytearray() - if ntot+n == size: - break - ntot = ntot + n - n = 0 -pbs.close() +if True: + # send start tag + packet = bytearray() + packet.append(0xD5) # write + ser.write(packet) + + # send address + packet = bytearray() + packet.append((addr>>24)&255) + packet.append((addr>>16)&255) + packet.append((addr>>8)&255) + packet.append(addr&255) + ser.write(packet) + + # send size + # we report a size of one less (avoids a 32 bits -1 in logic) + size_m1 = size - 1 + packet = bytearray() + packet.append((size_m1>>24)&255) + packet.append((size_m1>>16)&255) + packet.append((size_m1>>8)&255) + packet.append(size_m1&255) + ser.write(packet) + + # send data (pseudo random) + print('Sending ...') + packet = bytearray() + n = 0 + ntot = 0 + pbs = tqdm(total=size) + random.seed(S) + while True: + b = random.randint(0,255) + packet.append(b) + n = n + 1 + if n == 32768 or ntot+n == size: + ser.write(packet) + pbs.update(n) + packet = bytearray() + if ntot+n == size: + break + ntot = ntot + n + n = 0 + pbs.close() # ------------- read back data diff --git a/projects/qpsram/xfer.py b/projects/qpsram/xfer.py index b3f3b292..3024880e 100644 --- a/projects/qpsram/xfer.py +++ b/projects/qpsram/xfer.py @@ -11,8 +11,8 @@ sys.exit() # open serial port -ser = serial.Serial(sys.argv[1],500000, timeout=1) -# ser = serial.Serial(sys.argv[1],115200, timeout=1) +# ser = serial.Serial(sys.argv[1],500000, timeout=1) +ser = serial.Serial(sys.argv[1],115200, timeout=1) # op to perform op = sys.argv[2] @@ -20,9 +20,18 @@ print("writing") elif op == 'r': print("reading") +elif op == 'b': + print("reboot") else: print("unknown command ",op) - os.exit(-1) + sys.exit() + +# if boot, send it now +packet = bytearray() +if op == 'b': + packet.append(0xE5) + ser.write(packet) + sys.exit() # address addr = int(sys.argv[3], 0) @@ -37,7 +46,6 @@ print("size is ",size) # send start tag -packet = bytearray() if op == 'w': packet.append(0xD5) else: @@ -63,6 +71,7 @@ ser.write(packet) if op == 'r': + f = open('read.bytes', 'wb') # read data i = 0 ba = bytearray() @@ -70,6 +79,7 @@ b = ser.read(1) if len(b) == 0: break + f.write(b) print("{:02X}".format(int.from_bytes(b,byteorder='little')),end=" ") ba.append(int.from_bytes(b,byteorder='little')) i = i + 1 @@ -77,6 +87,7 @@ i = 0 print(" ",bytes(ba)) ba = bytearray() + f.close() elif op == 'w': # send data packet = bytearray() diff --git a/projects/qpsram/xfer.si b/projects/qpsram/xfer.si index 90ba48bf..74333acc 100644 --- a/projects/qpsram/xfer.si +++ b/projects/qpsram/xfer.si @@ -9,11 +9,18 @@ $$ bank_width = 23 +$$if SIMULATION then +$$print("simulating ICE40 cells") +$$ICE40_SIMULATION = 1 +$$ICE40 = 1 +append('../../tools/fpga-binutils/mingw64/bin/share/ice40/cells_sim.v') +$$end + $$if ICESTICK then import('../common/plls/icestick_100.v') $$uart_in_clock_freq_mhz = 100 $$qpsram_fast = true -- false if freq <= 60 -$$elseif ICE40 then +$$elseif ICEBREAKER then import('../common/plls/icebrkr_50.v') $$uart_in_clock_freq_mhz = 50 $$elseif ECPIX5 then @@ -23,13 +30,21 @@ $$elseif ULX3S then import('../common/plls/ulx3s_90.v') $$uart_in_clock_freq_mhz = 90 $$qpsram_fast = true -- false if freq <= 60 +$$elseif BROT then + import('../common/plls/brot_50_lock.v') + $$uart_in_clock_freq_mhz = 50 $$else - $$uart_in_clock_freq_mhz = 25 + $$uart_in_clock_freq_mhz = 100 +$$end + +$$if ICE40 then +// for convenience on DFU devices, adding the capability to reset to DFU +import('../common/ice40_warmboot.v') $$end $include('../common/qpsram2x.si') -$$uart_bauds = 500000 +$$uart_bauds = 115200 -- 500000 $include('../common/uart.si') unit main( @@ -45,13 +60,22 @@ unit main( output uint2 ram_bank(2b00), ) // clocking (pll) and reset -$$if ICE40 then +$$if ICESTICK or ICEBREAKER then <@clock_pll> { uint1 clock_pll = uninitialized; pll pllgen( clock_in <: clock, clock_out :> clock_pll, ); +$$elseif BROT then +<@clock_pll,!rst> { + uint1 clock_pll = uninitialized; + uint1 rst = uninitialized; + pll pllgen( + clock_in <: clock, + clock_out :> clock_pll, + reset :> rst + ); $$elseif ECPIX5 or ULX3S then <@clock_pll> { uint1 clock_pll = uninitialized; @@ -59,7 +83,7 @@ $$elseif ECPIX5 or ULX3S then pll pllgen( clkin <: clock, clkout0 :> clock_pll, - locked :> locked + locked :> locked ); $$else { @@ -86,6 +110,11 @@ $$end uart_tx :> uart_tx ); +$$if ICE40 then + uint2 slot_boot(2b01); + ice40_warmboot wboot(slot <: slot_boot); +$$end + uint1 enabled(0); uint1 write(0); uint1 restart(0); @@ -115,6 +144,26 @@ $$for bank=0,3 do while (ram.busy) {} $$end +$$if SIMULATION then + // test + ram.addr = {1b1,{$bank_width${1b1}}}; + ram_bank = 2b00; + ram.in_ready = 1; + ram.wdata = 8h55; + ram.wenable = 1; + while (ram.busy) {} + ram.addr = {1b1,{$bank_width${1b1}}}; + ram_bank = 2b00; + ram.in_ready = 1; + ram.wdata = 8h7A; + ram.wenable = 1; + while (ram.busy) {} +++: +++: +++: + __finish(); +$$end + while (1) { // wait for size and address while (get_size[0,1]) { } // size arrives last @@ -160,6 +209,10 @@ $$end blip = enabled ? 32hffffffff : 0; // write? (top bit of enabling tag) write = ( ~enabled & (ui.data_out == 8hD5) ) ? 1 : write; +$$if ICE40 then + // warmboot? + wboot.boot = ( ~enabled & (ui.data_out == 8hE5) ) ? 1 : 0; +$$end // enable on tag enabled = ( enabled | (ui.data_out[0,7] == 8h55)); } else { diff --git a/projects/sdram_memtest/sdram_memtest.si b/projects/sdram_memtest/sdram_memtest.si index f7671780..dfbea4ad 100644 --- a/projects/sdram_memtest/sdram_memtest.si +++ b/projects/sdram_memtest/sdram_memtest.si @@ -8,8 +8,8 @@ // @sylefeb 2020 $$TEST_r512w64 = false -$$TEST_r128w8 = true -$$TEST_r16w16 = false +$$TEST_r128w8 = false +$$TEST_r16w16 = true $$TEST_with_autoprecharge = true @@ -56,7 +56,7 @@ import('../common/plls/de2_25_100_100ph180.v') $$end $$if SIMULATION then -$$ TEST_SIZE = 1<<10 +$$ TEST_SIZE = 1<<16 $$else $$ TEST_SIZE = 1<<24 $$end @@ -273,7 +273,7 @@ $$if true then while (count < $TEST_SIZE$) { // write to sdram $$if TEST_r128w8 or TEST_r16w16 then - sio.data_in = count[0,8]; + sio.data_in = count; $$else sio.data_in = 64h1122aabbccddeeff ^ count ^ (count<<32); $$end diff --git a/projects/spiflash/Makefile b/projects/spiflash/Makefile index d18ff14c..267f93d0 100644 --- a/projects/spiflash/Makefile +++ b/projects/spiflash/Makefile @@ -2,6 +2,15 @@ .DEFAULT: xfer.si silice-make.py -s xfer.si -b $@ -p basic,qspiflash,uart -o BUILD_$(subst :,_,$@) $(ARGS) +brot: xfer.si + silice-make.py -s xfer.si -b $@ -p basic,pmod_dspi,uart2 -o BUILD_$(subst :,_,$@) $(ARGS) + +brot_uart2_mmod: xfer.si + silice-make.py -s xfer.si -b brot -p basic,uart2,spiflash_dspi -o BUILD_$(subst :,_,$@) $(ARGS) + +#icebreaker: xfer.si +# silice-make.py -s xfer.si -b $@ -p basic,pmod_dspi,uart -o BUILD_$(subst :,_,$@) $(ARGS) + icarus: stream.si mkdir -p BUILD_$(subst :,_,$@) cp W25Q128JVxIM/*.TXT BUILD_$(subst :,_,$@)/ diff --git a/projects/spiflash/spiflash1x.si b/projects/spiflash/spiflash1x.si index 3b31194c..9d69e58b 100644 --- a/projects/spiflash/spiflash1x.si +++ b/projects/spiflash/spiflash1x.si @@ -195,7 +195,9 @@ $$if ULX3S then $$error('TODO: adjust delays') wait = 1023; //_ 3 cycles $$else - wait = 1021; + wait = init[0,1] ? 1019 : 1021; + // ^^^^ + // init needs a longer delay $$end after = 4; @@ -218,7 +220,7 @@ $$end $$else rdata = spiflash.read; $$end - rdata_available = 1; + rdata_available = ~init[0,1]; spiflash.trigger = continue; busy = continue; init = {1b0,init[1,1]}; diff --git a/projects/spiflash/spiflash2x.si b/projects/spiflash/spiflash2x.si index e14becf9..2f329b64 100644 --- a/projects/spiflash/spiflash2x.si +++ b/projects/spiflash/spiflash2x.si @@ -256,7 +256,11 @@ $$end $$else rdata = spiflash.read; $$end - rdata_available = 1; +$$if FOMU or SIM_FOMU then + rdata_available = ~init[0,1]; +$$else + rdata_available = ~init[1,1]; +$$end spiflash.trigger = continue; busy = continue; init = {1b0,init[1,1]}; diff --git a/projects/spiflash/xfer.py b/projects/spiflash/xfer.py index 228bdf26..6e189d9c 100644 --- a/projects/spiflash/xfer.py +++ b/projects/spiflash/xfer.py @@ -22,7 +22,7 @@ print("reading") else: print("unknown command ",op) - os.exit(-1) + sys.exit() # address addr = int(sys.argv[3], 0) diff --git a/projects/spiflash/xfer.si b/projects/spiflash/xfer.si index 891b8af3..e10f8f2a 100644 --- a/projects/spiflash/xfer.si +++ b/projects/spiflash/xfer.si @@ -11,17 +11,18 @@ $$if ICESTICK then import('../common/plls/icestick_50.v') $$uart_in_clock_freq_mhz = 50 -$$elseif ICE40 then +$$elseif ICEBREAKER then import('../common/plls/icebrkr_50.v') $$uart_in_clock_freq_mhz = 50 +$$elseif BROT then + import('../common/plls/brot_50_lock.v') + $$uart_in_clock_freq_mhz = 50 $$elseif ECPIX5 then import('../common/plls/ecpix5_50.v') $$uart_in_clock_freq_mhz = 50 $$elseif ULX3S then import('../common/plls/ulx3s_50.v') $$uart_in_clock_freq_mhz = 50 -$$else - $$uart_in_clock_freq_mhz = 25 $$end $$spi1x = 1 @@ -47,13 +48,22 @@ unit main( inout uint1 sf_io3, ) // clocking (pll) and reset -$$if ICE40 then +$$if ICEBREAKER or ICESTICK then <@clock_pll> { uint1 clock_pll = uninitialized; pll pllgen( clock_in <: clock, clock_out :> clock_pll, ); +$$elseif BROT then +<@clock_pll,!rst> { + uint1 clock_pll = uninitialized; + uint1 rst = uninitialized; + pll pllgen( + clock_in <: clock, + clock_out :> clock_pll, + reset :> rst + ); $$elseif ECPIX5 or ULX3S then <@clock_pll> { uint1 clock_pll = uninitialized; @@ -134,7 +144,7 @@ $$end } always_after { - leds = {blip,write,1b1,2b00}; + leds = blip; blip = blip >> 1; // receiving if (ui.data_out_ready) { diff --git a/projects/vga_demo/Makefile.humanshader b/projects/vga_demo/Makefile.humanshader new file mode 100644 index 00000000..693b8981 --- /dev/null +++ b/projects/vga_demo/Makefile.humanshader @@ -0,0 +1,6 @@ + +.DEFAULT: vga_humanshader.si + silice-make.py -s vga_humanshader.si -b $@ -p basic,vga -o BUILD_$(subst :,_,$@) $(ARGS) + +clean: + rm -rf BUILD_* diff --git a/projects/vga_demo/vga_humanshader.si b/projects/vga_demo/vga_humanshader.si new file mode 100644 index 00000000..6487e75e --- /dev/null +++ b/projects/vga_demo/vga_humanshader.si @@ -0,0 +1,253 @@ +// The MIT License +// Copyright © 2023 Inigo Quilez +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// The shader I used while designing the Human Shader experiment +// +// https://humanshader.com +// +// Binary version of the Human Shader at +// https://www.shadertoy.com/view/Dtf3Dl +// +// The original Human Shader was designed +// with decimal fixed point, for humans to +// compute. This version uses binary fixed +// point instead for computers. + +// SL 2024-01-22 Silice port from https://www.shadertoy.com/view/XflXDs + +$$VGA_640_480 = 1 +$$delay = 27338 + +$include('vga_demo_main.si') + +// ------------------------- + +// NOTE: the design entry point is 'main' in 'vga_demo_main.si' + +unit frame_display( + input uint11 pix_x, input uint11 pix_y, + input uint1 pix_active, input uint1 pix_vblank, + input uint1 vga_hs, input uint1 vga_vs, + output! uint$color_depth$ pix_r, + output! uint$color_depth$ pix_g, + output! uint$color_depth$ pix_b +) { + + // --- always_before block, performed every cycle before anything else + always_before { + pix_r = 0; pix_g = 0; pix_b = 0; // maintain RGB at zero, important during + } // vga blanking for screen to adjust + + // --- algorithm block, contains the pipeline in an infinite loop + // feeding it with pixels + algorithm { + + while (1) { // forever + + // ===== Here we synch the pipeline with the vertical sync. + // The pipeline starts during vblank so latency is hidden and + // the first pixel is ready exactly at the right moment. + while (~vga_vs) {} + while ( vga_vs) {} + + // Wait the 'perfect' delay (obtained in simulation, see marker [1] below) + // (adjust delay if number of steps is changed). + uint17 wait = 0; while (wait != $delay$) { wait = wait + 1; } + + // ----- start the pipeline! ----- + // This loop feeds pixel coordinates to the pipeline, the pipeline outputs + // pixels directly into the VGA module in the last stage. The delay above + // (while (wait ...)) is just right so that the first pixel exits the + // pipeline zhen it is needed. + // Note that the pipeline computes value for entire VGA rows including + // during h-sync, but these pixels in h-sync are discarded (I found it + // simpler to do that, and it uses slightly less logic). + uint12 px = -1; uint12 py = -1; + while ( ! (px == $H_END-1$ && py == $V_RES-1$) ) { + + // ----- pipeline starts here ----- + + int16 x = px >> 3; + int16 y = py >> 3; + // increment pixel coordinates + py = px == $H_END-1$ ? (py + 1) : py; + px = px == $H_END-1$ ? 0 : (px + 1); + + -> // --- next pipeline stage + + //------------------------- + // Section A (2 MUL, 3 ADD) + //------------------------- + int16 u = x-36; + int16 v = 18-y; + + uint22 u2 = u*u; + uint22 v2 = v*v; + + -> // --- next pipeline stage + + uint22 h = u2 + v2; + //------------------------- + + //------------------------------------- + // Section B, Sphere (4/7 MUL, 5/9 ADD) + //------------------------------------- + + int16 R = 420; + int16 B = 520; + + uint22 t = 5200 + (h<<3); + + -> // --- next pipeline stage + + int16 p = (__signed(t)*u)>>>7; + int16 q = (__signed(t)*v)>>>7; + + -> // --- next pipeline stage + + // bounce light + int16 w = 18 + (((p*5-q*13))>>>9); + + -> // --- next pipeline stage + + if( w>0 ) { R = R + w*w; } + + -> // --- next pipeline stage + + // sky light / ambient occlusion + int22 o = q + 900; + R = (R*o)>>>__signed(12); + B = (B*o)>>>__signed(12); + + -> // --- next pipeline stage + + int22 w = (p+q)>>>__signed(3); + int16 R_s(0); int16 B_s(0); + // sun/key light + if( p > -q ) + { + R_s = R + w; + B_s = B + w; + } else { + R_s = R; + B_s = B; + } + + -> // --- next pipeline stage + + //------------------------------------- + // Section C, Ground (5/9 MUL, 6/9 ADD) + //------------------------------------- + + int16 R = 150 + (v<<1); + int16 B = 50; + + int22 p = h + (v2<<3); + + // int22 c = 240*(-v) - p; + // this one is heavy, we split it in stages + int22 c = 240*(-v); + -> + c = c - p; + + -> // --- next pipeline stage + + // sky light / ambient occlusion + int22 o1 = (25*c)>>3; + + -> // --- next pipeline stage + + // int22 o = (c*(7840-o1)>>>9) - 8560; + // this one is heavy, we split it in stages + + int22 o_t1 = (7840-o1); + -> + int24 o_t2 = c * o_t1; + // ^^ needs a bit more width + -> + int22 o = (o_t2>>>9) - 8560; + + -> // --- next pipeline stage + + int22 B_g(0); + if( c>1200 ) + { + R = (R*o)>>10; + B_g = (B*o)>>10; + } else { + B_g = B; + } + + -> // --- next pipeline stage + + // sun/key light with soft shadow + int16 r = c + u*v; + int16 d = 3200 - h - (r<<<1); + + -> // --- next pipeline stage + + int22 R_g(0); + if( d>0 ) { R_g = R + d; } + else { R_g = R; } + //------------------------- + + -> // --- next pipeline stage + + //------------------------------ + // Section D, Sky (1 MUL, 2 ADD) + //------------------------------ + int16 R_k(0); int16 B_k(0); + int16 c = x + (y<<2); + R_k = 132 + c; + B_k = 192 + c; + //------------------------- + + -> // --- next pipeline stage + + // select sphere/ground/sky + if( h < 200 ) + { + R = R_s; B = B_s; + } + else + { + if( v<0 ) + { + R = R_g; B = B_g; + } else { + R = R_k; B = B_k; + } + } + + -> // --- next pipeline stage + + //------------------------- + // Section E (3 MUL, 1 ADD) + //------------------------- + uint8 r = R < 255 ? R : 255; + uint8 b = B < 255 ? B : 255; + uint16 G = (r*11 + 5*b)>>4; + uint8 g = G < 255 ? G : 255; + //------------------------- + + -> // --- next pipeline stage + +$$if SIMULATION then + // to verify/adjust pixel synch [1] + if (pix_y == 0) { + __display("x = %d pix_x = %d (diff: %d)",px,pix_x,__signed(px-pix_x)); + __display("y = %d pix_y = %d (diff: %d)",py,pix_y,__signed(py-pix_y)); + } +$$end + + if (px < $H_RES$) { // do not produce color out of bound, screen may + // otherwise produce weird color artifacts + // framework uses 6 bpp + pix_r = r>>2; pix_g = g>>2; pix_b = b>>2; + } + + } + + } + } +} diff --git a/projects/vga_demo/vga_msponge.si b/projects/vga_demo/vga_msponge.si index 12a00239..395d9034 100644 --- a/projects/vga_demo/vga_msponge.si +++ b/projects/vga_demo/vga_msponge.si @@ -26,9 +26,9 @@ $$ N_steps = 190 $$ delay = 93106 $$ VGA_1920_1080 = 1 $$elseif VERILATOR then -$$ N_steps = 64 -$$ delay = 93232 -$$ VGA_1920_1080 = 1 +$$ N_steps = 128 +$$ delay = 45792 +$$ VGA_1024_768 = 1 $$else $$ error('sorry, this design is currently only for the ECPIX5 and de10-nano') $$end diff --git a/projects/vga_test/vga_test.si b/projects/vga_test/vga_test.si index 256cbb99..43096fe8 100644 --- a/projects/vga_test/vga_test.si +++ b/projects/vga_test/vga_test.si @@ -3,9 +3,9 @@ // https://github.com/sylefeb/Silice // @sylefeb 2019 -$$VGA_800_600 = nil +$$VGA_800_600 = 1 $$VGA_1024_768 = nil -$$VGA_1920_1080 = 1 +$$VGA_1920_1080 = nil // VGA driver $include('../common/vga.si') diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 8f368430..8265ef20 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.5) project(_silice) include_directories( @@ -29,9 +29,9 @@ add_subdirectory(pybind11) pybind11_add_module(_silice MODULE pysilice.cpp) -target_link_libraries(_silice libsilice lua luabind antlr4_static) +target_link_libraries(_silice PRIVATE libsilice lua luabind antlr4_static) if(WIN32) -target_link_libraries(_silice shlwapi) +target_link_libraries(_silice PRIVATE shlwapi) endif(WIN32) target_compile_definitions(_silice PRIVATE VERSION_INFO="0.1") @@ -51,3 +51,4 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") target_link_libraries(_silice stdc++fs) endif() endif() + diff --git a/python/pysilice.cpp b/python/pysilice.cpp index 6b04e5c6..ab4e3506 100644 --- a/python/pysilice.cpp +++ b/python/pysilice.cpp @@ -90,7 +90,8 @@ class Unit m_Blueprint.unit = m_Compiler->isStaticBlueprint(m_Name); if (m_Blueprint.unit.isNull()) { try { - m_Blueprint = m_Compiler->parseUnitIOs(m_Name); + Blueprint::t_instantiation_context ictx; + m_Blueprint = m_Compiler->parseUnitIOs(m_Name, ictx); } catch (Fatal&) { throw Fatal("could not instantiate unit '%s'", m_Name.c_str()); } @@ -151,6 +152,9 @@ class Unit for (auto exp : export_defs) { ictx.params[exp.first] = exp.second; } + // name the instance through the top name FIXME: inelegant + ictx.top_name = m_Name; + // write source if ( ! m_Blueprint.ios_parser.isNull() ) { // parse the unit body m_Compiler->parseUnitBody(m_Blueprint, ictx); @@ -165,7 +169,7 @@ class Unit m_Compiler->writeStaticUnit(m_Blueprint.unit, ictx, out, false); } // return instance - return Instance("M_" + m_Name + (postfix.empty() ? "" : ("_" + postfix)), tmp, m_Blueprint); + return Instance(m_Name + (postfix.empty() ? "" : ("_" + postfix)), tmp, m_Blueprint); } Instance instantiate(const std::vector >& export_params) diff --git a/python/setup.py b/python/setup.py index e1df59ce..b3a2805b 100644 --- a/python/setup.py +++ b/python/setup.py @@ -12,7 +12,7 @@ def add_asset_directory(dir): datadir = Path(__file__).parent / 'silice/{}'.format(dir) return [str(p.relative_to(Path(__file__).parent / 'silice')) for p in datadir.rglob('*')] -# Add the content of 'frameworks' as package files +# Add data as package files files = add_asset_directory('frameworks') # Setup diff --git a/python/tests/icebreaker_blinky.py b/python/tests/icebreaker_blinky.py index 422d3a1d..0739f613 100644 --- a/python/tests/icebreaker_blinky.py +++ b/python/tests/icebreaker_blinky.py @@ -38,8 +38,8 @@ class _CRG(Module): def __init__(self, platform, sys_clk_freq): self.rst = Signal() - self.clock_domains.cd_sys = ClockDomain() - self.clock_domains.cd_por = ClockDomain(reset_less=True) + self.clock_domains.cd_sys = ClockDomain("sys") + self.clock_domains.cd_por = ClockDomain("por",reset_less=True) # # # @@ -84,7 +84,7 @@ def __init__(self, d = silice.Design("../../projects/blinky/blinky.si", ["NUM_LEDS=5"]) - m = d.getUnit("test") + m = d.getUnit("main") leds = platform.request_all("user_led") inst = silice.migen.instantiate( m, diff --git a/python/tests/lambdaconcept_ecpix5.py b/python/tests/lambdaconcept_ecpix5.py index 03f02f5d..7f6fa428 100644 --- a/python/tests/lambdaconcept_ecpix5.py +++ b/python/tests/lambdaconcept_ecpix5.py @@ -38,11 +38,11 @@ class _CRG(Module): def __init__(self, platform, sys_clk_freq): self.rst = Signal() - self.clock_domains.cd_init = ClockDomain() - self.clock_domains.cd_por = ClockDomain(reset_less=True) - self.clock_domains.cd_sys = ClockDomain() - self.clock_domains.cd_sys2x = ClockDomain() - self.clock_domains.cd_sys2x_i = ClockDomain(reset_less=True) + self.clock_domains.cd_init = ClockDomain("init") + self.clock_domains.cd_por = ClockDomain("por",reset_less=True) + self.clock_domains.cd_sys = ClockDomain("sys") + self.clock_domains.cd_sys2x = ClockDomain("sys2x") + self.clock_domains.cd_sys2x_i = ClockDomain("sys2x_i",reset_less=True) # # # diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 17e2f4f6..1302d5b6 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -167,11 +167,12 @@ void Algorithm::checkBlueprintsBindings(const t_instantiation_context &ictx) con // lint bindings { ExpressionLinter linter(this, ictx); + // produce instantiation context + t_instantiation_context local_ictx; + makeBlueprintInstantiationContext(bp.second, ictx, local_ictx); linter.lintBinding( sprint("instance '%s', binding '%s' to '%s'", bp.first.c_str(), br.c_str(), b.left.c_str()), - b.dir, b.srcloc, - get<0>(bp.second.blueprint->determineVIOTypeWidthAndTableSize(translateVIOName(b.left, nullptr), b.srcloc)), - get<0>(determineVIOTypeWidthAndTableSize(translateVIOName(br, nullptr), b.srcloc)) + bp.second.blueprint, local_ictx, b ); } } @@ -1043,15 +1044,16 @@ void Algorithm::getBindings( // check if this is a group binding if ((bindings->bpBinding()->BDEFINE() != nullptr || bindings->bpBinding()->BDEFINEDBL() != nullptr)) { // verify right is an identifier + std::string vio; if (bindings->bpBinding()->right->IDENTIFIER() == nullptr) { - reportError( - sourceloc(bindings->bpBinding()), - "expecting an identifier on the right side of a group binding"); + vio = determineAccessedVar(bindings->bpBinding()->right->access(), nullptr); + } else { + vio = bindings->bpBinding()->right->IDENTIFIER()->getText(); } // inout pins do not bind as groups - if (!isInOut(bindings->bpBinding()->right->IDENTIFIER()->getText())) { + if (!isInOut(vio)) { // check if this is a group - auto G = m_VIOGroups.find(bindings->bpBinding()->right->getText()); + auto G = m_VIOGroups.find(vio); if (G != m_VIOGroups.end()) { // unfold all bindings, select direction automatically // NOTE: some members may not be used, these are excluded during auto-binding @@ -1059,7 +1061,7 @@ void Algorithm::getBindings( string member = v; t_binding_nfo nfo; nfo.left = bindings->bpBinding()->left->getText() + "_" + member; - nfo.right = bindings->bpBinding()->right->IDENTIFIER()->getText() + "_" + member; + nfo.right = vio + "_" + member; nfo.srcloc = sourceloc(bindings->bpBinding()); nfo.dir = (bindings->bpBinding()->BDEFINE() != nullptr) ? e_Auto : e_AutoQ; _vec_bindings.push_back(nfo); @@ -1270,7 +1272,10 @@ void Algorithm::instantiateBlueprint(t_instanced_nfo& _nfo, const t_instantiatio cerr << "instantiating unit '" << _nfo.blueprint_name << "' as '" << _nfo.instance_name << "'\n"; // parse the unit ios try { - auto cbp = ictx.compiler->parseUnitIOs(_nfo.blueprint_name); + // instantiation context for IO parsing + t_instantiation_context local_ictx; + makeBlueprintInstantiationContext(_nfo, ictx, local_ictx); + auto cbp = ictx.compiler->parseUnitIOs(_nfo.blueprint_name, local_ictx); _nfo.parsed_unit = cbp; _nfo.blueprint = cbp.unit; } catch (Fatal&) { @@ -1288,15 +1293,9 @@ void Algorithm::instantiateBlueprint(t_instanced_nfo& _nfo, const t_instantiatio // finish the unit if non static if (!_nfo.parsed_unit.unit.isNull()) { // instantiation context - t_instantiation_context local_ictx = ictx; - for (auto spc : _nfo.specializations.autos) { - local_ictx.autos[spc.first] = spc.second; // makes sure new specializations overwrite any existing ones - } - for (auto spc : _nfo.specializations.params) { - local_ictx.params[spc.first] = spc.second; - } + t_instantiation_context local_ictx; // update the instantiation context now that we have the unit ios - makeBlueprintInstantiationContext(_nfo, local_ictx, local_ictx); + makeBlueprintInstantiationContext(_nfo, ictx, local_ictx); // record the specializations _nfo.specializations = local_ictx; // resolve instanced blueprint inputs/outputs var types @@ -1358,7 +1357,13 @@ void Algorithm::gatherDeclarationInstance(siliceParser::DeclarationInstanceConte nfo.specializations.autos[str_signed] = tn.base_type == Int ? "signed" : ""; } else if (m->sparam() != nullptr) { std::string p = m->sparam()->IDENTIFIER()->getText(); - nfo.specializations.params[p] = m->sparam()->NUMBER()->getText(); + if (m->sparam()->NUMBER()) { + nfo.specializations.params[p] = m->sparam()->NUMBER()->getText(); + } else if (m->sparam()->CONSTANT()) { + nfo.specializations.params[p] = rewriteConstant(m->sparam()->CONSTANT()->getText()); + } else { + sl_assert(false); + } } else { reportError(sourceloc(m), "modifier not allowed during instantiation" ); } @@ -1398,14 +1403,16 @@ std::string Algorithm::translateVIOName( vio = Vrew->second; } } - // then pipeline stage - if (bctx->pipeline_stage != nullptr) { - const auto& Vpip = bctx->pipeline_stage->pipeline->trickling_vios.find(vio); - if (Vpip != bctx->pipeline_stage->pipeline->trickling_vios.end()) { - if (bctx->pipeline_stage->stage_id > Vpip->second[0]) { - vio = tricklingVIOName(vio, bctx->pipeline_stage); + // pipeline stage (recurses through nesting) + auto current = bctx->pipeline_stage; + while (current != nullptr) { + const auto& Vpip = current->pipeline->trickling_vios.find(vio); + if (Vpip != current->pipeline->trickling_vios.end()) { + if (current->stage_id > Vpip->second[0]) { + vio = tricklingVIOName(vio, current); } } + current = current->pipeline->nested_in_parent_stage; } } return vio; @@ -1520,7 +1527,15 @@ std::string Algorithm::rewriteIdentifier( if (isInput(var)) { return encapsulateIdentifier(var, read_access, ALG_INPUT + prefix + var, suffix); } else if (isInOut(var)) { - reportError(srcloc, "cannot use inouts directly in expressions"); + if (isInOutAccessed(var)) { // indicates the inout is used in expressions (outside of binding) + if (m_VIOToBlueprintInOutsBound.count(var)) { + reportError(srcloc, "cannot bind inout %s (already used in unit body)", var.c_str()); + } else { + reportError(srcloc, "cannot use inout %s as-is in expression (use .i, .o or .oenable)", var.c_str()); + } + } else { + return encapsulateIdentifier(var, read_access, ALG_INOUT + prefix + var, suffix); + } } else if (isOutput(var)) { auto usage = m_Outputs.at(m_OutputNames.at(var)).usage; if (usage == e_Temporary) { @@ -1900,6 +1915,25 @@ Algorithm::t_combinational_block *Algorithm::splitOrContinueBlock(siliceParser:: // ------------------------------------------------- +bool Algorithm::isInWhileBody(const antlr4::tree::ParseTree* node) const +{ + if (node == nullptr) { + return false; + } + auto wnode = dynamic_cast(node->parent); + auto pnode = dynamic_cast (node->parent); + if (wnode != nullptr) { + return true; + } else if (pnode != nullptr) { + if (hasPipeline(pnode)) { + return false; + } + } + return isInWhileBody(node->parent); +} + +// ------------------------------------------------- + Algorithm::t_combinational_block *Algorithm::gatherBreakLoop(siliceParser::BreakLoopContext* brk, t_combinational_block *_current, t_gather_context *_context) { // current goes to after while @@ -1908,6 +1942,10 @@ Algorithm::t_combinational_block *Algorithm::gatherBreakLoop(siliceParser::Break } _current->next(_context->break_to); _context->break_to->is_state = true; + // verify this is not within a pipeline stage + if (!isInWhileBody(brk)) { + reportError(sourceloc(brk->BREAK()), "cannot break from a pipeline stage"); + } // track line for fsm reporting { auto lns = instructionLines(brk); @@ -1923,12 +1961,6 @@ Algorithm::t_combinational_block *Algorithm::gatherBreakLoop(siliceParser::Break Algorithm::t_combinational_block *Algorithm::gatherWhile(siliceParser::WhileLoopContext* loop, t_combinational_block *_current, t_gather_context *_context) { - // pipeline nesting check - if (_current->context.pipeline_stage != nullptr) { - if (hasPipeline(loop->while_block)) { - reportError(sourceloc(loop->while_block),"while loop contains another pipeline: pipelines cannot be nested."); - } - } // while header block t_combinational_block *while_header = addBlock("__while" + generateBlockName(), _current, nullptr, sourceloc(loop)); _current->next(while_header); @@ -2378,7 +2410,7 @@ Algorithm::t_combinational_block *Algorithm::concatenatePipeline(siliceParser::P // go through the pipeline // -> for each stage block t_combinational_block *prev = _current; - bool resume = (_current->context.pipeline_stage != nullptr); // if in an existing pipeline, start by adding to the last stage + bool resume = (_current->context.pipeline_stage != nullptr) && !isSpawningNewPipeline(pip); // if in an existing pipeline, start by adding to the last stage for (auto b : pip->instructionList()) { t_fsm_nfo* fsm = nullptr; t_pipeline_stage_nfo* snfo = nullptr; @@ -2431,7 +2463,7 @@ Algorithm::t_combinational_block *Algorithm::concatenatePipeline(siliceParser::P resume = false; // no longer resuming } else { // set next stage - prev->pipeline_next(from, stage_end); + prev->pipeline_next(from); } // advance prev = nfo->stages.back()->fsm->lastBlock; @@ -2441,19 +2473,46 @@ Algorithm::t_combinational_block *Algorithm::concatenatePipeline(siliceParser::P // ------------------------------------------------- +bool Algorithm::isSpawningNewPipeline(const siliceParser::PipelineContext* pip) const +{ + // verifies the tree to check whether this is a new pipeline + // spawned within a block, or whether this is a concatenated pipeline + // from a circuitry + auto parent = pip->parent; + while (parent) { + auto block = dynamic_cast(parent); + if (block != nullptr) { + // check if new block or in a circuitry + auto circuitry = dynamic_cast(block->parent); + if (circuitry == nullptr) { + return true; + } else { + return false; + } + } else { + parent = parent->parent; + } + } + return false; +} + +// ------------------------------------------------- + Algorithm::t_combinational_block *Algorithm::gatherPipeline(siliceParser::PipelineContext* pip, t_combinational_block *_current, t_gather_context *_context) { sl_assert(pip->instructionList().size() > 1); // otherwise not a pipeline // inform change log CHANGELOG.addPointOfInterest("CL0003", sourceloc(pip)); - // are we already within a parent pipeline? - if (_current->context.pipeline_stage == nullptr) { + // are we already within a parent pipeline or spawning a new onesss? + if (_current->context.pipeline_stage == nullptr || isSpawningNewPipeline(pip)) { - // no: create a new pipeline + /// create a new pipeline auto nfo = new t_pipeline_nfo(); m_Pipelines.push_back(nfo); // name of the pipeline nfo->name = "__pip_" + std::to_string(pip->getStart()->getLine()) + "_" + std::to_string(m_Pipelines.size()); + // parent if nested + nfo->nested_in_parent_stage = _current->context.pipeline_stage; // start concatenating (may call gatherPipeline recursively) auto last = concatenatePipeline(pip, _current, _context, nfo); // now for each stage fsm @@ -2605,7 +2664,7 @@ Algorithm::t_combinational_block *Algorithm::gatherPipeline(siliceParser::Pipeli } else { - // yes: expand the parent pipeline + /// concatenate to the parent pipeline auto nfo = _current->context.pipeline_stage->pipeline; return concatenatePipeline(pip, _current, _context, nfo); @@ -2760,10 +2819,10 @@ bool Algorithm::isStateLessGraph(const t_combinational_block *head) const // ------------------------------------------------- -bool Algorithm::hasCombinationalExit(const t_combinational_block* head) const +void Algorithm::findNextStates( t_combinational_block* head, std::set< t_combinational_block*>& _exits) const { - std::queue< const t_combinational_block* > q; - std::unordered_set< const t_combinational_block* > visited; + std::queue< t_combinational_block* > q; + std::unordered_set< t_combinational_block* > visited; // initialize queue q.push(head); // explore @@ -2771,19 +2830,23 @@ bool Algorithm::hasCombinationalExit(const t_combinational_block* head) const auto cur = q.front(); q.pop(); visited.insert(cur); - // recurse + // get children std::vector< t_combinational_block* > children; cur->getChildren(children); - if (children.empty()) { - // we reached this combinational block and it has no children - // => combinational exit! - return true; + // ensure while is properly followed + if (cur->while_loop()) { // FIXME? this is due to when a while encloses the pipeline, as we do not explore other fsms + children.push_back(cur); // add self } + // recurse for (auto c : children) { if (c == nullptr) { - // tags a forward ref (jump), non combinational exit => skip + sl_assert(false); // jumps should be resolved before + } else if (c->context.fsm != head->context.fsm) { + // other fsm => exit, store + _exits.insert(c); } else if (c->is_state) { - // state, non combinational exit => skip + // state => exit, store + _exits.insert(c); } else { // explore further if (visited.count(c) == 0) { @@ -2792,7 +2855,6 @@ bool Algorithm::hasCombinationalExit(const t_combinational_block* head) const } } } - return false; } // ------------------------------------------------- @@ -2847,7 +2909,8 @@ Algorithm::t_combinational_block* Algorithm::gatherCircuitryInst( if (C == m_KnownCircuitries.end()) { // attempt dynamic instantiation try { - auto result = _context->ictx->compiler->parseCircuitryIOs(name); + sl_assert(_context->ictx != nullptr); + auto result = _context->ictx->compiler->parseCircuitryIOs(name, *_context->ictx); m_InstancedCircuitries.push_back(result); ioList = result.ioList; } catch (Fatal&) { @@ -2966,15 +3029,6 @@ Algorithm::t_combinational_block* Algorithm::gatherCircuitryInst( Algorithm::t_combinational_block *Algorithm::gatherIfElse(siliceParser::IfThenElseContext* ifelse, t_combinational_block *_current, t_gather_context *_context) { - // pipeline nesting check - if (_current->context.pipeline_stage != nullptr) { - if (hasPipeline(ifelse->if_block)) { - reportError(sourceloc(ifelse->if_block), "conditonal statement (if side) contains another pipeline: pipelines cannot be nested."); - } - if (hasPipeline(ifelse->else_block)) { - reportError(sourceloc(ifelse->else_block), "conditonal statement (else side) contains another pipeline: pipelines cannot be nested."); - } - } // blocks for both sides t_combinational_block *if_block = addBlock(generateBlockName(), _current, nullptr, sourceloc(ifelse->if_block)); t_combinational_block *else_block = addBlock(generateBlockName(), _current, nullptr, sourceloc(ifelse->else_block)); @@ -3000,7 +3054,7 @@ Algorithm::t_combinational_block *Algorithm::gatherIfElse(siliceParser::IfThenEl } // NOTE: We do not tag 'after' as being a state right now, so that we can then consider // whether to collapse it into the 'else' of the conditional in case the 'if' jumps over it. - // NOTE: This prevent a special mechanism to avoid code duplication is preventIfElseCodeDup() + // NOTE: A special mechanism avoids code duplication, see preventIfElseCodeDup() return after; } @@ -3008,12 +3062,6 @@ Algorithm::t_combinational_block *Algorithm::gatherIfElse(siliceParser::IfThenEl Algorithm::t_combinational_block *Algorithm::gatherIfThen(siliceParser::IfThenContext* ifthen, t_combinational_block *_current, t_gather_context *_context) { - // pipeline nesting check - if (_current->context.pipeline_stage != nullptr) { - if (hasPipeline(ifthen->if_block)) { - reportError(sourceloc(ifthen->if_block), "conditonal statement contains another pipeline: pipelines cannot be nested."); - } - } // blocks for both sides t_combinational_block *if_block = addBlock(generateBlockName(), _current, nullptr, sourceloc(ifthen->if_block)); t_combinational_block *else_block = addBlock(generateBlockName(), _current); @@ -3033,7 +3081,7 @@ Algorithm::t_combinational_block *Algorithm::gatherIfThen(siliceParser::IfThenCo } // NOTE: We do not tag 'after' as being a state right now, so that we can then consider // whether to collapse it into the 'else' of the conditional in case the 'if' jumps over it. - // NOTE: This prevent a special mechanism to avoid code duplication is preventIfElseCodeDup() + // NOTE: A special mechanism avoids code duplication, see preventIfElseCodeDup() return after; } @@ -3041,14 +3089,6 @@ Algorithm::t_combinational_block *Algorithm::gatherIfThen(siliceParser::IfThenCo Algorithm::t_combinational_block* Algorithm::gatherSwitchCase(siliceParser::SwitchCaseContext* switchCase, t_combinational_block* _current, t_gather_context* _context) { - // pipeline nesting check - if (_current->context.pipeline_stage != nullptr) { - for (auto cb : switchCase->caseBlock()) { - if (hasPipeline(cb)) { - reportError(sourceloc(cb), "switch case contains another pipeline: pipelines cannot be nested."); - } - } - } // create a block for after the switch-case t_combinational_block* after = addBlock(generateBlockName(), _current, nullptr, sourceloc(switchCase)); // create a block per case statement @@ -3135,7 +3175,6 @@ void Algorithm::addTemporary(std::string vname, siliceParser::Expression_0Contex } // insert var insertVar(var, block); - m_VarNames.at(var.name); // insert as an expression catcher m_ExpressionCatchers.insert(std::make_pair(std::make_pair(expr, block),var.name)); // insert a custom assignment instruction for this temporary @@ -4078,7 +4117,7 @@ bool Algorithm::preventIfElseCodeDup(t_fsm_nfo* fsm) // detect unreachable blocks // NOTE: this is done before as the loop below changes is_state for some block, // and these have to be renumbered - std::set unreachable; + std::set unreachable; for (auto b : m_Blocks) { if (b->context.fsm == fsm) { if (b->state_id == -1 && b->is_state) { @@ -4117,6 +4156,63 @@ bool Algorithm::preventIfElseCodeDup(t_fsm_nfo* fsm) // ------------------------------------------------- +/* + padPipeline ensures that the last state of the pipeline stage fsm only + exit towards antoher stage fsm. If not, this implies the last state + may loop within the stage (e.g. on itself with a while or using gotos) + in which case the pipeline trigger will be incorrect. Such cases require + adding a state before termination, and hence preventing collapse of + next cycles within while loops and if-else (CL0004, CL0005) +*/ +bool Algorithm::padPipeline(t_fsm_nfo* fsm) +{ + // NOTE: expects stage ids to have been generate + bool changed = false; + if (!fsmIsEmpty(fsm)) { + // get blocks + std::unordered_set blocks; + fsmGetBlocks(fsm, blocks); + // last state + int last_state_id = fsm->lastBlock->parent_state_id; + // find the block + t_combinational_block* last = nullptr; + for (auto b : blocks) { + if (b->state_id == last_state_id) { + last = b; + break; + } + } + // find all possible exits + std::set exits; + findNextStates(last, exits); + // count exits which are in the same fsm (other are ok; they won't lead to an incorrect pipeline start) + int same_fsm_exits = 0; + for (auto e : exits) { + if (e->context.fsm == fsm) { + ++same_fsm_exits; + } + } + // check: multiple exits, and one at least in same fsm + if (exits.size() > 1 && same_fsm_exits > 0) { + // TODO: can we do better than to explictely list all cases? + if (last->while_loop()) { + warn(Standard, last->srcloc, "The last state of the pipeline stage loops into the stage, an additional cycle has to be introduced after."); + last->while_loop()->after->is_state = true; + changed |= true; + } else if (last->if_then_else()) { + warn(Standard, last->srcloc, "The last state of the pipeline stage loops into the stage, an additional cycle has to be introduced after."); + last->if_then_else()->after->is_state = true; + changed |= true; + } else { + sl_assert(false); + } + } + } + return changed; +} + +// ------------------------------------------------- + void Algorithm::renumberStates(t_fsm_nfo *fsm) { typedef struct { @@ -4185,6 +4281,11 @@ void Algorithm::renumberStates(t_fsm_nfo *fsm) void Algorithm::generateStates(t_fsm_nfo* fsm) { renumberStates(fsm); + if (fsm != &m_RootFSM) { + if (padPipeline(fsm)) { // only on pipeline fsms + renumberStates(fsm); + } + } if (preventIfElseCodeDup(fsm)) { // NOTE: commenting this enables code duplication in if/else renumberStates(fsm); } @@ -4238,22 +4339,22 @@ std::string Algorithm::fsmPipelineStageStall(const t_fsm_nfo *fsm) const return "_stall_" + fsm->name; } -std::string Algorithm::fsmPipelineFirstStageDisable(const t_fsm_nfo *fsm) const +std::string Algorithm::fsmPipelineFirstStateDisable(const t_fsm_nfo *fsm) const { return "_1stdisable_" + fsm->name; } // ------------------------------------------------- -std::string Algorithm::fsmNextState(std::string prefix,const t_fsm_nfo *) const +std::string Algorithm::fsmNextState(std::string prefix,const t_fsm_nfo *fsm) const { std::string next; if (m_AutoRun) { // NOTE: same as isNotCallable() since hasNoFSM() is false - next = std::string("( ~") + prefix + ALG_AUTORUN + " ? " + std::to_string(toFSMState(&m_RootFSM, entryState(&m_RootFSM))); + next = std::string("( ~") + prefix + ALG_AUTORUN + " ? " + std::to_string(toFSMState(fsm, entryState(fsm))); } else { - next = std::string("( ~") + ALG_INPUT + "_" + ALG_RUN + " ? " + std::to_string(toFSMState(&m_RootFSM, entryState(&m_RootFSM))); + next = std::string("( ~") + ALG_INPUT + "_" + ALG_RUN + " ? " + std::to_string(toFSMState(fsm, entryState(fsm))); } - next += std::string(" : ") + FF_D + prefix + fsmIndex(&m_RootFSM) + ")"; + next += std::string(" : ") + FF_D + prefix + fsmIndex(fsm) + ")"; return next; } @@ -4326,7 +4427,7 @@ int Algorithm::fastForwardToFSMState(const t_fsm_nfo* fsm, const t_combinationa { // fast forward block = fastForward(block); - if (blockIsEmpty(block) && block == fsm->lastBlock) { + if (blockIsEmpty(block) && block == fsm->lastBlock && fsm == &m_RootFSM) { // special case of empty block at the end of the algorithm return toFSMState(fsm,terminationState(fsm)); } else { @@ -5983,8 +6084,12 @@ void Algorithm::determineBlueprintBoundVIO(const t_instantiation_context& ictx) // check width of output vs range width // -> get output width if possible { + // produce instantiation context + t_instantiation_context local_ictx; + makeBlueprintInstantiationContext(ib.second, ictx, local_ictx); + // verify width int iobw = -1; - string obw = ib.second.blueprint->resolveWidthOf(b.left, ictx, sourceloc(access)); + string obw = ib.second.blueprint->resolveWidthOf(b.left, local_ictx, sourceloc(access)); try { iobw = stoi(obw); } catch (...) { @@ -6033,9 +6138,28 @@ void Algorithm::determineBlueprintBoundVIO(const t_instantiation_context& ictx) if (m_VIOBoundToBlueprintOutputs.find(bindingRightIdentifier(b)) != m_VIOBoundToBlueprintOutputs.end()) { reportError(b.srcloc, "vio '%s' is already bound as the output of another instance", bindingRightIdentifier(b).c_str()); } + // check width if it is an access + if (std::holds_alternative(b.right)) { + auto access = std::get(b.right); + // produce instantiation context + t_instantiation_context local_ictx; + makeBlueprintInstantiationContext(ib.second, ictx, local_ictx); + // verify width (mismatch not allowed with inouts) + string iow = ib.second.blueprint->resolveWidthOf(b.left, local_ictx, b.srcloc); + int iiow = -1; + try { + iiow = stoi(iow); + } catch (...) { + reportError(b.srcloc, "cannot determine width of inout '%s'", b.left.c_str()); + } + auto tw = determineAccessTypeAndWidth(nullptr, access, nullptr); + if (tw.width != iiow) { + reportError(b.srcloc, "cannot bind to inout of different width"); + } + } // record wire name for this inout std::string bindpoint = ib.second.instance_prefix + "_" + b.left; - m_BlueprintInOutsBoundToVIO[bindpoint] = bindingRightIdentifier(b); + m_BlueprintInOutsBoundToVIO[bindpoint] = b.right; m_VIOToBlueprintInOutsBound[bindingRightIdentifier(b)] = bindpoint; } } @@ -6982,6 +7106,10 @@ std::tuple Algorithm::writeIOAccess( auto G = m_VIOGroups.find(base); if (G != m_VIOGroups.end()) { verifyMemberGroup(member, G->second); + // verifiy not a bound inout + if (m_VIOToBlueprintInOutsBound.count(base)) { + reportError(sourceloc(ioaccess),"cannot access bound inout '%s'", base.c_str()); + } // produce the variable name std::string vname = base + "_" + member; // write @@ -7453,13 +7581,15 @@ void Algorithm::writeFlipFlopDeclarations(std::string prefix, std::ostream& out, if (!fsmIsEmpty(fsm)) { out << "reg [" << stateWidth(fsm) - 1 << ":0] " FF_D << prefix << fsmIndex(fsm) << "," FF_Q << prefix << fsmIndex(fsm) << ';' << nxl; - out << "wire " << fsmPipelineStageReady(fsm) << " = " - << "(" << FF_Q << prefix << fsmIndex(fsm) << " == " << toFSMState(fsm, lastPipelineStageState(fsm)) << ')' - << " || (" << FF_Q << prefix << fsmIndex(fsm) << " == " << toFSMState(fsm, terminationState(fsm)) << ");" << nxl; + out << "wire " << fsmPipelineStageReady(fsm) << " = "; + // this condition allows to trigger the stage immediately after it reached its end state + out << "(" << FF_Q << prefix << fsmIndex(fsm) << " == " << toFSMState(fsm, lastPipelineStageState(fsm)) << ')'; + // this condition allows to trigger the stage when idle (becomes idle one cycle after it reached end) + out << " || (" << FF_Q << prefix << fsmIndex(fsm) << " == " << toFSMState(fsm, terminationState(fsm)) << ");" << nxl; out << "reg [0:0] " FF_D << prefix << fsmPipelineStageFull(fsm) << " = 0" << "," FF_Q << prefix << fsmPipelineStageFull(fsm) << " = 0;" << nxl; out << "reg [0:0] " FF_TMP << prefix << fsmPipelineStageStall(fsm) << " = 0;" << nxl; - out << "reg [0:0] " FF_TMP << prefix << fsmPipelineFirstStageDisable(fsm) << " = 0;" << nxl; + out << "reg [0:0] " FF_TMP << prefix << fsmPipelineFirstStateDisable(fsm) << " = 0;" << nxl; } } // state machine caller id (subroutines) @@ -7740,7 +7870,7 @@ void Algorithm::writeCombinationalAlwaysPre( w.out << FF_D << "_" << fsmIndex(fsm) << " = " << FF_Q << "_" << fsmIndex(fsm) << ';' << nxl; w.out << FF_D << "_" << fsmPipelineStageFull(fsm) << " = " << FF_Q << "_" << fsmPipelineStageFull(fsm) << ';' << nxl; w.out << FF_TMP << "_" << fsmPipelineStageStall(fsm) << " = 0;" << nxl; - w.out << FF_TMP << "_" << fsmPipelineFirstStageDisable(fsm) << " = 0;" << nxl; + w.out << FF_TMP << "_" << fsmPipelineFirstStateDisable(fsm) << " = 0;" << nxl; } // instanced algorithms run, maintain high for (const auto& iaiordr : m_InstancedBlueprintsInDeclOrder) { @@ -8128,24 +8258,28 @@ void Algorithm::writeBlock(std::string prefix, t_writer_context &w, } { auto display = dynamic_cast(a.instr); if (display) { - if (display->DISPLAY() != nullptr) { - w.out << "$display("; - } else if (display->DISPLWRITE() != nullptr) { - w.out << "$write("; - } - w.out << display->STRING()->getText(); - if (display->callParamList() != nullptr) { - std::vector params; - getCallParams(display->callParamList(),params, &block->context); - for (auto p : params) { - if (std::holds_alternative(p.what)) { - w.out << "," << rewriteIdentifier(prefix, std::get(p.what), "", &block->context, ictx, sourceloc(display), FF_Q, true, _dependencies, _ff_usage); - } else { - w.out << "," << rewriteExpression(prefix, p.expression, a.__id, &block->context, ictx, FF_Q, true, _dependencies, _ff_usage); + // check support + std::string instr = display->DISPLAY() != nullptr ? "display" : "write"; + auto C = CONFIG.keyValues().find("__" + instr + "_supported"); + if (C->second != "yes") { + warn(Standard, sourceloc(display), ("__" + instr + " not supported on this target, ignored").c_str()); + } else { + // add to code + w.out << "$" << instr << "("; + w.out << display->STRING()->getText(); + if (display->callParamList() != nullptr) { + std::vector params; + getCallParams(display->callParamList(), params, &block->context); + for (auto p : params) { + if (std::holds_alternative(p.what)) { + w.out << "," << rewriteIdentifier(prefix, std::get(p.what), "", &block->context, ictx, sourceloc(display), FF_Q, true, _dependencies, _ff_usage); + } else { + w.out << "," << rewriteExpression(prefix, p.expression, a.__id, &block->context, ictx, FF_Q, true, _dependencies, _ff_usage); + } } } + w.out << ");" << nxl; } - w.out << ");" << nxl; } } { auto inline_v = dynamic_cast(a.instr); @@ -8181,7 +8315,14 @@ void Algorithm::writeBlock(std::string prefix, t_writer_context &w, } { auto finish = dynamic_cast(a.instr); if (finish) { - w.out << "$finish();" << nxl; + // check support + auto C = CONFIG.keyValues().find("__finish_supported"); + if (C->second != "yes") { + warn(Standard, sourceloc(finish), "__finish not supported on this target, ignored"); + } else { + // add to code + w.out << "$finish();" << nxl; + } } } { auto stall = dynamic_cast(a.instr); @@ -8281,7 +8422,7 @@ void Algorithm::disableStartingPipelines(std::string prefix, t_writer_context &w findAllStartingPipelines(block, pipelines); for (auto pip : pipelines) { if (!fsmIsEmpty(pip->stages.front()->fsm)) { - w.out << FF_TMP << '_' << fsmPipelineFirstStageDisable(pip->stages.front()->fsm) << " = 1;" << nxl; + w.out << FF_TMP << '_' << fsmPipelineFirstStateDisable(pip->stages.front()->fsm) << " = 1;" << nxl; } } } @@ -8312,11 +8453,11 @@ void Algorithm::writeStatelessBlockGraph( return; } } else { - // first state of pipeline first stage? + // first state of first pipeline stage? if (block->context.pipeline_stage) { if (block->context.pipeline_stage->stage_id == 0 && !fsmIsEmpty(fsm)) { // add conditional on first stage disabled (in case the pipeline is enclosed in a conditional) - w.out << "if (~" << FF_TMP << prefix << fsmPipelineFirstStageDisable(fsm) << ") begin " << nxl; + w.out << "if (~" << FF_TMP << prefix << fsmPipelineFirstStateDisable(fsm) << ") begin " << nxl; enclosed_in_conditional = true; } } @@ -8324,6 +8465,9 @@ void Algorithm::writeStatelessBlockGraph( // follow the chain const t_combinational_block *current = block; while (true) { + if (current->block_name == "__stage___block_26") { + LIBSL_TRACE; + } // write current block writeBlock(prefix, w, ictx, current, _dependencies, _ff_usage, _lines); // goto next in chain @@ -8643,17 +8787,18 @@ void Algorithm::writeStatelessBlockGraph( // in a recursion, pipeline might have been disabled so we re-enable it // (otherwise we are sure it was not disabled, no need to manipulate the signal and risk adding logic) if (!fsmIsEmpty(current->pipeline_next()->next->context.pipeline_stage->fsm)) { - w.out << FF_TMP << '_' << fsmPipelineFirstStageDisable(current->pipeline_next()->next->context.pipeline_stage->fsm) << " = 0;" << nxl; + w.out << FF_TMP << '_' << fsmPipelineFirstStateDisable(current->pipeline_next()->next->context.pipeline_stage->fsm) << " = 0;" << nxl; } } // write pipeline auto prev = current; if (current->context.fsm != nullptr) { // if in an algorithm, pipelines are written later - std::ostringstream _; - t_writer_context wpip(w.pipes,_,w.wires); - sl_assert(_.str().empty()); + std::ostringstream subpip; // child pipelines are written here + t_writer_context wpip(w.pipes,subpip,w.wires); current = writeStatelessPipeline(prefix, wpip, ictx, current, _q, _dependencies, _ff_usage, _post_dependencies, _lines); + // combine any child pipeline with parents + w.pipes << subpip.str(); // also check that blocks between here and next states are empty if (!emptyUntilNextStates(current)) { reportError(prev->srcloc, "in an algorithm, a pipeline has to be followed by a new cycle.\n" @@ -9213,61 +9358,81 @@ void Algorithm::addToInstantiationContext(const Algorithm *alg, std::string var, // ------------------------------------------------- +bool Algorithm::isInOutAccessed(std::string var) const +{ + return m_Vars.at(m_VarNames.at(var + "_o" )).access != e_NotAccessed + || m_Vars.at(m_VarNames.at(var + "_oenable")).access != e_NotAccessed + || m_Vars.at(m_VarNames.at(var + "_i" )).access != e_NotAccessed; +} + +// ------------------------------------------------- + void Algorithm::makeBlueprintInstantiationContext(const t_instanced_nfo& nfo, const t_instantiation_context& ictx, t_instantiation_context& _local_ictx) const { _local_ictx = ictx; - // parameters for parameterized variables - ForIndex(i, nfo.blueprint->parameterized().size()) { - string var = nfo.blueprint->parameterized()[i]; - if (varIsInInstantiationContext(var, nfo.specializations)) { - // var has been specialized explicitly already - continue; - } - bool found = false; - auto io_nfo = nfo.blueprint->getVIODefinition(var, found); - sl_assert(found); - if (io_nfo.type_nfo.same_as.empty()) { - // a binding is needed to parameterize this io, find it - found = false; - const auto &b = findBindingLeft(var, nfo.bindings, found); - if (!found) { - reportError(nfo.srcloc, "io '%s' of instance '%s' is not bound nor specialized, cannot automatically determine it", - var.c_str(), nfo.instance_name.c_str()); - } - std::string bound = bindingRightIdentifier(b); - t_var_nfo bnfo; - if (!getVIONfo(bound, bnfo)) { - continue; // NOTE: This is fine, we might be missing a binding that will be later resolved. - // Later (when writing the output) this is strictly asserted. - // This will only be an issue if the bound var is actually a paramterized var, - // however the designer is expected to worry about instantiation order in such cases. - } - if (bnfo.table_size != 0) { - // parameterized vars cannot be tables + for (auto spc : nfo.specializations.autos) { + _local_ictx.autos [spc.first] = spc.second; // makes sure new specializations overwrite any existing ones + } + for (auto spc : nfo.specializations.params) { + _local_ictx.params[spc.first] = spc.second; + } + // if the blueprint is defined (not the case before IOs are parsed) + if (!nfo.blueprint.isNull()) { + // parameters for parameterized variables + ForIndex(i, nfo.blueprint->parameterized().size()) { + string var = nfo.blueprint->parameterized()[i]; + if (varIsInInstantiationContext(var, nfo.specializations)) { + // var has been specialized explicitly already continue; } - // add to context - addToInstantiationContext(this, var, bnfo, _local_ictx, _local_ictx); - } - } - // parameters of non-parameterized ios (for pre-processor widthof/signed) - Algorithm *alg = dynamic_cast(nfo.blueprint.raw()); - for (auto io : nfo.blueprint->inputs()) { - if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { - addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); - } - } - for (auto io : nfo.blueprint->outputs()) { - if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { - addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); - } - } - for (auto io : nfo.blueprint->inOuts()) { - if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { - addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); + bool found = false; + auto io_nfo = nfo.blueprint->getVIODefinition(var, found); + sl_assert(found); + if (io_nfo.type_nfo.same_as.empty()) { + // a binding is needed to parameterize this io, find it + found = false; + const auto& b = findBindingLeft(var, nfo.bindings, found); + if (!found) { + reportError(nfo.srcloc, "io '%s' of instance '%s' is not bound nor specialized, cannot automatically determine it", + var.c_str(), nfo.instance_name.c_str()); + } + std::string bound = bindingRightIdentifier(b); + t_var_nfo bnfo; + if (!getVIONfo(bound, bnfo)) { + continue; // NOTE: This is fine, we might be missing a binding that will be later resolved. + // Later (when writing the output) this is strictly asserted. + // This will only be an issue if the bound var is actually a paramterized var, + // however the designer is expected to worry about instantiation order in such cases. + } + if (bnfo.table_size != 0) { + // parameterized vars cannot be tables + continue; + } + // add to context + addToInstantiationContext(this, var, bnfo, _local_ictx, _local_ictx); + } + } + // parameters of non-parameterized ios (for pre-processor widthof/signed) + Algorithm* alg = dynamic_cast(nfo.blueprint.raw()); + if (alg != nullptr) { + for (auto io : nfo.blueprint->inputs()) { + if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { + addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); + } + } + for (auto io : nfo.blueprint->outputs()) { + if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { + addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); + } + } + for (auto io : nfo.blueprint->inOuts()) { + if (io.type_nfo.base_type != Parameterized || !io.type_nfo.same_as.empty()) { + addToInstantiationContext(alg, io.name, io, _local_ictx, _local_ictx); + } + } } } - // instance context + // instance name _local_ictx.instance_name = (ictx.instance_name.empty() ? ictx.top_name : ictx.instance_name) + "_" + nfo.instance_name; } @@ -9308,7 +9473,7 @@ void Algorithm::writeAsModule(std::ostream& out, const t_instantiation_context& // module header if (ictx.instance_name.empty()) { - out << "module " << ictx.top_name << ' '; + out << "module " << ictx.top_name << ' '; // FIXME: inelegant, calrify role of top_name } else { out << "module M_" << m_Name + '_' + ictx.instance_name + ' '; } @@ -9474,6 +9639,27 @@ void Algorithm::writeAsModule(std::ostream& out, const t_instantiation_context& } else { out << nfo.blueprint->moduleName(nfo.blueprint_name, "") << ' '; } + // if verilog module add parameters + { + const Module* vmod = dynamic_cast(nfo.blueprint.raw()); + if (vmod != nullptr) { + if (!vmod->parameters().empty()) { + out << "#("; + bool first = true; + for (const auto& prm : vmod->parameters()) { + if (first) { first = false; out << '\n'; } else { out << ",\n"; } + // test if given when instanced, otherwise use default + auto P = nfo.specializations.params.find(prm.first); + if (P != nfo.specializations.params.end()) { + out << '.' << prm.first << '(' << P->second << ")"; + } else { + out << '.' << prm.first << '(' << prm.second << ")"; + } + } + out << "\n)\n"; + } + } + } // instance name out << nfo.instance_name << ' '; // ports @@ -9549,11 +9735,23 @@ void Algorithm::writeAsModule(std::ostream& out, const t_instantiation_context& std::string bindpoint = nfo.instance_prefix + "_" + os.name; const auto& vio = m_BlueprintInOutsBoundToVIO.find(bindpoint); if (vio != m_BlueprintInOutsBoundToVIO.end()) { - if (isInOut(vio->second)) { - out << '.' << nfo.blueprint->inoutPortName(os.name) << '(' << ALG_INOUT << "_" << vio->second << ")"; + out << '.' << nfo.blueprint->inoutPortName(os.name) << '('; + if (std::holds_alternative(vio->second)) { + std::string bndid = std::get(vio->second); + if (isInOut(bndid)) { + out << ALG_INOUT << "_" << bndid; + } else { + out << WIRE << "_" << bndid; + } } else { - out << '.' << nfo.blueprint->inoutPortName(os.name) << '(' << WIRE << "_" << vio->second << ")"; + // write access + t_vio_dependencies _; + writeAccess("_", out, false, std::get(vio->second), + -1, nullptr, ictx, + FF_D, _, ff_input_bindings_usage + ); } + out << ")"; } else { reportError(nfo.srcloc, "cannot find algorithm inout binding '%s'", os.name.c_str()); } @@ -9844,9 +10042,8 @@ void Algorithm::writeAsModule(std::ostream& out, const t_instantiation_context& if (stage_id == 0) { // parent state active? if (fsm->parentBlock->context.fsm != nullptr) { - sl_assert(fsm->parentBlock->context.fsm == &m_RootFSM); // no nested pipelines out << " && (("; - out << fsmNextState("_", &m_RootFSM); + out << fsmNextState("_", fsm->parentBlock->context.fsm); out << ')'; out << " == " << toFSMState(fsm->parentBlock->context.fsm, fsmParentTriggerState(fsm)); out << ")" << nxl; diff --git a/src/Algorithm.h b/src/Algorithm.h index 0f74306c..bfdfb215 100644 --- a/src/Algorithm.h +++ b/src/Algorithm.h @@ -199,12 +199,15 @@ namespace Silice /// \brief all inout names, map contains index in m_InOuts std::unordered_map m_InOutNames; + /// \brief binding point, identifier or access + typedef std::variant t_binding_point; + /// \brief VIO bound to blueprint outputs (wires) (vio name => wire name) - std::unordered_map m_VIOBoundToBlueprintOutputs; + std::unordered_map m_VIOBoundToBlueprintOutputs; /// \brief module/algorithms inouts bound to VIO (inout => vio name) - std::unordered_map m_BlueprintInOutsBoundToVIO; + std::unordered_map m_BlueprintInOutsBoundToVIO; /// \brief VIO bound to module/algorithms inouts (vio name => inout) - std::unordered_map m_VIOToBlueprintInOutsBound; + std::unordered_map m_VIOToBlueprintInOutsBound; // forward definition of combinational blocks class t_combinational_block; @@ -226,9 +229,6 @@ namespace Silice /// \brief enum binding direction enum e_BindingDir { e_Left, e_LeftQ, e_Right, e_BiDir, e_Auto, e_AutoQ }; - /// \brief binding point, identifier or access - typedef std::variant t_binding_point; - /// \brief records info about variable bindings typedef struct { @@ -347,6 +347,7 @@ namespace Silice std::string name; std::unordered_map trickling_vios; // v2i: [0] stage at which to start [1] stage at which to stop std::vector stages; + struct s_pipeline_stage_nfo *nested_in_parent_stage; // track read/written vios std::unordered_map > read_at, written_at; std::unordered_set written_outputs; @@ -510,9 +511,8 @@ namespace Silice { public: t_combinational_block *next; - t_combinational_block *after; - end_action_pipeline_next(t_combinational_block *next_, t_combinational_block *after_) : next(next_), after(after_) { } - void getChildren(std::vector& _ch) const override { _ch.push_back(next); if (after != next) { _ch.push_back(after); } } + end_action_pipeline_next(t_combinational_block *next_) : next(next_) { } + void getChildren(std::vector& _ch) const override { _ch.push_back(next); } std::string name() const override { return "end_action_pipeline_next";} }; @@ -548,7 +548,7 @@ namespace Silice bool no_skip = false; // true the state cannot be skipped, even if empty int state_id = -1; // state id, when assigned, -1 otherwise int parent_state_id = -1; // parent state id (closest state before) - std::vector decltrackers; // list of declaration expressions within block (typically bound exprs, aka wires) + std::vector decltrackers; // list of declaration expressions within block (typically bound exprs, aka wires) std::vector instructions; // list of instructions within block t_end_action *end_action = nullptr; // end action to perform t_combinational_block_context context; // block context: subroutine, parent, etc. @@ -609,9 +609,9 @@ namespace Silice } const end_action_goto_and_return_to * goto_and_return_to() const { return dynamic_cast(end_action); } - void pipeline_next(t_combinational_block *next, t_combinational_block *after) + void pipeline_next(t_combinational_block *next) { - swap_end(new end_action_pipeline_next(next, after)); + swap_end(new end_action_pipeline_next(next)); } const end_action_pipeline_next *pipeline_next() const { return dynamic_cast(end_action); } @@ -698,6 +698,8 @@ namespace Silice bool getVIONfo(std::string vio, t_var_nfo &_nfo) const; /// \brief checks whether an identifier is a group VIO bool isGroupVIO(std::string var) const; + /// \brief returns whether an inout is used by the unit (accessed) + bool isInOutAccessed(std::string var) const; /// \brief rewrites a constant std::string rewriteConstant(std::string cst) const; /// \brief returns a string representing the widthof value @@ -804,6 +806,8 @@ namespace Silice bool hasPipeline(antlr4::tree::ParseTree* tree) const; /// \brief split current block (state present) or continue current with the next instruction list t_combinational_block *splitOrContinueBlock(siliceParser::InstructionListItemContext* ilist, t_combinational_block *_current, t_gather_context *_context); + /// \brief returns true if node is in a loop body + bool isInWhileBody(const antlr4::tree::ParseTree* node) const; /// \brief gather a break from loop t_combinational_block *gatherBreakLoop(siliceParser::BreakLoopContext* brk, t_combinational_block *_current, t_gather_context *_context); /// \brief gather a while block @@ -814,6 +818,8 @@ namespace Silice t_combinational_block *gatherSubroutine(siliceParser::SubroutineContext* sub, t_combinational_block *_current, t_gather_context *_context); /// \brief concatenate a pipeline to an existing one t_combinational_block *concatenatePipeline(siliceParser::PipelineContext* pip, t_combinational_block *_current, t_gather_context *_context, t_pipeline_nfo *nfo); + /// \brief returns true if pip is spawning a new pipeline + bool isSpawningNewPipeline(const siliceParser::PipelineContext* pip) const; /// \brief gather a pipeline t_combinational_block *gatherPipeline(siliceParser::PipelineContext* pip, t_combinational_block *_current, t_gather_context *_context); /// \brief gather a jump @@ -828,8 +834,8 @@ namespace Silice t_combinational_block *gatherJoinExec(siliceParser::JoinExecContext* join, t_combinational_block *_current, t_gather_context *_context); /// \brief tests whether a graph of block is stateless bool isStateLessGraph(const t_combinational_block *head) const; - /// \brief returns true if the graph has a combinational exit (one path that does not jump to an actual state) - bool hasCombinationalExit(const t_combinational_block *head) const; + /// \brief returns all the blocks which are next states + void findNextStates(t_combinational_block* head, std::set< t_combinational_block*>& _exits) const; /// \brief gather an if-then-else t_combinational_block *gatherIfElse(siliceParser::IfThenElseContext* ifelse, t_combinational_block *_current, t_gather_context *_context); /// \brief gather an if-then @@ -884,6 +890,8 @@ namespace Silice void resolveForwardJumpRefs(); /// \brief performs a pass on all ifelse to prevent code duplication of after, returns true if states where changed bool preventIfElseCodeDup(t_fsm_nfo* fsm); + /// \brief pad pipeline to ensure it ends on a 'simple' state (without multiple possible next state), returns true if states where changed + bool padPipeline(t_fsm_nfo* fsm); /// \brief performs a numbering pass on the fsm states void renumberStates(t_fsm_nfo*); /// \brief generates the states of an fsm @@ -892,14 +900,14 @@ namespace Silice void fsmGetBlocks(t_fsm_nfo *fsm, std::unordered_set& _blocks) const; /// \brief returns the index name of the fsm std::string fsmIndex(const t_fsm_nfo *) const; - /// \brief returns the 'ready' signal name of the fsm + /// \brief returns the 'ready' signal name of the pipeline stage fsm std::string fsmPipelineStageReady(const t_fsm_nfo *) const; - /// \brief returns the 'full' signal name of the fsm + /// \brief returns the 'full' signal name of the pipeline stage fsm std::string fsmPipelineStageFull(const t_fsm_nfo *) const; - /// \brief returns the 'stall' signal name of the fsm + /// \brief returns the 'stall' signal name of the pipeline stage fsm std::string fsmPipelineStageStall(const t_fsm_nfo *) const; - /// \brief returns the 'first stage disable' signal name of the fsm - std::string fsmPipelineFirstStageDisable(const t_fsm_nfo *) const; + /// \brief returns the 'first state disable' signal name of the pipeline stage fsm + std::string fsmPipelineFirstStateDisable(const t_fsm_nfo *) const; /// \brief returns an expression that evaluates to the fsm next state std::string fsmNextState(std::string prefix, const t_fsm_nfo *) const; /// \brief returns whether the fsm is empty (no state) diff --git a/src/Config.cpp b/src/Config.cpp index e6f44ee5..6cfc9f3f 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -66,6 +66,10 @@ Config::Config() m_KeyValues["simple_dualport_bram_wenable1_type"] = "uint"; // uint | int | data m_KeyValues["simple_dualport_bram_wenable1_width"] = "1"; // 1 | data + m_KeyValues["__display_supported"] = "no"; + m_KeyValues["__write_supported"] = "no"; + m_KeyValues["__finish_supported"] = "no"; + // internal options m_KeyValues["output_fsm_graph"] = "1"; } diff --git a/src/ExpressionLinter.cpp b/src/ExpressionLinter.cpp index c7830e09..6132dc7f 100644 --- a/src/ExpressionLinter.cpp +++ b/src/ExpressionLinter.cpp @@ -214,21 +214,48 @@ void ExpressionLinter::lintReadback( void ExpressionLinter::lintBinding( std::string msg, - Algorithm::e_BindingDir dir, - const t_source_loc& srcloc, - const t_type_nfo &left, - const t_type_nfo &right -) const + AutoPtr bp, + const Algorithm::t_instantiation_context& local_ictx, + const Algorithm::t_binding_nfo& bnfo + ) const { - // check - if (left.base_type == Parameterized || right.base_type == Parameterized) { - return; // skip if parameterized + // check width + int lw = -1; + e_Type rtype = Parameterized; + std::string slw = bp->resolveWidthOf(bnfo.left, local_ictx, bnfo.srcloc); + try { + lw = stoi(slw); + } catch (...) { + warn(Standard, bnfo.srcloc, "%s, cannot check binding bit-width", msg.c_str()); + return; + } + int rw = -1; + if (!std::holds_alternative(bnfo.right)) { + auto access = std::get(bnfo.right); + auto rnfo = m_Host->determineAccessTypeAndWidth(nullptr, access, nullptr); + rw = rnfo.width; + rtype = rnfo.base_type; + } else { + /// TODO: rtype in this case? + std::string right = std::get(bnfo.right); + if (right != ALG_CLOCK && right != ALG_RESET) { + std::string srw = m_Host->resolveWidthOf(right, m_Ictx, bnfo.srcloc); + try { + rw = stoi(srw); + } catch (...) { + warn(Standard, bnfo.srcloc, "%s, cannot check binding bit-width", msg.c_str()); + return; + } + } } - if (left.base_type != right.base_type) { - warn(Standard, srcloc, "%s, bindings have inconsistent signedness", msg.c_str()); + if (rw != lw) { + warn(Standard, bnfo.srcloc, "%s, bindings have inconsistent bit-widths", msg.c_str()); } - if (left.width != right.width) { - warn(Standard, srcloc, "%s, bindings have inconsistent bit-widths", msg.c_str()); + // check signdness + auto lnfo = bp->determineVIOTypeWidthAndTableSize(bnfo.left, bnfo.srcloc); + e_Type ltype = std::get<0>(lnfo).base_type; + if (ltype != rtype && rtype != Parameterized && ltype != Parameterized) { + warn(Standard, bnfo.srcloc, "%s, bindings have inconsistent signedness", msg.c_str()); } } diff --git a/src/ExpressionLinter.h b/src/ExpressionLinter.h index 9c0278a5..7cafd570 100644 --- a/src/ExpressionLinter.h +++ b/src/ExpressionLinter.h @@ -137,10 +137,9 @@ namespace Silice /// \brief Lint a binding void lintBinding( std::string msg, - Algorithm::e_BindingDir dir, - const Utils::t_source_loc& srcloc, - const t_type_nfo &left, - const t_type_nfo &right + AutoPtr bp, + const Algorithm::t_instantiation_context& local_ictx, + const Algorithm::t_binding_nfo& bnfo ) const; /// \brief Returns the type nfo of an expression diff --git a/src/LuaPreProcessor.cpp b/src/LuaPreProcessor.cpp index a573f0d3..e5462c2e 100644 --- a/src/LuaPreProcessor.cpp +++ b/src/LuaPreProcessor.cpp @@ -1311,17 +1311,16 @@ void LuaPreProcessor::generateBody( // create Lua context createLuaContext(); // execute body (Lua context also contains all unit functions) - executeLuaString(lua_code, dst_file, false, ictx); + executeLuaString(lua_code, dst_file, ictx); } // ------------------------------------------------- -void LuaPreProcessor::generateUnitIOSource(std::string unit, std::string dst_file) +void LuaPreProcessor::generateUnitIOSource(std::string unit, std::string dst_file, const Blueprint::t_instantiation_context& ictx) { std::string lua_code = "_G['__io__" + unit + "']()\n"; - Blueprint::t_instantiation_context empty_ictx; - executeLuaString(lua_code, dst_file, false, empty_ictx); + executeLuaString(lua_code, dst_file, ictx); } // ------------------------------------------------- @@ -1330,7 +1329,7 @@ void LuaPreProcessor::generateUnitSource( std::string unit, std::string dst_file, const Blueprint::t_instantiation_context& ictx) { std::string lua_code = "_G['" + unit + "']()\n"; - executeLuaString(lua_code, dst_file, true, ictx); + executeLuaString(lua_code, dst_file, ictx); } // ------------------------------------------------- @@ -1353,14 +1352,12 @@ void LuaPreProcessor::createLuaContext() // ------------------------------------------------- -void LuaPreProcessor::executeLuaString(std::string lua_code, std::string dst_file, bool has_ictx, const Blueprint::t_instantiation_context& ictx) +void LuaPreProcessor::executeLuaString(std::string lua_code, std::string dst_file, const Blueprint::t_instantiation_context& ictx) { // reset line counter m_CurOutputLine = 0; // prepare instantiation context - if (has_ictx) { - g_LuaInstCtx.insert(std::make_pair(m_LuaState, ictx)); - } + g_LuaInstCtx.insert(std::make_pair(m_LuaState, ictx)); // prepare output g_LuaOutputs.insert(std::make_pair(m_LuaState, ofstream(dst_file))); // execute @@ -1391,9 +1388,7 @@ void LuaPreProcessor::executeLuaString(std::string lua_code, std::string dst_fil // close output g_LuaOutputs.at(m_LuaState).close(); g_LuaOutputs.erase(m_LuaState); - if (has_ictx) { - g_LuaInstCtx.erase(m_LuaState); - } + g_LuaInstCtx.erase(m_LuaState); } // ------------------------------------------------- diff --git a/src/LuaPreProcessor.h b/src/LuaPreProcessor.h index 22252da8..8a58db8d 100644 --- a/src/LuaPreProcessor.h +++ b/src/LuaPreProcessor.h @@ -80,7 +80,7 @@ namespace Silice { void createLuaContext(); void destroyLuaContext(); - void executeLuaString(std::string lua_code, std::string dst_file, bool has_ictx, const Blueprint::t_instantiation_context& ictx); + void executeLuaString(std::string lua_code, std::string dst_file, const Blueprint::t_instantiation_context& ictx); public: @@ -90,7 +90,7 @@ namespace Silice { void generateBody(std::string src_file, const std::vector &defaultLibraries, const Blueprint::t_instantiation_context& ictx, std::string lua_header_code, std::string dst_file); /// \brief generates a unit IO source code (the part defining unit ios) in dst_file - void generateUnitIOSource(std::string unit, std::string dst_file); + void generateUnitIOSource(std::string unit, std::string dst_file, const Blueprint::t_instantiation_context& ictx); /// \brief generates a unit source code in dst_file void generateUnitSource(std::string unit, std::string dst_file, const Blueprint::t_instantiation_context& ictx); diff --git a/src/Module.h b/src/Module.h index 1c3a84b0..b0676c46 100644 --- a/src/Module.h +++ b/src/Module.h @@ -69,57 +69,71 @@ namespace Silice std::unordered_map m_OutputNames; /// \brief all inout names, map contains index in m_InOuts std::unordered_map m_InOutNames; + /// \brief Verilog parameters + std::vector< std::pair > m_Parameters; /// \brief gather module information from parsed grammar void gather(vmoduleParser::VmoduleContext *vmodule) { m_Name = vmodule->IDENTIFIER()->getText(); - vmoduleParser::InOutListContext *list = vmodule->inOutList(); - for (auto io : list->inOrOut()) { - if (io->input()) { - t_inout_nfo nfo; - nfo.name = io->input()->IDENTIFIER()->getText(); - nfo.do_not_initialize = true; - nfo.type_nfo.base_type = UInt; // TODO signed? - if (io->input()->mod()->first != nullptr) { - int f = atoi(io->input()->mod()->first->getText().c_str()); - int s = atoi(io->input()->mod()->second->getText().c_str()); - nfo.type_nfo.width = f - s + 1; - } else { - nfo.type_nfo.width = 1; - } - m_Inputs.emplace_back(nfo); - m_InputNames.insert(make_pair(nfo.name, (int)m_Inputs.size() - 1)); - } else if (io->output()) { - t_output_nfo nfo; - nfo.name = io->output()->IDENTIFIER()->getText(); - nfo.do_not_initialize = true; - nfo.type_nfo.base_type = UInt; // TODO signed? - if (io->output()->mod()->first != nullptr) { - int f = atoi(io->output()->mod()->first->getText().c_str()); - int s = atoi(io->output()->mod()->second->getText().c_str()); - nfo.type_nfo.width = f - s + 1; - } else { - nfo.type_nfo.width = 1; + // parse parameters + { + vmoduleParser::ParamListContext* plist = vmodule->paramList(); + if (plist != nullptr) { + for (auto prm : plist->paramDecl()) { + m_Parameters.push_back(std::make_pair( prm->name->getText(), prm->value->getText() )); } - m_Outputs.emplace_back(nfo); - m_OutputNames.insert(make_pair(nfo.name, (int)m_Outputs.size() - 1)); - } else if (io->inout()) { - t_inout_nfo nfo; - nfo.name = io->inout()->IDENTIFIER()->getText(); - nfo.do_not_initialize = true; - nfo.type_nfo.base_type = UInt; // TODO signed? - if (io->inout()->mod()->first != nullptr) { - int f = atoi(io->inout()->mod()->first->getText().c_str()); - int s = atoi(io->inout()->mod()->second->getText().c_str()); - nfo.type_nfo.width = f - s + 1; + } + } + // parse io + { + vmoduleParser::InOutListContext* list = vmodule->inOutList(); + for (auto io : list->inOrOut()) { + if (io->input()) { + t_inout_nfo nfo; + nfo.name = io->input()->IDENTIFIER()->getText(); + nfo.do_not_initialize = true; + nfo.type_nfo.base_type = UInt; // TODO signed? + if (io->input()->mod()->first != nullptr) { + int f = atoi(io->input()->mod()->first->getText().c_str()); + int s = atoi(io->input()->mod()->second->getText().c_str()); + nfo.type_nfo.width = f - s + 1; + } else { + nfo.type_nfo.width = 1; + } + m_Inputs.emplace_back(nfo); + m_InputNames.insert(make_pair(nfo.name, (int)m_Inputs.size() - 1)); + } else if (io->output()) { + t_output_nfo nfo; + nfo.name = io->output()->IDENTIFIER()->getText(); + nfo.do_not_initialize = true; + nfo.type_nfo.base_type = UInt; // TODO signed? + if (io->output()->mod()->first != nullptr) { + int f = atoi(io->output()->mod()->first->getText().c_str()); + int s = atoi(io->output()->mod()->second->getText().c_str()); + nfo.type_nfo.width = f - s + 1; + } else { + nfo.type_nfo.width = 1; + } + m_Outputs.emplace_back(nfo); + m_OutputNames.insert(make_pair(nfo.name, (int)m_Outputs.size() - 1)); + } else if (io->inout()) { + t_inout_nfo nfo; + nfo.name = io->inout()->IDENTIFIER()->getText(); + nfo.do_not_initialize = true; + nfo.type_nfo.base_type = UInt; // TODO signed? + if (io->inout()->mod()->first != nullptr) { + int f = atoi(io->inout()->mod()->first->getText().c_str()); + int s = atoi(io->inout()->mod()->second->getText().c_str()); + nfo.type_nfo.width = f - s + 1; + } else { + nfo.type_nfo.width = 1; + } + m_InOuts.emplace_back(nfo); + m_InOutNames.insert(make_pair(nfo.name, (int)m_InOuts.size() - 1)); } else { - nfo.type_nfo.width = 1; + sl_assert(false); } - m_InOuts.emplace_back(nfo); - m_InOutNames.insert(make_pair(nfo.name, (int)m_InOuts.size() - 1)); - } else { - sl_assert(false); } } } @@ -150,12 +164,15 @@ namespace Silice out << std::endl; } + /// instantiation parameters + const std::vector< std::pair >& parameters() const { return m_Parameters; } + /// === implements Blueprint /// \brief returns the blueprint name std::string name() const override { return m_Name; } /// \brief writes the algorithm as a Verilog module, recurses through instanced blueprints - void writeAsModule(std::ostream& out, const t_instantiation_context& ictx, bool first_pass) { } + void writeAsModule(std::ostream& out, const t_instantiation_context& ictx, bool first_pass) override { } /// \brief inputs const std::vector& inputs() const override { return m_Inputs; } /// \brief outputs diff --git a/src/ParsingErrors.cpp b/src/ParsingErrors.cpp index 9c2dd0c8..257846ef 100644 --- a/src/ParsingErrors.cpp +++ b/src/ParsingErrors.cpp @@ -86,7 +86,7 @@ void ReportError::printReport(std::pair where, std::string msg // ------------------------------------------------- -#if !defined(_WIN32) && !defined(_WIN64) +#if !defined(_WIN32) && !defined(_WIN64) && !defined(fopen_s) #define fopen_s(f,n,m) ((*f) = fopen(n,m)) #endif diff --git a/src/SiliceCompiler.cpp b/src/SiliceCompiler.cpp index c4280971..03c5890f 100644 --- a/src/SiliceCompiler.cpp +++ b/src/SiliceCompiler.cpp @@ -339,7 +339,7 @@ void SiliceCompiler::beginParsing( } // determine frameworks dir if needed if (frameworks_dir.empty()) { - frameworks_dir = std::string(LibSL::System::Application::executablePath()) + "../frameworks/"; + frameworks_dir = std::string(FRAMEWORKS_DEFAULT_PATH); } // extract pre-processor header from framework std::string framework_lpp, framework_verilog; @@ -403,7 +403,7 @@ void SiliceCompiler::endParsing() // ------------------------------------------------- -t_parsed_circuitry SiliceCompiler::parseCircuitryIOs(std::string to_parse) +t_parsed_circuitry SiliceCompiler::parseCircuitryIOs(std::string to_parse, const Blueprint::t_instantiation_context& ictx) { t_parsed_circuitry parsed; @@ -419,7 +419,7 @@ t_parsed_circuitry SiliceCompiler::parseCircuitryIOs(std::string to_parse) // bind local context parsed.ios_parser->bind(); // pre-process unit IOs (done first to gather intel on parameterized vs static ios - m_BodyContext->lpp->generateUnitIOSource(parsed.parsed_circuitry, preprocessed_io); + m_BodyContext->lpp->generateUnitIOSource(parsed.parsed_circuitry, preprocessed_io, ictx); // gather the unit parsed.ios_parser->prepareParser(preprocessed_io); auto ios_root = parsed.ios_parser->parser->rootIoList(); @@ -459,7 +459,7 @@ void SiliceCompiler::parseCircuitryBody(t_parsed_circuitry& _parse // ------------------------------------------------- -t_parsed_unit SiliceCompiler::parseUnitIOs(std::string to_parse) +t_parsed_unit SiliceCompiler::parseUnitIOs(std::string to_parse, const Blueprint::t_instantiation_context& ictx) { t_parsed_unit parsed; @@ -479,7 +479,7 @@ t_parsed_unit SiliceCompiler::parseUnitIOs(std::string to_parse) // bind local context parsed.ios_parser->bind(); // pre-process unit IOs (done first to gather intel on parameterized vs static ios - m_BodyContext->lpp->generateUnitIOSource(parsed.parsed_unit, preprocessed_io); + m_BodyContext->lpp->generateUnitIOSource(parsed.parsed_unit, preprocessed_io, ictx); // gather the unit parsed.ios_parser->prepareParser(preprocessed_io); auto ios_root = parsed.ios_parser->parser->rootInOutList(); @@ -635,9 +635,9 @@ void SiliceCompiler::writeFormalTests(std::ostream& _out, const Blueprint::t_ins // write formal unit tests for (auto name : m_BodyContext->lpp->formalUnits()) { Blueprint::t_instantiation_context local_ictx = ictx; - local_ictx.top_name = "formal_" + name + "$"; + local_ictx.top_name = "formal_" + name + "$"; // FIXME: inelegant // parse and write unit - auto bp = parseUnitIOs(name); + auto bp = parseUnitIOs(name, local_ictx); parseUnitBody(bp, local_ictx); bp.unit->setAsTopMost(); // -> first pass @@ -649,6 +649,22 @@ void SiliceCompiler::writeFormalTests(std::ostream& _out, const Blueprint::t_ins // ------------------------------------------------- +/// \brief writes a static unit in the output stream +/// NOTE: used by the python framework +void SiliceCompiler::writeStaticUnit( + AutoPtr bp, + const Blueprint::t_instantiation_context& ictx, + std::ostream& _out, + bool first_pass) +{ + t_parsed_unit pu; + pu.body_parser = m_BodyContext; + pu.unit = bp; + writeUnit(pu, ictx, _out, first_pass); +} + +// ------------------------------------------------- + AutoPtr SiliceCompiler::isStaticBlueprint(std::string bpname) { if (m_Blueprints.count(bpname) != 0) { @@ -722,7 +738,7 @@ void SiliceCompiler::run( } ictx.top_name = "M_" + to_export; // parse and write top unit - auto bp = parseUnitIOs(to_export); + auto bp = parseUnitIOs(to_export, ictx); parseUnitBody(bp, ictx); bp.unit->setAsTopMost(); // -> first pass diff --git a/src/SiliceCompiler.h b/src/SiliceCompiler.h index 8e1b0ac5..e3eb60ce 100644 --- a/src/SiliceCompiler.h +++ b/src/SiliceCompiler.h @@ -121,15 +121,22 @@ namespace Silice { /// \brief end parsing void endParsing(); + /// \brief writes a static unit in the output stream + void writeStaticUnit( + AutoPtr bp, + const Blueprint::t_instantiation_context& ictx, + std::ostream& _out, + bool first_pass); + /// \brief parses a specific unit ios - t_parsed_unit parseUnitIOs(std::string to_parse); + t_parsed_unit parseUnitIOs(std::string to_parse, const Blueprint::t_instantiation_context& ictx); /// \brief parses a unit body (call after parseUnitIOs); void parseUnitBody(t_parsed_unit& _parsed, const Blueprint::t_instantiation_context& ictx); /// \brief parses a specific circuitry ios - t_parsed_circuitry parseCircuitryIOs(std::string to_parse); + t_parsed_circuitry parseCircuitryIOs(std::string to_parse, const Blueprint::t_instantiation_context& ictx); /// \brief parses a circuitry body (call after parseCircuitryIOs); - void parseCircuitryBody(t_parsed_circuitry& _parsed,const Blueprint::t_instantiation_context& ictx); + void parseCircuitryBody(t_parsed_circuitry& _parsed, const Blueprint::t_instantiation_context& ictx); /// \brief returns the static blueprint for 'unit', otherwise null AutoPtr isStaticBlueprint(std::string bpname); diff --git a/src/libs/lua/CMakeLists.txt b/src/libs/lua/CMakeLists.txt index fc24e86b..cfdcbc34 100644 --- a/src/libs/lua/CMakeLists.txt +++ b/src/libs/lua/CMakeLists.txt @@ -1,4 +1,4 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +CMAKE_MINIMUM_REQUIRED(VERSION 3.5) PROJECT(lua) SET(LIBLUA_SOURCES diff --git a/src/libs/luabind-deboostified/CMakeLists.txt b/src/libs/luabind-deboostified/CMakeLists.txt index b89217b8..1e0b3008 100644 --- a/src/libs/luabind-deboostified/CMakeLists.txt +++ b/src/libs/luabind-deboostified/CMakeLists.txt @@ -3,7 +3,7 @@ # http://academic.cleardefinition.com/ # Iowa State University HCI Graduate Program/VRAC -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.5) set(CMAKE_LEGACY_CYGWIN_WIN32 0) # Remove when CMake >= 2.8.4 is required project(LuaBind) diff --git a/src/silice.cpp b/src/silice.cpp index 8764bb3d..84aa692b 100644 --- a/src/silice.cpp +++ b/src/silice.cpp @@ -48,7 +48,7 @@ int main(int argc, char **argv) { try { - const std::string version_string = std::string(" 0.2.0") + " " + c_GitHash; + const std::string version_string = std::string(" 0.2.3") + " " + c_GitHash; // ^ ^ ^ // | | | // | | \_ increments with features in wip/draft (x.x.x) diff --git a/tests/Makefile.icestick b/tests/Makefile.icestick new file mode 100644 index 00000000..ec3c61c7 --- /dev/null +++ b/tests/Makefile.icestick @@ -0,0 +1,6 @@ + +.DEFAULT: $@.si.lpp + silice-make.py -s $@.si -b icestick -p basic -o BUILD_$(subst :,_,$@) $(ARGS) + +clean: + rm -rf BUILD_* diff --git a/tests/bind2.si b/tests/bind2.si new file mode 100644 index 00000000..5ba5767d --- /dev/null +++ b/tests/bind2.si @@ -0,0 +1,17 @@ +unit foo(input uint1 a,input uint1 b) +{ + +} + +unit main(input uint2 tmp,output uint8 leds) +{ + foo f( + a <: tmp[0,1], + b <: tmp[1,1], + ); + + algorithm { + __display("hello world"); + } + +} diff --git a/tests/bind3.si b/tests/bind3.si new file mode 100644 index 00000000..13a09dd1 --- /dev/null +++ b/tests/bind3.si @@ -0,0 +1,23 @@ +unit foo(inout uint1 a,inout uint1 b) +{ + always { + a.oenable = 1b1; + a.o = 1b1; + b.oenable = 1b0; + __display("b=%b",b.i); + } +} + +unit main(inout uint2 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp[0,1], + b <:> tmp[1,1], + ); + + algorithm { + __display("hello world"); + } + +} diff --git a/tests/bind7.si b/tests/bind7.si new file mode 100644 index 00000000..a6526ad6 --- /dev/null +++ b/tests/bind7.si @@ -0,0 +1,20 @@ +unit foo(inout uint2 a) +{ + always { + a.oenable = 2b11; + a.o = 2b01; + } +} + +unit main(inout uint4 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp[0,2] // ok, width do match + ); + + algorithm { + __display("hello world"); + } + +} diff --git a/tests/bind8.si b/tests/bind8.si new file mode 100644 index 00000000..b1292ea6 --- /dev/null +++ b/tests/bind8.si @@ -0,0 +1,22 @@ + +unit foo(output uint3 b,inout uint$W$ a) +{ + always { + __display("W=%d",$W$); + a.oenable = 2b11; + a.o = 2b01; + } +} + +unit main(inout uint4 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp[0,2] // ok, width do match + ); + + algorithm { + __display("hello world"); + } + +} diff --git a/tests/circuits21.si b/tests/circuits21.si new file mode 100644 index 00000000..4c24dbf9 --- /dev/null +++ b/tests/circuits21.si @@ -0,0 +1,20 @@ +circuitry add_two(input i) +{ + // starts new pipelines + { -> -> __display("hello from A, %d",i); } + { -> -> __display("hello from B, %d",i); } +} + +unit main(output uint8 leds) +{ + uint32 cycle=0; + algorithm { + + while (cycle<100) { + + () = add_two(cycle); // of to add circuitry since no parent pipeline + + } + } + always_after { cycle = cycle + 1; } +} diff --git a/tests/error_checks/bind4.si b/tests/error_checks/bind4.si new file mode 100644 index 00000000..be433c47 --- /dev/null +++ b/tests/error_checks/bind4.si @@ -0,0 +1,22 @@ + +unit foo(inout uint2 a) +{ + always { + a.oenable = 2b11; + a.o = 2b01; + } +} + +unit main(inout uint2 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp + ); + + algorithm { + uint1 p = tmp.i & 1b1; // error: tmp is bound + __display("hello world"); + } + +} diff --git a/tests/error_checks/bind5.si b/tests/error_checks/bind5.si new file mode 100644 index 00000000..7bd4de64 --- /dev/null +++ b/tests/error_checks/bind5.si @@ -0,0 +1,21 @@ +unit foo(inout uint2 a) +{ + always { + a.oenable = 2b11; + a.o = 2b01; + } +} + +unit main(inout uint2 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp[0,2] // error: used in body below + ); + + algorithm { + uint1 p = tmp.i & 1b1; + __display("hello world"); + } + +} diff --git a/tests/error_checks/bind6.si b/tests/error_checks/bind6.si new file mode 100644 index 00000000..f0ce5636 --- /dev/null +++ b/tests/error_checks/bind6.si @@ -0,0 +1,20 @@ +unit foo(inout uint2 a) +{ + always { + a.oenable = 2b11; + a.o = 2b01; + } +} + +unit main(inout uint2 tmp,output uint8 leds) +{ + + foo f( + a <:> tmp[0,1], // error: width mismatch + ); + + algorithm { + __display("hello world"); + } + +} diff --git a/tests/error_checks/circuits19.si b/tests/error_checks/circuits19.si new file mode 100644 index 00000000..f8407267 --- /dev/null +++ b/tests/error_checks/circuits19.si @@ -0,0 +1,31 @@ +circuitry add_two(input i,output o) +{ + { // forbidden : starts a new pipeline + // stage 1 + uint8 v = i + 1; + -> + // stage 2 + // o = v + 1; + -> + + } +} + +unit main(output uint8 leds) +{ + uint32 cycle=0; + uint8 a =0; + algorithm { + + while (a<3) { + // stage 0 + uint8 v = a; + a = a + 1; + -> + (v) = add_two(v); + -> + + } + } + always_after { cycle = cycle + 1; } +} diff --git a/tests/error_checks/circuits20.si b/tests/error_checks/circuits20.si new file mode 100644 index 00000000..d7cf7697 --- /dev/null +++ b/tests/error_checks/circuits20.si @@ -0,0 +1,21 @@ +circuitry add_two(input i) +{ + // starts new pipelines + { -> -> } + { -> -> } +} + +unit main(output uint8 leds) +{ + uint32 cycle=0; + algorithm { + + while (cycle<100) { + -> + () = add_two(cycle); // ERROR: this would next pipelines + -> + + } + } + always_after { cycle = cycle + 1; } +} diff --git a/tests/error_checks/pip1.si b/tests/error_checks/pip1.si index a5d9ff47..e2173ff8 100644 --- a/tests/error_checks/pip1.si +++ b/tests/error_checks/pip1.si @@ -4,16 +4,15 @@ algorithm main(output int8 led) int8 b = 0; int8 c = 0; + a = 2; + +-> + { - a = 2; - } -> { b = a; - { - c = a; - } -> { - b = 3; - } - + c = a; + -> + b = 3; } } diff --git a/tests/error_checks/pipeline4.si b/tests/error_checks/pipeline4.si new file mode 100644 index 00000000..7d76bbc5 --- /dev/null +++ b/tests/error_checks/pipeline4.si @@ -0,0 +1,18 @@ + +algorithm main(output uint8 leds) +{ + + while (1) { + + { + + } -> { + + } + + if (1) { break; } // cannot break in pipeline stage + // (this is in the last piepline stage, see CL0003) + + } + +} diff --git a/tests/issues/257_break.si b/tests/issues/257_break.si new file mode 100644 index 00000000..01d46332 --- /dev/null +++ b/tests/issues/257_break.si @@ -0,0 +1,17 @@ + +algorithm main(output uint8 leds) +{ + + while (1) { + + { + + } -> { + + } + + if (1) { break; } + + } + +} diff --git a/tests/issues/258_inout.si b/tests/issues/258_inout.si new file mode 100644 index 00000000..b7918d81 --- /dev/null +++ b/tests/issues/258_inout.si @@ -0,0 +1,20 @@ +// make 258_inout -f Makefile.icebreaker + +import('../../projects/common/ice40_sb_io_inout.v') + +unit main(output uint5 leds, + inout uint8 sdr_dq) + +{ + uint8 io_oe(0); uint8 io_i(0); uint8 io_o(0); + + sb_io_inout sb_io0(clock <: clock, + oe <: io_oe[0,1], + in :> io_i[0,1], + out <: io_o[0,1], + pin <:> sdr_dq[0,1]); + + algorithm { + __display("nop"); + } +} diff --git a/tests/module1.si b/tests/module1.si new file mode 100644 index 00000000..08562749 --- /dev/null +++ b/tests/module1.si @@ -0,0 +1,11 @@ + +import('../projects/common/ice40_sb_io_inout.v') + +unit main(output uint8 leds,inout uint1 io) +{ + + uint1 one(1); uint1 bla(0); uint1 read(0); + + sb_io_inout _(clock <: clock, oe <: one, out <: bla, in :> read, pin <:> io); + +} diff --git a/tests/nonsynth1.si b/tests/nonsynth1.si new file mode 100644 index 00000000..70437865 --- /dev/null +++ b/tests/nonsynth1.si @@ -0,0 +1,6 @@ +unit main(output uint8 leds) +{ + algorithm { + __display("hello world"); + } +} diff --git a/tests/pipeline2.si b/tests/pipeline2.si index 5edd1d73..935a5c3b 100644 --- a/tests/pipeline2.si +++ b/tests/pipeline2.si @@ -11,26 +11,27 @@ algorithm main(output uint8 leds) while (i < 8+2) { + i = i + 1; + pipeline_ready = pipeline_ready + 1; + { pipeline_i = i; a = i + 1; __display("-----"); __display("[0] %d",pipeline_i); - } -> { + + -> if (pipeline_ready >= 1) { b = a + 10; __display("[1] %d",pipeline_i); } - } -> { + -> if (pipeline_ready >= 2) { o[pipeline_i*8,8] = b; __display("[2] [%d] = %h",pipeline_i,b); } } - i = i + 1; - pipeline_ready = pipeline_ready + 1; - } __display("%h",o); diff --git a/tests/pipeline28.si b/tests/pipeline28.si new file mode 100644 index 00000000..900b661f --- /dev/null +++ b/tests/pipeline28.si @@ -0,0 +1,20 @@ + +algorithm main(output uint8 leds) +{ + + while (1) { + + { + // stage 0 + -> + // stage 1 + while (1) { + if (1) { break; } // this is ok (break from loop within stage) + } + } + + if (1) { break; } // this is ok (break from loop outside of pipeline) + + } + +} diff --git a/tests/pipeline29.si b/tests/pipeline29.si new file mode 100644 index 00000000..0d44af50 --- /dev/null +++ b/tests/pipeline29.si @@ -0,0 +1,103 @@ + +// nested pipelines test + +algorithm main(output uint8 leds) +{ + + uint5 i = 0; + while (i!=4) + { + + // stage 0 + uint5 n = i; + __display("[A] (%d)",n); + i = i + 1; + + -> + + __display("[B] before (%d)",n); + // stage 1 + uint5 j = 0; + while (j != 3) { + { + // nested pipeline + __display("[B,A] %d,%d",n,j); + uint8 q = j + 10; + j = j + 1; + -> + __display("[B,B] %d,%d",n,q); + q = q + 100; + -> + __display("[B,C] %d,%d",n,q); + } + } + __display("[B] after (%d)",n); + + } + +} + +/* + +[A] ( 0) +[A] ( 1) +[B] before ( 0) +[B,A] 0, 0 +[B,A] 0, 1 +[B,B] 0, 10 +[B,A] 0, 2 +[B,B] 0, 11 +[B,C] 0,110 +[B,A] 0, 3 +[B,B] 0, 12 +[B,C] 0,111 +[B] after ( 0) +[B,B] 0, 13 +[B,C] 0,112 +[A] ( 2) +[B] before ( 1) +[B,C] 1,113 +[B,A] 1, 0 +[B,A] 1, 1 +[B,B] 1, 10 +[B,A] 1, 2 +[B,B] 1, 11 +[B,C] 1,110 +[B,A] 1, 3 +[B,B] 1, 12 +[B,C] 1,111 +[B] after ( 1) +[B,B] 1, 13 +[B,C] 1,112 +[A] ( 3) +[B] before ( 2) +[B,C] 2,113 +[B,A] 2, 0 +[B,A] 2, 1 +[B,B] 2, 10 +[B,A] 2, 2 +[B,B] 2, 11 +[B,C] 2,110 +[B,A] 2, 3 +[B,B] 2, 12 +[B,C] 2,111 +[B] after ( 2) +[B,B] 2, 13 +[B,C] 2,112 +[B] before ( 3) +[B,C] 3,113 +[B,A] 3, 0 +[B,A] 3, 1 +[B,B] 3, 10 +[B,A] 3, 2 +[B,B] 3, 11 +[B,C] 3,110 +[B,A] 3, 3 +[B,B] 3, 12 +[B,C] 3,111 +[B] after ( 3) +[B,B] 3, 13 +[B,C] 3,112 +[B,C] 3,113 + +*/ diff --git a/tests/pipeline30.si b/tests/pipeline30.si new file mode 100644 index 00000000..a25f34b4 --- /dev/null +++ b/tests/pipeline30.si @@ -0,0 +1,48 @@ + +// nested pipelines test + +algorithm main(output uint8 leds) +{ + + uint5 i = 0; + while (i!=6) + { + + // stage 0 + uint5 n = i; + __display("[A] (%d)",n); + i = i + 1; + + -> + + repeat: + if (n > 3) { + __display("[B] n = %d > 3",n); + n = n - 1; + goto repeat; + } + __display("[B] n = %d",n); + + } + +} + +/* + +[A] ( 0) +[A] ( 1) +[B] n = 0 +[A] ( 2) +[B] n = 1 +[A] ( 3) +[B] n = 2 +[A] ( 4) +[B] n = 3 +[A] ( 5) +[B] n = 4 > 3 +[B] n = 3 +[B] n = 5 > 3 +[B] n = 4 > 3 +[B] n = 3 + +*/ \ No newline at end of file diff --git a/tests/pipeline4.si b/tests/pipeline4.si index 40e49edb..17e61c7f 100644 --- a/tests/pipeline4.si +++ b/tests/pipeline4.si @@ -8,18 +8,21 @@ algorithm main(output uint8 leds) uint64 o = 0; while (i < 8) { // the while will stop too early - { - a = a + 1; - } -> { - b = a + 10; - } -> { - o[i*8,8] = b; - } + i = i + 1; - i = i + 1; + { + a = a + 1; + -> + b = a + 10; + -> + __display("[%d] = %d",a,b); + o[a*8,8] = b; + } } return; } + () <- test <- (0); + } diff --git a/tests/pipeline6.si b/tests/pipeline6.si index 8f9a2bf6..db90e086 100644 --- a/tests/pipeline6.si +++ b/tests/pipeline6.si @@ -7,35 +7,35 @@ algorithm main(output uint8 leds) while (i < 8) { + i = i + 1; + { a = a + 1; __display("[0] a = %d\tb = %d\tc = %d",a,b,c); - } -> { + -> __display("[1] a = %d\tb = %d\tc = %d",a,b,c); - } -> { + -> b ^= a; c v= a; __display("[2] a = %d\tb = %d\tc = %d",a,b,c); - } -> { + -> __display("[3] a = %d\tb = %d\tc = %d",a,b,c); - } -> { + -> __display("[4] a = %d\tb = %d\tc = %d\n",a,b,c); } - i = i + 1; - } } diff --git a/tests/pipeline7.si b/tests/pipeline7.si index 006e14af..07be3b46 100644 --- a/tests/pipeline7.si +++ b/tests/pipeline7.si @@ -3,15 +3,14 @@ unit main(output uint8 leds) algorithm { uint16 cycle=0; uint16 a=0; uint16 b=0; while (cycle < 4) { - { // stage 0 - a = cycle; - cycle = cycle + 1; - __display("[stage 0] cycle %d, a = %d",cycle,a); - } -> { // stage 1 - __display("[stage 1] cycle %d, a = %d",cycle,a); - } -> { // stage 2 - __display("[stage 2] cycle %d, a = %d\n",cycle,a); - } + // stage 0 + a = cycle; + cycle = cycle + 1; + __display("[stage 0] cycle %d, a = %d",cycle,a); + -> // stage 1 + __display("[stage 1] cycle %d, a = %d",cycle,a); + -> // stage 2 + __display("[stage 2] cycle %d, a = %d\n",cycle,a); } } } diff --git a/tests/pipeline8.si b/tests/pipeline8.si index 0d9b8390..85a373fa 100644 --- a/tests/pipeline8.si +++ b/tests/pipeline8.si @@ -52,7 +52,8 @@ unit main(output uint8 leds) algorithm { while (1) { - { // stage 0 + { + // stage 0 uint1 valid(0); uint1 first(1); if (a.last | first) { @@ -68,7 +69,15 @@ unit main(output uint8 leds) __display("[cycle %d] ==== input :%d",cycle,a.i); } - } -> { // stage 1 + // check at every cycle + if (c.last) { + __display("[cycle %d] ==== result:%d",cycle,c.o); + } + cycle vv= cycle + 1; + // ^ assigns out of pipeline + if (cycle == 40) { __finish(); } + + -> // stage 1 uint1 valid(0); if (a.last) { @@ -83,7 +92,7 @@ unit main(output uint8 leds) __display("[cycle %d] B (b.i:%d)",cycle,b.i); } - } -> { // stage 2 + -> // stage 2 uint1 valid(0); if (b.last) { @@ -97,14 +106,7 @@ unit main(output uint8 leds) __display("[cycle %d] C (c.i:%d)",cycle,c.i); } - } - - if (c.last) { - __display("[cycle %d] ==== result:%d",cycle,c.o); - } - - cycle = cycle + 1; - if (cycle == 40) { __finish(); } + } // end of pipeline } } } diff --git a/tests/pipeline9.si b/tests/pipeline9.si index 3705f877..9f95a2f3 100644 --- a/tests/pipeline9.si +++ b/tests/pipeline9.si @@ -6,13 +6,13 @@ unit main(output uint8 leds) algorithm { while (1) { - { // stage 0 + // stage 0 t = q; __display("[cycle %d] in ===== %d",cycle,q); q = q + 1; - } -> { // stage 1 + -> // stage 1 __display("[cycle %d] 1_A ==== %d",cycle,t); uint4 i=0; @@ -22,12 +22,10 @@ unit main(output uint8 leds) } __display("[cycle %d] 1_B ==== %d",cycle,t); - } -> { // stage 2 + -> // stage 2 __display("[cycle %d] 2_ ===== %d",cycle,t); - } - } } always_after { diff --git a/tools/oss-cad-suite-env.sh b/tools/oss-cad-suite-env.sh new file mode 100644 index 00000000..4802b816 --- /dev/null +++ b/tools/oss-cad-suite-env.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +export YOSYSHQ_ROOT=$DIR/oss-cad-suite + +export SSL_CERT_FILE=%YOSYSHQ_ROOT%etc\cacert.pem + +export PATH=$YOSYSHQ_ROOT/bin:$YOSYSHQ_ROOT/lib:$PATH +# export PATH=$YOSYSHQ_ROOT/bin:$YOSYSHQ_ROOT/lib:$YOSYSHQ_ROOT/py3bin:$PATH +# export PYTHON_EXECUTABLE=$YOSYSHQ_ROOT/py3bin/python3 +export QT_PLUGIN_PATH=$YOSYSHQ_ROOT/lib/qt5/plugins + +export GTK_EXE_PREFIX=$YOSYSHQ_ROOT +export GTK_DATA_PREFIX=$YOSYSHQ_ROOT +export GDK_PIXBUF_MODULEDIR=$YOSYSHQ_ROOT/lib/gdk-pixbuf-2.0/2.10.0/loaders +export GDK_PIXBUF_MODULE_FILE=$YOSYSHQ_ROOT/lib/gdk-pixbuf-2.0/2.10.0/loaders.cache + +gdk-pixbuf-query-loaders.exe --update-cache + +export OPENFPGALOADER_SOJ_DIR=$YOSYSHQ_ROOT/share/openFPGALoader