From 93e53d563de5022010cbacc670298270c129cead Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 12:45:40 -0400
Subject: [PATCH 01/56] libargparse pugixml

---
 .gitmodules             | 6 ++++++
 third_party/libargparse | 1 +
 third_party/pugixml     | 1 +
 3 files changed, 8 insertions(+)
 create mode 160000 third_party/libargparse
 create mode 160000 third_party/pugixml

diff --git a/.gitmodules b/.gitmodules
index 918b2bfde..e27183eb6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,9 @@
 [submodule "third_party/make-env"]
 	path = third_party/make-env
 	url = https://github.com/SymbiFlow/make-env.git
+[submodule "third_party/pugixml"]
+	path = third_party/pugixml
+	url = https://github.com/zeux/pugixml.git
+[submodule "third_party/libargparse"]
+	path = third_party/libargparse
+	url = https://github.com/kmurray/libargparse.git
diff --git a/third_party/libargparse b/third_party/libargparse
new file mode 160000
index 000000000..ee74d1b53
--- /dev/null
+++ b/third_party/libargparse
@@ -0,0 +1 @@
+Subproject commit ee74d1b53bd680748af14e737378de57e2a0a954
diff --git a/third_party/pugixml b/third_party/pugixml
new file mode 160000
index 000000000..95683943b
--- /dev/null
+++ b/third_party/pugixml
@@ -0,0 +1 @@
+Subproject commit 95683943bba726729079886d0967112a60fa71aa

From 38e19bd3d7ec62ab4ae6b062265fe98f22a8885f Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 12:46:56 -0400
Subject: [PATCH 02/56] `parmys` plugin

---
 .github/workflows/licensing.yml               |    9 +
 Makefile                                      |    3 +-
 parmys-plugin/Makefile                        |  112 +
 parmys-plugin/include/BlockMemories.hpp       |  100 +
 parmys-plugin/include/HardSoftLogicMixer.hpp  |   97 +
 parmys-plugin/include/Hashtable.hpp           |   49 +
 parmys-plugin/include/MixingOptimization.hpp  |  222 +
 parmys-plugin/include/adders.h                |   72 +
 parmys-plugin/include/ast_util.h              |    9 +
 parmys-plugin/include/config_t.h              |   47 +
 parmys-plugin/include/hard_blocks.h           |   38 +
 parmys-plugin/include/memories.h              |  106 +
 parmys-plugin/include/multipliers.h           |   64 +
 parmys-plugin/include/netlist_check.h         |    6 +
 parmys-plugin/include/netlist_cleanup.h       |    6 +
 parmys-plugin/include/netlist_statistic.h     |   25 +
 parmys-plugin/include/netlist_utils.h         |   73 +
 parmys-plugin/include/netlist_visualizer.h    |   10 +
 parmys-plugin/include/node_creation_library.h |   24 +
 parmys-plugin/include/odin_error.h            |   56 +
 parmys-plugin/include/odin_globals.h          |   48 +
 parmys-plugin/include/odin_ii.h               |   10 +
 parmys-plugin/include/odin_types.h            |  671 +++
 parmys-plugin/include/odin_util.h             |   27 +
 parmys-plugin/include/partial_map.h           |   34 +
 parmys-plugin/include/read_xml_config_file.h  |   32 +
 parmys-plugin/include/string_cache.h          |   46 +
 parmys-plugin/include/subtractions.h          |   40 +
 parmys-plugin/parmys.cc                       | 1156 ++++
 parmys-plugin/parmys_arch.cc                  |  134 +
 parmys-plugin/parmys_resolve.cc               |  229 +
 parmys-plugin/parmys_resolve.hpp              |   26 +
 parmys-plugin/parmys_update.cc                |  520 ++
 parmys-plugin/parmys_update.hpp               |   30 +
 parmys-plugin/parmys_utils.cc                 |  149 +
 parmys-plugin/parmys_utils.hpp                |   31 +
 parmys-plugin/src/BlockMemories.cc            | 2201 +++++++
 parmys-plugin/src/HardSoftLogicMixer.cc       |   70 +
 parmys-plugin/src/Hashtable.cc                |   60 +
 parmys-plugin/src/MixingOptimization.cc       |  186 +
 parmys-plugin/src/adders.cc                   | 1422 +++++
 parmys-plugin/src/ast_util.cc                 |   99 +
 parmys-plugin/src/enum_str.cc                 |  359 ++
 parmys-plugin/src/hard_blocks.cc              |  314 +
 parmys-plugin/src/memories.cc                 | 2273 ++++++++
 parmys-plugin/src/multipliers.cc              | 1910 +++++++
 parmys-plugin/src/netlist_check.cc            |  741 +++
 parmys-plugin/src/netlist_cleanup.cc          |  340 ++
 parmys-plugin/src/netlist_statistic.cc        |  371 ++
 parmys-plugin/src/netlist_utils.cc            | 1502 +++++
 parmys-plugin/src/netlist_visualizer.cc       |  361 ++
 parmys-plugin/src/node_creation_library.cc    |  412 ++
 parmys-plugin/src/odin_error.cc               |  142 +
 parmys-plugin/src/odin_ii.cc                  |   76 +
 parmys-plugin/src/odin_util.cc                |  236 +
 parmys-plugin/src/partial_map.cc              | 1232 ++++
 parmys-plugin/src/read_xml_config_file.cc     |  285 +
 parmys-plugin/src/string_cache.cc             |  166 +
 parmys-plugin/src/subtractions.cc             |  891 +++
 parmys-plugin/techlibs/adff2dff.v             |   45 +
 parmys-plugin/techlibs/adffe2dff.v            |   51 +
 parmys-plugin/techlibs/aldff2dff.v            |   47 +
 parmys-plugin/techlibs/aldffe2dff.v           |   51 +
 parmys-plugin/techlibs/vtr_primitives.v       |  329 ++
 parmys-plugin/tests/Makefile                  |   28 +
 .../tests/eltwise_layer/eltwise_layer.tcl     |   87 +
 .../tests/eltwise_layer/eltwise_layer.v       | 3057 ++++++++++
 .../tests/eltwise_layer/hard_block_include.v  |    3 +
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml | 3246 +++++++++++
 .../tests/eltwise_layer/odin_config.xml       |   41 +
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    | 1505 +++++
 parmys-plugin/tests/raygentop/odin_config.xml |   40 +
 parmys-plugin/tests/raygentop/raygentop.tcl   |   93 +
 parmys-plugin/tests/raygentop/raygentop.v     | 2978 ++++++++++
 third_party/pugixml                           |    2 +-
 third_party/vtr/LICENSE.md                    |   69 +
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml | 3246 +++++++++++
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    | 1505 +++++
 third_party/vtr/libs/archfpga/.gitignore      |    1 +
 third_party/vtr/libs/archfpga/CMakeLists.txt  |   75 +
 third_party/vtr/libs/archfpga/arch/README.txt |   11 +
 .../vtr/libs/archfpga/arch/mult_luts_arch.xml |  744 +++
 .../vtr/libs/archfpga/arch/sample_arch.xml    | 1215 ++++
 .../vtr/libs/archfpga/src/arch_check.cc       |  412 ++
 .../vtr/libs/archfpga/src/arch_check.h        |   80 +
 .../vtr/libs/archfpga/src/arch_error.cc       |   16 +
 .../vtr/libs/archfpga/src/arch_error.h        |   18 +
 .../vtr/libs/archfpga/src/arch_types.h        |   35 +
 .../vtr/libs/archfpga/src/arch_util.cc        | 1572 +++++
 third_party/vtr/libs/archfpga/src/arch_util.h |  125 +
 third_party/vtr/libs/archfpga/src/cad_types.h |  130 +
 .../vtr/libs/archfpga/src/clock_types.h       |   63 +
 .../vtr/libs/archfpga/src/device_grid.cc      |   42 +
 .../vtr/libs/archfpga/src/device_grid.h       |   61 +
 .../vtr/libs/archfpga/src/echo_arch.cc        |  632 +++
 third_party/vtr/libs/archfpga/src/echo_arch.h |   11 +
 .../vtr/libs/archfpga/src/histogram.cc        |  119 +
 third_party/vtr/libs/archfpga/src/histogram.h |   26 +
 .../vtr/libs/archfpga/src/logic_types.h       |   57 +
 third_party/vtr/libs/archfpga/src/main.cc     |   80 +
 .../libs/archfpga/src/parse_switchblocks.cc   |  473 ++
 .../libs/archfpga/src/parse_switchblocks.h    |   22 +
 .../vtr/libs/archfpga/src/physical_types.cc   |  255 +
 .../vtr/libs/archfpga/src/physical_types.h    | 1896 +++++++
 .../libs/archfpga/src/physical_types_util.cc  |  492 ++
 .../libs/archfpga/src/physical_types_util.h   |  304 +
 .../src/read_fpga_interchange_arch.cc         | 2542 +++++++++
 .../archfpga/src/read_fpga_interchange_arch.h |   34 +
 .../libs/archfpga/src/read_xml_arch_file.cc   | 5037 +++++++++++++++++
 .../libs/archfpga/src/read_xml_arch_file.h    |   27 +
 .../vtr/libs/archfpga/src/read_xml_util.cc    |  142 +
 .../vtr/libs/archfpga/src/read_xml_util.h     |   28 +
 third_party/vtr/libs/archfpga/test/main.cpp   |    2 +
 .../archfpga/test/test_read_xml_arch_file.cpp |  269 +
 third_party/vtr/libs/log/.gitignore           |    1 +
 third_party/vtr/libs/log/CMakeLists.txt       |   24 +
 third_party/vtr/libs/log/LICENSE.txt          |   21 +
 third_party/vtr/libs/log/Readme.txt           |   13 +
 third_party/vtr/libs/log/src/log.cc           |  117 +
 third_party/vtr/libs/log/src/log.h            |   22 +
 third_party/vtr/libs/log/src/main.cc          |   18 +
 third_party/vtr/libs/pugiutil/CMakeLists.txt  |   22 +
 .../vtr/libs/pugiutil/src/pugixml_loc.cc      |   49 +
 .../vtr/libs/pugiutil/src/pugixml_loc.hpp     |   51 +
 .../vtr/libs/pugiutil/src/pugixml_util.cc     |  298 +
 .../vtr/libs/pugiutil/src/pugixml_util.hpp    |  198 +
 third_party/vtr/libs/rtlnumber/.gitignore     |    1 +
 third_party/vtr/libs/rtlnumber/CMakeLists.txt |   33 +
 third_party/vtr/libs/rtlnumber/Makefile       |   81 +
 third_party/vtr/libs/rtlnumber/README.md      |    9 +
 third_party/vtr/libs/rtlnumber/main.cpp       |  200 +
 .../basic_regression_tests.csv                |  310 +
 .../rtlnumber/src/include/internal_bits.hpp   | 1140 ++++
 .../libs/rtlnumber/src/include/rtl_int.hpp    |   95 +
 .../libs/rtlnumber/src/include/rtl_utils.hpp  |   57 +
 third_party/vtr/libs/rtlnumber/src/rtl_int.cc |  746 +++
 .../vtr/libs/rtlnumber/src/rtl_utils.cc       |  304 +
 .../vtr/libs/rtlnumber/unit_test/Makefile     |   79 +
 .../libs/rtlnumber/unit_test/verilog_bits.cpp |   27 +
 .../vtr/libs/rtlnumber/verify_librtlnumber.sh |  124 +
 .../vpr/src/draw/breakpoint_state_globals.h   |   33 +
 third_party/vtr/libs/vtrutil/CMakeLists.txt   |  119 +
 .../cmake/modules/configure_version.cmake     |   55 +
 third_party/vtr/libs/vtrutil/src/picosha2.h   |  357 ++
 third_party/vtr/libs/vtrutil/src/vpr_error.cc |   89 +
 third_party/vtr/libs/vtrutil/src/vpr_error.h  |  127 +
 .../vtr/libs/vtrutil/src/vtr_array_view.h     |  273 +
 .../vtr/libs/vtrutil/src/vtr_assert.cc        |   23 +
 third_party/vtr/libs/vtrutil/src/vtr_assert.h |  151 +
 third_party/vtr/libs/vtrutil/src/vtr_bimap.h  |  167 +
 third_party/vtr/libs/vtrutil/src/vtr_cache.h  |   46 +
 .../vtr/libs/vtrutil/src/vtr_color_map.cc     |  831 +++
 .../vtr/libs/vtrutil/src/vtr_color_map.h      |   58 +
 .../vtr/libs/vtrutil/src/vtr_digest.cc        |   39 +
 third_party/vtr/libs/vtrutil/src/vtr_digest.h |   16 +
 .../vtr/libs/vtrutil/src/vtr_dynamic_bitset.h |   72 +
 third_party/vtr/libs/vtrutil/src/vtr_error.h  |   68 +
 .../vtr/libs/vtrutil/src/vtr_expr_eval.cc     |  904 +++
 .../vtr/libs/vtrutil/src/vtr_expr_eval.h      |  234 +
 .../vtr/libs/vtrutil/src/vtr_flat_map.h       |  483 ++
 .../vtr/libs/vtrutil/src/vtr_geometry.h       |  312 +
 .../vtr/libs/vtrutil/src/vtr_geometry.tpp     |  347 ++
 third_party/vtr/libs/vtrutil/src/vtr_hash.h   |   30 +
 .../vtr/libs/vtrutil/src/vtr_linear_map.h     |  312 +
 third_party/vtr/libs/vtrutil/src/vtr_list.cc  |   25 +
 third_party/vtr/libs/vtrutil/src/vtr_list.h   |   24 +
 third_party/vtr/libs/vtrutil/src/vtr_log.cc   |   50 +
 third_party/vtr/libs/vtrutil/src/vtr_log.h    |  167 +
 third_party/vtr/libs/vtrutil/src/vtr_logic.h  |   33 +
 .../vtr/libs/vtrutil/src/vtr_map_util.h       |   45 +
 third_party/vtr/libs/vtrutil/src/vtr_math.cc  |  106 +
 third_party/vtr/libs/vtrutil/src/vtr_math.h   |  168 +
 .../vtr/libs/vtrutil/src/vtr_memory.cc        |  178 +
 third_party/vtr/libs/vtrutil/src/vtr_memory.h |  151 +
 .../vtr/libs/vtrutil/src/vtr_ndmatrix.h       |  409 ++
 .../vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h |  459 ++
 .../vtr/libs/vtrutil/src/vtr_ostream_guard.h  |   40 +
 .../vtr/libs/vtrutil/src/vtr_pair_util.h      |   96 +
 third_party/vtr/libs/vtrutil/src/vtr_path.cc  |   87 +
 third_party/vtr/libs/vtrutil/src/vtr_path.h   |   33 +
 .../vtr/libs/vtrutil/src/vtr_ragged_matrix.h  |  255 +
 .../vtr/libs/vtrutil/src/vtr_random.cc        |   77 +
 third_party/vtr/libs/vtrutil/src/vtr_random.h |   45 +
 third_party/vtr/libs/vtrutil/src/vtr_range.h  |   85 +
 .../vtr/libs/vtrutil/src/vtr_rusage.cc        |   30 +
 third_party/vtr/libs/vtrutil/src/vtr_rusage.h |   11 +
 .../vtr/libs/vtrutil/src/vtr_sentinels.h      |   49 +
 .../vtr/libs/vtrutil/src/vtr_small_vector.h   |  854 +++
 .../libs/vtrutil/src/vtr_string_interning.h   |  558 ++
 .../vtr/libs/vtrutil/src/vtr_string_view.h    |  192 +
 .../vtr/libs/vtrutil/src/vtr_strong_id.h      |  245 +
 .../libs/vtrutil/src/vtr_strong_id_range.h    |  185 +
 third_party/vtr/libs/vtrutil/src/vtr_time.cc  |   99 +
 third_party/vtr/libs/vtrutil/src/vtr_time.h   |   99 +
 third_party/vtr/libs/vtrutil/src/vtr_token.cc |  218 +
 third_party/vtr/libs/vtrutil/src/vtr_token.h  |   40 +
 third_party/vtr/libs/vtrutil/src/vtr_util.cc  |  504 ++
 third_party/vtr/libs/vtrutil/src/vtr_util.h   |  123 +
 .../vtr/libs/vtrutil/src/vtr_vec_id_set.h     |  106 +
 third_party/vtr/libs/vtrutil/src/vtr_vector.h |  211 +
 .../vtr/libs/vtrutil/src/vtr_vector_map.h     |  172 +
 .../vtr/libs/vtrutil/src/vtr_version.cpp.in   |   20 +
 .../vtr/libs/vtrutil/src/vtr_version.h        |   20 +
 third_party/vtr/libs/vtrutil/test/main.cpp    |    2 +
 .../vtr/libs/vtrutil/test/test_array_view.cpp |  110 +
 .../vtr/libs/vtrutil/test/test_expr_eval.cpp  |   90 +
 .../vtr/libs/vtrutil/test/test_geometry.cpp   |  245 +
 .../vtr/libs/vtrutil/test/test_map_util.cpp   |   35 +
 .../vtr/libs/vtrutil/test/test_math.cpp       |   58 +
 .../libs/vtrutil/test/test_ragged_vector.cpp  |  104 +
 .../vtr/libs/vtrutil/test/test_random.cpp     |   16 +
 .../vtr/libs/vtrutil/test/test_range.cpp      |   41 +
 .../libs/vtrutil/test/test_small_vector.cpp   |  148 +
 .../vtr/libs/vtrutil/test/test_strings.cpp    |  211 +
 .../vtr/libs/vtrutil/test/test_strong_id.cpp  |  130 +
 .../vtr/libs/vtrutil/test/test_vector.cpp     |   57 +
 third_party/vtr/verilog/eltwise_layer.v       | 3057 ++++++++++
 third_party/vtr/verilog/hard_block_include.v  |    3 +
 third_party/vtr/verilog/raygentop.v           | 2978 ++++++++++
 third_party/vtr/vtr_primitives.v              |  329 ++
 220 files changed, 76498 insertions(+), 2 deletions(-)
 create mode 100644 parmys-plugin/Makefile
 create mode 100644 parmys-plugin/include/BlockMemories.hpp
 create mode 100644 parmys-plugin/include/HardSoftLogicMixer.hpp
 create mode 100644 parmys-plugin/include/Hashtable.hpp
 create mode 100644 parmys-plugin/include/MixingOptimization.hpp
 create mode 100644 parmys-plugin/include/adders.h
 create mode 100644 parmys-plugin/include/ast_util.h
 create mode 100644 parmys-plugin/include/config_t.h
 create mode 100644 parmys-plugin/include/hard_blocks.h
 create mode 100644 parmys-plugin/include/memories.h
 create mode 100644 parmys-plugin/include/multipliers.h
 create mode 100644 parmys-plugin/include/netlist_check.h
 create mode 100644 parmys-plugin/include/netlist_cleanup.h
 create mode 100644 parmys-plugin/include/netlist_statistic.h
 create mode 100644 parmys-plugin/include/netlist_utils.h
 create mode 100644 parmys-plugin/include/netlist_visualizer.h
 create mode 100644 parmys-plugin/include/node_creation_library.h
 create mode 100644 parmys-plugin/include/odin_error.h
 create mode 100644 parmys-plugin/include/odin_globals.h
 create mode 100644 parmys-plugin/include/odin_ii.h
 create mode 100644 parmys-plugin/include/odin_types.h
 create mode 100644 parmys-plugin/include/odin_util.h
 create mode 100644 parmys-plugin/include/partial_map.h
 create mode 100644 parmys-plugin/include/read_xml_config_file.h
 create mode 100644 parmys-plugin/include/string_cache.h
 create mode 100644 parmys-plugin/include/subtractions.h
 create mode 100644 parmys-plugin/parmys.cc
 create mode 100644 parmys-plugin/parmys_arch.cc
 create mode 100644 parmys-plugin/parmys_resolve.cc
 create mode 100644 parmys-plugin/parmys_resolve.hpp
 create mode 100644 parmys-plugin/parmys_update.cc
 create mode 100644 parmys-plugin/parmys_update.hpp
 create mode 100644 parmys-plugin/parmys_utils.cc
 create mode 100644 parmys-plugin/parmys_utils.hpp
 create mode 100644 parmys-plugin/src/BlockMemories.cc
 create mode 100644 parmys-plugin/src/HardSoftLogicMixer.cc
 create mode 100644 parmys-plugin/src/Hashtable.cc
 create mode 100644 parmys-plugin/src/MixingOptimization.cc
 create mode 100644 parmys-plugin/src/adders.cc
 create mode 100644 parmys-plugin/src/ast_util.cc
 create mode 100644 parmys-plugin/src/enum_str.cc
 create mode 100644 parmys-plugin/src/hard_blocks.cc
 create mode 100644 parmys-plugin/src/memories.cc
 create mode 100644 parmys-plugin/src/multipliers.cc
 create mode 100644 parmys-plugin/src/netlist_check.cc
 create mode 100644 parmys-plugin/src/netlist_cleanup.cc
 create mode 100644 parmys-plugin/src/netlist_statistic.cc
 create mode 100644 parmys-plugin/src/netlist_utils.cc
 create mode 100644 parmys-plugin/src/netlist_visualizer.cc
 create mode 100644 parmys-plugin/src/node_creation_library.cc
 create mode 100644 parmys-plugin/src/odin_error.cc
 create mode 100644 parmys-plugin/src/odin_ii.cc
 create mode 100644 parmys-plugin/src/odin_util.cc
 create mode 100644 parmys-plugin/src/partial_map.cc
 create mode 100644 parmys-plugin/src/read_xml_config_file.cc
 create mode 100644 parmys-plugin/src/string_cache.cc
 create mode 100644 parmys-plugin/src/subtractions.cc
 create mode 100644 parmys-plugin/techlibs/adff2dff.v
 create mode 100644 parmys-plugin/techlibs/adffe2dff.v
 create mode 100644 parmys-plugin/techlibs/aldff2dff.v
 create mode 100644 parmys-plugin/techlibs/aldffe2dff.v
 create mode 100644 parmys-plugin/techlibs/vtr_primitives.v
 create mode 100644 parmys-plugin/tests/Makefile
 create mode 100644 parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
 create mode 100644 parmys-plugin/tests/eltwise_layer/eltwise_layer.v
 create mode 100644 parmys-plugin/tests/eltwise_layer/hard_block_include.v
 create mode 100644 parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
 create mode 100644 parmys-plugin/tests/eltwise_layer/odin_config.xml
 create mode 100644 parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
 create mode 100644 parmys-plugin/tests/raygentop/odin_config.xml
 create mode 100644 parmys-plugin/tests/raygentop/raygentop.tcl
 create mode 100644 parmys-plugin/tests/raygentop/raygentop.v
 create mode 100644 third_party/vtr/LICENSE.md
 create mode 100644 third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
 create mode 100644 third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
 create mode 100644 third_party/vtr/libs/archfpga/.gitignore
 create mode 100644 third_party/vtr/libs/archfpga/CMakeLists.txt
 create mode 100644 third_party/vtr/libs/archfpga/arch/README.txt
 create mode 100644 third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
 create mode 100755 third_party/vtr/libs/archfpga/arch/sample_arch.xml
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_check.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_check.h
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_error.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_error.h
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_types.h
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_util.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/arch_util.h
 create mode 100644 third_party/vtr/libs/archfpga/src/cad_types.h
 create mode 100644 third_party/vtr/libs/archfpga/src/clock_types.h
 create mode 100644 third_party/vtr/libs/archfpga/src/device_grid.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/device_grid.h
 create mode 100644 third_party/vtr/libs/archfpga/src/echo_arch.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/echo_arch.h
 create mode 100644 third_party/vtr/libs/archfpga/src/histogram.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/histogram.h
 create mode 100644 third_party/vtr/libs/archfpga/src/logic_types.h
 create mode 100644 third_party/vtr/libs/archfpga/src/main.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/parse_switchblocks.h
 create mode 100644 third_party/vtr/libs/archfpga/src/physical_types.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/physical_types.h
 create mode 100644 third_party/vtr/libs/archfpga/src/physical_types_util.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/physical_types_util.h
 create mode 100644 third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
 create mode 100644 third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
 create mode 100644 third_party/vtr/libs/archfpga/src/read_xml_util.cc
 create mode 100644 third_party/vtr/libs/archfpga/src/read_xml_util.h
 create mode 100644 third_party/vtr/libs/archfpga/test/main.cpp
 create mode 100644 third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
 create mode 100644 third_party/vtr/libs/log/.gitignore
 create mode 100644 third_party/vtr/libs/log/CMakeLists.txt
 create mode 100644 third_party/vtr/libs/log/LICENSE.txt
 create mode 100644 third_party/vtr/libs/log/Readme.txt
 create mode 100644 third_party/vtr/libs/log/src/log.cc
 create mode 100644 third_party/vtr/libs/log/src/log.h
 create mode 100644 third_party/vtr/libs/log/src/main.cc
 create mode 100644 third_party/vtr/libs/pugiutil/CMakeLists.txt
 create mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
 create mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
 create mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_util.cc
 create mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
 create mode 100644 third_party/vtr/libs/rtlnumber/.gitignore
 create mode 100644 third_party/vtr/libs/rtlnumber/CMakeLists.txt
 create mode 100644 third_party/vtr/libs/rtlnumber/Makefile
 create mode 100644 third_party/vtr/libs/rtlnumber/README.md
 create mode 100644 third_party/vtr/libs/rtlnumber/main.cpp
 create mode 100644 third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
 create mode 100644 third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
 create mode 100644 third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
 create mode 100644 third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
 create mode 100644 third_party/vtr/libs/rtlnumber/src/rtl_int.cc
 create mode 100644 third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
 create mode 100644 third_party/vtr/libs/rtlnumber/unit_test/Makefile
 create mode 100644 third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
 create mode 100755 third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
 create mode 100644 third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
 create mode 100644 third_party/vtr/libs/vtrutil/CMakeLists.txt
 create mode 100644 third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
 create mode 100644 third_party/vtr/libs/vtrutil/src/picosha2.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vpr_error.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vpr_error.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_array_view.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_assert.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_assert.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_bimap.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_cache.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_color_map.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_digest.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_digest.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_error.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_geometry.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_hash.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_list.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_list.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_log.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_log.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_logic.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_map_util.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_math.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_math.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_memory.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_memory.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_path.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_path.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_random.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_random.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_range.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_rusage.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_string_view.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_time.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_time.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_token.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_token.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_util.cc
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_util.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vector.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
 create mode 100644 third_party/vtr/libs/vtrutil/src/vtr_version.h
 create mode 100644 third_party/vtr/libs/vtrutil/test/main.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_array_view.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_geometry.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_map_util.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_math.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_random.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_range.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_strings.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
 create mode 100644 third_party/vtr/libs/vtrutil/test/test_vector.cpp
 create mode 100644 third_party/vtr/verilog/eltwise_layer.v
 create mode 100644 third_party/vtr/verilog/hard_block_include.v
 create mode 100644 third_party/vtr/verilog/raygentop.v
 create mode 100644 third_party/vtr/vtr_primitives.v

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 60e54f613..ddf249b39 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,5 +33,14 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
+          ./parmys-plugin/tests/eltwise_layer/eltwise_layer.v
+          ./parmys-plugin/tests/raygentop/raygentop.v
+          ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
+          ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+          ./parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
+          ./parmys-plugin/techlibs/vtr_primitives.v
         third_party: |
           ./third_party/googletest/
+          ./third_party/libargparse/
+          ./third_party/pugixml/
+          ./third_party/vtr/
diff --git a/Makefile b/Makefile
index fc39ff4c3..9e53932f7 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff
+#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
new file mode 100644
index 000000000..ef8ffea19
--- /dev/null
+++ b/parmys-plugin/Makefile
@@ -0,0 +1,112 @@
+# Copyright 2022 Daniel Khadivi
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+PLUGIN_DIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+
+NAME = parmys
+SOURCES = parmys.cc \
+		  parmys_arch.cc \
+		  parmys_update.cc \
+		  parmys_utils.cc \
+		  parmys_resolve.cc \
+		  src/adders.cc \
+		  src/enum_str.cc \
+		  src/MixingOptimization.cc \
+		  src/read_xml_config_file.cc \
+		  src/odin_error.cc \
+		  src/odin_util.cc \
+		  src/netlist_statistic.cc \
+		  src/netlist_utils.cc \
+		  src/netlist_check.cc \
+		  src/netlist_cleanup.cc \
+		  src/node_creation_library.cc \
+		  src/multipliers.cc \
+		  src/subtractions.cc \
+		  src/HardSoftLogicMixer.cc \
+		  src/odin_ii.cc \
+		  src/string_cache.cc \
+		  src/partial_map.cc \
+		  src/hard_blocks.cc \
+		  src/BlockMemories.cc \
+		  src/memories.cc \
+		  src/netlist_visualizer.cc \
+		  src/Hashtable.cc \
+		  src/ast_util.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_util.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_token.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_memory.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_list.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_log.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_digest.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_math.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_path.cc \
+		  ../third_party/vtr/libs/vtrutil/src/vtr_assert.cc \
+		  ../third_party/vtr/libs/log/src/log.cc \
+		  ../third_party/pugixml/src/pugixml.cpp \
+		  ../third_party/libargparse/src/argparse.cpp \
+		  ../third_party/libargparse/src/argparse_formatter.cpp \
+		  ../third_party/libargparse/src/argparse_util.cpp \
+		  ../third_party/vtr/libs/rtlnumber/src/rtl_int.cc \
+		  ../third_party/vtr/libs/rtlnumber/src/rtl_utils.cc \
+		  ../third_party/vtr/libs/pugiutil/src/pugixml_loc.cc \
+		  ../third_party/vtr/libs/pugiutil/src/pugixml_util.cc \
+		  ../third_party/vtr/libs/archfpga/src/physical_types.cc \
+		  ../third_party/vtr/libs/archfpga/src/read_xml_util.cc \
+		  ../third_party/vtr/libs/archfpga/src/arch_error.cc \
+		  ../third_party/vtr/libs/archfpga/src/physical_types_util.cc \
+		  ../third_party/vtr/libs/archfpga/src/arch_check.cc \
+		  ../third_party/vtr/libs/archfpga/src/arch_util.cc \
+		  ../third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc \
+		  ../third_party/vtr/libs/archfpga/src/parse_switchblocks.cc \
+		  ../third_party/vtr/libs/archfpga/src/echo_arch.cc
+
+include ../Makefile_plugin.common
+
+CXXFLAGS += -I./include
+CXXFLAGS += -I../third_party/pugixml/src
+CXXFLAGS += -I../third_party/libargparse/src
+CXXFLAGS += -I../third_party/vtr/libs/archfpga/src
+CXXFLAGS += -I../third_party/vtr/libs/log/src
+CXXFLAGS += -I../third_party/vtr/libs/pugiutil/src
+CXXFLAGS += -I../third_party/vtr/libs/rtlnumber/src/include
+CXXFLAGS += -I../third_party/vtr/libs/rtlnumber/src
+CXXFLAGS += -I../third_party/vtr/libs/vtrutil/src
+CXXFLAGS += -I../third_party/vtr/libs/vpr/src/draw
+
+CXXSTD := c++14
+CXXFLAGS += -std=$(CXXSTD) -Os
+
+LDLIBS += -lpthread
+
+TECHLIBS_DIR = techlibs
+VERILOG_MODULES = adff2dff.v \
+				  adffe2dff.v \
+				  aldff2dff.v \
+				  aldffe2dff.v \
+				  vtr_primitives.v
+
+# install_modules: $(VERILOG_MODULES)
+# install -D $< $(YOSYS_PLUGINS_DIR)/parmys/$<
+install_modules:
+	$(foreach f, $(wildcard $(TECHLIBS_DIR)/*), install -D $(f) $(YOSYS_DATA_DIR)/parmys/$(notdir $(f));)
+
+install: install_modules
+
+clean_modules:
+	rm -rf ./third_party
+
+clean: clean_modules
\ No newline at end of file
diff --git a/parmys-plugin/include/BlockMemories.hpp b/parmys-plugin/include/BlockMemories.hpp
new file mode 100644
index 000000000..76996b33e
--- /dev/null
+++ b/parmys-plugin/include/BlockMemories.hpp
@@ -0,0 +1,100 @@
+/**
+ * Copyright (c) 2021 Seyed Alireza Damghani (sdamghann@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * @file This file includes the definition of the basic structure
+ * used in Odin-II Block Memory resolving process. Moreover, it
+ * provides the declaration of the related public routines.
+ */
+#ifndef _BLOCK_MEMORIES_H_
+#define _BLOCK_MEMORIES_H_
+
+#include <unordered_map>
+
+// Max number of bits for register of array inference
+const int LUTRAM_INFERENCE_THRESHOLD_MIN = 80;  // Max number of bits for LUTRAM inference
+const int LUTRAM_INFERENCE_THRESHOLD_MAX = 640; // Min number of bits for LUTRAM inference
+
+/*
+ * Contains a pointer to the block memory node as well as other
+ * information which is used in creating the block memory.
+ */
+struct block_memory_t {
+    loc_t loc;
+    nnode_t *node;
+
+    signal_list_t *read_addr;
+    signal_list_t *read_data;
+    signal_list_t *read_en;
+
+    signal_list_t *write_addr;
+    signal_list_t *write_data;
+    signal_list_t *write_en;
+
+    signal_list_t *clk;
+
+    char *name;
+    char *memory_id;
+};
+
+typedef std::unordered_map<std::string, block_memory_t *> block_memory_hashtable;
+
+/**
+ * block memories information. variable will be invalid
+ * after iterations happen before partial mapping
+ */
+struct block_memory_information_t {
+    /**
+     * block_memory_list and read-only_memory_list linked lists
+     * include the corresponding memory instances. Each instance
+     * comprises memory signal lists, location, memory id and the
+     * corresponding netlist node. These linked lists are used in
+     * optimization iteration, including signal pruning, REG/LUTRAM
+     * threshold checking and mapping to VTR memory blocks. Once the
+     * optimization iteration is done, these linked lists are not
+     * valid anymore.
+     *
+     * [NOTE] Block memories and read-only memory both use the same
+     * structure (block_memory_t*). They only differ in terms of
+     * their member variables initialization. The naming convention
+     * is only due to the ease of the coding process.
+     */
+    vtr::t_linked_vptr *block_memory_list;
+    vtr::t_linked_vptr *read_only_memory_list;
+    /* hashtable to look up block memories faster */
+    block_memory_hashtable block_memories;
+    block_memory_hashtable read_only_memories;
+};
+extern block_memory_information_t block_memories_info;
+
+extern void init_block_memory_index();
+extern void free_block_memories();
+
+extern void resolve_ymem_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void resolve_ymem2_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void resolve_bram_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void resolve_rom_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+extern void iterate_block_memories(netlist_t *netlist);
+
+#endif // _BLOCK_MEMORIES_H_
diff --git a/parmys-plugin/include/HardSoftLogicMixer.hpp b/parmys-plugin/include/HardSoftLogicMixer.hpp
new file mode 100644
index 000000000..21d431359
--- /dev/null
+++ b/parmys-plugin/include/HardSoftLogicMixer.hpp
@@ -0,0 +1,97 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HARD_SOFT_LOGIC_MIXER_HPP
+#define HARD_SOFT_LOGIC_MIXER_HPP
+
+#include "MixingOptimization.hpp"
+#include "odin_types.h" // netlist_t, config_t
+
+class HardSoftLogicMixer
+{
+  public:
+    HardSoftLogicMixer();
+    ~HardSoftLogicMixer();
+    /*----------------------------------------------------------------------
+     * Returns whether the specific node is a candidate for implementing
+     * in hard block
+     *---------------------------------------------------------------------
+     */
+    bool hardenable(nnode_t *node);
+
+    /*----------------------------------------------------------------------
+     * Function: map_deferred_blocksQueries if mixing optimization is enabled for this kind of
+     * of hard blocks
+     *---------------------------------------------------------------------
+     */
+    bool enabled(nnode_t *node);
+
+    /*----------------------------------------------------------------------
+     * Function: perform_optimizations
+     * For all  noted nodes, that were noted as candidates to be implemented
+     * on the hard blocks, launches corresponding procedure of chosing the
+     * corresponding blocks
+     * Parameters: netlist_t *
+     *---------------------------------------------------------------------
+     */
+    void perform_optimizations(netlist_t *netlist);
+
+    /*----------------------------------------------------------------------
+     * Function: partial_map_node
+     * High-level call to provide support for partial mapping layer
+     * Parameters:
+     *      node_t * : pointer to node needs to perform mapping
+     *      netlist_t : pointer to netlist
+     *---------------------------------------------------------------------
+     */
+    void partial_map_node(nnode_t *node, short traverse_number, netlist_t *);
+
+    /*----------------------------------------------------------------------
+     * Function: note_candidate_node
+     * Calculates number of available hard blocks by issuing a call,
+     * traverses the netlist and statistics to figure out
+     * which operation should be implemented on the hard block
+     * Parameters:
+     *      node_t * : pointer to candidate node
+     *---------------------------------------------------------------------
+     */
+    void note_candidate_node(nnode_t *node);
+
+    // This is a container containing all optimization passes
+    MixingOpt *_opts[operation_list_END];
+
+  private:
+    /*----------------------------------------------------------------------
+     * Function: hard_blocks_needed
+     * Returns cached value calculated from netlist, for a specific optimiza
+     * tion kind
+     *---------------------------------------------------------------------
+     */
+    int hard_blocks_needed(operation_list);
+
+    // This array is composed of vectors, that store nodes that
+    // are potential candidates for performing mixing optimization
+    std::vector<nnode_t *> _nodes_by_opt[operation_list_END];
+};
+
+#endif
diff --git a/parmys-plugin/include/Hashtable.hpp b/parmys-plugin/include/Hashtable.hpp
new file mode 100644
index 000000000..6cf79436d
--- /dev/null
+++ b/parmys-plugin/include/Hashtable.hpp
@@ -0,0 +1,49 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef HASHTABLE_H
+#define HASHTABLE_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <unordered_map>
+
+class Hashtable
+{
+  private:
+    std::unordered_map<std::string, void *> my_map;
+
+  public:
+    // Adds an item to the hashtable.
+    void add(std::string key, void *item);
+    // Removes an item from the hashtable. If the item is not present, a null pointer is returned.
+    void *remove(std::string key);
+    // Gets an item from the hashtable without removing it. If the item is not present, a null pointer is returned.
+    void *get(std::string key);
+    // Check to see if the hashtable is empty.
+    bool is_empty();
+    // calls free on each item.
+    void destroy_free_items();
+};
+
+#endif
diff --git a/parmys-plugin/include/MixingOptimization.hpp b/parmys-plugin/include/MixingOptimization.hpp
new file mode 100644
index 000000000..717030fe6
--- /dev/null
+++ b/parmys-plugin/include/MixingOptimization.hpp
@@ -0,0 +1,222 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef MIXING_OPTIMIZATION_HPP
+#define MIXING_OPTIMIZATION_HPP
+#include "odin_types.h" // netlist_t, config_t
+
+class HardSoftLogicMixer;
+/**
+ * @brief A base class in hierarchy for complex synthesis
+ * allowing for mixing soft and hard logic
+ */
+class MixingOpt
+{
+  public:
+    /**
+     * @brief Construct a new Mixing Opt object for disabled optimization
+     * usable for querying 'hardenable' condition
+     */
+    MixingOpt() { _enabled = false; }
+
+    /**
+     * @brief Construct a new Mixing Opt object
+     *
+     * By default, all optimizations only share
+     * the ratio of blocks to be implemented in
+     * hard logic
+     * @param ratio, a value within 0 to 1 to
+     * implement ratio*requested hard blocks in
+     * hard logic
+     * @param kind a kind of blocks that correspond
+     * to optimization pass
+     */
+    MixingOpt(float ratio, operation_list kind) : _kind(kind) { _ratio = ratio; }
+
+    /**
+     * @brief Destroy the Mixing Opt object
+     * required by compiler
+     */
+    virtual ~MixingOpt() = default;
+
+    /**
+     * @brief assign weights to the candidate nodes vector, according to netlist_statistic
+     *
+     * @param nnode_t* pointer to the node
+     */
+    virtual void assign_weights(netlist_t *netlist, std::vector<nnode_t *> nodes);
+
+    /**
+     * @brief Checks if the optimization is enabled for this node
+     *
+     * @param nodes pointer to the vector with mults
+     */
+    virtual bool enabled() { return _enabled; }
+
+    /**
+     * @brief Instantiates an alternative (not on hard blocks)
+     * implementation for the operation
+     *
+     * @param netlist
+     * @param nodes
+     */
+    virtual void instantiate_soft_logic(netlist_t *netlist, std::vector<nnode_t *> nodes);
+
+    /**
+     * @brief performs the optimization pass, varies between kinds.
+     * If the implementation is not provided within the inherited class
+     * will throw ODIN error
+     *
+     * @param netlist_t* pointer to a global netlist
+     * @param std::vector<nnode_t*> a vector with nodes the optimization
+     * pass is concerned (all of which are potential candidates to
+     * be implemented in hard blocks for a given _kind)
+     */
+    virtual void perform(netlist_t *, std::vector<nnode_t *> &);
+
+    /**
+     * @brief Set the blocks of blocks required
+     * by counting in netlist
+     *
+     * @param count
+     */
+    virtual void set_blocks_needed(int count);
+
+    operation_list get_kind() { return _kind; }
+
+    /**
+     * @brief based on criteria for hardening given kind of operation, return
+     * if the node should be implemented in hard blocks
+     *
+     * @param nnode_t* pointer to the node
+     */
+    virtual bool hardenable(nnode_t *) { return false; }
+
+    /**
+     * @brief allowing for replacing with dynamic polymorphism for different
+     * kinds of nodes
+     *
+     * @param nnode_t* pointer to the node
+     */
+    virtual void partial_map_node(nnode_t *, short, netlist_t *, HardSoftLogicMixer *);
+
+  protected:
+    /**
+     * @brief a routine that will multiply
+     * required blocks by the ratio
+     */
+    virtual void scale_counts();
+
+    /**
+     * @brief this variable allows to cache traverse value
+     *
+     */
+    short cached_traverse_value = 0;
+
+    // an integer representing the number of required hard blocks
+    // that should be estimated and updated through set blocks needed
+    int _blocks_count = -1;
+    // a boolean type to double check if the optimization is enabled
+    bool _enabled = false;
+    // a parameter allowing for scaling counts
+    float _ratio = -1.0;
+    // an enum kind variable, corresponding to an optimization pass
+    operation_list _kind = operation_list_END;
+};
+
+class MultsOpt : public MixingOpt
+{
+  public:
+    /**
+     * @brief Construct a new Mults Opt object for disabled optimization
+     * usable for querying 'hardenable' condition
+     */
+    MultsOpt() : MixingOpt() {}
+
+    /**
+     * @brief Construct a new Mults Opt object
+     * from ratio parameter
+     * @param ratio
+     */
+    MultsOpt(float ratio);
+    /**
+     * @brief Construct a new Mults Opt object
+     * allowing to set exact number of multipliers
+     * that will be used
+     * @param exact
+     */
+    MultsOpt(int exact);
+
+    /**
+     * @brief assign weights to the candidate nodes vector, according to netlist_statistic
+     *
+     * @param nodes pointer to the vector with mults
+     */
+    virtual void assign_weights(netlist_t *netlist, std::vector<nnode_t *> nodes);
+
+    /**
+     * @brief allowing for replacing with dynamic polymorphism for different
+     * kinds of nodes
+     *
+     * @param nnode_t* pointer to the node
+     */
+    virtual void partial_map_node(nnode_t *, short, netlist_t *, HardSoftLogicMixer *);
+    /**
+     * @brief Instantiates an alternative (not on hard blocks)
+     * implementation for the operation
+     *
+     * @param netlist
+     * @param nodes
+     */
+    virtual void instantiate_soft_logic(netlist_t *netlist, std::vector<nnode_t *> nodes);
+
+    /**
+     * @brief performs the optimization pass, specifically for multipliers.
+     * If the implementation is not provided within the inherited class
+     * will throw ODIN error
+     *
+     * @param netlist_t* pointer to a global netlist
+     * @param std::vector<nnode_t*> a vector with nodes the optimization
+     * pass is concerned (all of which are potential candidates to
+     * be implemented in hard blocks for a given _kind)
+     */
+    virtual void perform(netlist_t *netlist, std::vector<nnode_t *> &);
+
+    /**
+     * @brief Set the blocks of blocks required
+     * by counting in netlist. Has to be overriden, to account
+     * with specifics of optimization
+     *
+     * @param count
+     */
+    virtual void set_blocks_needed(int);
+
+    /**
+     * @brief based on criteria for hardening given kind of operation, return
+     * if the node should be implemented in hard blocks
+     *
+     * @param nnode_t* pointer to the node
+     */
+    virtual bool hardenable(nnode_t *);
+};
+
+#endif
diff --git a/parmys-plugin/include/adders.h b/parmys-plugin/include/adders.h
new file mode 100644
index 000000000..4e228a804
--- /dev/null
+++ b/parmys-plugin/include/adders.h
@@ -0,0 +1,72 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef ADDERS_H
+#define ADDERS_H
+
+#include "odin_types.h"
+#include "read_xml_arch_file.h"
+#include <vector>
+
+struct t_adder {
+    int size_a;
+    int size_b;
+    int size_cin;
+    int size_sumout;
+    int size_cout;
+    struct t_adder *next;
+};
+
+extern t_model *hard_adders;
+extern vtr::t_linked_vptr *add_list;
+extern vtr::t_linked_vptr *chain_list;
+extern vtr::t_linked_vptr *processed_adder_list;
+extern int total;
+extern int min_add;
+extern int min_threshold_adder;
+
+void init_add_distribution();
+void report_add_distribution();
+void declare_hard_adder(nnode_t *node);
+void instantiate_hard_adder(nnode_t *node, short mark, netlist_t *netlist);
+void find_hard_adders();
+void add_the_blackbox_for_adds_yosys(Yosys::Design *design);
+void define_add_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Design *design);
+void split_adder(nnode_t *node, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist);
+void iterate_adders(netlist_t *netlist);
+void clean_adders();
+void reduce_operations(netlist_t *netlist, operation_list op);
+void traverse_list(operation_list oper, vtr::t_linked_vptr *place);
+void match_node(vtr::t_linked_vptr *place, operation_list oper);
+int match_ports(nnode_t *node, nnode_t *next_node, operation_list oper);
+void traverse_operation_node(ast_node_t *node, char *component[], operation_list op, int *mark);
+void merge_nodes(nnode_t *node, nnode_t *next_node);
+void remove_list_node(vtr::t_linked_vptr *node, vtr::t_linked_vptr *place);
+void remove_fanout_pins(nnode_t *node);
+void reallocate_pins(nnode_t *node, nnode_t *next_node);
+void free_op_nodes(nnode_t *node);
+int match_pins(nnode_t *node, nnode_t *next_node);
+
+void instantiate_add_w_carry_block(int *width, nnode_t *node, short mark, netlist_t *netlist, short subtraction);
+nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+#endif // ADDERS_H
diff --git a/parmys-plugin/include/ast_util.h b/parmys-plugin/include/ast_util.h
new file mode 100644
index 000000000..af19d9637
--- /dev/null
+++ b/parmys-plugin/include/ast_util.h
@@ -0,0 +1,9 @@
+#ifndef AST_UTIL_H
+#define AST_UTIL_H
+
+#include "odin_types.h"
+
+ast_node_t *create_node_w_type(ids id, loc_t loc);
+ast_node_t *create_tree_node_id(char *string, loc_t loc);
+
+#endif
diff --git a/parmys-plugin/include/config_t.h b/parmys-plugin/include/config_t.h
new file mode 100644
index 000000000..717ef41b2
--- /dev/null
+++ b/parmys-plugin/include/config_t.h
@@ -0,0 +1,47 @@
+#ifndef CONFIG_T_H
+#define CONFIG_T_H
+
+#include "odin_types.h"
+#include <string>
+#include <vector>
+
+/* This is the data structure that holds config file details */
+struct config_t {
+    std::vector<std::string> list_of_file_names;
+
+    std::string debug_output_path; // path for where to output the debug outputs
+    std::string dsp_verilog;       // path for the output Verilog file including target DSPs' declaration
+    bool coarsen;                  // Specify if the input BLIF is coarse-grain
+
+    bool output_netlist_graphs; // switch that outputs netlist graphs per node for use with GraphViz tools
+
+    int min_hard_multiplier; // threshold from hard to soft logic
+    int mult_padding;        // setting how multipliers are padded to fit fixed size
+    // Flag for fixed or variable hard mult (1 or 0)
+    int fixed_hard_multiplier;
+    // Flag for splitting hard multipliers If fixed_hard_multiplier is set, this must be 1.
+    int split_hard_multiplier;
+    // 1 to split memory width down to a size of 1. 0 to split to arch width.
+    char split_memory_width;
+    // Set to a positive integer to split memory depth to that address width. 0 to split to arch width.
+    int split_memory_depth;
+
+    // Flag for fixed or variable hard mult (1 or 0)
+    int fixed_hard_adder;
+    //  Threshold from hard to soft logic
+    int min_threshold_adder;
+    // defines if the first cin of an adder/subtractor is connected to a global gnd/vdd
+    // or generated using a dummy adder with both inputs set to gnd/vdd
+    bool adder_cin_global;
+
+    // If the memory is smaller than both of these, it will be converted to soft logic.
+    int soft_logic_memory_depth_threshold;
+    int soft_logic_memory_width_threshold;
+
+    std::string arch_file; // Name of the FPGA architecture file
+    std::string tcl_file;  // TCL file to be run by yosys
+};
+
+extern config_t configuration;
+
+#endif
diff --git a/parmys-plugin/include/hard_blocks.h b/parmys-plugin/include/hard_blocks.h
new file mode 100644
index 000000000..726a0e23e
--- /dev/null
+++ b/parmys-plugin/include/hard_blocks.h
@@ -0,0 +1,38 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HARD_BLOCKS_H
+#define HARD_BLOCKS_H
+
+#include "odin_types.h"
+
+extern STRING_CACHE *hard_block_names;
+
+void register_hard_blocks();
+t_model *find_hard_block(const char *name);
+void cell_hard_block(nnode_t *node, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
+void output_hard_blocks_yosys(Yosys::Design *design);
+void instantiate_hard_block(nnode_t *node, short mark, netlist_t *netlist);
+t_model_ports *get_model_port(t_model_ports *ports, const char *name);
+
+#endif
diff --git a/parmys-plugin/include/memories.h b/parmys-plugin/include/memories.h
new file mode 100644
index 000000000..c0550094b
--- /dev/null
+++ b/parmys-plugin/include/memories.h
@@ -0,0 +1,106 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef MEMORIES_H
+#define MEMORIES_H
+
+#include "odin_types.h"
+
+extern vtr::t_linked_vptr *sp_memory_list;
+extern vtr::t_linked_vptr *dp_memory_list;
+extern t_model *single_port_rams;
+extern t_model *dual_port_rams;
+
+#define HARD_RAM_ADDR_LIMIT 33
+#define SOFT_RAM_ADDR_LIMIT 10
+
+struct sp_ram_signals {
+    signal_list_t *addr;
+    signal_list_t *data;
+    signal_list_t *out;
+    npin_t *we;
+    npin_t *clk;
+};
+
+struct dp_ram_signals {
+    signal_list_t *addr1;
+    signal_list_t *addr2;
+    signal_list_t *data1;
+    signal_list_t *data2;
+    signal_list_t *out1;
+    signal_list_t *out2;
+    npin_t *we1;
+    npin_t *we2;
+    npin_t *clk;
+};
+
+extern sp_ram_signals *init_sp_ram_signals();
+extern dp_ram_signals *init_dp_ram_signals();
+
+long get_sp_ram_split_depth();
+long get_dp_ram_split_depth();
+
+sp_ram_signals *get_sp_ram_signals(nnode_t *node);
+void free_sp_ram_signals(sp_ram_signals *signalsvar);
+
+dp_ram_signals *get_dp_ram_signals(nnode_t *node);
+void free_dp_ram_signals(dp_ram_signals *signalsvar);
+
+bool is_sp_ram(nnode_t *node);
+bool is_dp_ram(nnode_t *node);
+
+bool is_blif_sp_ram(nnode_t *node);
+bool is_blif_dp_ram(nnode_t *node);
+
+void check_memories_and_report_distribution();
+
+long get_sp_ram_depth(nnode_t *node);
+long get_dp_ram_depth(nnode_t *node);
+long get_sp_ram_width(nnode_t *node);
+long get_dp_ram_width(nnode_t *node);
+
+void split_sp_memory_depth(nnode_t *node, int split_size);
+void split_dp_memory_depth(nnode_t *node, int split_size);
+void split_sp_memory_width(nnode_t *node, int target_size);
+void split_dp_memory_width(nnode_t *node, int target_size);
+void iterate_memories(netlist_t *netlist);
+void free_memory_lists();
+
+void instantiate_soft_single_port_ram(nnode_t *node, short mark, netlist_t *netlist);
+void instantiate_soft_dual_port_ram(nnode_t *node, short mark, netlist_t *netlist);
+
+signal_list_t *create_decoder(nnode_t *node, short mark, signal_list_t *input_list, netlist_t *netlist);
+
+extern void add_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name);
+extern void add_output_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name);
+extern void remap_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name);
+
+extern nnode_t *create_single_port_ram(sp_ram_signals *spram_signals, nnode_t *node);
+extern nnode_t *create_dual_port_ram(dp_ram_signals *dpram_signals, nnode_t *node);
+
+extern void register_memory_model(nnode_t *mem);
+
+extern void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+#endif // MEMORIES_H
diff --git a/parmys-plugin/include/multipliers.h b/parmys-plugin/include/multipliers.h
new file mode 100644
index 000000000..2c6112c4e
--- /dev/null
+++ b/parmys-plugin/include/multipliers.h
@@ -0,0 +1,64 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef MULTIPLIERS_H
+#define MULTIPLIERS_H
+
+#include "odin_types.h"
+#include "read_xml_arch_file.h"
+
+struct t_multiplier {
+    int size_a;
+    int size_b;
+    int size_out;
+    struct t_multiplier *next;
+};
+
+enum class mult_port_stat_e {
+    NOT_CONSTANT,         // neither of ports are constant
+    MULTIPLIER_CONSTANT,  // first input port is constant
+    MULTIPICAND_CONSTANT, // second input port is constant
+    CONSTANT,             // both input ports are constant
+    mult_port_stat_END
+};
+
+extern t_model *hard_multipliers;
+extern vtr::t_linked_vptr *mult_list;
+extern int min_mult;
+
+extern void init_mult_distribution();
+extern void report_mult_distribution();
+extern void declare_hard_multiplier(nnode_t *node);
+extern void instantiate_hard_multiplier(nnode_t *node, short mark, netlist_t *netlist);
+extern void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *netlist);
+extern void connect_constant_mult_outputs(nnode_t *node, signal_list_t *output_signal_list);
+extern void find_hard_multipliers();
+extern void add_the_blackbox_for_mults_yosys(Yosys::Design *design);
+extern void define_mult_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Design *design);
+extern void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist);
+extern void iterate_multipliers(netlist_t *netlist);
+extern bool check_constant_multipication(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void check_multiplier_port_size(nnode_t *node);
+extern void clean_multipliers();
+extern void free_multipliers();
+
+#endif // MULTIPLIERS_H
diff --git a/parmys-plugin/include/netlist_check.h b/parmys-plugin/include/netlist_check.h
new file mode 100644
index 000000000..79f3c7ff9
--- /dev/null
+++ b/parmys-plugin/include/netlist_check.h
@@ -0,0 +1,6 @@
+#ifndef NETLIST_CHECK_H
+#define NETLIST_CHECK_H
+
+void check_netlist(netlist_t *netlist);
+
+#endif
diff --git a/parmys-plugin/include/netlist_cleanup.h b/parmys-plugin/include/netlist_cleanup.h
new file mode 100644
index 000000000..a94d6df71
--- /dev/null
+++ b/parmys-plugin/include/netlist_cleanup.h
@@ -0,0 +1,6 @@
+#ifndef NETLIST_CLEANUP_H
+#define NETLIST_CLEANUP_H
+
+void remove_unused_logic(netlist_t *netlist);
+
+#endif
diff --git a/parmys-plugin/include/netlist_statistic.h b/parmys-plugin/include/netlist_statistic.h
new file mode 100644
index 000000000..fc7c4b752
--- /dev/null
+++ b/parmys-plugin/include/netlist_statistic.h
@@ -0,0 +1,25 @@
+#ifndef NETLIST_STATISTIC_HPP
+#define NETLIST_STATISTIC_HPP
+
+#include "netlist_utils.h"
+
+static const unsigned int traversal_id = 0;
+static const uintptr_t mult_optimization_traverse_value = (uintptr_t)&traversal_id;
+
+stat_t *get_stats(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number);
+
+void init_stat(netlist_t *netlist);
+void compute_statistics(netlist_t *netlist, bool display);
+
+/**
+ * @brief This function will calculate and assign weights related
+ * to mixing hard and soft logic implementation for certain kind
+ * of logic blocks
+ * @param node
+ * The node that needs its weight to be assigned
+ * @param netlist
+ * netlist, has to be passed to the counting functions
+ */
+void mixing_optimization_stats(nnode_t *node, netlist_t *netlist);
+
+#endif // NETLIST_STATISTIC_HPP
diff --git a/parmys-plugin/include/netlist_utils.h b/parmys-plugin/include/netlist_utils.h
new file mode 100644
index 000000000..5e5958e06
--- /dev/null
+++ b/parmys-plugin/include/netlist_utils.h
@@ -0,0 +1,73 @@
+#ifndef NETLIST_UTILS_H_FUNCTIONS
+#define NETLIST_UTILS_H_FUNCTIONS
+
+#include "odin_types.h"
+
+nnode_t *allocate_nnode(loc_t loc);
+npin_t *allocate_npin();
+nnet_t *allocate_nnet();
+
+nnode_t *free_nnode(nnode_t *to_free);
+npin_t *free_npin(npin_t *to_free);
+nnet_t *free_nnet(nnet_t *to_free);
+
+npin_t *get_zero_pin(netlist_t *netlist);
+npin_t *get_pad_pin(netlist_t *netlist);
+npin_t *get_one_pin(netlist_t *netlist);
+npin_t *copy_input_npin(npin_t *copy_pin);
+npin_t *copy_output_npin(npin_t *copy_pin);
+
+void allocate_more_input_pins(nnode_t *node, int width);
+void allocate_more_output_pins(nnode_t *node, int width);
+
+void move_input_pin(nnode_t *node, int old_idx, int new_idx);
+void move_output_pin(nnode_t *node, int old_idx, int new_idx);
+void add_input_pin_to_node(nnode_t *node, npin_t *pin, int pin_idx);
+void add_fanout_pin_to_net(nnet_t *net, npin_t *pin);
+void add_output_pin_to_node(nnode_t *node, npin_t *pin, int pin_idx);
+void add_driver_pin_to_net(nnet_t *net, npin_t *pin);
+void add_output_port_information(nnode_t *node, int port_width);
+void add_input_port_information(nnode_t *node, int port_width);
+
+void join_nets(nnet_t *net, nnet_t *input_net);
+
+void remap_pin_to_new_net(npin_t *pin, nnet_t *new_net);
+void remap_pin_to_new_node(npin_t *pin, nnode_t *new_node, int pin_idx);
+
+attr_t *init_attribute();
+void copy_attribute(attr_t *to, attr_t *copy);
+void copy_signedness(attr_t *to, attr_t *copy);
+void free_attribute(attr_t *attribute);
+
+signal_list_t *init_signal_list();
+extern bool is_constant_signal(signal_list_t *signal, netlist_t *netlist);
+extern long constant_signal_value(signal_list_t *signal, netlist_t *netlist);
+extern signal_list_t *create_constant_signal(const long long value, const int desired_width, netlist_t *netlist);
+extern signal_list_t *prune_signal(signal_list_t *signalsvar, long signal_width, long prune_size, int num_of_signals);
+extern signal_list_t **split_signal_list(signal_list_t *signalsvar, const int width);
+extern bool sigcmp(signal_list_t *sig, signal_list_t *be_checked);
+void add_pin_to_signal_list(signal_list_t *list, npin_t *pin);
+signal_list_t *combine_lists(signal_list_t **signal_lists, int num_signal_lists);
+signal_list_t *copy_input_signals(signal_list_t *signalsvar);
+void free_signal_list(signal_list_t *list);
+
+signal_list_t *make_output_pins_for_existing_node(nnode_t *node, int width);
+void connect_nodes(nnode_t *out_node, int out_idx, nnode_t *in_node, int in_idx);
+
+netlist_t *allocate_netlist();
+void free_netlist(netlist_t *to_free);
+
+int get_output_pin_index_from_mapping(nnode_t *node, const char *name);
+int get_output_port_index_from_mapping(nnode_t *node, const char *name);
+int get_input_pin_index_from_mapping(nnode_t *node, const char *name);
+int get_input_port_index_from_mapping(nnode_t *node, const char *name);
+extern npin_t *legalize_polarity(npin_t *pin, edge_type_e pin_polarity, nnode_t *node);
+extern void reduce_input_ports(nnode_t *&node, netlist_t *netlist);
+extern signal_list_t *reduce_signal_list(signal_list_t *signalvar, operation_list signedness, netlist_t *netlist);
+chain_information_t *allocate_chain_info();
+void remove_fanout_pins_from_net(nnet_t *net, npin_t *pin, int id);
+
+extern void equalize_ports_size(nnode_t *&node, uintptr_t traverse_mark_number, netlist_t *netlist);
+extern void delete_npin(npin_t *pin);
+
+#endif
diff --git a/parmys-plugin/include/netlist_visualizer.h b/parmys-plugin/include/netlist_visualizer.h
new file mode 100644
index 000000000..529a5b6fe
--- /dev/null
+++ b/parmys-plugin/include/netlist_visualizer.h
@@ -0,0 +1,10 @@
+#ifndef NETLIST_VISUALIZER_H
+#define NETLIST_VISUALIZER_H
+
+#include "odin_types.h"
+#include <string>
+
+void graphVizOutputNetlist(std::string path, const char *name, uintptr_t marker_value, netlist_t *input_netlist);
+void graphVizOutputCombinationalNet(std::string path, const char *name, uintptr_t marker_value, nnode_t *current_node);
+
+#endif
diff --git a/parmys-plugin/include/node_creation_library.h b/parmys-plugin/include/node_creation_library.h
new file mode 100644
index 000000000..865a17501
--- /dev/null
+++ b/parmys-plugin/include/node_creation_library.h
@@ -0,0 +1,24 @@
+#ifndef NODE_CREATION_LIBRARY_H
+#define NODE_CREATION_LIBRARY_H
+
+#include "odin_types.h"
+
+nnode_t *make_not_gate_with_input(npin_t *input_pin, nnode_t *node, short mark);
+
+nnode_t *make_not_gate(nnode_t *node, short mark);
+nnode_t *make_inverter(npin_t *pin, nnode_t *node, short mark);
+nnode_t *make_1port_logic_gate(operation_list type, int width, nnode_t *node, short mark);
+
+nnode_t *make_1port_gate(operation_list type, int width_input, int width_output, nnode_t *node, short mark);
+nnode_t *make_2port_gate(operation_list type, int width_port1, int width_port2, int width_output, nnode_t *node, short mark);
+nnode_t *make_3port_gate(operation_list type, int width_port1, int width_port2, int width_port3, int width_output, nnode_t *node, short mark);
+nnode_t *make_nport_gate(operation_list type, int port_sizes, int width, int width_output, nnode_t *node, short mark);
+
+char *node_name(nnode_t *node, char *instance_prefix_name);
+char *op_node_name(operation_list op, char *instance_prefix_name);
+
+const char *edge_type_blif_str(edge_type_e edge_type, loc_t loc);
+
+extern nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, int num_muxed_inputs, signal_list_t *outs, nnode_t *node,
+                                    netlist_t *netlist);
+#endif
diff --git a/parmys-plugin/include/odin_error.h b/parmys-plugin/include/odin_error.h
new file mode 100644
index 000000000..cc7930f88
--- /dev/null
+++ b/parmys-plugin/include/odin_error.h
@@ -0,0 +1,56 @@
+#ifndef ODIN_ERROR_H
+#define ODIN_ERROR_H
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+struct loc_t {
+    int file = -1;
+    int line = -1;
+    int col = -1;
+};
+
+enum odin_error {
+    UTIL,
+    PARSE_ARGS,
+    AST,
+    RESOLVE,
+    NETLIST,
+    PARSE_BLIF,
+};
+
+extern const char *odin_error_STR[];
+extern std::vector<std::pair<std::string, int>> include_file_names;
+extern int delayed_errors;
+extern const loc_t unknown_location;
+
+// causes an interrupt in GDB
+[[noreturn]] void _verbose_abort(const char *condition_str, const char *odin_file_name, int odin_line_number, const char *odin_function_name);
+
+#define oassert(condition)                                                                                                                           \
+    if (!bool(condition))                                                                                                                            \
+    _verbose_abort(#condition, __FILE__, __LINE__, __func__)
+
+void _log_message(odin_error error_type, loc_t loc, bool soft_error, const char *function_file_name, int function_line, const char *function_name,
+                  const char *message, ...);
+
+#define error_message(error_type, loc, message, ...)                                                                                                 \
+    _log_message(error_type, loc, true, __FILE__, __LINE__, __PRETTY_FUNCTION__, message, __VA_ARGS__)
+
+#define warning_message(error_type, loc, message, ...)                                                                                               \
+    _log_message(error_type, loc, false, __FILE__, __LINE__, __PRETTY_FUNCTION__, message, __VA_ARGS__)
+
+#define possible_error_message(error_type, loc, message, ...)                                                                                        \
+    _log_message(error_type, loc, !global_args.permissive.value(), __FILE__, __LINE__, __PRETTY_FUNCTION__, message, __VA_ARGS__)
+
+#define delayed_error_message(error_type, loc, message, ...)                                                                                         \
+    {                                                                                                                                                \
+        _log_message(error_type, loc, false, __FILE__, __LINE__, __PRETTY_FUNCTION__, message, __VA_ARGS__);                                         \
+        delayed_errors += 1;                                                                                                                         \
+    }
+
+void verify_delayed_error(odin_error error_type);
+
+#endif
diff --git a/parmys-plugin/include/odin_globals.h b/parmys-plugin/include/odin_globals.h
new file mode 100644
index 000000000..cd117e447
--- /dev/null
+++ b/parmys-plugin/include/odin_globals.h
@@ -0,0 +1,48 @@
+#ifndef GLOBALS_H
+#define GLOBALS_H
+
+#include "HardSoftLogicMixer.hpp"
+#include "Hashtable.hpp"
+#include "config_t.h"
+#include "odin_types.h"
+#include "read_xml_arch_file.h"
+#include "string_cache.h"
+
+/**
+ * The cutoff for the number of netlist nodes.
+ * Technically, Odin-II prints statistics for
+ * netlist nodes that the total number of them
+ * is greater than this value.
+ */
+constexpr long long UNUSED_NODE_TYPE = 0;
+
+extern global_args_t global_args;
+extern config_t configuration;
+extern loc_t my_location;
+
+extern nnode_t *gnd_node;
+extern nnode_t *vcc_node;
+extern nnode_t *pad_node;
+
+extern char *one_string;
+extern char *zero_string;
+extern char *pad_string;
+
+extern t_arch Arch;
+extern short physical_lut_size;
+
+/* logic optimization mixer, once ODIN is classy, could remove that
+ * and pass as member variable
+ */
+extern HardSoftLogicMixer *mixer;
+
+/**
+ * a global var to specify the need for cleanup after
+ * receiving a coarsen BLIF file as the input.
+ */
+extern bool coarsen_cleanup;
+
+extern strmap<file_type_e> file_type_strmap;
+extern strmap<operation_list> yosys_subckt_strmap;
+
+#endif
diff --git a/parmys-plugin/include/odin_ii.h b/parmys-plugin/include/odin_ii.h
new file mode 100644
index 000000000..ebd03b8d8
--- /dev/null
+++ b/parmys-plugin/include/odin_ii.h
@@ -0,0 +1,10 @@
+#ifndef ODIN_II_H
+#define ODIN_II_H
+
+#include "odin_types.h"
+/* Odin-II exit status enumerator */
+enum ODIN_ERROR_CODE { ERROR_INITIALIZATION, ERROR_PARSE_CONFIG, ERROR_PARSE_ARCH, ERROR_ELABORATION, ERROR_OPTIMIZATION, ERROR_TECHMAP };
+
+void set_default_config();
+
+#endif
diff --git a/parmys-plugin/include/odin_types.h b/parmys-plugin/include/odin_types.h
new file mode 100644
index 000000000..2ea6b8a53
--- /dev/null
+++ b/parmys-plugin/include/odin_types.h
@@ -0,0 +1,671 @@
+#ifndef ODIN_TYPES_H
+#define ODIN_TYPES_H
+/*
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "argparse_value.hpp"
+#include "odin_error.h"
+#include "read_xml_arch_file.h"
+#include "string_cache.h"
+#include <atomic>
+#include <mutex>
+#include <stdbool.h>
+#include <string>
+#include <unordered_map>
+
+#include <stdlib.h>
+
+#include "rtl_int.hpp"
+
+#include "kernel/rtlil.h"
+
+/**
+ * to use short vs long string for output
+ */
+#define ODIN_LONG_STRING 0
+#define ODIN_SHORT_STRING 1
+
+#ifndef DEBUG_ODIN
+#define ODIN_STRING_TYPE ODIN_SHORT_STRING
+#else
+#define ODIN_STRING_TYPE ODIN_LONG_STRING
+#endif
+
+#define ODIN_STD_BITWIDTH (sizeof(long) * 8)
+
+/* buffer size for reading a directory path */
+#define READ_BUFFER_SIZE 1048576 // 1MB
+
+/* unique numbers to mark the nodes as we DFS traverse the netlist */
+#define PARTIAL_MAP_TRAVERSE_VALUE 10
+#define OUTPUT_TRAVERSE_VALUE 12
+#define COUNT_NODES 14 /* NOTE that you can't call countnodes one after the other or the mark will be incorrect */
+#define COMBO_LOOP 15
+#define COMBO_LOOP_ERROR 16
+#define GRAPH_CRUNCH 17
+#define STATS 18
+#define SEQUENTIAL_LEVELIZE 19
+#define RESOLVE_DFS_VALUE 30
+
+/* unique numbers for using void *data entries in some of the datastructures */
+#define RESET -1
+#define LEVELIZE 12
+#define ACTIVATION 13
+
+#define verify_i_o_availabilty(node, expected_input_size, expected_output_size)                                                                      \
+    passed_verify_i_o_availabilty(node, expected_input_size, expected_output_size, __FILE__, __LINE__)
+
+struct ast_node_t;
+struct nnode_t;
+struct npin_t;
+struct nnet_t;
+struct netlist_t;
+
+/* the global arguments of the software */
+struct global_args_t {
+    std::string program_name;
+    // Odin-II Root directory
+    std::string program_root;
+    // Current path Odin-II is running
+    std::string current_path;
+
+    argparse::ArgValue<std::string> config_file;
+    argparse::ArgValue<std::vector<std::string>> verilog_files;
+    argparse::ArgValue<std::string> blif_file;
+    argparse::ArgValue<std::string> output_file;
+    argparse::ArgValue<std::string> arch_file;   // Name of the FPGA architecture file
+    argparse::ArgValue<std::string> tcl_file;    // TCL file to be run by yosys elaborator
+    argparse::ArgValue<std::string> elaborator;  // Name of the external elaborator tool, currently Yosys is supported, default is Odin
+    argparse::ArgValue<bool> permissive;         // turn possible_errors into warnings
+    argparse::ArgValue<bool> print_parse_tokens; // print the tokens as they are parsed byt the parser
+
+    argparse::ArgValue<std::string> high_level_block; // Legacy option, no longer used
+
+    argparse::ArgValue<std::string> top_level_module_name; // force the name of the top level module desired
+
+    argparse::ArgValue<bool> write_netlist_as_dot;
+    argparse::ArgValue<bool> write_ast_as_dot;
+    argparse::ArgValue<bool> all_warnings;
+    argparse::ArgValue<bool> show_help;
+
+    //    argparse::ArgValue<bool> fflegalize;     // makes flip-flops rising edge sensitive
+    argparse::ArgValue<bool> coarsen; // tells Odin-II that the input blif is coarse-grain
+                                      //    argparse::ArgValue<bool> show_yosys_log; // Show Yosys output logs into the standard output stream
+
+    argparse::ArgValue<std::string> adder_def; // DEPRECATED
+
+    // defines if the first cin of an adder/subtractor is connected to a global gnd/vdd
+    // or generated using a dummy adder with both inputs set to gnd/vdd
+    argparse::ArgValue<bool> adder_cin_global;
+
+    /////////////////////
+    // For simulation.
+    /////////////////////
+    // Generate this number of random vectors.
+    argparse::ArgValue<int> sim_num_test_vectors;
+    // Input vectors to simulate instead of generating vectors.
+    argparse::ArgValue<std::string> sim_vector_input_file;
+    // Existing output vectors to verify against.
+    argparse::ArgValue<std::string> sim_vector_output_file;
+    // Simulation output Directory
+    argparse::ArgValue<std::string> sim_directory;
+    // Tells the simulator whether or not to generate random vectors which include the unknown logic value.
+    argparse::ArgValue<bool> sim_generate_three_valued_logic;
+    // Output both falling and rising edges in the output_vectors file. (DEFAULT)
+    argparse::ArgValue<bool> sim_output_both_edges;
+    // Request to read mif file input
+    argparse::ArgValue<bool> read_mif_input;
+    // Additional pins, nets, and nodes to output.
+    argparse::ArgValue<std::vector<std::string>> sim_additional_pins;
+    // Comma-separated list of primary input pins to hold high for all cycles but the first.
+    argparse::ArgValue<std::vector<std::string>> sim_hold_high;
+    // Comma-separated list of primary input pins to hold low for all cycles but the first.
+    argparse::ArgValue<std::vector<std::string>> sim_hold_low;
+    // target coverage
+    argparse::ArgValue<double> sim_min_coverage;
+    // simulate until best coverage is achieved
+    argparse::ArgValue<bool> sim_achieve_best;
+
+    argparse::ArgValue<int> parralelized_simulation;
+    argparse::ArgValue<bool> parralelized_simulation_in_batch;
+    // deprecated since this should be defined when compiled
+    argparse::ArgValue<int> sim_initial_value;
+    // The seed for creating random simulation vector
+    argparse::ArgValue<int> sim_random_seed;
+
+    argparse::ArgValue<bool> interactive_simulation;
+
+    // Arguments for mixing hard and soft logic
+    argparse::ArgValue<int> exact_mults;
+    argparse::ArgValue<float> mults_ratio;
+};
+
+extern const char *ZERO_GND_ZERO;
+extern const char *ONE_VCC_CNS;
+extern const char *ZERO_PAD_ZERO;
+
+extern const char *SINGLE_PORT_RAM_string;
+extern const char *DUAL_PORT_RAM_string;
+extern const char *LUTRAM_string;
+
+extern const char *edge_type_e_STR[];
+extern const char *operation_list_STR[][2];
+
+template <typename T> using strmap = std::unordered_map<std::string, T>;
+
+enum file_type_e {
+    _ILANG, /* not supported yet */
+    _VERILOG,
+    _VERILOG_HEADER,
+    _BLIF,
+    _EBLIF,     /* not supported yet */
+    _UNDEFINED, /* EROOR */
+    file_type_e_END
+};
+
+enum elaborator_e { _ODIN, _YOSYS, elaborator_e_END };
+
+enum edge_type_e {
+    UNDEFINED_SENSITIVITY,
+    FALLING_EDGE_SENSITIVITY,
+    RISING_EDGE_SENSITIVITY,
+    ACTIVE_HIGH_SENSITIVITY,
+    ACTIVE_LOW_SENSITIVITY,
+    ASYNCHRONOUS_SENSITIVITY,
+    edge_type_e_END
+};
+
+enum circuit_type_e { COMBINATIONAL, SEQUENTIAL, circuit_type_e_END };
+
+enum init_value_e {
+    _0 = 0,
+    _1 = 1,
+    dont_care = 2,
+    undefined = 3,
+};
+
+/**
+ * list of Odin-II supported operation node
+ * In the synthesis flow, most operations are resolved or mapped
+ * to an operation mode that has instantiation procedure in the
+ * partial mapping. However, for techmap flow, nodes are elaborated
+ * into the partial mapper supported operations in BLIF elaboration.
+ * Technically, each Odin-II node should have one of the following
+ * operation type. To add support for a new type you would need to
+ * start from here to see how each operation mode is being resolved.
+ */
+enum operation_list {
+    NO_OP,
+    MULTI_PORT_MUX, // port 1 = control, port 2+ = mux options
+    FF_NODE,
+    BUF_NODE,
+    INPUT_NODE,
+    OUTPUT_NODE,
+    GND_NODE,
+    VCC_NODE,
+    CLOCK_NODE,
+    ADD,            // +
+    MINUS,          // -
+    BITWISE_NOT,    // ~
+    BITWISE_AND,    // &
+    BITWISE_OR,     // |
+    BITWISE_NAND,   // ~&
+    BITWISE_NOR,    // ~|
+    BITWISE_XNOR,   // ~^
+    BITWISE_XOR,    // ^
+    LOGICAL_NOT,    // !
+    LOGICAL_OR,     // ||
+    LOGICAL_AND,    // &&
+    LOGICAL_NAND,   // No Symbol
+    LOGICAL_NOR,    // No Symbol
+    LOGICAL_XNOR,   // No symbol
+    LOGICAL_XOR,    // No Symbol
+    MULTIPLY,       // *
+    DIVIDE,         // /
+    MODULO,         // %
+    POWER,          // **
+    LT,             // <
+    GT,             // >
+    LOGICAL_EQUAL,  // ==
+    NOT_EQUAL,      // !=
+    LTE,            // <=
+    GTE,            // >=
+    SR,             // >>
+    ASR,            // >>>
+    SL,             // <<
+    ASL,            // <<<
+    CASE_EQUAL,     // ===
+    CASE_NOT_EQUAL, // !==
+    ADDER_FUNC,
+    CARRY_FUNC,
+    MUX_2,
+    SMUX_2, // MUX_2 with single bit selector (no need to add not selector as the second pin) => [SEL] [IN1, IN2] [OUT]
+    BLIF_FUNCTION,
+    NETLIST_FUNCTION,
+    MEMORY,
+    PAD_NODE,
+    HARD_IP,
+    GENERIC,             /*added for the unknown node type */
+    CLOG2,               // $clog2
+    UNSIGNED,            // $unsigned
+    SIGNED,              // $signed
+                         // [START] operations to cover yosys subckt
+    MULTI_BIT_MUX_2,     // like MUX_2 but with n-bit input/output
+    MULTIPORT_nBIT_SMUX, // n-bit input/output in multiple ports
+    PMUX,                // Multiplexer with many inputs using one-hot select signal
+    SDFF,                // data, S to reset value and output port
+    DFFE,                // data, enable to output port
+    SDFFE,               // data, synchronous reset value and enable to output port
+    SDFFCE,              // data, synchronous reset value and enable to reset value and output port
+    DFFSR,               // data, clear and set to output port
+    DFFSRE,              // data, clear and set with enable to output port
+    DLATCH,              // datato output port based on polarity without clk
+    ADLATCH,             // datato output port based on polarity without clk
+    SETCLR,              // set or clear an input pins
+    SPRAM,               // representing primitive single port ram
+    DPRAM,               // representing primitive dual port ram
+    YMEM,                // $mem block memory generated by ysos, can have complete varaiable num of read and write plus clk for each port
+    YMEM2,               // $mem_v2 block memory generated by ysos, can have complete varaiable num of read and write plus clk for each port
+    BRAM,                // Odin-II block memory, from techlib/bram_bb.v
+    ROM,                 // Odin-II read-only memory, from techlib/rom_bb.v
+                         // [END] operations to cover yosys subckt
+    SKIP,                // to skip mapping for this specific node
+    operation_list_END
+};
+
+enum ids {
+    NO_ID,
+    /* top level things */
+    FILE_ITEMS,
+    MODULE,
+    SPECIFY,
+    /* VARIABLES */
+    INPUT,
+    OUTPUT,
+    INOUT,
+    WIRE,
+    REG,
+    GENVAR,
+    PARAMETER,
+    LOCALPARAM,
+    INITIAL,
+    PORT,
+    /* OTHER MODULE ITEMS */
+    MODULE_ITEMS,
+    VAR_DECLARE,
+    VAR_DECLARE_LIST,
+    ASSIGN,
+    /* OTHER MODULE AND FUNCTION ITEMS */
+    FUNCTION,
+    /* OTHER FUNCTION ITEMS */
+    FUNCTION_ITEMS,
+    TASK,
+    TASK_ITEMS,
+    /* primitives */
+    GATE,
+    GATE_INSTANCE,
+    ONE_GATE_INSTANCE,
+    /* Module instances */
+    MODULE_CONNECT_LIST,
+    MODULE_CONNECT,
+    MODULE_PARAMETER_LIST,
+    MODULE_PARAMETER,
+    MODULE_NAMED_INSTANCE,
+    MODULE_INSTANCE,
+    MODULE_MASTER_INSTANCE,
+    ONE_MODULE_INSTANCE,
+    /* Function instances*/
+    FUNCTION_NAMED_INSTANCE,
+    FUNCTION_INSTANCE,
+    TASK_NAMED_INSTANCE,
+    TASK_INSTANCE,
+    /* Specify Items */
+    SPECIFY_ITEMS,
+    SPECIFY_PARAMETER,
+    SPECIFY_PAL_CONNECTION_STATEMENT,
+    SPECIFY_PAL_CONNECT_LIST,
+    /* statements */
+    STATEMENT,
+    BLOCK,
+    NON_BLOCKING_STATEMENT,
+    BLOCKING_STATEMENT,
+    ASSIGNING_LIST,
+    CASE,
+    CASE_LIST,
+    CASE_ITEM,
+    CASE_DEFAULT,
+    ALWAYS,
+    IF,
+    FOR,
+    WHILE,
+    /* Delay Control */
+    DELAY_CONTROL,
+    POSEDGE,
+    NEGEDGE,
+    /* expressions */
+    TERNARY_OPERATION,
+    BINARY_OPERATION,
+    UNARY_OPERATION,
+    /* basic primitives */
+    ARRAY_REF,
+    RANGE_REF,
+    CONCATENATE,
+    REPLICATE,
+    /* basic identifiers */
+    IDENTIFIERS,
+    NUMBERS,
+    /* C functions */
+    C_ARG_LIST,
+    DISPLAY,
+    FINISH,
+    /* Hard Blocks */
+    HARD_BLOCK,
+    HARD_BLOCK_NAMED_INSTANCE,
+    HARD_BLOCK_CONNECT_LIST,
+    HARD_BLOCK_CONNECT,
+    // EDDIE: new enum value for ids to replace MEMORY from operation_t
+    RAM,
+    ids_END
+};
+
+struct metric_t {
+    double min_depth;
+    double max_depth;
+    double avg_depth;
+    double avg_width;
+};
+
+struct stat_t {
+    metric_t upward;
+    metric_t downward;
+};
+
+struct typ {
+    char *identifier;
+    VNumber *vnumber = nullptr;
+    struct {
+        operation_list op;
+    } operation;
+    struct {
+        short is_parameter;
+        short is_string;
+        short is_localparam;
+        short is_defparam;
+        short is_port;
+        short is_input;
+        short is_output;
+        short is_inout;
+        short is_wire;
+        short is_reg;
+        short is_genvar;
+        short is_memory;
+        operation_list signedness;
+        VNumber *initial_value = nullptr;
+    } variable;
+    struct {
+        short is_instantiated;
+        ast_node_t **module_instantiations_instance;
+        int size_module_instantiations;
+    } module;
+    struct {
+        short is_instantiated;
+        ast_node_t **function_instantiations_instance;
+        int size_function_instantiations;
+    } function;
+    struct {
+        short is_instantiated;
+        ast_node_t **task_instantiations_instance;
+        int size_task_instantiations;
+    } task;
+    struct {
+        int num_bit_strings;
+        char **bit_strings;
+    } concat;
+};
+
+struct ast_node_t {
+    loc_t loc;
+
+    long unique_count;
+    int far_tag;
+    int high_number;
+    ids type;
+    typ types;
+
+    ast_node_t *identifier_node;
+    ast_node_t **children;
+    long num_children;
+
+    void *hb_port;
+    void *net_node;
+    long chunk_size;
+};
+
+//-----------------------------------------------------------------------------------------------------
+
+/* DEFINTIONS for carry chain*/
+struct chain_information_t {
+    char *name; // unique name of the chain
+    int count;  // the number of hard blocks in this chain
+    int num_bits;
+};
+
+//-----------------------------------------------------------------------------------------------------
+
+/**
+ * DEFINTIONS netlist node attributes
+ * the attr_t structure provides the control signals sensitivity
+ * In the synthesis flow, the attribute structure is mostly used to
+ * specify the clock sensitivity. However, in the techmap flow,
+ * it is used in most sections, including DFFs, Block memories
+ * and arithmetic operation instantiation
+ */
+struct attr_t {
+    edge_type_e clk_edge_type;   // clock edge sensitivity
+    edge_type_e clr_polarity;    // clear (reset to GND) polarity
+    edge_type_e set_polarity;    // set to VCC polarity
+    edge_type_e enable_polarity; // enable polarity
+    edge_type_e areset_polarity; // asynchronous reset polarity
+    edge_type_e sreset_polarity; // synchronous reset polarity
+
+    long areset_value; // asynchronous reset value
+    long sreset_value; // synchronous reset value
+
+    operation_list port_a_signed;
+    operation_list port_b_signed;
+
+    /* memory node attributes */
+    long size;                   // memory size
+    long offset;                 // ADDR offset
+    char *memory_id;             // the id of memory in verilog file (different from name since for memory it is $mem~#)
+    edge_type_e RD_CLK_ENABLE;   // read clock enable
+    edge_type_e WR_CLK_ENABLE;   // write clock enable
+    edge_type_e RD_CLK_POLARITY; // read clock polarity
+    edge_type_e WR_CLK_POLARITY; // write clock polarity
+    long RD_PORTS;               // Numof read ports
+    long WR_PORTS;               // Num of Write ports
+    long DBITS;                  // Data width
+    long ABITS;                  // Addr width
+};
+
+/* DEFINTIONS for all the different types of nodes there are.  This is also used cross-referenced in utils.c so that I can get a string version
+ * of these names, so if you add new tpyes in here, be sure to add those same types in utils.c */
+struct nnode_t {
+    Yosys::RTLIL::Cell *cell;
+    Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::RTLIL::Const> cell_parameters;
+
+    loc_t loc;
+
+    long unique_id;
+    char *name;          // unique name of a node
+    operation_list type; // the type of node
+    int bit_width;       // Size of the operation (e.g. for adders/subtractors)
+
+    ast_node_t *related_ast_node; // the abstract syntax node that made this node
+
+    uintptr_t traverse_visited; // a way to mark if we've visited yet
+    stat_t stat;
+
+    npin_t **input_pins; // the input pins
+    long num_input_pins;
+    int *input_port_sizes; // info about the input ports
+    int num_input_port_sizes;
+
+    npin_t **output_pins; // the output pins
+    long num_output_pins;
+    int *output_port_sizes; // info if there is ports
+    int num_output_port_sizes;
+
+    short unique_node_data_id;
+    void *node_data; // this is a point where you can add additional data for your optimization or technique
+
+    int forward_level;           // this is your logic level relative to PIs and FFs .. i.e farthest PI
+    int backward_level;          // this is your reverse logic level relative to POs and FFs .. i.e. farthest PO
+    int sequential_level;        // the associated sequential network that the node is in
+    short sequential_terminator; // if this combinational node is a terminator for the sequential level (connects to flip-flop or Output pin
+
+    std::vector<std::vector<BitSpace::bit_value_t>> memory_data;
+
+    // For simulation
+    //    int in_queue;           // Flag used by the simulator to avoid double queueing.
+    //    npin_t** undriven_pins; // These pins have been found by the simulator to have no driver.
+    //    int num_undriven_pins;
+    //    int ratio;                  //clock ratio for clock nodes
+    init_value_e initial_value; // initial net value
+                                //    bool internal_clk_warn = false;
+
+    attr_t *attributes;
+
+    //    bool covered = false;
+
+    // For mixing soft and hard logic optimizations
+    // a field that is used for storing weights towards the
+    // mixing optimization.
+    //  value of -1 is reserved for hardened blocks
+    long weight = 0;
+};
+
+struct npin_t {
+    long unique_id;
+    ids type; // INPUT or OUTPUT
+    char *name;
+    nnet_t *net; // related net
+    int pin_net_idx;
+    nnode_t *node;    // related node
+    int pin_node_idx; // pin on the node where we're located
+    char *mapping;    // name of mapped port from hard block
+
+    edge_type_e sensitivity;
+
+    ////////////////////
+    // For simulation
+
+    bool delay_cycle;
+
+    unsigned long coverage;
+    bool is_default; // The pin is feeding a mux from logic representing an else or default.
+    bool is_implied; // This signal is implied.
+};
+
+struct nnet_t {
+    long unique_id;
+    char *name; // name for the net
+    short combined;
+
+    int num_driver_pins;
+    npin_t **driver_pins; // the pin that drives the net
+
+    npin_t **fanout_pins; // the pins pointed to by the net
+    int num_fanout_pins;  // the list size of pins
+
+    short unique_net_data_id;
+    void *net_data;
+
+    uintptr_t traverse_visited;
+    stat_t stat;
+    /////////////////////
+    // For simulation
+    //////////////////////
+};
+
+struct signal_list_t {
+    npin_t **pins;
+    long count;
+
+    char is_memory;
+    char is_adder;
+};
+
+struct netlist_t {
+    char *identifier;
+
+    nnode_t *gnd_node;
+    nnode_t *vcc_node;
+    nnode_t *pad_node;
+    nnet_t *zero_net;
+    nnet_t *one_net;
+    nnet_t *pad_net;
+    nnode_t **top_input_nodes;
+    int num_top_input_nodes;
+    nnode_t **top_output_nodes;
+    int num_top_output_nodes;
+    nnode_t **ff_nodes;
+    int num_ff_nodes;
+    nnode_t **internal_nodes;
+    int num_internal_nodes;
+    nnode_t **clocks;
+    int num_clocks;
+
+    /* netlist levelized structures */
+    nnode_t ***forward_levels;
+    int num_forward_levels;
+    int *num_at_forward_level;
+    nnode_t **
+      *backward_levels; // NOTE backward levels isn't neccessarily perfect.  Because of multiple output pins, the node can be put closer to POs than
+                        // should be.  To fix, run a rebuild of the list afterwards since the marked "node->backward_level" is correct */
+    int num_backward_levels;
+    int *num_at_backward_level;
+
+    nnode_t ***sequential_level_nodes;
+    int num_sequential_levels;
+    int *num_at_sequential_level;
+    /* these structures store the last combinational node in a level before a flip-flop or output pin */
+    nnode_t ***sequential_level_combinational_termination_node;
+    int num_sequential_level_combinational_termination_nodes;
+    int *num_at_sequential_level_combinational_termination_node;
+
+    STRING_CACHE *nets_sc;
+    STRING_CACHE *out_pins_sc;
+    STRING_CACHE *nodes_sc;
+
+    long long num_of_type[operation_list_END];
+    long long num_of_node;
+    long long num_logic_element;
+    metric_t output_node_stat;
+
+    t_logical_block_type_ptr type;
+    Yosys::Design *design;
+};
+
+#endif
diff --git a/parmys-plugin/include/odin_util.h b/parmys-plugin/include/odin_util.h
new file mode 100644
index 000000000..8dd23404f
--- /dev/null
+++ b/parmys-plugin/include/odin_util.h
@@ -0,0 +1,27 @@
+#ifndef ODIN_UTIL_H
+#define ODIN_UTIL_H
+
+#include <string>
+
+#include "odin_types.h"
+
+long shift_left_value_with_overflow_check(long input_value, long shift_by, loc_t loc);
+
+const char *name_based_on_op(operation_list op);
+const char *node_name_based_on_op(nnode_t *node);
+
+char *make_full_ref_name(const char *previous, const char *module_name, const char *module_instance_name, const char *signal_name, long bit);
+
+std::string make_simple_name(char *input, const char *flatten_string, char flatten_char);
+
+void *my_malloc_struct(long bytes_to_alloc);
+
+char *append_string(const char *string, const char *appendage, ...);
+
+double wall_time();
+
+int odin_sprintf(char *s, const char *format, ...);
+
+void passed_verify_i_o_availabilty(nnode_t *node, int expected_input_size, int expected_output_size, const char *current_src, int line_src);
+
+#endif
diff --git a/parmys-plugin/include/partial_map.h b/parmys-plugin/include/partial_map.h
new file mode 100644
index 000000000..a7be967f3
--- /dev/null
+++ b/parmys-plugin/include/partial_map.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PARTIAL_MAP_H
+#define PARTIAL_MAP_H
+
+void partial_map_top(netlist_t *netlist);
+void instantiate_add_w_carry(nnode_t *node, short mark, netlist_t *netlist);
+void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t *netlist);
+void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist);
+
+#endif
diff --git a/parmys-plugin/include/read_xml_config_file.h b/parmys-plugin/include/read_xml_config_file.h
new file mode 100644
index 000000000..58ee4403e
--- /dev/null
+++ b/parmys-plugin/include/read_xml_config_file.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef READ_XML_CONFIG_FILE_H
+#define READ_XML_CONFIG_FILE_H
+
+#include "odin_types.h"
+
+extern void read_config_file(const char *file_name);
+#endif
diff --git a/parmys-plugin/include/string_cache.h b/parmys-plugin/include/string_cache.h
new file mode 100644
index 000000000..3552d61c2
--- /dev/null
+++ b/parmys-plugin/include/string_cache.h
@@ -0,0 +1,46 @@
+#ifndef __STRING_CACHE_H__
+#define __STRING_CACHE_H__
+
+/*
+ * Copyright (c) 2001 Vladimir Dergachev (volodya@users.sourceforge.net)
+ *
+ *    This source code is free software; you can redistribute it
+ *    and/or modify it in source code form under the terms of the GNU
+ *    General Public License as published by the Free Software
+ *    Foundation; either version 2 of the License, or (at your option)
+ *    any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+struct STRING_CACHE {
+    long size;
+    long string_hash_size;
+    long free;
+    long mod;
+    long mul;
+    char **string;
+    void **data;
+    long *string_hash;
+    long *next_string;
+};
+
+/* creates the hash where it is indexed by a string and the void ** holds the data */
+STRING_CACHE *sc_new_string_cache(void);
+/* returns an index of the spot where string is */
+long sc_lookup_string(STRING_CACHE *sc, const char *string);
+/* adds an element into the cache and returns and id...check with cache_name->data[i] == NULL to see if already added */
+long sc_add_string(STRING_CACHE *sc, const char *string);
+void *sc_do_alloc(long, long);
+
+/* free the cache */
+STRING_CACHE *sc_free_string_cache(STRING_CACHE *sc);
+
+#endif
diff --git a/parmys-plugin/include/subtractions.h b/parmys-plugin/include/subtractions.h
new file mode 100644
index 000000000..690540d6a
--- /dev/null
+++ b/parmys-plugin/include/subtractions.h
@@ -0,0 +1,40 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef SUBS_H
+#define SUBS_H
+
+#include "adders.h"
+#include "read_xml_arch_file.h"
+
+extern vtr::t_linked_vptr *sub_list;
+extern vtr::t_linked_vptr *sub_chain_list;
+
+extern void report_sub_distribution();
+extern void declare_hard_adder_for_sub(nnode_t *node);
+extern void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t *netlist);
+extern void split_adder_for_sub(nnode_t *node, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist);
+extern void iterate_adders_for_sub(netlist_t *netlist);
+extern void instantiate_sub_w_borrow_block(nnode_t *node, short traverse_mark_number, netlist_t *netlist);
+extern void clean_adders_for_sub();
+
+#endif // SUBS_H
diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
new file mode 100644
index 000000000..8d47b0e96
--- /dev/null
+++ b/parmys-plugin/parmys.cc
@@ -0,0 +1,1156 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "kernel/celltypes.h"
+#include "kernel/yosys.h"
+
+#include <regex>
+
+#include "netlist_utils.h"
+#include "odin_globals.h"
+#include "odin_ii.h"
+#include "odin_util.h"
+
+#include "vtr_memory.h"
+#include "vtr_path.h"
+#include "vtr_util.h"
+
+#include "netlist_check.h"
+
+#include "partial_map.h"
+
+#include "netlist_visualizer.h"
+
+#include "parmys_resolve.hpp"
+
+#include "BlockMemories.hpp"
+#include "adders.h"
+#include "arch_util.h"
+#include "hard_blocks.h"
+#include "memories.h"
+#include "multipliers.h"
+#include "netlist_cleanup.h"
+#include "netlist_statistic.h"
+#include "read_xml_config_file.h"
+#include "subtractions.h"
+
+#include "ast_util.h"
+#include "parmys_update.hpp"
+#include "parmys_utils.hpp"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+#define GND_NAME "$false"
+#define VCC_NAME "$true"
+#define HBPAD_NAME "$undef"
+
+struct Bbox {
+    std::string name;
+    std::vector<std::string> inputs, outputs;
+};
+
+CellTypes ct;
+
+struct ParMYSPass : public Pass {
+
+    static void hook_up_nets(netlist_t *odin_netlist, Hashtable *output_nets_hash)
+    {
+        nnode_t **node_sets[] = {odin_netlist->internal_nodes, odin_netlist->ff_nodes, odin_netlist->top_output_nodes};
+        int counts[] = {odin_netlist->num_internal_nodes, odin_netlist->num_ff_nodes, odin_netlist->num_top_output_nodes};
+        int num_sets = 3;
+
+        int i;
+        for (i = 0; i < num_sets; i++) {
+            int j;
+            for (j = 0; j < counts[i]; j++) {
+                nnode_t *node = node_sets[i][j];
+                hook_up_node(node, output_nets_hash);
+            }
+        }
+    }
+
+    static void hook_up_node(nnode_t *node, Hashtable *output_nets_hash)
+    {
+        int j;
+        for (j = 0; j < node->num_input_pins; j++) {
+            npin_t *input_pin = node->input_pins[j];
+
+            nnet_t *output_net = (nnet_t *)output_nets_hash->get(input_pin->name);
+
+            if (!output_net)
+                log_error("Error: Could not hook up the pin %s: not available, related node: %s.", input_pin->name, node->name);
+            add_fanout_pin_to_net(output_net, input_pin);
+        }
+    }
+
+    static void build_top_output_node(const char *name_str, netlist_t *odin_netlist)
+    {
+        nnode_t *new_node = allocate_nnode(my_location);
+        new_node->related_ast_node = NULL;
+        new_node->type = OUTPUT_NODE;
+        new_node->name = vtr::strdup(name_str);
+        allocate_more_input_pins(new_node, 1);
+        add_input_port_information(new_node, 1);
+
+        npin_t *new_pin = allocate_npin();
+        new_pin->name = vtr::strdup(name_str);
+        add_input_pin_to_node(new_node, new_pin, 0);
+
+        odin_netlist->top_output_nodes =
+          (nnode_t **)vtr::realloc(odin_netlist->top_output_nodes, sizeof(nnode_t *) * (odin_netlist->num_top_output_nodes + 1));
+        odin_netlist->top_output_nodes[odin_netlist->num_top_output_nodes++] = new_node;
+    }
+
+    static void build_top_input_node(const char *name_str, netlist_t *odin_netlist, Hashtable *output_nets_hash)
+    {
+        loc_t my_loc;
+        nnode_t *new_node = allocate_nnode(my_loc);
+
+        new_node->related_ast_node = NULL;
+        new_node->type = INPUT_NODE;
+
+        new_node->name = vtr::strdup(name_str);
+
+        allocate_more_output_pins(new_node, 1);
+        add_output_port_information(new_node, 1);
+
+        npin_t *new_pin = allocate_npin();
+        new_pin->name = vtr::strdup(name_str);
+        new_pin->type = OUTPUT;
+
+        add_output_pin_to_node(new_node, new_pin, 0);
+
+        nnet_t *new_net = allocate_nnet();
+        new_net->name = vtr::strdup(name_str);
+
+        add_driver_pin_to_net(new_net, new_pin);
+
+        odin_netlist->top_input_nodes =
+          (nnode_t **)vtr::realloc(odin_netlist->top_input_nodes, sizeof(nnode_t *) * (odin_netlist->num_top_input_nodes + 1));
+        odin_netlist->top_input_nodes[odin_netlist->num_top_input_nodes++] = new_node;
+
+        output_nets_hash->add(name_str, new_net);
+    }
+
+    static void create_top_driver_nets(netlist_t *odin_netlist, Hashtable *output_nets_hash)
+    {
+        npin_t *new_pin;
+
+        /* ZERO net */
+        odin_netlist->zero_net = allocate_nnet();
+        odin_netlist->gnd_node = allocate_nnode(unknown_location);
+        odin_netlist->gnd_node->type = GND_NODE;
+        allocate_more_output_pins(odin_netlist->gnd_node, 1);
+        add_output_port_information(odin_netlist->gnd_node, 1);
+        new_pin = allocate_npin();
+        add_output_pin_to_node(odin_netlist->gnd_node, new_pin, 0);
+        add_driver_pin_to_net(odin_netlist->zero_net, new_pin);
+
+        /*ONE net*/
+        odin_netlist->one_net = allocate_nnet();
+        odin_netlist->vcc_node = allocate_nnode(unknown_location);
+        odin_netlist->vcc_node->type = VCC_NODE;
+        allocate_more_output_pins(odin_netlist->vcc_node, 1);
+        add_output_port_information(odin_netlist->vcc_node, 1);
+        new_pin = allocate_npin();
+        add_output_pin_to_node(odin_netlist->vcc_node, new_pin, 0);
+        add_driver_pin_to_net(odin_netlist->one_net, new_pin);
+
+        /* Pad net */
+        odin_netlist->pad_net = allocate_nnet();
+        odin_netlist->pad_node = allocate_nnode(unknown_location);
+        odin_netlist->pad_node->type = PAD_NODE;
+        allocate_more_output_pins(odin_netlist->pad_node, 1);
+        add_output_port_information(odin_netlist->pad_node, 1);
+        new_pin = allocate_npin();
+        add_output_pin_to_node(odin_netlist->pad_node, new_pin, 0);
+        add_driver_pin_to_net(odin_netlist->pad_net, new_pin);
+
+        /* CREATE the driver for the ZERO */
+        odin_netlist->zero_net->name = make_full_ref_name(odin_netlist->identifier, NULL, NULL, zero_string, -1);
+        output_nets_hash->add(GND_NAME, odin_netlist->zero_net);
+
+        /* CREATE the driver for the ONE and store twice */
+        odin_netlist->one_net->name = make_full_ref_name(odin_netlist->identifier, NULL, NULL, one_string, -1);
+        output_nets_hash->add(VCC_NAME, odin_netlist->one_net);
+
+        /* CREATE the driver for the PAD */
+        odin_netlist->pad_net->name = make_full_ref_name(odin_netlist->identifier, NULL, NULL, pad_string, -1);
+        output_nets_hash->add(HBPAD_NAME, odin_netlist->pad_net);
+
+        odin_netlist->vcc_node->name = vtr::strdup(VCC_NAME);
+        odin_netlist->gnd_node->name = vtr::strdup(GND_NAME);
+        odin_netlist->pad_node->name = vtr::strdup(HBPAD_NAME);
+    }
+
+    static char *sig_full_ref_name_sig(RTLIL::SigBit sig, pool<SigBit> &cstr_bits_seen)
+    {
+
+        cstr_bits_seen.insert(sig);
+
+        if (sig.wire == NULL) {
+            if (sig == RTLIL::State::S0)
+                return vtr::strdup(GND_NAME);
+            else if (sig == RTLIL::State::S1)
+                return vtr::strdup(VCC_NAME);
+            else
+                return vtr::strdup(HBPAD_NAME);
+        } else {
+            std::string str = RTLIL::unescape_id(sig.wire->name);
+            if (sig.wire->width == 1)
+                return make_full_ref_name(NULL, NULL, NULL, str.c_str(), -1);
+            else {
+                int idx = sig.wire->upto ? sig.wire->start_offset + sig.wire->width - sig.offset - 1 : sig.wire->start_offset + sig.offset;
+                return make_full_ref_name(NULL, NULL, NULL, str.c_str(), idx);
+            }
+        }
+    }
+
+    static void map_input_port(const RTLIL::IdString &mapping, SigSpec in_port, nnode_t *node, pool<SigBit> &cstr_bits_seen)
+    {
+
+        int base_pin_idx = node->num_input_pins;
+
+        allocate_more_input_pins(node, in_port.size());
+        add_input_port_information(node, in_port.size());
+
+        for (int i = 0; i < in_port.size(); i++) {
+
+            char *in_pin_name = sig_full_ref_name_sig(in_port[i], cstr_bits_seen);
+
+            npin_t *in_pin = allocate_npin();
+            in_pin->name = vtr::strdup(in_pin_name);
+            in_pin->mapping = vtr::strdup(RTLIL::unescape_id(mapping).c_str());
+            add_input_pin_to_node(node, in_pin, base_pin_idx + i);
+
+            vtr::free(in_pin_name);
+        }
+    }
+
+    static void map_output_port(const RTLIL::IdString &mapping, SigSpec out_port, nnode_t *node, Hashtable *output_nets_hash,
+                                pool<SigBit> &cstr_bits_seen)
+    {
+
+        int base_pin_idx = node->num_output_pins;
+
+        allocate_more_output_pins(node, out_port.size()); //?
+        add_output_port_information(node, out_port.size());
+
+        /*add name information and a net(driver) for the output */
+
+        for (int i = 0; i < out_port.size(); i++) {
+            npin_t *out_pin = allocate_npin();
+            out_pin->name = NULL;
+            out_pin->mapping = vtr::strdup(RTLIL::unescape_id(mapping).c_str());
+            add_output_pin_to_node(node, out_pin, base_pin_idx + i);
+
+            char *output_pin_name = sig_full_ref_name_sig(out_port[i], cstr_bits_seen);
+            nnet_t *out_net = (nnet_t *)output_nets_hash->get(output_pin_name);
+            if (out_net == nullptr) {
+                out_net = allocate_nnet();
+                out_net->name = vtr::strdup(output_pin_name);
+                output_nets_hash->add(output_pin_name, out_net);
+            }
+            add_driver_pin_to_net(out_net, out_pin);
+
+            vtr::free(output_pin_name);
+        }
+    }
+
+    static bool is_param_required(operation_list op)
+    {
+        switch (op) {
+        case (SL):
+        case (SR):
+        case (ASL):
+        case (ASR):
+        case (DLATCH):
+        case (ADLATCH):
+        case (SETCLR):
+        case (SDFF):
+        case (DFFE):
+        case (SDFFE):
+        case (SDFFCE):
+        case (DFFSR):
+        case (DFFSRE):
+        case (SPRAM):
+        case (DPRAM):
+        case (YMEM):
+        case (YMEM2):
+        case (FF_NODE):
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    static operation_list from_yosys_type(Yosys::RTLIL::IdString type)
+    {
+        if (type == ID($add)) {
+            return ADD;
+        }
+        if (type == ID($mem)) {
+            return YMEM;
+        }
+        if (type == ID($mem_v2)) {
+            return YMEM2;
+        }
+        if (type == ID($mul)) {
+            return MULTIPLY;
+        }
+        if (type == ID($sub)) {
+            return MINUS;
+        }
+        if (type == ID(LUT_K)) {
+            return SKIP;
+        }
+        if (type == ID(DFF)) {
+            return FF_NODE;
+        }
+        if (type == ID(fpga_interconnect)) {
+            return operation_list_END;
+        }
+        if (type == ID(mux)) {
+            return SMUX_2;
+        }
+        if (type == ID(adder)) {
+            return ADD;
+        }
+        if (type == ID(multiply)) {
+            return MULTIPLY;
+        }
+        if (type == ID(single_port_ram)) {
+            return SPRAM;
+        }
+        if (type == ID(dual_port_ram)) {
+            return DPRAM;
+        }
+
+        if (Yosys::RTLIL::builtin_ff_cell_types().count(type)) {
+            return SKIP;
+        }
+
+        if (ct.cell_known(type)) {
+            return SKIP;
+        }
+
+        return NO_OP;
+    }
+
+    static netlist_t *to_netlist(RTLIL::Module *top_module, RTLIL::Design *design)
+    {
+        ct.setup();
+
+        std::vector<RTLIL::Module *> mod_list;
+
+        std::vector<RTLIL::Module *> black_list;
+
+        std::string top_module_name;
+        if (top_module_name.empty())
+            for (auto module : design->modules())
+                if (module->get_bool_attribute(ID::top))
+                    top_module_name = module->name.str();
+
+        for (auto module : design->modules()) {
+
+            if (module->processes.size() != 0)
+                log_error("Found unmapped processes in module %s: unmapped processes are not supported in parmys pass!\n", log_id(module->name));
+            if (module->memories.size() != 0)
+                log_error("Found unmapped memories in module %s: unmapped memories are not supported in parmys pass!\n", log_id(module->name));
+
+            if (module->name == RTLIL::escape_id(top_module_name)) {
+                top_module_name.clear();
+                continue;
+            }
+
+            mod_list.push_back(module);
+        }
+
+        pool<SigBit> cstr_bits_seen;
+
+        netlist_t *odin_netlist = allocate_netlist();
+        odin_netlist->design = design;
+        Hashtable *output_nets_hash = new Hashtable();
+        odin_netlist->identifier = vtr::strdup(log_id(top_module->name));
+
+        create_top_driver_nets(odin_netlist, output_nets_hash);
+
+        // build_top_input_node(DEFAULT_CLOCK_NAME, odin_netlist, output_nets_hash);
+
+        std::map<int, RTLIL::Wire *> inputs, outputs;
+
+        for (auto wire : top_module->wires()) {
+            if (wire->port_input)
+                inputs[wire->port_id] = wire;
+            if (wire->port_output)
+                outputs[wire->port_id] = wire;
+        }
+
+        for (auto &it : inputs) {
+            RTLIL::Wire *wire = it.second;
+            for (int i = 0; i < wire->width; i++) {
+                char *name_string = sig_full_ref_name_sig(RTLIL::SigBit(wire, i), cstr_bits_seen);
+                build_top_input_node(name_string, odin_netlist, output_nets_hash);
+                vtr::free(name_string);
+            }
+        }
+
+        for (auto &it : outputs) {
+            RTLIL::Wire *wire = it.second;
+            for (int i = 0; i < wire->width; i++) {
+                char *name_string = sig_full_ref_name_sig(RTLIL::SigBit(wire, i), cstr_bits_seen);
+                build_top_output_node(name_string, odin_netlist);
+                vtr::free(name_string);
+            }
+        }
+
+        long hard_id = 0;
+        for (auto cell : top_module->cells()) {
+
+            nnode_t *new_node = allocate_nnode(my_location);
+
+            for (auto &param : cell->parameters) {
+                new_node->cell_parameters[Yosys::RTLIL::IdString(param.first)] = Yosys::Const(param.second);
+            }
+
+            new_node->related_ast_node = NULL;
+
+            // new_node->type = yosys_subckt_strmap[str(cell->type).c_str()];
+            new_node->type = from_yosys_type(cell->type);
+
+            // check primitive node type is alreday mapped before or not (blackboxed)
+            if (new_node->type == SPRAM || new_node->type == DPRAM || new_node->type == ADD || new_node->type == MULTIPLY) {
+                if (design->module(cell->type) != nullptr && design->module(cell->type)->get_blackbox_attribute()) {
+                    new_node->type = SKIP;
+                }
+            }
+
+            if (new_node->type == NO_OP) {
+
+                /**
+                 *  according to ast.cc:1657-1663
+                 *
+                 * 	std::string modname;
+                 *	if (parameters.size() == 0)
+                 *		modname = stripped_name;
+                 *	else if (para_info.size() > 60)
+                 *		modname = "$paramod$" + sha1(para_info) + stripped_name;
+                 *	else
+                 *		modname = "$paramod" + stripped_name + para_info;
+                 */
+
+                if (cell->type.begins_with("$paramod$")) // e.g. $paramod$b509a885304d9c8c49f505bb9d0e99a9fb676562\dual_port_ram
+                {
+                    std::regex regex("^\\$paramod\\$\\w+\\\\(\\w+)$");
+                    std::smatch m;
+                    std::string modname(str(cell->type));
+                    if (regex_match(modname, m, regex)) {
+                        new_node->type = yosys_subckt_strmap[m.str(1).c_str()];
+                    }
+                } else if (cell->type.begins_with("$paramod\\")) // e.g. $paramod\dual_port_ram\ADDR_WIDTH?4'0100\DATA_WIDTH?4'0101
+                {
+                    std::regex regex("^\\$paramod\\\\(\\w+)(\\\\\\S+)*$");
+                    std::smatch m;
+                    std::string modname(str(cell->type));
+                    if (regex_match(modname, m, regex)) {
+                        new_node->type = yosys_subckt_strmap[m.str(1).c_str()];
+                    }
+                } else if (design->module(cell->type)->get_blackbox_attribute()) {
+                    new_node->type = SKIP;
+                } else {
+                    new_node->type = HARD_IP;
+                    t_model *hb_model = find_hard_block(str(cell->type).c_str());
+                    if (hb_model) {
+                        hb_model->used = 1;
+                    }
+                    std::string modname(str(cell->type));
+                    // Create a fake ast node.
+                    if (new_node->type == HARD_IP) {
+                        new_node->related_ast_node = create_node_w_type(HARD_BLOCK, my_location);
+                        new_node->related_ast_node->children = (ast_node_t **)vtr::calloc(1, sizeof(ast_node_t *));
+                        new_node->related_ast_node->identifier_node = create_tree_node_id(vtr::strdup(modname.c_str()), my_location);
+                    }
+                }
+            }
+
+            if (new_node->type == SKIP) {
+                std::string modname(str(cell->type));
+                // fake ast node.
+                new_node->related_ast_node = create_node_w_type(HARD_BLOCK, my_location);
+                new_node->related_ast_node->children = (ast_node_t **)vtr::calloc(1, sizeof(ast_node_t *));
+                new_node->related_ast_node->identifier_node = create_tree_node_id(vtr::strdup(modname.c_str()), my_location);
+            }
+
+            for (auto &conn : cell->connections()) {
+
+                if (cell->input(conn.first) && conn.second.size() > 0) {
+                    map_input_port(conn.first, conn.second, new_node, cstr_bits_seen);
+                }
+
+                if (cell->output(conn.first) && conn.second.size() > 0) {
+                    map_output_port(conn.first, conn.second, new_node, output_nets_hash, cstr_bits_seen);
+                }
+            }
+
+            if (is_param_required(new_node->type)) {
+
+                if (cell->hasParam(ID::SRST_VALUE)) {
+                    auto value = vtr::strdup(cell->getParam(ID::SRST_VALUE).as_string().c_str());
+                    new_node->attributes->sreset_value = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::ARST_VALUE)) {
+                    auto value = vtr::strdup(cell->getParam(ID::ARST_VALUE).as_string().c_str());
+                    new_node->attributes->areset_value = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::OFFSET)) {
+                    auto value = vtr::strdup(cell->getParam(ID::OFFSET).as_string().c_str());
+                    new_node->attributes->offset = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::SIZE)) {
+                    auto value = vtr::strdup(cell->getParam(ID::SIZE).as_string().c_str());
+                    new_node->attributes->size = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::WIDTH)) {
+                    auto value = vtr::strdup(cell->getParam(ID::WIDTH).as_string().c_str());
+                    new_node->attributes->DBITS = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::RD_PORTS)) {
+                    auto value = vtr::strdup(cell->getParam(ID::RD_PORTS).as_string().c_str());
+                    new_node->attributes->RD_PORTS = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::WR_PORTS)) {
+                    auto value = vtr::strdup(cell->getParam(ID::WR_PORTS).as_string().c_str());
+                    new_node->attributes->WR_PORTS = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::ABITS)) {
+                    auto value = vtr::strdup(cell->getParam(ID::ABITS).as_string().c_str());
+                    new_node->attributes->ABITS = std::bitset<sizeof(long) * 8>(value).to_ulong();
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::MEMID)) {
+                    auto value = vtr::strdup(cell->getParam(ID::MEMID).as_string().c_str());
+                    RTLIL::IdString ids = cell->getParam(ID::MEMID).decode_string();
+                    new_node->attributes->memory_id = vtr::strdup(RTLIL::unescape_id(ids).c_str());
+                    vtr::free(value);
+                }
+
+                if (cell->hasParam(ID::A_SIGNED)) {
+                    new_node->attributes->port_a_signed = cell->getParam(ID::A_SIGNED).as_bool() ? SIGNED : UNSIGNED;
+                }
+
+                if (cell->hasParam(ID::B_SIGNED)) {
+                    new_node->attributes->port_b_signed = cell->getParam(ID::B_SIGNED).as_bool() ? SIGNED : UNSIGNED;
+                }
+
+                if (cell->hasParam(ID::CLK_POLARITY)) {
+                    new_node->attributes->clk_edge_type =
+                      cell->getParam(ID::CLK_POLARITY).as_bool() ? RISING_EDGE_SENSITIVITY : FALLING_EDGE_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::CLR_POLARITY)) {
+                    new_node->attributes->clr_polarity =
+                      cell->getParam(ID::CLR_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::SET_POLARITY)) {
+                    new_node->attributes->set_polarity =
+                      cell->getParam(ID::SET_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::EN_POLARITY)) {
+                    new_node->attributes->enable_polarity =
+                      cell->getParam(ID::EN_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::ARST_POLARITY)) {
+                    new_node->attributes->areset_polarity =
+                      cell->getParam(ID::ARST_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::SRST_POLARITY)) {
+                    new_node->attributes->sreset_polarity =
+                      cell->getParam(ID::SRST_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::RD_CLK_ENABLE)) {
+                    new_node->attributes->RD_CLK_ENABLE =
+                      cell->getParam(ID::RD_CLK_ENABLE).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::WR_CLK_ENABLE)) {
+                    new_node->attributes->WR_CLK_ENABLE =
+                      cell->getParam(ID::WR_CLK_ENABLE).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::RD_CLK_POLARITY)) {
+                    new_node->attributes->RD_CLK_POLARITY =
+                      cell->getParam(ID::RD_CLK_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+
+                if (cell->hasParam(ID::WR_CLK_POLARITY)) {
+                    new_node->attributes->WR_CLK_POLARITY =
+                      cell->getParam(ID::WR_CLK_POLARITY).as_bool() ? ACTIVE_HIGH_SENSITIVITY : ACTIVE_LOW_SENSITIVITY;
+                }
+            }
+
+            if (new_node->type == SMUX_2) {
+                new_node->name = vtr::strdup(new_node->output_pins[0]->net->name);
+            } else {
+                new_node->name = vtr::strdup(
+                  stringf("%s~%ld", (((new_node->type == HARD_IP /*|| new_node->type == SKIP*/) ? "\\" : "") + str(cell->type)).c_str(), hard_id++)
+                    .c_str());
+            }
+
+            /*add this node to blif_netlist as an internal node */
+            odin_netlist->internal_nodes =
+              (nnode_t **)vtr::realloc(odin_netlist->internal_nodes, sizeof(nnode_t *) * (odin_netlist->num_internal_nodes + 1));
+            odin_netlist->internal_nodes[odin_netlist->num_internal_nodes++] = new_node;
+        }
+
+        // add intermediate buffer nodes
+        for (auto &conn : top_module->connections())
+            for (int i = 0; i < conn.first.size(); i++) {
+                SigBit lhs_bit = conn.first[i];
+                SigBit rhs_bit = conn.second[i];
+
+                if (cstr_bits_seen.count(lhs_bit) == 0) // @TODO to be double checked later
+                    continue;
+
+                nnode_t *buf_node = allocate_nnode(my_location);
+
+                buf_node->related_ast_node = NULL;
+
+                buf_node->type = BUF_NODE;
+
+                allocate_more_input_pins(buf_node, 1);
+                add_input_port_information(buf_node, 1);
+
+                char *in_pin_name = sig_full_ref_name_sig(rhs_bit, cstr_bits_seen);
+                npin_t *in_pin = allocate_npin();
+                in_pin->name = vtr::strdup(in_pin_name);
+                in_pin->type = INPUT;
+                add_input_pin_to_node(buf_node, in_pin, 0);
+
+                vtr::free(in_pin_name);
+
+                allocate_more_output_pins(buf_node, 1);
+                add_output_port_information(buf_node, 1);
+
+                npin_t *out_pin = allocate_npin();
+                out_pin->name = NULL;
+                add_output_pin_to_node(buf_node, out_pin, 0);
+
+                char *output_pin_name = sig_full_ref_name_sig(lhs_bit, cstr_bits_seen);
+                nnet_t *out_net = (nnet_t *)output_nets_hash->get(output_pin_name);
+                if (out_net == nullptr) {
+                    out_net = allocate_nnet();
+                    out_net->name = vtr::strdup(output_pin_name);
+                    output_nets_hash->add(output_pin_name, out_net);
+                }
+                add_driver_pin_to_net(out_net, out_pin);
+
+                buf_node->name = vtr::strdup(output_pin_name);
+
+                odin_netlist->internal_nodes =
+                  (nnode_t **)vtr::realloc(odin_netlist->internal_nodes, sizeof(nnode_t *) * (odin_netlist->num_internal_nodes + 1));
+                odin_netlist->internal_nodes[odin_netlist->num_internal_nodes++] = buf_node;
+
+                vtr::free(output_pin_name);
+            }
+
+        hook_up_nets(odin_netlist, output_nets_hash);
+
+        delete output_nets_hash;
+        return odin_netlist;
+    }
+
+    void get_physical_luts(std::vector<t_pb_type *> &pb_lut_list, t_mode *mode)
+    {
+        for (int i = 0; i < mode->num_pb_type_children; i++) {
+            get_physical_luts(pb_lut_list, &mode->pb_type_children[i]);
+        }
+    }
+
+    void get_physical_luts(std::vector<t_pb_type *> &pb_lut_list, t_pb_type *pb_type)
+    {
+        if (pb_type) {
+            if (pb_type->class_type == LUT_CLASS) {
+                pb_lut_list.push_back(pb_type);
+            } else {
+                for (int i = 0; i < pb_type->num_modes; i++) {
+                    get_physical_luts(pb_lut_list, &pb_type->modes[i]);
+                }
+            }
+        }
+    }
+
+    void set_physical_lut_size(std::vector<t_logical_block_type> &logical_block_types)
+    {
+        std::vector<t_pb_type *> pb_lut_list;
+
+        for (t_logical_block_type &logical_block : logical_block_types) {
+            if (logical_block.index != EMPTY_TYPE_INDEX) {
+                get_physical_luts(pb_lut_list, logical_block.pb_type);
+            }
+        }
+        for (t_pb_type *pb_lut : pb_lut_list) {
+            if (pb_lut) {
+                if (pb_lut->num_input_pins < physical_lut_size || physical_lut_size < 1) {
+                    physical_lut_size = pb_lut->num_input_pins;
+                }
+            }
+        }
+    }
+
+    static void elaborate(netlist_t *odin_netlist)
+    {
+        double elaboration_time = wall_time();
+
+        /* Perform any initialization routines here */
+        find_hard_multipliers();
+        find_hard_adders();
+        // find_hard_adders_for_sub();
+        register_hard_blocks();
+
+        resolve_top(odin_netlist);
+
+        elaboration_time = wall_time() - elaboration_time;
+        log("\nElaboration Time: ");
+        log_time(elaboration_time);
+        log("\n--------------------------------------------------------------------\n");
+    }
+
+    static void optimization(netlist_t *odin_netlist)
+    {
+        double optimization_time = wall_time();
+
+        if (odin_netlist) {
+            check_netlist(odin_netlist);
+
+            /* point for all netlist optimizations. */
+            log("Performing Optimization on the Netlist\n");
+            if (hard_multipliers) {
+                /* Perform a splitting of the multipliers for hard block mults */
+                reduce_operations(odin_netlist, MULTIPLY);
+                iterate_multipliers(odin_netlist);
+                clean_multipliers();
+            }
+
+            if (block_memories_info.read_only_memory_list || block_memories_info.block_memory_list) {
+                /* Perform a hard block registration and splitting in width for Yosys generated memory blocks */
+                iterate_block_memories(odin_netlist);
+                free_block_memories();
+            }
+
+            if (single_port_rams || dual_port_rams) {
+                /* Perform a splitting of any hard block memories */
+                iterate_memories(odin_netlist);
+                free_memory_lists();
+            }
+
+            if (hard_adders) {
+                /* Perform a splitting of the adders for hard block add */
+                reduce_operations(odin_netlist, ADD);
+                iterate_adders(odin_netlist);
+                clean_adders();
+
+                /* Perform a splitting of the adders for hard block sub */
+                reduce_operations(odin_netlist, MINUS);
+                iterate_adders_for_sub(odin_netlist);
+                clean_adders_for_sub();
+            }
+        }
+
+        optimization_time = wall_time() - optimization_time;
+        log("\nOptimization Time: ");
+        log_time(optimization_time);
+        log("\n--------------------------------------------------------------------\n");
+    }
+
+    static void techmap(netlist_t *odin_netlist)
+    {
+        double techmap_time = wall_time();
+
+        if (odin_netlist) {
+            /* point where we convert netlist to FPGA or other hardware target compatible format */
+            log("Performing Partial Technology Mapping to the target device\n");
+            partial_map_top(odin_netlist);
+            mixer->perform_optimizations(odin_netlist);
+
+            /* Find any unused logic in the netlist and remove it */
+            remove_unused_logic(odin_netlist);
+        }
+
+        techmap_time = wall_time() - techmap_time;
+        log("\nTechmap Time: ");
+        log_time(techmap_time);
+        log("\n--------------------------------------------------------------------\n");
+    }
+
+    static void report(netlist_t *odin_netlist)
+    {
+
+        if (odin_netlist) {
+
+            report_mult_distribution();
+            report_add_distribution();
+            report_sub_distribution();
+
+            compute_statistics(odin_netlist, true);
+        }
+    }
+
+    static void log_time(double time) { log("%.1fms", time * 1000); }
+
+    ParMYSPass() : Pass("parmys", "ODIN_II partial mapper for Yosys") {}
+    void help() override
+    {
+        log("\n");
+        log("    -a ARCHITECTURE_FILE\n");
+        log("        VTR FPGA architecture description file (XML)\n");
+        log("\n");
+        log("    -c XML_CONFIGURATION_FILE\n");
+        log("        Configuration file\n");
+        log("\n");
+        log("    -top top_module\n");
+        log("        set the specified module as design top module\n");
+        log("\n");
+        log("    -nopass\n");
+        log("        No additional passes will be executed.\n");
+        log("\n");
+        log("    -exact_mults int_value\n");
+        log("        To enable mixing hard block and soft logic implementation of adders\n");
+        log("\n");
+        log("    -mults_ratio float_value\n");
+        log("        To enable mixing hard block and soft logic implementation of adders\n");
+        log("\n");
+        log("    -vtr_prim\n");
+        log("        loads vtr primitives as modules, if the design uses vtr prmitives then this flag is mandatory for first run\n");
+        log("\n");
+    }
+    void execute(std::vector<std::string> args, RTLIL::Design *design) override
+    {
+        bool flag_arch_file = false;
+        bool flag_config_file = false;
+        bool flag_load_vtr_primitives = false;
+        bool flag_no_pass = false;
+        std::string arch_file_path;
+        std::string config_file_path;
+        std::string top_module_name;
+        std::string DEFAULT_OUTPUT(".");
+
+        global_args.exact_mults.set(-1, argparse::Provenance::DEFAULT);
+        global_args.mults_ratio.set(-1.0, argparse::Provenance::DEFAULT);
+
+        log_header(design, "Starting parmys pass.\n");
+
+        size_t argidx;
+        for (argidx = 1; argidx < args.size(); argidx++) {
+            if (args[argidx] == "-a" && argidx + 1 < args.size()) {
+                arch_file_path = args[++argidx];
+                flag_arch_file = true;
+                continue;
+            }
+            if (args[argidx] == "-c" && argidx + 1 < args.size()) {
+                config_file_path = args[++argidx];
+                flag_config_file = true;
+                continue;
+            }
+            if (args[argidx] == "-top" && argidx + 1 < args.size()) {
+                top_module_name = args[++argidx];
+                continue;
+            }
+            if (args[argidx] == "-vtr_prim") {
+                flag_load_vtr_primitives = true;
+                continue;
+            }
+            if (args[argidx] == "-nopass") {
+                flag_no_pass = true;
+                continue;
+            }
+            if (args[argidx] == "-exact_mults" && argidx + 1 < args.size()) {
+                global_args.exact_mults.set(atoi(args[++argidx].c_str()), argparse::Provenance::SPECIFIED);
+                continue;
+            }
+            if (args[argidx] == "-mults_ratio" && argidx + 1 < args.size()) {
+                global_args.mults_ratio.set(atof(args[++argidx].c_str()), argparse::Provenance::SPECIFIED);
+                continue;
+            }
+        }
+        extra_args(args, argidx, design);
+
+        std::vector<t_physical_tile_type> physical_tile_types;
+        std::vector<t_logical_block_type> logical_block_types;
+
+        try {
+            /* Some initialization */
+            one_string = vtr::strdup(ONE_VCC_CNS);
+            zero_string = vtr::strdup(ZERO_GND_ZERO);
+            pad_string = vtr::strdup(ZERO_PAD_ZERO);
+
+        } catch (vtr::VtrError &vtr_error) {
+            log_error("Odin failed to initialize %s with exit code%d\n", vtr_error.what(), ERROR_INITIALIZATION);
+        }
+
+        mixer = new HardSoftLogicMixer();
+        set_default_config();
+
+        if (global_args.mults_ratio >= 0.0 && global_args.mults_ratio <= 1.0) {
+            delete mixer->_opts[MULTIPLY];
+            mixer->_opts[MULTIPLY] = new MultsOpt(global_args.mults_ratio);
+        } else if (global_args.exact_mults >= 0) {
+            delete mixer->_opts[MULTIPLY];
+            mixer->_opts[MULTIPLY] = new MultsOpt(global_args.exact_mults);
+        }
+
+        configuration.coarsen = true;
+
+        /* read the confirguration file .. get options presets the config values just in case theyr'e not read in with config file */
+        if (flag_config_file) {
+            log("Reading Configuration file\n");
+            try {
+                read_config_file(config_file_path.c_str());
+            } catch (vtr::VtrError &vtr_error) {
+                log_error("Odin Failed Reading Configuration file %s with exit code%d\n", vtr_error.what(), ERROR_PARSE_CONFIG);
+            }
+        }
+
+        if (flag_arch_file) {
+            log("Architecture: %s\n", vtr::basename(arch_file_path).c_str());
+
+            log("Reading FPGA Architecture file\n");
+            try {
+                XmlReadArch(arch_file_path.c_str(), false, &Arch, physical_tile_types, logical_block_types);
+                set_physical_lut_size(logical_block_types);
+            } catch (vtr::VtrError &vtr_error) {
+                log_error("Odin Failed to load architecture file: %s with exit code%d at line: %ld\n", vtr_error.what(), ERROR_PARSE_ARCH,
+                          vtr_error.line());
+            }
+        }
+        log("Using Lut input width of: %d\n", physical_lut_size);
+
+        if (!flag_no_pass) {
+
+            if (flag_load_vtr_primitives) {
+                Pass::call(design, "read_verilog -nomem2reg +/parmys/vtr_primitives.v");
+                Pass::call(design, "setattr -mod -set keep_hierarchy 1 single_port_ram");
+                Pass::call(design, "setattr -mod -set keep_hierarchy 1 dual_port_ram");
+            }
+
+            Pass::call(design, "parmys_arch -a " + arch_file_path);
+
+            if (top_module_name.empty()) {
+                Pass::call(design, "hierarchy -check -auto-top -purge_lib");
+            } else {
+                Pass::call(design, "hierarchy -check -top " + top_module_name);
+            }
+
+            Pass::call(design, "proc -norom");
+            Pass::call(design, "fsm");
+            Pass::call(design, "opt");
+            Pass::call(design, "wreduce");
+            Pass::call(design, "memory -norom");
+            Pass::call(design, "check");
+            Pass::call(design, "flatten");
+            Pass::call(design, "opt -full");
+        }
+
+        if (design->top_module()->processes.size() != 0)
+            log_error("Found unmapped processes in top module %s: unmapped processes are not supported in parmys pass!\n",
+                      log_id(design->top_module()->name));
+        if (design->top_module()->memories.size() != 0)
+            log_error("Found unmapped memories in module %s: unmapped memories are not supported in parmys pass!\n",
+                      log_id(design->top_module()->name));
+
+        design->sort();
+
+        log("--------------------------------------------------------------------\n");
+        log("Creating Odin-II Netlist from Design\n");
+
+        std::vector<Bbox> black_boxes;
+
+        for (auto bb_module : design->modules()) {
+            if (bb_module->get_bool_attribute(ID::blackbox)) {
+
+                Bbox bb;
+
+                bb.name = str(bb_module->name);
+
+                std::map<int, Yosys::RTLIL::Wire *> inputs, outputs;
+
+                for (auto wire : bb_module->wires()) {
+                    if (wire->port_input)
+                        inputs[wire->port_id] = wire;
+                    if (wire->port_output)
+                        outputs[wire->port_id] = wire;
+                }
+
+                for (auto &it : inputs) {
+                    Yosys::RTLIL::Wire *wire = it.second;
+                    for (int i = 0; i < wire->width; i++)
+                        bb.inputs.push_back(str(Yosys::RTLIL::SigSpec(wire, i)));
+                }
+
+                for (auto &it : outputs) {
+                    Yosys::RTLIL::Wire *wire = it.second;
+                    for (int i = 0; i < wire->width; i++)
+                        bb.outputs.push_back(str(Yosys::RTLIL::SigSpec(wire, i)));
+                }
+
+                black_boxes.push_back(bb);
+            }
+        }
+
+        netlist_t *transformed = to_netlist(design->top_module(), design);
+
+        double synthesis_time = wall_time();
+
+        log("--------------------------------------------------------------------\n");
+        log("High-level Synthesis Begin\n");
+
+        /* Performing elaboration for input digital circuits */
+        try {
+            elaborate(transformed);
+            log("Successful Elaboration of the design by Odin-II\n");
+        } catch (vtr::VtrError &vtr_error) {
+            log_error("Odin-II Failed to parse Verilog / load BLIF file: %s with exit code:%d \n", vtr_error.what(), ERROR_ELABORATION);
+        }
+
+        /* Performing netlist optimizations */
+        try {
+            optimization(transformed);
+            log("Successful Optimization of netlist by Odin-II\n");
+        } catch (vtr::VtrError &vtr_error) {
+            log_error("Odin-II Failed to perform netlist optimization %s with exit code:%d \n", vtr_error.what(), ERROR_OPTIMIZATION);
+        }
+
+        /* Performaing partial tech. map to the target device */
+        try {
+            techmap(transformed);
+            log("Successful Partial Technology Mapping by Odin-II\n");
+        } catch (vtr::VtrError &vtr_error) {
+            log_error("Odin-II Failed to perform partial mapping to target device %s with exit code:%d \n", vtr_error.what(), ERROR_TECHMAP);
+        }
+
+        synthesis_time = wall_time() - synthesis_time;
+
+        log("\nTotal Synthesis Time: ");
+        log_time(synthesis_time);
+        log("\n--------------------------------------------------------------------\n");
+
+        log("Updating the Design\n");
+        Pass::call(design, "delete");
+
+        for (auto module : design->modules()) {
+            design->remove(module);
+        }
+
+        for (auto bb_module : black_boxes) {
+            Yosys::Module *module = nullptr;
+            Yosys::hashlib::dict<Yosys::IdString, std::pair<int, bool>> wideports_cache;
+
+            module = new Yosys::Module;
+            module->name = RTLIL::escape_id(bb_module.name);
+
+            if (design->module(module->name))
+                log_error("Duplicate definition of module %s!\n", Yosys::log_id(module->name));
+
+            design->add(module);
+
+            for (auto b_wire : bb_module.inputs) {
+                Yosys::RTLIL::Wire *wire = to_wire(b_wire, module);
+                wire->port_input = true;
+                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(Yosys::RTLIL::unescape_id(b_wire));
+                if (!wp.first.empty() && wp.second >= 0) {
+                    wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                    wideports_cache[wp.first].second = true;
+                }
+            }
+
+            for (auto b_wire : bb_module.outputs) {
+                Yosys::RTLIL::Wire *wire = to_wire(Yosys::RTLIL::unescape_id(b_wire), module);
+                wire->port_output = true;
+                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(Yosys::RTLIL::unescape_id(b_wire));
+                if (!wp.first.empty() && wp.second >= 0) {
+                    wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                    wideports_cache[wp.first].second = false;
+                }
+            }
+
+            handle_wideports_cache(&wideports_cache, module);
+
+            module->fixup_ports();
+            wideports_cache.clear();
+
+            module->attributes[Yosys::ID::blackbox] = Yosys::RTLIL::Const(1);
+        }
+
+        update_design(design, transformed);
+
+        if (!flag_no_pass) {
+            if (top_module_name.empty()) {
+                Pass::call(design, "hierarchy -check -auto-top -purge_lib");
+            } else {
+                Pass::call(design, "hierarchy -check -top " + top_module_name);
+            }
+        }
+
+        log("--------------------------------------------------------------------\n");
+
+        free_netlist(transformed);
+
+        if (Arch.models) {
+            free_arch(&Arch);
+            Arch.models = nullptr;
+        }
+
+        free_type_descriptors(logical_block_types);
+        free_type_descriptors(physical_tile_types);
+
+        vtr::free(transformed);
+
+        if (one_string) {
+            vtr::free(one_string);
+        }
+        if (zero_string) {
+            vtr::free(zero_string);
+        }
+        if (pad_string) {
+            vtr::free(pad_string);
+        }
+
+        log("parmys pass finished.\n");
+    }
+} ParMYSPass;
+
+PRIVATE_NAMESPACE_END
\ No newline at end of file
diff --git a/parmys-plugin/parmys_arch.cc b/parmys-plugin/parmys_arch.cc
new file mode 100644
index 000000000..21672bdbb
--- /dev/null
+++ b/parmys-plugin/parmys_arch.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "kernel/yosys.h"
+
+#include "arch_util.h"
+#include "odin_types.h"
+#include "parmys_utils.hpp"
+#include "read_xml_arch_file.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct ParmysArchPass : public Pass {
+
+    static void add_hb_to_design(t_model *hb, Design *design)
+    {
+        Module *module = nullptr;
+        dict<IdString, std::pair<int, bool>> wideports_cache;
+
+        module = new Module;
+        module->name = RTLIL::escape_id(hb->name);
+
+        if (design->module(module->name))
+            Yosys::log_error("Duplicate definition of module %s!\n", log_id(module->name));
+        design->add(module);
+
+        t_model_ports *input_port = hb->inputs;
+        while (input_port) {
+            for (int i = 0; i < input_port->size; i++) {
+                std::string w_name = stringf("%s[%d]", input_port->name, i);
+                Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                wire->port_input = true;
+                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                if (!wp.first.empty() && wp.second >= 0) {
+                    wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                    wideports_cache[wp.first].second = true;
+                }
+            }
+
+            input_port = input_port->next;
+        }
+
+        t_model_ports *output_port = hb->outputs;
+        while (output_port) {
+            for (int i = 0; i < output_port->size; i++) {
+                std::string w_name = stringf("%s[%d]", output_port->name, i);
+                Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                wire->port_output = true;
+                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                if (!wp.first.empty() && wp.second >= 0) {
+                    wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                    wideports_cache[wp.first].second = false;
+                }
+            }
+
+            output_port = output_port->next;
+        }
+
+        handle_wideports_cache(&wideports_cache, module);
+
+        module->fixup_ports();
+        wideports_cache.clear();
+
+        module->attributes[ID::blackbox] = RTLIL::Const(1);
+    }
+
+    ParmysArchPass() : Pass("parmys_arch", "loads available hard blocks within the architecture to Yosys Design") {}
+
+    void help() override
+    {
+        log("\n");
+        log("    -a ARCHITECTURE_FILE\n");
+        log("        VTR FPGA architecture description file (XML)\n");
+    }
+
+    void execute(std::vector<std::string> args, RTLIL::Design *design) override
+    {
+        size_t argidx = 1;
+        std::string arch_file_path;
+        if (args[argidx] == "-a" && argidx + 1 < args.size()) {
+            arch_file_path = args[++argidx];
+            argidx++;
+        }
+        extra_args(args, argidx, design);
+
+        t_arch arch;
+
+        std::vector<t_physical_tile_type> physical_tile_types;
+        std::vector<t_logical_block_type> logical_block_types;
+
+        try {
+            XmlReadArch(arch_file_path.c_str(), false, &arch, physical_tile_types, logical_block_types);
+        } catch (vtr::VtrError &vtr_error) {
+            log_error("Odin Failed to load architecture file: %s with exit code %s at line: %ld\n", vtr_error.what(), "ERROR_PARSE_ARCH",
+                      vtr_error.line());
+        }
+
+        t_model *hb = arch.models;
+        while (hb) {
+            if (strcmp(hb->name, SINGLE_PORT_RAM_string) && strcmp(hb->name, DUAL_PORT_RAM_string) && strcmp(hb->name, "multiply") &&
+                strcmp(hb->name, "adder")) {
+                add_hb_to_design(hb, design);
+                log("Hard block added to the Design ---> `%s`\n", hb->name);
+            }
+
+            hb = hb->next;
+        }
+
+        // CLEAN UP
+        free_arch(&arch);
+        free_type_descriptors(physical_tile_types);
+        free_type_descriptors(logical_block_types);
+
+        log("parmys_arch pass finished.\n");
+    }
+
+} ParmysArchPass;
+
+PRIVATE_NAMESPACE_END
\ No newline at end of file
diff --git a/parmys-plugin/parmys_resolve.cc b/parmys-plugin/parmys_resolve.cc
new file mode 100644
index 000000000..7b4aabd4d
--- /dev/null
+++ b/parmys-plugin/parmys_resolve.cc
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "odin_globals.h"
+#include "odin_types.h"
+
+#include <string.h>
+
+#include "netlist_utils.h"
+
+#include "BlockMemories.hpp"
+#include "adders.h"
+#include "memories.h"
+#include "multipliers.h"
+#include "parmys_resolve.hpp"
+#include "subtractions.h"
+
+#include "vtr_util.h"
+
+void dfs_resolve(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+void resolve_node(nnode_t *node, short traverse_mark_number, netlist_t *netlist);
+
+static void resolve_arithmetic_nodes(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+static void resolve_memory_nodes(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+static void look_for_clocks(netlist_t *netlist);
+
+void resolve_top(netlist_t *netlist)
+{
+
+    if (configuration.coarsen) {
+        init_block_memory_index();
+
+        for (int i = 0; i < netlist->num_top_input_nodes; i++) {
+            if (netlist->top_input_nodes[i] != NULL) {
+                dfs_resolve(netlist->top_input_nodes[i], RESOLVE_DFS_VALUE, netlist);
+            }
+        }
+
+        dfs_resolve(netlist->gnd_node, RESOLVE_DFS_VALUE, netlist);
+        dfs_resolve(netlist->vcc_node, RESOLVE_DFS_VALUE, netlist);
+        dfs_resolve(netlist->pad_node, RESOLVE_DFS_VALUE, netlist);
+
+        look_for_clocks(netlist);
+
+        configuration.coarsen = false;
+    }
+}
+
+void dfs_resolve(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    int i, j;
+
+    if (node->traverse_visited != traverse_mark_number) {
+
+        node->traverse_visited = traverse_mark_number;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net) {
+                nnet_t *next_net = node->output_pins[i]->net;
+                if (next_net->fanout_pins) {
+                    for (j = 0; j < next_net->num_fanout_pins; j++) {
+                        if (next_net->fanout_pins[j]) {
+                            if (next_net->fanout_pins[j]->node) {
+                                dfs_resolve(next_net->fanout_pins[j]->node, traverse_mark_number, netlist);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        resolve_node(node, traverse_mark_number, netlist);
+    }
+}
+
+void resolve_node(nnode_t *node, short traverse_number, netlist_t *netlist)
+{
+    switch (node->type) {
+    case ADD:
+    case MINUS:
+    case MULTIPLY: {
+        resolve_arithmetic_nodes(node, traverse_number, netlist);
+        break;
+    }
+    case SPRAM:
+    case DPRAM:
+    case ROM:
+    case BRAM:
+    case YMEM:
+    case YMEM2:
+    case MEMORY: {
+        resolve_memory_nodes(node, traverse_number, netlist);
+        break;
+    }
+    case GND_NODE:
+    case VCC_NODE:
+    case PAD_NODE:
+    case INPUT_NODE:
+    case OUTPUT_NODE:
+    case HARD_IP:
+    case BUF_NODE:
+    case BITWISE_NOT:
+    case BITWISE_AND:
+    case BITWISE_OR:
+    case BITWISE_NAND:
+    case BITWISE_NOR:
+    case BITWISE_XNOR:
+    case BITWISE_XOR: {
+        /* some are already resolved for this phase */
+        break;
+    }
+    case SKIP:
+        break;
+    case ADDER_FUNC:
+    case CARRY_FUNC:
+    case CLOCK_NODE:
+    case GENERIC:
+    default:
+        error_message(RESOLVE, node->loc, "node (%s: %s) should have been converted to softer version.", node->type, node->name);
+        break;
+    }
+}
+
+static void resolve_arithmetic_nodes(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    switch (node->type) {
+    case ADD: {
+        if (hard_adders) {
+            node = check_missing_ports(node, traverse_mark_number, netlist);
+        }
+
+        add_list = insert_in_vptr_list(add_list, node);
+        break;
+    }
+    case MINUS: {
+        equalize_ports_size(node, traverse_mark_number, netlist);
+
+        sub_list = insert_in_vptr_list(sub_list, node);
+        break;
+    }
+    case MULTIPLY: {
+        if (!hard_multipliers)
+            check_constant_multipication(node, traverse_mark_number, netlist);
+        else
+            check_multiplier_port_size(node);
+
+        mult_list = insert_in_vptr_list(mult_list, node);
+        break;
+    }
+    default: {
+        error_message(RESOLVE, node->loc, "The node(%s) type is not among Odin's arithmetic types [ADD, MINUS and MULTIPLY]\n", node->name);
+        break;
+    }
+    }
+}
+
+static void resolve_memory_nodes(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    switch (node->type) {
+    case SPRAM: {
+        resolve_single_port_ram(node, traverse_mark_number, netlist);
+        break;
+    }
+    case DPRAM: {
+        resolve_dual_port_ram(node, traverse_mark_number, netlist);
+        break;
+    }
+    case YMEM: {
+        resolve_ymem_node(node, traverse_mark_number, netlist);
+        break;
+    }
+    case YMEM2: {
+        resolve_ymem2_node(node, traverse_mark_number, netlist);
+        break;
+    }
+    case MEMORY: {
+        break;
+    }
+    default: {
+        error_message(RESOLVE, node->loc, "The node(%s) type (%s) is not among Odin's latch types [SPRAM, DPRAM, ROM and BRAM(RW)]\n", node->name,
+                      node->type);
+        break;
+    }
+    }
+}
+
+static void look_for_clocks(netlist_t *netlist)
+{
+    int i;
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        nnode_t *input_node = netlist->top_input_nodes[i];
+        if (!strcmp(input_node->name, DEFAULT_CLOCK_NAME))
+            input_node->type = CLOCK_NODE;
+    }
+
+    for (i = 0; i < netlist->num_ff_nodes; i++) {
+        oassert(netlist->ff_nodes[i]->input_pins[1]->net->num_driver_pins == 1);
+        nnode_t *node = netlist->ff_nodes[i]->input_pins[1]->net->driver_pins[0]->node;
+
+        while (node->type == BUF_NODE)
+            node = node->input_pins[0]->net->driver_pins[0]->node;
+
+        if (node->type != CLOCK_NODE) {
+            node->type = CLOCK_NODE;
+        }
+    }
+}
diff --git a/parmys-plugin/parmys_resolve.hpp b/parmys-plugin/parmys_resolve.hpp
new file mode 100644
index 000000000..52c2a9beb
--- /dev/null
+++ b/parmys-plugin/parmys_resolve.hpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __RESOLVE_H
+#define __RESOLVE_H
+
+#define DEFAULT_CLOCK_NAME "GLOBAL_SIM_BASE_CLK"
+
+void resolve_top(netlist_t* netlist);
+
+#endif
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
new file mode 100644
index 000000000..25fca5991
--- /dev/null
+++ b/parmys-plugin/parmys_update.cc
@@ -0,0 +1,520 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+
+#include "odin_globals.h"
+#include "odin_types.h"
+
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "node_creation_library.h"
+
+#include "adders.h"
+#include "hard_blocks.h"
+#include "multipliers.h"
+
+#include "kernel/rtlil.h"
+#include "parmys_update.hpp"
+#include "parmys_utils.hpp"
+
+static void depth_first_traversal_to_design(short marker_value, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
+static void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Yosys::Module *module, netlist_t *netlist,
+                                         Yosys::Design *design);
+static void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
+
+Yosys::Wire *wire_net_driver(Yosys::Module *module, nnode_t *node, nnet_t *net, long driver_idx)
+{
+    oassert(driver_idx < net->num_driver_pins);
+    npin_t *driver = net->driver_pins[driver_idx];
+    std::string wire_name;
+    if (!driver->node) {
+        // Add a warning for an undriven net.
+        warning_message(NETLIST, node->loc, "Net %s driving node %s is itself undriven.", net->name, node->name);
+
+        wire_name = "$undef";
+    } else {
+        if (driver->name != NULL && ((driver->node->type == MULTIPLY) || (driver->node->type == HARD_IP) || (driver->node->type == MEMORY) ||
+                                     (driver->node->type == ADD) || (driver->node->type == MINUS) || (driver->node->type == SKIP))) {
+            wire_name = driver->name;
+        } else {
+            wire_name = driver->node->name;
+        }
+    }
+
+    return to_wire(wire_name, module);
+}
+
+Yosys::Wire *wire_input_single_driver(Yosys::Module *module, nnode_t *node, long pin_idx)
+{
+    oassert(pin_idx < node->num_input_pins);
+    nnet_t *net = node->input_pins[pin_idx]->net;
+    if (!net->num_driver_pins) {
+        return to_wire("$undef", module);
+    } else {
+        oassert(net->num_driver_pins == 1);
+        return wire_net_driver(module, node, net, 0);
+    }
+}
+
+Yosys::Wire *wire_output_pin(Yosys::Module *module, nnode_t *node)
+{
+    Yosys::RTLIL::IdString wire_name(Yosys::stringf("\\%s", node->name));
+    Yosys::RTLIL::Wire *wire = module->wire(wire_name);
+    if (wire == nullptr)
+        wire = module->addWire(wire_name);
+
+    return wire;
+}
+
+void update_design(Yosys::Design *design, netlist_t *netlist)
+{
+    Yosys::RTLIL::Module *module = nullptr;
+    std::string err_reason;
+    int blif_maxnum = 0;
+
+    Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> wideports_cache;
+
+    module = new Yosys::RTLIL::Module;
+    module->name = Yosys::RTLIL::escape_id(strtok(netlist->identifier, " \t\r\n"));
+
+    if (design->module(module->name))
+        Yosys::log_error("Duplicate definition of module %s\n", Yosys::log_id(module->name));
+    design->add(module);
+
+    Yosys::RTLIL::SigSpec undef;
+    undef.append(to_wire("$undef", module));
+    module->connect(Yosys::RTLIL::SigSig(undef, Yosys::RTLIL::State::Sx));
+    Yosys::RTLIL::SigSpec vcc;
+    vcc.append(to_wire("$true", module));
+    // vcc.append(module->wire(ID($true)));
+    module->connect(Yosys::RTLIL::SigSig(vcc, Yosys::RTLIL::State::S1));
+    Yosys::RTLIL::SigSpec gnd;
+    gnd.append(to_wire("$false", module));
+    module->connect(Yosys::RTLIL::SigSig(gnd, Yosys::RTLIL::State::S0));
+
+    for (long i = 0; i < netlist->num_top_input_nodes; i++) {
+        nnode_t *top_input_node = netlist->top_input_nodes[i];
+        Yosys::RTLIL::Wire *wire = to_wire(top_input_node->name, module);
+        wire->port_input = true;
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(top_input_node->name);
+        if (!wp.first.empty() && wp.second >= 0) {
+            wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+            wideports_cache[wp.first].second = true;
+        }
+    }
+
+    for (long i = 0; i < netlist->num_top_output_nodes; i++) {
+        nnode_t *top_output_node = netlist->top_output_nodes[i];
+        if (!top_output_node->input_pins[0]->net->num_driver_pins) {
+            Yosys::log_warning("This output is undriven (%s) and will be removed\n", top_output_node->name);
+        } else {
+            Yosys::RTLIL::Wire *wire = to_wire(top_output_node->name, module);
+            wire->port_output = true;
+
+            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(top_output_node->name);
+            if (!wp.first.empty() && wp.second >= 0) {
+                wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                wideports_cache[wp.first].second = false;
+            }
+        }
+    }
+
+    depth_first_traversal_to_design(100, module, netlist, design);
+
+    /* connect all the outputs up to the last gate */
+    for (long i = 0; i < netlist->num_top_output_nodes; i++) {
+        nnode_t *node = netlist->top_output_nodes[i];
+
+        if (node->input_pins[0]->net->num_fanout_pins > 0) {
+            nnet_t *net = node->input_pins[0]->net;
+            for (int j = 0; j < net->num_driver_pins; j++) {
+                Yosys::Wire *driver_wire = wire_net_driver(module, node, net, j);
+                Yosys::Wire *out_wire = to_wire(node->name, module);
+
+                Yosys::RTLIL::SigSpec input_sig, output_sig;
+                input_sig.append(driver_wire);
+                output_sig.append(out_wire);
+
+                module->connect(output_sig, input_sig);
+            }
+        }
+    }
+
+    handle_wideports_cache(&wideports_cache, module);
+
+    module->fixup_ports();
+    wideports_cache.clear();
+
+    bool run_clean = true;
+    if (run_clean) {
+        Yosys::Const buffer_lut(std::vector<Yosys::RTLIL::State>({Yosys::State::S0, Yosys::State::S1}));
+        std::vector<Yosys::Cell *> remove_cells;
+
+        for (auto cell : module->cells())
+            if (cell->type == ID($lut) && cell->getParam(Yosys::ID::LUT) == buffer_lut) {
+                module->connect(cell->getPort(Yosys::ID::Y), cell->getPort(Yosys::ID::A));
+                remove_cells.push_back(cell);
+            }
+
+        for (auto cell : remove_cells)
+            module->remove(cell);
+
+        Yosys::Wire *true_wire = module->wire(ID($true));
+        Yosys::Wire *false_wire = module->wire(ID($false));
+        Yosys::Wire *undef_wire = module->wire(ID($undef));
+
+        if (true_wire != nullptr)
+            module->rename(true_wire, Yosys::stringf("$true$%d", ++blif_maxnum));
+
+        if (false_wire != nullptr)
+            module->rename(false_wire, Yosys::stringf("$false$%d", ++blif_maxnum));
+
+        if (undef_wire != nullptr)
+            module->rename(undef_wire, Yosys::stringf("$undef$%d", ++blif_maxnum));
+
+        blif_maxnum = 0;
+    }
+
+    add_the_blackbox_for_mults_yosys(design);
+    add_the_blackbox_for_adds_yosys(design);
+
+    output_hard_blocks_yosys(design);
+
+    module = nullptr;
+}
+
+void depth_first_traversal_to_design(short marker_value, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+{
+    int i;
+
+    if (!coarsen_cleanup) {
+        netlist->gnd_node->name = vtr::strdup("$false");
+        netlist->vcc_node->name = vtr::strdup("$true");
+        netlist->pad_node->name = vtr::strdup("$undef");
+    }
+
+    depth_traverse_update_design(netlist->gnd_node, marker_value, module, netlist, design);
+    depth_traverse_update_design(netlist->vcc_node, marker_value, module, netlist, design);
+    depth_traverse_update_design(netlist->pad_node, marker_value, module, netlist, design);
+
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        if (netlist->top_input_nodes[i] != NULL) {
+            depth_traverse_update_design(netlist->top_input_nodes[i], marker_value, module, netlist, design);
+        }
+    }
+}
+
+void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+{
+    int i, j;
+    nnode_t *next_node;
+    nnet_t *next_net;
+
+    if (node->traverse_visited == traverse_mark_number) {
+        return;
+    } else {
+        cell_node(node, traverse_mark_number, module, netlist, design);
+
+        node->traverse_visited = traverse_mark_number;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net == NULL)
+                continue;
+
+            next_net = node->output_pins[i]->net;
+            for (j = 0; j < next_net->num_fanout_pins; j++) {
+                if (next_net->fanout_pins[j] == NULL)
+                    continue;
+
+                next_node = next_net->fanout_pins[j]->node;
+                if (next_node == NULL)
+                    continue;
+
+                depth_traverse_update_design(next_node, traverse_mark_number, module, netlist, design);
+            }
+        }
+    }
+}
+
+void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+{
+    switch (node->type) {
+    case GT:
+        Yosys::log_error("GT\n");
+        break;
+    case LT:
+        Yosys::log_error("LT\n");
+        break;
+    case BITWISE_NOT:
+        Yosys::log_error("BITWISE_NOT\n");
+        break;
+    case BUF_NODE:
+        Yosys::log_error("BUF_NODE\n");
+        break;
+    case LOGICAL_OR:
+    case LOGICAL_AND:
+    case LOGICAL_NOT:
+    case LOGICAL_NOR:
+    case LOGICAL_XOR:
+    case ADDER_FUNC:
+    case CARRY_FUNC:
+    case LOGICAL_XNOR:
+        define_logical_function_yosys(node, module);
+        break;
+    case LOGICAL_NAND:
+    case LOGICAL_EQUAL:
+    case NOT_EQUAL:
+        Yosys::log_error("LOGICAL_\n");
+        break;
+    case MUX_2:
+        define_MUX_function_yosys(node, module);
+        break;
+
+    case SMUX_2:
+        Yosys::log_error("SMUX_2\n");
+        break;
+
+    case FF_NODE:
+        define_FF_yosys(node, module);
+        break;
+
+    case MULTIPLY:
+        oassert(hard_multipliers); /* should be soft logic! */
+        define_mult_function_yosys(node, module, design);
+        break;
+
+    case ADD:
+        oassert(hard_adders); /* should be soft logic! */
+        define_add_function_yosys(node, module, design);
+        break;
+
+    case MINUS:
+        oassert(hard_adders); /* should be soft logic! */
+        define_add_function_yosys(node, module, design);
+        break;
+
+    case MEMORY:
+    case HARD_IP:
+        cell_hard_block(node, module, netlist, design);
+        break;
+    case CLOCK_NODE:
+        Yosys::log_error("CLOCK\n");
+        break;
+    case INPUT_NODE:
+    case OUTPUT_NODE:
+    case PAD_NODE:
+    case GND_NODE:
+    case VCC_NODE:
+        break;
+    case SKIP:
+        cell_hard_block(node, module, netlist, design);
+        break;
+    case BITWISE_AND:
+    case BITWISE_NAND:
+    case BITWISE_NOR:
+    case BITWISE_XNOR:
+    case BITWISE_XOR:
+    case BITWISE_OR:
+    case MULTI_PORT_MUX:
+    case SL:
+    case ASL:
+    case SR:
+    case ASR:
+    case CASE_EQUAL:
+    case CASE_NOT_EQUAL:
+    case DIVIDE:
+    case MODULO:
+    case GTE:
+    case LTE:
+    default:
+        Yosys::log_error("node should have been converted to softer version.");
+        break;
+    }
+}
+
+void define_FF_yosys(nnode_t *node, Yosys::Module *module)
+{
+    Yosys::Wire *d = wire_input_single_driver(module, node, 0);
+    Yosys::Wire *q = wire_output_pin(module, node);
+    const char *clk_edge_type_str = edge_type_blif_str(node->attributes->clk_edge_type, node->loc);
+    char *edge = vtr::strdup(clk_edge_type_str);
+    Yosys::Wire *clock = wire_input_single_driver(module, node, 1);
+
+    if (clock == nullptr && edge != nullptr) {
+        edge = nullptr;
+    }
+
+    if (node->initial_value == init_value_e::_0 || node->initial_value == init_value_e::_1)
+        q->attributes[Yosys::ID::init] = Yosys::Const(node->initial_value, 1);
+
+    if (clock == nullptr)
+        goto no_latch_clock;
+
+    if (!strcmp(edge, "re"))
+        module->addDff(NEW_ID, clock, d, q);
+    else if (!strcmp(edge, "fe"))
+        module->addDff(NEW_ID, clock, d, q, false);
+    else if (!strcmp(edge, "ah"))
+        module->addDlatch(NEW_ID, clock, d, q);
+    else if (!strcmp(edge, "al"))
+        module->addDlatch(NEW_ID, clock, d, q, false);
+    else {
+    no_latch_clock:
+        module->addFf(NEW_ID, d, q);
+    }
+}
+
+void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module)
+{
+    oassert(node->num_output_pins == 1);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
+
+    Yosys::RTLIL::SigSpec input_sig_A, input_sig_B, buf_sig_M, output_sig;
+
+    for (int i = 0; i < node->input_port_sizes[0]; i++) {
+        nnet_t *input_net = node->input_pins[i]->net;
+        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
+
+        input_sig_A.append(driver_wire);
+    }
+
+    for (int i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
+        nnet_t *input_net = node->input_pins[i]->net;
+        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
+
+        input_sig_B.append(driver_wire);
+    }
+
+    for (int i = 0; i < node->input_port_sizes[0]; i++) {
+        std::string mid_buf_name = op_node_name(BUF_NODE, node->name);
+        Yosys::RTLIL::Wire *buf_wire = to_wire(mid_buf_name, module);
+        buf_sig_M.append(buf_wire);
+    }
+
+    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    output_sig.append(out_wire);
+
+    Yosys::IdString celltype_1 = ID($and);
+    Yosys::RTLIL::Cell *cell_1 = module->addCell(NEW_ID, celltype_1);
+    cell_1->setPort(Yosys::ID::A, input_sig_A);
+    cell_1->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
+    cell_1->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
+    cell_1->setPort(Yosys::ID::B, input_sig_B);
+    cell_1->parameters[Yosys::ID::B_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[1]));
+    cell_1->parameters[Yosys::ID::B_SIGNED] = Yosys::RTLIL::Const(false);
+    cell_1->setPort(Yosys::ID::Y, buf_sig_M);
+    cell_1->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
+
+    Yosys::IdString celltype_2 = ID($reduce_or);
+    Yosys::RTLIL::Cell *cell_2 = module->addCell(NEW_ID, celltype_2);
+    cell_2->setPort(Yosys::ID::A, buf_sig_M);
+    cell_2->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
+    cell_2->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
+    cell_2->setPort(Yosys::ID::Y, output_sig);
+    cell_2->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->num_output_pins));
+}
+
+void define_logical_function_yosys(nnode_t *node, Yosys::Module *module)
+{
+    Yosys::RTLIL::SigSpec input_sig, output_sig;
+
+    for (int i = 0; i < node->num_input_pins; i++) {
+        nnet_t *input_net = node->input_pins[i]->net;
+        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0); // 0 TODO?
+
+        input_sig.append(driver_wire);
+    }
+
+    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    output_sig.append(out_wire);
+
+    oassert(node->num_output_pins == 1);
+
+    Yosys::IdString celltype;
+
+    /* print out the blif definition of this gate */
+    switch (node->type) {
+    case LOGICAL_AND: {
+        celltype = ID($reduce_and);
+        break;
+    }
+    case LOGICAL_OR: {
+        celltype = ID($reduce_or);
+        break;
+    }
+    case LOGICAL_NAND: {
+        /* generates: 0----- 1\n-0----- 1\n ... */
+        break;
+    }
+    case LOGICAL_NOT:
+    case LOGICAL_NOR: {
+        celltype = ID($logic_not);
+        break;
+    }
+    case LOGICAL_EQUAL:
+        break;
+    case ADDER_FUNC:
+        oassert(node->num_input_pins == 3);
+        celltype = ID($reduce_xor);
+        break;
+    case CARRY_FUNC:
+        oassert(node->num_input_pins == 3);
+        celltype = ID($lut);
+        break;
+    case LOGICAL_XOR: {
+        oassert(node->num_input_pins <= 3);
+        celltype = ID($reduce_xor);
+        break;
+    }
+    case NOT_EQUAL:
+    case LOGICAL_XNOR: {
+        oassert(node->num_input_pins <= 3);
+        celltype = ID($reduce_xnor);
+        break;
+    }
+    default:
+        oassert(false);
+        break;
+    }
+
+    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+
+    cell->setPort(Yosys::ID::A, input_sig);
+    cell->setPort(Yosys::ID::Y, output_sig);
+
+    if (node->type == CARRY_FUNC) {
+        cell->parameters[Yosys::ID::WIDTH] = Yosys::RTLIL::Const(input_sig.size());
+        cell->parameters[Yosys::ID::LUT] = Yosys::RTLIL::Const(Yosys::RTLIL::State::Sx, 1 << input_sig.size());
+        Yosys::RTLIL::Const *lutptr = NULL;
+        lutptr = &cell->parameters.at(Yosys::ID::LUT);
+        for (int i = 0; i < (1 << node->num_input_pins); i++) {
+            if (i == 3 || i == 5 || i == 6 || i == 7) //"011 1\n101 1\n110 1\n111 1\n"
+                lutptr->bits.at(i) = Yosys::RTLIL::State::S1;
+            else
+                lutptr->bits.at(i) = Yosys::RTLIL::State::S0;
+        }
+    } else {
+        cell->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->num_input_pins));
+        cell->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->num_output_pins));
+        cell->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
+    }
+}
\ No newline at end of file
diff --git a/parmys-plugin/parmys_update.hpp b/parmys-plugin/parmys_update.hpp
new file mode 100644
index 000000000..ecdb00112
--- /dev/null
+++ b/parmys-plugin/parmys_update.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef __DESIGN_UPDATE_H__
+#define __DESIGN_UPDATE_H__
+
+#include "odin_types.h"
+
+#define DEFAULT_CLOCK_NAME "GLOBAL_SIM_BASE_CLK"
+
+void define_logical_function_yosys(nnode_t *node, Yosys::Module *module);
+void update_design(Yosys::Design *design, netlist_t *netlist);
+void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module);
+void define_FF_yosys(nnode_t *node, Yosys::Module *module);
+
+#endif //__DESIGN_UPDATE_H__
\ No newline at end of file
diff --git a/parmys-plugin/parmys_utils.cc b/parmys-plugin/parmys_utils.cc
new file mode 100644
index 000000000..fb43e2f20
--- /dev/null
+++ b/parmys-plugin/parmys_utils.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "parmys_utils.hpp"
+
+Yosys::Wire *to_wire(std::string wire_name, Yosys::Module *module)
+{
+    Yosys::IdString wire_id = Yosys::RTLIL::escape_id(wire_name);
+    Yosys::Wire *wire = module->wire(wire_id);
+
+    if (wire == nullptr)
+        wire = module->addWire(wire_id);
+
+    return wire;
+}
+
+std::pair<Yosys::RTLIL::IdString, int> wideports_split(std::string name)
+{
+    int pos = -1;
+
+    if (name.empty() || name.back() != ']')
+        goto failed;
+
+    for (int i = 0; i + 1 < Yosys::GetSize(name); i++) {
+        if (name[i] == '[')
+            pos = i;
+        else if (name[i] != '-' && (name[i] < '0' || name[i] > '9'))
+            pos = -1;
+        else if (name[i] == '-' && ((i != pos + 1) || name[i + 1] == ']'))
+            pos = -1;
+        else if (i == pos + 2 && name[i] == '0' && name[i - 1] == '-')
+            pos = -1;
+        else if (i == pos + 1 && name[i] == '0' && name[i + 1] != ']')
+            pos = -1;
+    }
+
+    if (pos >= 0)
+        return std::pair<Yosys::RTLIL::IdString, int>("\\" + name.substr(0, pos), atoi(name.c_str() + pos + 1));
+
+failed:
+    return std::pair<Yosys::RTLIL::IdString, int>(Yosys::RTLIL::IdString(), 0);
+}
+
+const std::string str(Yosys::RTLIL::SigBit sig)
+{
+    // cstr_bits_seen.insert(sig);
+
+    if (sig.wire == NULL) {
+        if (sig == Yosys::RTLIL::State::S0)
+            return "$false";
+        if (sig == Yosys::RTLIL::State::S1)
+            return "$true";
+        return "$undef";
+    }
+
+    std::string str = Yosys::RTLIL::unescape_id(sig.wire->name);
+    for (size_t i = 0; i < str.size(); i++)
+        if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>')
+            str[i] = '?';
+
+    if (sig.wire->width != 1)
+        str +=
+          Yosys::stringf("[%d]", sig.wire->upto ? sig.wire->start_offset + sig.wire->width - sig.offset - 1 : sig.wire->start_offset + sig.offset);
+
+    return str;
+}
+
+const std::string str(Yosys::RTLIL::IdString id)
+{
+    std::string str = Yosys::RTLIL::unescape_id(id);
+    for (size_t i = 0; i < str.size(); i++)
+        if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>')
+            str[i] = '?';
+    return str;
+}
+
+void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> *cell_wideports_cache,
+                                 Yosys::Design *design, Yosys::Module *module, Yosys::Cell *cell)
+{
+    Yosys::RTLIL::Module *cell_mod = design->module(cell->type);
+    for (auto &it : *cell_wideports_cache) {
+        int width = 0;
+        int offset = 0;
+        bool upto = false;
+        for (auto &b : it.second)
+            width = std::max(width, b.first + 1);
+
+        if (cell_mod) {
+            Yosys::Wire *cell_port = cell_mod->wire(it.first);
+            if (cell_port && (cell_port->port_input || cell_port->port_output)) {
+                offset = cell_port->start_offset;
+                upto = cell_port->upto;
+                width = cell_port->width;
+            }
+        }
+
+        Yosys::SigSpec sig;
+
+        for (int i = 0; i < width; i++) {
+            int idx = offset + (upto ? width - 1 - i : i);
+            if (it.second.count(idx))
+                sig.append(it.second.at(idx));
+            else
+                sig.append(module->addWire(NEW_ID));
+        }
+
+        cell->setPort(it.first, sig);
+    }
+}
+
+void handle_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Yosys::Module *module)
+{
+    for (auto &wp : *wideports_cache) {
+        auto name = wp.first;
+        int width = wp.second.first;
+        bool isinput = wp.second.second;
+
+        Yosys::RTLIL::Wire *wire = module->addWire(name, width);
+        wire->port_input = isinput;
+        wire->port_output = !isinput;
+
+        for (int i = 0; i < width; i++) {
+            Yosys::RTLIL::IdString other_name = name.str() + Yosys::stringf("[%d]", i);
+            Yosys::RTLIL::Wire *other_wire = module->wire(other_name);
+            if (other_wire) {
+                other_wire->port_input = false;
+                other_wire->port_output = false;
+                if (isinput)
+                    module->connect(other_wire, Yosys::SigSpec(wire, i));
+                else
+                    module->connect(Yosys::SigSpec(wire, i), other_wire);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/parmys-plugin/parmys_utils.hpp b/parmys-plugin/parmys_utils.hpp
new file mode 100644
index 000000000..44433b13b
--- /dev/null
+++ b/parmys-plugin/parmys_utils.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Daniel Khadivi
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef __YOSYS_UTILS_H__
+#define __YOSYS_UTILS_H__
+
+#include "odin_types.h"
+
+Yosys::Wire *to_wire(std::string wire_name, Yosys::Module *module);
+std::pair<Yosys::RTLIL::IdString, int> wideports_split(std::string name);
+const std::string str(Yosys::RTLIL::SigBit sig);
+const std::string str(Yosys::RTLIL::IdString id);
+void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> *cell_wideports_cache,
+                                 Yosys::Design *design, Yosys::Module *module, Yosys::Cell *cell);
+void handle_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Yosys::Module *module);
+
+#endif //__YOSYS_UTILS_H__
\ No newline at end of file
diff --git a/parmys-plugin/src/BlockMemories.cc b/parmys-plugin/src/BlockMemories.cc
new file mode 100644
index 000000000..0f26bbd61
--- /dev/null
+++ b/parmys-plugin/src/BlockMemories.cc
@@ -0,0 +1,2201 @@
+/**
+ * Copyright (c) 2021 Seyed Alireza Damghani (sdamghann@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * @file: This file includes the definitions of the routines to map
+ * block memories to VTR compatible memory types, i.e., Single Port RAM
+ * and Dual Port RAM. The definition of block memory and read-only memory
+ * is provided in techlib directory in the Odin-II root directory.
+ * Basically, a memory block with both read and write accesses that has a
+ * separate port for each operation is called BRAM. While following the
+ * same definition, a read-only memory block is referred to as a BRAM that
+ * has only read access (even multiple accesses). This function also
+ * includes ymem block support which somehow represents the Yosys internal
+ * memory cell.
+ */
+
+#include "odin_util.h"
+#include <string.h>
+
+#include "BlockMemories.hpp"
+#include "hard_blocks.h"
+#include "memories.h"
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "partial_map.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+using vtr::t_linked_vptr;
+/* global linked list including block memory instances */
+struct block_memory_information_t block_memories_info;
+
+static block_memory_t *init_block_memory(nnode_t *node, netlist_t *netlist);
+
+void map_bram_to_mem_hardblocks(block_memory_t *bram, netlist_t *netlist);
+void map_rom_to_mem_hardblocks(block_memory_t *rom, netlist_t *netlist);
+
+static void create_r_single_port_ram(block_memory_t *rom, netlist_t *netlist);
+static void create_2r_dual_port_ram(block_memory_t *rom, netlist_t *netlist);
+static void create_nr_single_port_ram(block_memory_t *rom, netlist_t *netlist);
+static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netlist */);
+static void create_rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+
+static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number);
+static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number);
+static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number);
+static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number);
+
+static bool check_same_addrs(block_memory_t *bram);
+static void perform_optimization(block_memory_t *memory);
+static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t *selectors, int desired_width, nnode_t *node, netlist_t *netlist);
+static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list_t *selectors, nnode_t *node, netlist_t *netlist);
+
+static void cleanup_block_memory_old_node(nnode_t *old_node);
+
+static void free_block_memory_index(block_memory_hashtable to_free);
+static void free_block_memory(block_memory_t *to_free);
+
+/**
+ * (function: init_block_memory_index)
+ *
+ * @brief Initialises hashtables to lookup memories based on inputs and names.
+ */
+void init_block_memory_index()
+{
+    block_memories_info.block_memories = block_memory_hashtable();
+    block_memories_info.read_only_memories = block_memory_hashtable();
+}
+
+/**
+ * (function: init_block_memory)
+ *
+ * @brief: initialize bram signals
+ *
+ * @param node pointing to a bram node
+ * @param netlist pointer to the current netlist file
+ */
+static block_memory_t *init_block_memory(nnode_t *node, netlist_t * /* netlist */)
+{
+    int i, offset;
+    block_memory_t *bram = (block_memory_t *)vtr::malloc(sizeof(block_memory_t));
+
+    /**
+     * BRAM information
+     *
+     * CLK:        input port [0]
+     * RD_ADDR:    input port [1]
+     * RD_DATA:    output port[0]
+     * RD_ENABLE:  input port [2]
+     * WR_ADDR:    input port [3]
+     * WR_DATA:    input port [4]
+     * WR_ENABLE:  input port [5]
+     */
+
+    int CLK_width = node->input_port_sizes[0];
+    int RD_ADDR_width = node->input_port_sizes[1];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[2];
+    int WR_ADDR_width = node->input_port_sizes[3];
+    int WR_DATA_width = node->input_port_sizes[4];
+    int WR_ENABLE_width = node->input_port_sizes[5];
+
+    oassert(CLK_width == 1);
+
+    /* INPUT */
+
+    /* CLK */
+    offset = 0;
+    bram->clk = init_signal_list();
+    add_pin_to_signal_list(bram->clk, node->input_pins[offset]);
+
+    /* read address pins */
+    offset += CLK_width;
+    bram->read_addr = init_signal_list();
+    for (i = 0; i < RD_ADDR_width; ++i) {
+        add_pin_to_signal_list(bram->read_addr, node->input_pins[i + offset]);
+    }
+
+    /* read enable pins */
+    offset += RD_ADDR_width;
+    bram->read_en = init_signal_list();
+    for (i = 0; i < RD_ENABLE_width; ++i) {
+        add_pin_to_signal_list(bram->read_en, node->input_pins[i + offset]);
+    }
+
+    /* write addr pins */
+    offset += RD_ENABLE_width;
+    bram->write_addr = init_signal_list();
+    for (i = 0; i < WR_ADDR_width; ++i) {
+        add_pin_to_signal_list(bram->write_addr, node->input_pins[i + offset]);
+    }
+
+    /* write data pins */
+    offset += WR_ADDR_width;
+    bram->write_data = init_signal_list();
+    for (i = 0; i < WR_DATA_width; ++i) {
+        add_pin_to_signal_list(bram->write_data, node->input_pins[i + offset]);
+    }
+
+    /* write enable clk pins */
+    offset += WR_DATA_width;
+    bram->write_en = init_signal_list();
+    for (i = 0; i < WR_ENABLE_width; ++i) {
+        add_pin_to_signal_list(bram->write_en, node->input_pins[i + offset]);
+    }
+
+    /* OUTPUT */
+    /* read clk pins */
+    offset = 0;
+    bram->read_data = init_signal_list();
+    for (i = 0; i < RD_DATA_width; ++i) {
+        add_pin_to_signal_list(bram->read_data, node->output_pins[i + offset]);
+    }
+
+    /* creating new node since we need to reorder some input port for each inferenece mode */
+    bram->node = node;
+
+    /* keep track of location since we need for further node creation */
+    bram->loc = node->loc;
+
+    bram->memory_id = vtr::strdup(node->attributes->memory_id);
+
+    /* creating a unique name for the block memory */
+    bram->name = make_full_ref_name(bram->node->name, NULL, NULL, bram->memory_id, -1);
+    block_memories_info.block_memories.emplace(bram->name, bram);
+
+    return (bram);
+}
+
+/**
+ * (function: init_block_memory)
+ *
+ * @brief: initialize rom signals
+ *
+ * @param node pointing to a rom node
+ * @param netlist pointer to the current netlist file
+ */
+static block_memory_t *init_read_only_memory(nnode_t *node, netlist_t *netlist)
+{
+    int i, offset;
+    block_memory_t *rom = (block_memory_t *)vtr::malloc(sizeof(block_memory_t));
+
+    /**
+     * ROM information
+     *
+     * CLK:        input port [0]
+     * RD_ADDR:    input port [1]
+     * RD_DATA:    output port [0]
+     * RD_ENABLE:  input port [2]
+     */
+
+    int CLK_width = node->input_port_sizes[0];
+    int RD_ADDR_width = node->input_port_sizes[1];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[2];
+    int WR_DATA_width = node->output_port_sizes[0];
+
+    oassert(CLK_width == 1);
+
+    /* INPUT */
+    /* CLK */
+    offset = 0;
+    rom->clk = init_signal_list();
+    add_pin_to_signal_list(rom->clk, node->input_pins[offset]);
+
+    /* read address pins */
+    offset += CLK_width;
+    rom->read_addr = init_signal_list();
+    for (i = 0; i < RD_ADDR_width; ++i) {
+        add_pin_to_signal_list(rom->read_addr, node->input_pins[i + offset]);
+    }
+
+    /* read enable pins */
+    offset += RD_ADDR_width;
+    rom->read_en = init_signal_list();
+    for (i = 0; i < RD_ENABLE_width; ++i) {
+        add_pin_to_signal_list(rom->read_en, node->input_pins[i + offset]);
+    }
+
+    /* OUTPUT */
+    offset = 0;
+    rom->read_data = init_signal_list();
+    for (i = 0; i < RD_DATA_width; ++i) {
+        add_pin_to_signal_list(rom->read_data, node->output_pins[i + offset]);
+    }
+
+    /* PAD DATA IN */
+    /* we pad the data_in port for rom using pad pins */
+    rom->write_data = init_signal_list();
+    for (i = 0; i < WR_DATA_width; ++i) {
+        add_pin_to_signal_list(rom->write_data, get_pad_pin(netlist));
+    }
+
+    /* no need to these variables in rom */
+    rom->write_addr = NULL;
+    rom->write_en = NULL;
+
+    /* creating new node since we need to reorder some input port for each inferenece mode */
+    rom->node = node;
+
+    /* keep track of location since we need for further node creation */
+    rom->loc = node->loc;
+
+    rom->memory_id = vtr::strdup(node->attributes->memory_id);
+
+    /* creating a unique name for the block memory */
+    rom->name = make_full_ref_name(rom->node->name, NULL, NULL, rom->memory_id, -1);
+    block_memories_info.read_only_memories.emplace(rom->name, rom);
+
+    return (rom);
+}
+
+/**
+ * (function: create_r_single_port_ram)
+ *
+ * @brief read_only_memory will be considered as single port ram
+ * this function reorders inputs and add data_in input to new node
+ * which is connected to pad node
+ *
+ * @param rom pointing to a rom node node
+ * @param netlist pointer to the current netlist file
+ */
+static void create_r_single_port_ram(block_memory_t *rom, netlist_t *netlist)
+{
+    nnode_t *old_node = rom->node;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 1);
+    oassert(num_wr_ports == 0);
+
+    /* single port ram signals */
+    sp_ram_signals *signals = (sp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+
+    /* INPUTS */
+    /* adding the read addr input port as address1 */
+    signals->addr = rom->read_addr;
+
+    /* handle clk signal */
+    signals->clk = rom->clk->pins[0];
+
+    /**
+     * we pad the data_in port using pad pins
+     * rom->write_data is already filled with pad pins
+     */
+    signals->data = rom->write_data;
+
+    /* there is no write data to set any we, so it will be connected to GND */
+    signals->we = get_zero_pin(netlist);
+    delete_npin(rom->read_en->pins[0]);
+
+    /* OUTPUT */
+    /* adding the read data port as output1 */
+    signals->out = rom->read_data;
+
+    create_single_port_ram(signals, old_node);
+
+    // CLEAN UP rom src node
+    cleanup_block_memory_old_node(old_node);
+    vtr::free(signals);
+}
+
+/**
+ * (function: create_2r_dual_port_ram)
+ *
+ * @brief in this case one write port MUST have the
+ * same address of the single read port.
+ * Will be instantiated into a DPRAM.
+ *
+ * @param bram pointer to the block memory
+ * @param netlist pointer to the current netlist file
+ */
+static void create_2r_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    nnode_t *old_node = bram->node;
+
+    int i, offset;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 2);
+    oassert(num_wr_ports == 0);
+    oassert(bram->read_addr->count == 2 * addr_width);
+    oassert(bram->read_data->count == 2 * data_width);
+
+    /* create a list of dpram ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    /* split read addr and add the first half to the addr1 */
+    signals->addr1 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(signals->addr1, bram->read_addr->pins[i]);
+    }
+
+    /* add pad pins as data1 */
+    signals->data1 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
+    }
+
+    /* there is no write data to set any we, so it will be connected to GND */
+    signals->we1 = get_zero_pin(netlist);
+    delete_npin(bram->read_en->pins[0]);
+
+    /* add clk signal as dpram clk signal */
+    signals->clk = bram->clk->pins[0];
+
+    /* split read data and add the first half to the out1 */
+    signals->out1 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->out1, bram->read_data->pins[i]);
+    }
+
+    /* add the second half of the read addr to addr2 */
+    offset = addr_width;
+    signals->addr2 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(signals->addr2, bram->read_addr->pins[i + offset]);
+    }
+
+    /* there is no write data to set any we, so it will be connected to GND */
+    signals->we2 = get_zero_pin(netlist);
+    delete_npin(bram->read_en->pins[1]);
+
+    /* add the second half of the write data to data2 */
+    signals->data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data2, get_pad_pin(netlist));
+    }
+
+    /* add the second half of the read data to out2 */
+    offset = data_width;
+    signals->out2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->out2, bram->read_data->pins[i + offset]);
+    }
+
+    /* create a new dual port ram */
+    create_dual_port_ram(signals, old_node);
+
+    // CLEAN UP
+    cleanup_block_memory_old_node(old_node);
+    free_dp_ram_signals(signals);
+}
+
+/**
+ * (function: create_nr_single_port_ram)
+ *
+ * @brief multiple read ports are multiplexed using read enable.
+ * Then, the rom will be mapped to a SPRAM
+ *
+ * @param rom pointing to a rom node node
+ * @param netlist pointer to the current netlist file
+ */
+static void create_nr_single_port_ram(block_memory_t *rom, netlist_t *netlist)
+{
+    nnode_t *old_node = rom->node;
+    int data_width = rom->node->attributes->DBITS;
+    int addr_width = rom->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* validation */
+    oassert(num_rd_ports > 1);
+    oassert(num_wr_ports == 0);
+
+    /* single port ram signals */
+    sp_ram_signals *signals = (sp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+    signal_list_t *selectors = NULL;
+
+    /* INPUTS */
+    selectors = copy_input_signals(rom->read_en);
+    /* adding the muxed read addrs as spram address */
+    signals->addr = split_cascade_port(rom->read_addr, selectors, addr_width, old_node, netlist);
+
+    /* add clk singnal */
+    signals->clk = rom->clk->pins[0];
+
+    /**
+     * rom->write_data is already initialized with pad pins in rom init
+     */
+    signals->data = copy_input_signals(rom->write_data);
+
+    /* there is no write data to set any we, so it will be connected to GND */
+    signals->we = get_zero_pin(netlist);
+
+    /* OUTPUT */
+    /* leave it empty, so create_single port ram function create a new pins */
+    signals->out = NULL;
+
+    /* create a SPRAM */
+    nnode_t *spram = create_single_port_ram(signals, old_node);
+
+    signal_list_t *spram_outputs = init_signal_list();
+    for (int i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(spram_outputs, spram->output_pins[i]);
+    }
+
+    /* decode the spram outputs to the n rom output ports */
+    decode_out_port(spram_outputs, rom->read_data, rom->read_en, old_node, netlist);
+
+    // CLEAN UP rom src node
+    free_signal_list(selectors);
+    free_signal_list(spram_outputs);
+
+    free_sp_ram_signals(signals);
+    cleanup_block_memory_old_node(old_node);
+}
+
+/**
+ * (function: create_rw_single_port_ram)
+ *
+ * @brief creates a single port ram for a block memory
+ * with one read and one write port that has the same
+ * addr for both read and write
+ *
+ * @param bram pointing to a bram node node
+ * @param netlist pointer to the current netlist file
+ */
+static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netlist */)
+{
+    int i;
+    nnode_t *old_node = bram->node;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 1);
+    oassert(num_wr_ports == 1);
+
+    /* single port ram signals */
+    sp_ram_signals *signals = (sp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+
+    /* the wr addr will be deleted since we do not need it anymore */
+    for (i = 0; i < bram->write_addr->count; ++i) {
+        npin_t *wr_addr_pin = bram->write_addr->pins[i];
+        /* delete pin */
+        delete_npin(wr_addr_pin);
+    }
+
+    /* INPUTS */
+    /* adding the read addr input port as address1 */
+    signals->addr = bram->read_addr;
+
+    /* handling clock signals */
+    signals->clk = bram->clk->pins[0];
+
+    /**
+     * we pad the data_in port using pad pins
+     * bram->write_data is already filled with pad pins
+     */
+    signals->data = bram->write_data;
+
+    /* the rd enables will be deleted since we do not need it anymore */
+    for (i = 0; i < bram->read_en->count; ++i) {
+        npin_t *rd_en_pin = bram->read_en->pins[i];
+        /* delete pin */
+        delete_npin(rd_en_pin);
+    }
+
+    /* merge all enables */
+    if (bram->write_en->count > 1) {
+        /* need to OR all write enable since we1 should be one bit in single port ram */
+        // bram->write_en = make_chain(LOGICAL_OR, bram->write_en, old_node);
+        for (i = 1; i < bram->write_en->count; ++i) {
+            delete_npin(bram->write_en->pins[i]);
+        }
+    }
+    signals->we = bram->write_en->pins[0];
+
+    /* OUTPUT */
+    /* adding the read data port as output1 */
+    signals->out = bram->read_data;
+
+    create_single_port_ram(signals, old_node);
+
+    // CLEAN UP bram src node
+    cleanup_block_memory_old_node(old_node);
+    vtr::free(signals);
+}
+
+/**
+ * (function: create_rw_dual_port_ram)
+ *
+ * @brief block_ram will be considered as dual port ram
+ * this function reorders inputs and hook pad pins into datain2
+ * it also leaves the second output of the dual port ram unconnected
+ *
+ * @param bram pointing to a block memory
+ * @param netlist pointer to the current netlist file
+ */
+static void create_rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    int i;
+    nnode_t *old_node = bram->node;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 1);
+    oassert(num_wr_ports == 1);
+
+    /* dual port ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+
+    /* INPUTS */
+    signals->addr1 = bram->read_addr;
+    signals->addr2 = bram->write_addr;
+
+    /* adding the write addr port as address2 */
+    signals->clk = bram->clk->pins[0];
+
+    /* adding the write data port as data1 */
+    signals->data2 = bram->write_data;
+
+    /* we pad the second data port using pad pins */
+    signal_list_t *pad_signals = init_signal_list();
+    for (i = 0; i < bram->write_data->count; ++i) {
+        add_pin_to_signal_list(pad_signals, get_pad_pin(netlist));
+    }
+    signals->data1 = pad_signals;
+
+    for (i = 0; i < bram->read_en->count; ++i) {
+        /* delete all read enable pins, since no need to write from addr1 */
+        delete_npin(bram->read_en->pins[0]);
+    }
+    signals->we1 = get_zero_pin(netlist);
+
+    /* adding wr_en as we of port 2 */
+    signals->we2 = bram->write_en->pins[0];
+
+    /* OUTPUT */
+    /* adding the read data port as output1 */
+    signals->out1 = bram->read_data;
+
+    /* leave second output port unconnected */
+    int offset = bram->read_data->count;
+    signal_list_t *out2_signals = init_signal_list();
+    for (i = 0; i < bram->read_data->count; i++) {
+        // specify the output pin
+        npin_t *new_pin1 = allocate_npin();
+        npin_t *new_pin2 = allocate_npin();
+        nnet_t *new_net = allocate_nnet();
+        new_net->name = make_full_ref_name(NULL, NULL, NULL, bram->name, offset + i);
+        /* hook up new pin 1 into the new net */
+        add_driver_pin_to_net(new_net, new_pin1);
+        /* hook up the new pin 2 to this new net */
+        add_fanout_pin_to_net(new_net, new_pin2);
+
+        /* adding to signal list */
+        add_pin_to_signal_list(out2_signals, new_pin1);
+    }
+    signals->out2 = out2_signals;
+
+    create_dual_port_ram(signals, old_node);
+
+    // CLEAN UP
+    cleanup_block_memory_old_node(old_node);
+    free_signal_list(pad_signals);
+    free_signal_list(out2_signals);
+    vtr::free(signals);
+}
+
+/**
+ * (function: create_r2w_dual_port_ram)
+ *
+ * @brief in this case one write port MUST have the
+ * same address of the single read port.
+ * Will be instantiated into a DPRAM.
+ *
+ * @param bram pointer to the block memory
+ * @param netlist pointer to the current netlist file
+ */
+static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    nnode_t *old_node = bram->node;
+
+    int i, offset;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 1);
+    oassert(num_wr_ports == 2);
+
+    /* create a list of dpram ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    /* add read address as addr1 to dpram signal lists */
+    signals->addr1 = init_signal_list();
+    for (i = 0; i < bram->read_addr->count; ++i) {
+        add_pin_to_signal_list(signals->addr1, bram->read_addr->pins[i]);
+    }
+
+    /* split wr_addr, wr_data and wr_en ports */
+    offset = addr_width;
+    signal_list_t *wr_addr1 = init_signal_list();
+    signal_list_t *wr_addr2 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(wr_addr1, bram->write_addr->pins[i]);
+        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[i + offset]);
+    }
+
+    oassert(bram->write_en->count == 2);
+    npin_t *wr_en1 = bram->write_en->pins[0];
+    npin_t *wr_en2 = bram->write_en->pins[1];
+
+    offset = data_width;
+    signal_list_t *wr_data1 = init_signal_list();
+    signal_list_t *wr_data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(wr_data1, bram->write_data->pins[i]);
+        add_pin_to_signal_list(wr_data2, bram->write_data->pins[i + offset]);
+    }
+
+    /**
+     * [NOTE]:
+     * Odin-II handle memory block with more than two distint
+     * address ports by muxing read/write ports based on en
+     */
+    bool first_match = sigcmp(wr_addr1, bram->read_addr);
+    bool second_match = sigcmp(wr_addr2, bram->read_addr);
+
+    if (!first_match && !second_match) {
+        first_match = sigcmp(bram->read_addr, wr_addr1);
+        second_match = sigcmp(bram->read_addr, wr_addr2);
+        if (!first_match && !second_match) {
+            // CLEAN UP
+            free_signal_list(wr_addr1);
+            free_signal_list(wr_addr2);
+            free_signal_list(wr_data1);
+            free_signal_list(wr_data2);
+
+            /* all ports have different address */
+            create_nrmw_dual_port_ram(bram, netlist);
+            return;
+        }
+    }
+
+    /**
+     * one write address is always equal to read addr.
+     * As a result, the corresponding write data will be mapped to data1
+     */
+    signals->data1 = (first_match) ? wr_data1 : wr_data2;
+
+    /* free read en since we do not need in DPRAM model */
+    delete_npin(bram->read_en->pins[0]);
+    /* add write enable signals for matched wr port as we1 */
+    signals->we1 = (first_match) ? wr_en1 : wr_en2;
+
+    /* add clk signal as dpram clk signal */
+    signals->clk = bram->clk->pins[0];
+
+    /* map read data to the out1 */
+    signals->out1 = init_signal_list();
+    for (i = 0; i < bram->read_data->count; ++i) {
+        add_pin_to_signal_list(signals->out1, bram->read_data->pins[i]);
+    }
+
+    /* OTHER WR RELATED PORTS */
+    /* addr2 for another write addr */
+    signals->addr2 = (first_match) ? wr_addr2 : wr_addr1;
+
+    /* add write enable signals for second wr port as we2 */
+    signals->we2 = (first_match) ? wr_en2 : wr_en1;
+
+    /* the rest of write data pin is for data2 */
+    signals->data2 = (first_match) ? wr_data2 : wr_data1;
+
+    /* out2 will be unconnected */
+    signals->out2 = init_signal_list();
+    for (i = 0; i < signals->out1->count; ++i) {
+        /* create the clk node's output pin */
+        npin_t *new_pin1 = allocate_npin();
+        npin_t *new_pin2 = allocate_npin();
+        nnet_t *new_net = allocate_nnet();
+        /* hook up new pin 1 into the new net */
+        add_driver_pin_to_net(new_net, new_pin1);
+        /* hook up the new pin 2 to this new net */
+        add_fanout_pin_to_net(new_net, new_pin2);
+
+        /* hook the output pin into the node */
+        add_pin_to_signal_list(signals->out2, new_pin1);
+    }
+
+    /* create a new dual port ram */
+    create_dual_port_ram(signals, old_node);
+
+    // CLEAN UP
+    /* free matched wr addr pins since they are as the same as read addr */
+    for (i = 0; i < addr_width; ++i) {
+        npin_t *pin = (first_match) ? wr_addr1->pins[i] : wr_addr2->pins[i];
+        /* delete pin */
+        delete_npin(pin);
+    }
+    free_signal_list((first_match) ? wr_addr1 : wr_addr2);
+
+    cleanup_block_memory_old_node(old_node);
+    free_dp_ram_signals(signals);
+}
+
+/**
+ * (function: create_2rw_dual_port_ram)
+ *
+ * @brief creates a dual port ram. The given bram should have
+ * a pair of read addr and write addr with the same addrs
+ *
+ * @param bram pointer to the block memory
+ * @param netlist pointer to the current netlist file
+ */
+static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    nnode_t *old_node = bram->node;
+
+    int i, offset;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 2);
+    oassert(num_wr_ports == 1);
+
+    /* create a list of dpram ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    /* add write address as addr1 to dpram signal lists */
+    signals->addr1 = init_signal_list();
+    for (i = 0; i < bram->write_addr->count; ++i) {
+        add_pin_to_signal_list(signals->addr1, bram->write_addr->pins[i]);
+    }
+
+    /**
+     * one write address is always equal to read addr.
+     * As a result, the corresponding write data will be mapped to data1
+     */
+    signals->data1 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data1, bram->write_data->pins[i]);
+    }
+
+    oassert(bram->write_en->count == 1);
+    /* add write enable signals for matched wr port as we1 */
+    signals->we1 = bram->write_en->pins[0];
+
+    /* split rd_addr, rd_data ports */
+    offset = addr_width;
+    signal_list_t *rd_addr1 = init_signal_list();
+    signal_list_t *rd_addr2 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(rd_addr1, bram->read_addr->pins[i]);
+        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[i + offset]);
+    }
+
+    offset = data_width;
+    signal_list_t *rd_data1 = init_signal_list();
+    signal_list_t *rd_data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(rd_data1, bram->read_data->pins[i]);
+        add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
+    }
+
+    /* delete rd pins since we use corresponding wr_en and zero*/
+    for (i = 0; i < bram->read_en->count; ++i) {
+        delete_npin(bram->read_en->pins[i]);
+    }
+
+    /**
+     * [NOTE]:
+     * Odin-II handle memory block with more than two distint
+     * address ports using muxed read/write ports
+     */
+    bool first_match = sigcmp(bram->write_addr, rd_addr1);
+    bool second_match = sigcmp(bram->write_addr, rd_addr2);
+
+    if (!first_match && !second_match) {
+        first_match = sigcmp(rd_addr1, bram->write_addr);
+        second_match = sigcmp(rd_addr2, bram->write_addr);
+        if (!first_match && !second_match) {
+            // CLEAN UP
+            free_signal_list(rd_addr1);
+            free_signal_list(rd_addr2);
+            free_signal_list(rd_data1);
+            free_signal_list(rd_data2);
+
+            /* all ports have different address */
+            create_nrmw_dual_port_ram(bram, netlist);
+            return;
+        }
+    }
+
+    /* map matched read data to the out1 */
+    signals->out1 = (first_match) ? rd_data1 : rd_data2;
+
+    /* add merged clk signal as dpram clk signal */
+    signals->clk = bram->clk->pins[0];
+
+    /* OTHER WR RELATED PORTS */
+    /* addr2 for another write addr */
+    signals->addr2 = (first_match) ? rd_addr2 : rd_addr1;
+
+    /* en 2 should always be 0 since there is no second write port */
+    signals->we2 = get_zero_pin(netlist);
+
+    /* the rest of write data pin is for data2 */
+    signals->data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data2, get_pad_pin(netlist));
+    }
+
+    /* out2 map to other read data */
+    signals->out2 = (first_match) ? rd_data2 : rd_data1;
+
+    /* create a new dual port ram */
+    create_dual_port_ram(signals, old_node);
+
+    // CLEAN UP
+    /* free matched rd addr pins since they are as the same as write addr */
+    for (i = 0; i < addr_width; ++i) {
+        npin_t *pin = (first_match) ? rd_addr1->pins[i] : rd_addr2->pins[i];
+        /* delete pin */
+        delete_npin(pin);
+    }
+    free_signal_list((first_match) ? rd_addr1 : rd_addr2);
+
+    cleanup_block_memory_old_node(old_node);
+    free_dp_ram_signals(signals);
+}
+
+/**
+ * (function: create_2r2w_dual_port_ram)
+ *
+ * @brief creates a dual port ram. The given bram should have
+ * two pairs of read addr and write addr with the same addrs
+ *
+ * @param bram pointer to the block memory
+ * @param netlist pointer to the current netlist file
+ */
+static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    nnode_t *old_node = bram->node;
+
+    int i, offset;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 2);
+    oassert(num_wr_ports == 2);
+    oassert(bram->read_addr->count == 2 * addr_width);
+    oassert(bram->read_data->count == 2 * data_width);
+
+    /* split wr_addr, wr_data and wr_en ports */
+    offset = addr_width;
+    signal_list_t *wr_addr1 = init_signal_list();
+    signal_list_t *wr_addr2 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(wr_addr1, bram->write_addr->pins[i]);
+        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[i + offset]);
+    }
+
+    oassert(bram->write_en->count == 2);
+    npin_t *wr_en1 = bram->write_en->pins[0];
+    npin_t *wr_en2 = bram->write_en->pins[1];
+
+    offset = data_width;
+    signal_list_t *wr_data1 = init_signal_list();
+    signal_list_t *wr_data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(wr_data1, bram->write_data->pins[i]);
+        add_pin_to_signal_list(wr_data2, bram->write_data->pins[i + offset]);
+    }
+
+    /* split rd_addr, rd_data ports */
+    offset = addr_width;
+    signal_list_t *rd_addr1 = init_signal_list();
+    signal_list_t *rd_addr2 = init_signal_list();
+    for (i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(rd_addr1, bram->read_addr->pins[i]);
+        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[i + offset]);
+    }
+
+    offset = data_width;
+    signal_list_t *rd_data1 = init_signal_list();
+    signal_list_t *rd_data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(rd_data1, bram->read_data->pins[i]);
+        add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
+    }
+
+    /* delete rd pins since we use corresponding wr_en and zero*/
+    for (i = 0; i < bram->read_en->count; ++i) {
+        delete_npin(bram->read_en->pins[i]);
+    }
+
+    /**
+     * [NOTE]:
+     * Odin-II handle memory block with more than two distint
+     * address ports using muxed read/write ports
+     */
+    bool first_match_read1 = sigcmp(wr_addr1, rd_addr1);
+    bool second_match_read1 = sigcmp(wr_addr2, rd_addr1);
+    if (!first_match_read1 && !second_match_read1) {
+        first_match_read1 = sigcmp(rd_addr1, wr_addr1);
+        second_match_read1 = sigcmp(rd_addr1, wr_addr2);
+    }
+
+    if (!first_match_read1 && !second_match_read1) {
+        // CLEAN UP
+        free_signal_list(wr_addr1);
+        free_signal_list(wr_addr2);
+        free_signal_list(wr_data1);
+        free_signal_list(wr_data2);
+        free_signal_list(rd_addr1);
+        free_signal_list(rd_addr2);
+        free_signal_list(rd_data1);
+        free_signal_list(rd_data2);
+
+        /* all ports have different address */
+        create_nrmw_dual_port_ram(bram, netlist);
+        return;
+    }
+
+    /* create a list of dpram ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    /* hook the first half os splitted addr into addr1 */
+    signals->addr1 = rd_addr1;
+
+    /* hook the second half os splitted addr into addr2 */
+    signals->addr2 = rd_addr2;
+
+    /* split read data and add the first half to the out1 */
+    signals->out1 = rd_data1;
+
+    /* add the second half of the read data to out2 */
+    signals->out2 = rd_data2;
+
+    /* split write data and add the first half to the data1 */
+    signals->data1 = (first_match_read1) ? wr_data1 : wr_data2;
+
+    /* split write data and add the second half to the data2 */
+    signals->data2 = (first_match_read1) ? wr_data2 : wr_data1;
+
+    /* add write enable signals for first wr port as we1 */
+    signals->we1 = wr_en1;
+
+    /* add clk signal as dpram clk signal */
+    signals->clk = bram->clk->pins[0];
+
+    /* add write enable signals for the second wr port as we2 */
+    signals->we2 = wr_en2;
+
+    /* create a new dual port ram */
+    create_dual_port_ram(signals, old_node);
+
+    // CLEAN UP
+    /* at this point wr_addr and rd_addr must be the same */
+    oassert(bram->read_addr->count == bram->write_addr->count);
+    /* the wr addr will be deleted since we do not need it anymore */
+    for (i = 0; i < bram->write_addr->count; ++i) {
+        npin_t *wr_addr_pin = bram->write_addr->pins[i];
+        /* delete pin */
+        delete_npin(wr_addr_pin);
+    }
+    free_signal_list(wr_addr1);
+    free_signal_list(wr_addr2);
+
+    cleanup_block_memory_old_node(old_node);
+    free_dp_ram_signals(signals);
+}
+
+/**
+ * (function: create_nrmw_dual_port_ram)
+ *
+ * @brief multiple read ports are multiplexed using read enable.
+ * multiple write ports are multiplexed using write enable.
+ * Then, the BRAM will be mapped to a DPRAM
+ *
+ * @param bram pointing to a bram node node
+ * @param netlist pointer to the current netlist file
+ */
+static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    int i;
+    nnode_t *old_node = bram->node;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+
+    /* should have been resovled before this function */
+    oassert(num_rd_ports > 2);
+    oassert(num_wr_ports > 2);
+
+    /* dual port ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+    signal_list_t *selectors = NULL;
+
+    /* INPUTS */
+    selectors = copy_input_signals(bram->read_en);
+    /* adding the read addr input port as address1 */
+    signals->addr1 = split_cascade_port(bram->read_addr, selectors, addr_width, old_node, netlist);
+    free_signal_list(selectors);
+
+    selectors = copy_input_signals(bram->write_en);
+    /* adding the write addr port as address2 */
+    signals->addr2 = split_cascade_port(bram->write_addr, selectors, addr_width, old_node, netlist);
+    free_signal_list(selectors);
+
+    /* handling clock signals */
+    signals->clk = bram->clk->pins[0];
+
+    /* we pad the first data port using pad pins */
+    signals->data1 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
+    }
+    selectors = copy_input_signals(bram->write_en);
+    /* adding the write data port as data2 */
+    signals->data2 = split_cascade_port(bram->write_data, selectors, data_width, old_node, netlist);
+    free_signal_list(selectors);
+
+    /* first port does not have data, so the enable is GND */
+    signals->we1 = get_zero_pin(netlist);
+
+    /* create vcc signas as the value of we2 when the write_en pins are active */
+    signal_list_t *vcc_signals = init_signal_list();
+    for (i = 0; i < num_wr_ports; ++i) {
+        add_pin_to_signal_list(vcc_signals, get_one_pin(netlist));
+    }
+    signal_list_t *we2_signal = split_cascade_port(vcc_signals, bram->write_en, 1, old_node, netlist);
+
+    signals->we2 = we2_signal->pins[0];
+
+    /* OUTPUT */
+    /* leaving out1 of dpram null, so it will create a new pins */
+    signals->out1 = NULL;
+    signals->out2 = NULL;
+
+    /* create a DPRAM node */
+    nnode_t *dpram = create_dual_port_ram(signals, old_node);
+
+    signal_list_t *dpram_outputs = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(dpram_outputs, dpram->output_pins[i]);
+    }
+
+    /* decode the spram outputs to the n bram output ports */
+    decode_out_port(dpram_outputs, bram->read_data, bram->read_en, old_node, netlist);
+
+    // CLEAN UP
+    cleanup_block_memory_old_node(old_node);
+    free_signal_list(dpram_outputs);
+    free_signal_list(we2_signal);
+    free_signal_list(vcc_signals);
+    free_dp_ram_signals(signals);
+}
+
+/**
+ * (function: map_rom_to_mem_hardblocks)
+ *
+ * @brief mapping a read-only memory to single_port_ram or
+ * dual_port_ram according to the number and source of its ports
+ *
+ * @param rom pointer to the read only memory
+ * @param netlist pointer to the current netlist file
+ */
+void map_rom_to_mem_hardblocks(block_memory_t *rom, netlist_t *netlist)
+{
+    nnode_t *node = rom->node;
+
+    int width = node->attributes->DBITS;
+    int depth = shift_left_value_with_overflow_check(0X1, node->attributes->ABITS, rom->loc);
+
+    int rd_ports = node->attributes->RD_PORTS;
+    int wr_ports = node->attributes->WR_PORTS;
+
+    /* Read Only Memory validateion */
+    oassert(wr_ports == 0);
+
+    int rom_relative_area = depth * width;
+    t_model *lutram_model = find_hard_block(LUTRAM_string);
+
+    if (lutram_model != NULL && (LUTRAM_INFERENCE_THRESHOLD_MIN <= rom_relative_area) && (rom_relative_area <= LUTRAM_INFERENCE_THRESHOLD_MAX)) {
+        /* map to LUTRAM */
+        // nnode_t* lutram = NULL;
+        /* TODO */
+    } else {
+        /* need to split the rom from the data width */
+        if (rd_ports == 1) {
+            /* create the ROM and allocate ports according to the SPRAM hard block */
+            create_r_single_port_ram(rom, netlist);
+
+        } else if (rd_ports == 2) {
+            /* create the ROM and allocate ports according to the DPRAM hard block */
+            create_2r_dual_port_ram(rom, netlist);
+
+        } else {
+            /* more than 2 read ports wil be handle using multiplexed ports and a SPRAM */
+            create_nr_single_port_ram(rom, netlist);
+        }
+    }
+}
+
+/**
+ * (function: map_bram_to_mem_hardblocks)
+ *
+ * @brief mapping a block_memory (has both read and write access) to single_port_ram
+ * or dual_port_ram according to the number and source of its ports
+ *
+ * @param bram pointer to the block memory
+ * @param netlist pointer to the current netlist file
+ */
+void map_bram_to_mem_hardblocks(block_memory_t *bram, netlist_t *netlist)
+{
+    nnode_t *node = bram->node;
+
+    int width = node->attributes->DBITS;
+    int depth = shift_left_value_with_overflow_check(0X1, node->attributes->ABITS, bram->loc);
+
+    /* since the data1_w + data2_w == data_out for DPRAM */
+    long rd_ports = node->attributes->RD_PORTS;
+    long wr_ports = node->attributes->WR_PORTS;
+
+    /**
+     * Potential place for checking block ram if their relative
+     * size is less than a threshold, they could be mapped on LUTRAM
+     */
+
+    int bram_relative_area = depth * width;
+    t_model *lutram_model = find_hard_block(LUTRAM_string);
+
+    if (lutram_model != NULL && (LUTRAM_INFERENCE_THRESHOLD_MIN <= bram_relative_area) && (bram_relative_area <= LUTRAM_INFERENCE_THRESHOLD_MAX)) {
+        /* map to LUTRAM */
+        // nnode_t* lutram = NULL;
+        /* TODO */
+    } else {
+        if (wr_ports == (rd_ports == 1)) {
+            if (check_same_addrs(bram)) {
+                /* create a single port ram and allocate ports according to the SPRAM hard block */
+                create_rw_single_port_ram(bram, netlist);
+
+            } else {
+                /* create a dual port ram and allocate ports according to the DPRAM hard block */
+                create_rw_dual_port_ram(bram, netlist);
+            }
+
+        } else if (rd_ports == 1 && wr_ports == 2) {
+            /* create a dual port ram and allocate ports according to the DPRAM hard block */
+            create_r2w_dual_port_ram(bram, netlist);
+
+        } else if (rd_ports == 2 && wr_ports == 1) {
+            /* create a dual port ram and allocate ports according to the DPRAM hard block */
+            create_2rw_dual_port_ram(bram, netlist);
+
+        } else if (rd_ports == 2 && wr_ports == 2) {
+            /* create a dual port ram and allocate ports according to the DPRAM hard block */
+            create_2r2w_dual_port_ram(bram, netlist);
+
+        } else {
+            /* create a dual port ram and muxed all read together and all writes together */
+            create_nrmw_dual_port_ram(bram, netlist);
+        }
+    }
+}
+/**
+ * (function: resolve_bram_node)
+ *
+ * @brief create, verify and shrink the bram node
+ *
+ * @param node pointing to a bram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void resolve_bram_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    /* validate bram port sizes */
+    oassert(node->num_input_port_sizes == 6);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* initializing a new block ram */
+    block_memory_t *bram = init_block_memory(node, netlist);
+
+    /* perform optimization on memory inference */
+    perform_optimization(bram);
+
+    block_memories_info.block_memory_list = insert_in_vptr_list(block_memories_info.block_memory_list, bram);
+}
+
+/**
+ * (function: resolve_rom_node)
+ *
+ * @brief read_only_memory will be considered as single port ram
+ * this function reorders inputs and add data_in input to new node
+ * which is connected to pad node
+ *
+ * @param node pointing to a rom node node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void resolve_rom_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    /* validate port sizes */
+    oassert(node->num_input_port_sizes == 3);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* create the rom and allocate ports according to the DPRAM hard block */
+    block_memory_t *rom = init_read_only_memory(node, netlist);
+
+    /* perform optimization on memory inference */
+    perform_optimization(rom);
+
+    block_memories_info.read_only_memory_list = insert_in_vptr_list(block_memories_info.read_only_memory_list, rom);
+}
+
+/**
+ * (function: resolve_ymem_node)
+ *
+ * @brief change ymem to bram or rom
+ *
+ * @param node pointing to a bram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void resolve_ymem_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    nnode_t *transformed_mem = NULL;
+
+    /* check for BRAM */
+    if ((node->num_input_port_sizes == 7) && (node->num_output_port_sizes == 1)) {
+        /* create BRAM node */
+        transformed_mem = ymem_to_bram(node, traverse_mark_number);
+        /* resolve bram node */
+        resolve_bram_node(transformed_mem, traverse_mark_number, netlist);
+    }
+    /* check for ROM */
+    else if ((node->num_input_port_sizes == 3) && (node->num_output_port_sizes == 1)) {
+        /* create BRAM node */
+        transformed_mem = ymem_to_rom(node, traverse_mark_number);
+        /* resolve bram node */
+        resolve_rom_node(transformed_mem, traverse_mark_number, netlist);
+    }
+}
+
+void resolve_ymem2_node(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    nnode_t *transformed_mem = NULL;
+
+    /* check for BRAM */
+    if ((node->num_input_port_sizes == 9) && (node->num_output_port_sizes == 1)) {
+        /* create BRAM node */
+        transformed_mem = ymem2_to_bram(node, traverse_mark_number);
+        /* resolve bram node */
+        resolve_bram_node(transformed_mem, traverse_mark_number, netlist);
+    }
+    /* check for ROM */
+    else if ((node->num_input_port_sizes == 5) && (node->num_output_port_sizes == 1)) {
+        /* create BRAM node */
+        transformed_mem = ymem2_to_rom(node, traverse_mark_number);
+        /* resolve bram node */
+        resolve_rom_node(transformed_mem, traverse_mark_number, netlist);
+    }
+}
+
+/**
+ * (function: iterate_block_memories)
+ *
+ * @brief iterate over block memories to map them to DP/SPRAMs
+ *
+ * @param netlist pointer to the current netlist file
+ */
+void iterate_block_memories(netlist_t *netlist)
+{
+    t_linked_vptr *ptr = block_memories_info.block_memory_list;
+    while (ptr != NULL) {
+        block_memory_t *bram = (block_memory_t *)ptr->data_vptr;
+
+        /* validation */
+        oassert(bram != NULL);
+
+        map_bram_to_mem_hardblocks(bram, netlist);
+        ptr = ptr->next;
+    }
+
+    ptr = block_memories_info.read_only_memory_list;
+    while (ptr != NULL) {
+        block_memory_t *rom = (block_memory_t *)ptr->data_vptr;
+
+        /* validation */
+        oassert(rom != NULL);
+
+        map_rom_to_mem_hardblocks(rom, netlist);
+        ptr = ptr->next;
+    }
+}
+
+/**
+ * (function: check_same_addrs)
+ *
+ * @brief this function is to check if the read addr
+ *  and write addr is the same or not.
+ *
+ * @param bram pointer to the block memory
+ *
+ * @return read_addr == write_addr
+ */
+static bool check_same_addrs(block_memory_t *bram)
+{
+    int read_addr_width = bram->read_addr->count;
+    int write_addr_width = bram->write_addr->count;
+
+    /* first check is the width, if they have not the equal width return false */
+    if (read_addr_width != write_addr_width) {
+        return (false);
+    }
+    /* if they have the equal width, here is to check their driver */
+    else
+        return (sigcmp(bram->read_addr, bram->write_addr));
+}
+
+/**
+ * (function: ymem_to_rom)
+ *
+ * @brief map ymem to rom
+ *
+ * @param node pointing to a bram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ */
+static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i;
+    int offset, new_offset = 0;
+    int addr_width = node->attributes->ABITS;
+    int data_width = node->attributes->DBITS;
+    int num_rd_ports = node->attributes->RD_PORTS;
+
+    int RD_ADDR_width = node->input_port_sizes[0];
+    int RD_CLK_width = node->input_port_sizes[1];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[2];
+
+    /* check for BRAM */
+    oassert(node->num_input_port_sizes == 3);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* create BRAM node */
+    nnode_t *transformed_mem = allocate_nnode(node->loc);
+    transformed_mem->traverse_visited = traverse_mark_number;
+    transformed_mem->type = ROM;
+    copy_attribute(transformed_mem->attributes, node->attributes);
+    transformed_mem->name = node_name(transformed_mem, node->name);
+    transformed_mem->related_ast_node = node->related_ast_node;
+    /**
+     * ROM information
+     *
+     * RD_ADDR:    input port [0]  ==> CLK:        input port [0]
+     * RD_CLK:     input port [1]  ==> RD_ADDR:    input port [1]
+     * RD_DATA:    output port[0]  ==> RD_DATA:    output port [0]
+     * RD_ENABLE:  input port [2]  ==> RD_ENABLE:  input port [2]
+     */
+
+    /* CLK */
+    offset = RD_ADDR_width;
+    add_input_port_information(transformed_mem, 1);
+    allocate_more_input_pins(transformed_mem, 1);
+    for (i = 0; i < RD_CLK_width; i++) {
+        if (i == 0) {
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
+        } else {
+            /* delete extra pins */
+            delete_npin(node->input_pins[i + offset]);
+        }
+    }
+    new_offset += 1;
+
+    /* RD_ADDR */
+    offset = 0;
+    oassert(RD_ADDR_width == num_rd_ports * addr_width);
+    add_input_port_information(transformed_mem, RD_ADDR_width);
+    allocate_more_input_pins(transformed_mem, RD_ADDR_width);
+    for (i = 0; i < RD_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ADDR_width;
+
+    /* RD_ENABLE */
+    offset = RD_ADDR_width + RD_CLK_width;
+    oassert(RD_ENABLE_width == num_rd_ports);
+    add_input_port_information(transformed_mem, RD_ENABLE_width);
+    allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
+    for (i = 0; i < RD_ENABLE_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ENABLE_width;
+
+    /* RD_DATA */
+    offset = 0;
+    oassert(RD_DATA_width == num_rd_ports * data_width);
+    add_output_port_information(transformed_mem, RD_DATA_width);
+    allocate_more_output_pins(transformed_mem, RD_DATA_width);
+    for (i = 0; i < RD_DATA_width; i++) {
+        remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return (transformed_mem);
+}
+static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i;
+    int offset, new_offset = 0;
+    int addr_width = node->attributes->ABITS;
+    int data_width = node->attributes->DBITS;
+    int num_rd_ports = node->attributes->RD_PORTS;
+
+    int RD_ADDR_width = node->input_port_sizes[0];
+    int RD_ARST_width = node->input_port_sizes[1];
+    int RD_CLK_width = node->input_port_sizes[2];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[3];
+    int RD_SRST_width = node->input_port_sizes[4];
+
+    /* check for BRAM */
+    oassert(node->num_input_port_sizes == 5);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* create BRAM node */
+    nnode_t *transformed_mem = allocate_nnode(node->loc);
+    transformed_mem->traverse_visited = traverse_mark_number;
+    transformed_mem->type = ROM;
+    copy_attribute(transformed_mem->attributes, node->attributes);
+    transformed_mem->name = node_name(transformed_mem, node->name);
+    transformed_mem->related_ast_node = node->related_ast_node;
+
+    /* ARST */
+    offset = RD_ADDR_width;
+    for (i = 0; i < RD_ARST_width; i++) {
+        delete_npin(node->input_pins[offset + i]);
+    }
+    /* SRST */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width;
+    for (i = 0; i < RD_SRST_width; i++) {
+        delete_npin(node->input_pins[offset + i]);
+    }
+    /* CLK */
+    offset = RD_ADDR_width + RD_ARST_width;
+    add_input_port_information(transformed_mem, 1);
+    allocate_more_input_pins(transformed_mem, 1);
+    for (i = 0; i < RD_CLK_width; i++) {
+        if (i == 0) {
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
+        } else {
+            /* delete extra pins */
+            delete_npin(node->input_pins[i + offset]);
+        }
+    }
+    new_offset += 1;
+
+    /* RD_ADDR */
+    offset = 0;
+    oassert(RD_ADDR_width == num_rd_ports * addr_width);
+    add_input_port_information(transformed_mem, RD_ADDR_width);
+    allocate_more_input_pins(transformed_mem, RD_ADDR_width);
+    for (i = 0; i < RD_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ADDR_width;
+
+    /* RD_ENABLE */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width;
+    oassert(RD_ENABLE_width == num_rd_ports);
+    add_input_port_information(transformed_mem, RD_ENABLE_width);
+    allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
+    for (i = 0; i < RD_ENABLE_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ENABLE_width;
+
+    /* RD_DATA */
+    offset = 0;
+    oassert(RD_DATA_width == num_rd_ports * data_width);
+    add_output_port_information(transformed_mem, RD_DATA_width);
+    allocate_more_output_pins(transformed_mem, RD_DATA_width);
+    for (i = 0; i < RD_DATA_width; i++) {
+        remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return (transformed_mem);
+}
+/**
+ * (function: ymem_to_bram)
+ *
+ * @brief map ymem to bram
+ *
+ * @param node pointing to a bram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ */
+static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i;
+    int offset, new_offset = 0;
+    int addr_width = node->attributes->ABITS;
+    int data_width = node->attributes->DBITS;
+    int num_rd_ports = node->attributes->RD_PORTS;
+    int num_wr_ports = node->attributes->WR_PORTS;
+
+    int RD_ADDR_width = node->input_port_sizes[0];
+    int RD_CLK_width = node->input_port_sizes[1];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[2];
+    int WR_ADDR_width = node->input_port_sizes[3];
+    int WR_CLK_width = node->input_port_sizes[4];
+    int WR_DATA_width = node->input_port_sizes[5];
+    int WR_ENABLE_width = node->input_port_sizes[6];
+
+    /* check for BRAM */
+    oassert(node->num_input_port_sizes == 7);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* create BRAM node */
+    nnode_t *transformed_mem = allocate_nnode(node->loc);
+    transformed_mem->traverse_visited = traverse_mark_number;
+    transformed_mem->type = BRAM;
+    copy_attribute(transformed_mem->attributes, node->attributes);
+    transformed_mem->name = node_name(transformed_mem, node->name);
+    transformed_mem->related_ast_node = node->related_ast_node;
+    /**
+     * BRAM information
+     *
+     * RD_ADDR:    input port [0]  ==> CLK:        input port [0]
+     * RD_CLK:     input port [1]  ==> RD_ADDR:    input port [1]
+     *   RD_DATA:    output port[0]  ==> RD_DATA:    output port[0]
+     * RD_ENABLE:  input port [2]  ==> RD_ENABLE:  input port [2]
+     * WR_ADDR:    input port [3]  ==> WR_ADDR:    input port [3]
+     * WR_CLK:     input port [4]  ==> WR_DATA:    input port [4]
+     * WR_DATA:    input port [5]  ==> WR_ENABLE:  input port [5]
+     * WR_ENABLE:  input port [6]
+     */
+
+    /* CLK */
+    offset = RD_ADDR_width + RD_CLK_width + RD_ENABLE_width + WR_ADDR_width;
+    add_input_port_information(transformed_mem, 1);
+    allocate_more_input_pins(transformed_mem, 1);
+    for (i = 0; i < WR_CLK_width; i++) {
+        if (i == 0) {
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
+        } else {
+            /* delete extra pins */
+            delete_npin(node->input_pins[i + offset]);
+        }
+    }
+    new_offset += 1;
+
+    /* RD_ADDR */
+    offset = 0;
+    oassert(RD_ADDR_width == num_rd_ports * addr_width);
+    add_input_port_information(transformed_mem, RD_ADDR_width);
+    allocate_more_input_pins(transformed_mem, RD_ADDR_width);
+    for (i = 0; i < RD_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ADDR_width;
+
+    /* RD_CLK */
+    offset = RD_ADDR_width;
+    oassert(RD_CLK_width == num_rd_ports);
+    for (i = 0; i < RD_CLK_width; i++) {
+        delete_npin(node->input_pins[i + offset]);
+    }
+
+    /* RD_ENABLE */
+    offset = RD_ADDR_width + RD_CLK_width;
+    oassert(RD_ENABLE_width == num_rd_ports);
+    add_input_port_information(transformed_mem, RD_ENABLE_width);
+    allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
+    for (i = 0; i < RD_ENABLE_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ENABLE_width;
+
+    /* WR_ADDR */
+    offset = RD_ADDR_width + RD_CLK_width + RD_ENABLE_width;
+    oassert(WR_ADDR_width == num_wr_ports * addr_width);
+    add_input_port_information(transformed_mem, WR_ADDR_width);
+    allocate_more_input_pins(transformed_mem, WR_ADDR_width);
+    for (i = 0; i < WR_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += WR_ADDR_width;
+
+    /* WR_DATA */
+    offset = RD_ADDR_width + RD_CLK_width + RD_ENABLE_width + WR_ADDR_width + WR_CLK_width;
+    oassert(WR_DATA_width == num_wr_ports * data_width);
+    add_input_port_information(transformed_mem, WR_DATA_width);
+    allocate_more_input_pins(transformed_mem, WR_DATA_width);
+    for (i = 0; i < WR_DATA_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += WR_DATA_width;
+
+    /* WR_ENABLE */
+    offset = RD_ADDR_width + RD_CLK_width + RD_ENABLE_width + WR_ADDR_width + WR_CLK_width + WR_DATA_width;
+    oassert(WR_ENABLE_width == num_wr_ports * data_width);
+    add_input_port_information(transformed_mem, num_wr_ports);
+    allocate_more_input_pins(transformed_mem, num_wr_ports);
+    for (i = 0; i < WR_ENABLE_width; i++) {
+        if (i % data_width == 0)
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, new_offset++);
+        else
+            delete_npin(node->input_pins[i + offset]);
+    }
+
+    /* RD_DATA */
+    offset = 0;
+    oassert(RD_DATA_width == num_rd_ports * data_width);
+    add_output_port_information(transformed_mem, RD_DATA_width);
+    allocate_more_output_pins(transformed_mem, RD_DATA_width);
+    for (i = 0; i < RD_DATA_width; i++) {
+        remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return (transformed_mem);
+}
+
+static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i;
+    int offset, new_offset = 0;
+    int addr_width = node->attributes->ABITS;
+    int data_width = node->attributes->DBITS;
+    int num_rd_ports = node->attributes->RD_PORTS;
+    int num_wr_ports = node->attributes->WR_PORTS;
+
+    int RD_ADDR_width = node->input_port_sizes[0];
+    int RD_ARST_width = node->input_port_sizes[1];
+    int RD_CLK_width = node->input_port_sizes[2];
+    int RD_DATA_width = node->output_port_sizes[0];
+    int RD_ENABLE_width = node->input_port_sizes[3];
+    int RD_SRST_width = node->input_port_sizes[4];
+    int WR_ADDR_width = node->input_port_sizes[5];
+    int WR_CLK_width = node->input_port_sizes[6];
+    int WR_DATA_width = node->input_port_sizes[7];
+    int WR_ENABLE_width = node->input_port_sizes[8];
+
+    /* check for BRAM */
+    oassert(node->num_input_port_sizes == 9);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* create BRAM node */
+    nnode_t *transformed_mem = allocate_nnode(node->loc);
+    transformed_mem->traverse_visited = traverse_mark_number;
+    transformed_mem->type = BRAM;
+    copy_attribute(transformed_mem->attributes, node->attributes);
+    transformed_mem->name = node_name(transformed_mem, node->name);
+    transformed_mem->related_ast_node = node->related_ast_node;
+
+    /* ARST */
+    offset = RD_ADDR_width;
+    for (i = 0; i < RD_ARST_width; i++) {
+        delete_npin(node->input_pins[offset + i]);
+    }
+    /* SRST */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width;
+    for (i = 0; i < RD_SRST_width; i++) {
+        delete_npin(node->input_pins[offset + i]);
+    }
+
+    /* CLK */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width + RD_SRST_width + WR_ADDR_width;
+    add_input_port_information(transformed_mem, 1);
+    allocate_more_input_pins(transformed_mem, 1);
+    for (i = 0; i < WR_CLK_width; i++) {
+        if (i == 0) {
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
+        } else {
+            /* delete extra pins */
+            delete_npin(node->input_pins[i + offset]);
+        }
+    }
+    new_offset += 1;
+
+    /* RD_ADDR */
+    offset = 0;
+    oassert(RD_ADDR_width == num_rd_ports * addr_width);
+    add_input_port_information(transformed_mem, RD_ADDR_width);
+    allocate_more_input_pins(transformed_mem, RD_ADDR_width);
+    for (i = 0; i < RD_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ADDR_width;
+
+    /* RD_CLK */
+    offset = RD_ADDR_width + RD_ARST_width;
+    oassert(RD_CLK_width == num_rd_ports);
+    for (i = 0; i < RD_CLK_width; i++) {
+        delete_npin(node->input_pins[i + offset]);
+    }
+
+    /* RD_ENABLE */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width;
+    oassert(RD_ENABLE_width == num_rd_ports);
+    add_input_port_information(transformed_mem, RD_ENABLE_width);
+    allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
+    for (i = 0; i < RD_ENABLE_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += RD_ENABLE_width;
+
+    /* WR_ADDR */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width + RD_SRST_width;
+    oassert(WR_ADDR_width == num_wr_ports * addr_width);
+    add_input_port_information(transformed_mem, WR_ADDR_width);
+    allocate_more_input_pins(transformed_mem, WR_ADDR_width);
+    for (i = 0; i < WR_ADDR_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += WR_ADDR_width;
+
+    /* WR_DATA */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width + RD_SRST_width + WR_ADDR_width + WR_CLK_width;
+    oassert(WR_DATA_width == num_wr_ports * data_width);
+    add_input_port_information(transformed_mem, WR_DATA_width);
+    allocate_more_input_pins(transformed_mem, WR_DATA_width);
+    for (i = 0; i < WR_DATA_width; i++) {
+        remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
+    }
+    new_offset += WR_DATA_width;
+
+    /* WR_ENABLE */
+    offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width + RD_SRST_width + WR_ADDR_width + WR_CLK_width + WR_DATA_width;
+    oassert(WR_ENABLE_width == num_wr_ports * data_width);
+    add_input_port_information(transformed_mem, num_wr_ports);
+    allocate_more_input_pins(transformed_mem, num_wr_ports);
+    for (i = 0; i < WR_ENABLE_width; i++) {
+        if (i % data_width == 0)
+            remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, new_offset++);
+        else
+            delete_npin(node->input_pins[i + offset]);
+    }
+
+    /* RD_DATA */
+    offset = 0;
+    oassert(RD_DATA_width == num_rd_ports * data_width);
+    add_output_port_information(transformed_mem, RD_DATA_width);
+    allocate_more_output_pins(transformed_mem, RD_DATA_width);
+    for (i = 0; i < RD_DATA_width; i++) {
+        remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return (transformed_mem);
+}
+
+/**
+ * (function: perform_optimization)
+ *
+ * @brief this function is to perform optimization on block
+ * memories and roms. Optimization includes address width
+ * reduction base on the mem size
+ *
+ * @param bram pointer to the block memory
+ */
+static void perform_optimization(block_memory_t *memory)
+{
+    nnode_t *node = memory->node;
+    int depth = node->attributes->size;
+    int addr_width = node->attributes->ABITS;
+    int rd_ports = node->attributes->RD_PORTS;
+    int wr_ports = node->attributes->WR_PORTS;
+
+    int needed_addr_width = 0;
+    long shifted_value = 0;
+    /* calculate the needed address width */
+    while (shifted_value < depth) {
+        needed_addr_width++;
+        shifted_value = shift_left_value_with_overflow_check(0X1, needed_addr_width, node->loc);
+    }
+
+    /**
+     * [NOTE]: At this point there is a need to take care of multiple read
+     * or write address pins (like dual port ram) since in the case of having
+     * arithmetic operation in address pins (cause to extend addr to 32 bits),
+     * yosys would NOT handle it before creting the mem subcircuit.
+     * e.g.:
+     *
+     *      wire [3:0] read_addr;
+     *      read_data <= mem[read_addr / 3]
+     *
+     * the signal representing the read addr signal in $mem subcircuit is 32 bits.
+     * However, we only need four bits
+     */
+
+    /* check if block memory has a read addr */
+    if (memory->read_addr) {
+        /* prune read address to reduce its width based on the needed addr width if needed */
+        memory->read_addr = prune_signal(memory->read_addr, addr_width, needed_addr_width, rd_ports);
+    }
+
+    /* check if block memory has a write addr */
+    if (memory->write_addr) {
+        /* prune write address to reduce its width based on the needed addr width if needed */
+        memory->write_addr = prune_signal(memory->write_addr, addr_width, needed_addr_width, wr_ports);
+    }
+
+    /* update new read addr width */
+    node->attributes->ABITS = needed_addr_width;
+}
+
+/**
+ * (function: split_cascade_port)
+ *
+ * @brief split the given signal list into chunks of desired_width size.
+ * Then, cascade them with selectors pin. In this function, assumed that
+ * the order of selectors is matched to the order of signals
+ *
+ * @param signalvar list of signals (like write data)
+ * @param selectors list of selectors (like write enables)
+ * @param desired_width final output width
+ * @param node pointer to the corresponding node
+ * @param netlist pointer to the current netlist
+ *
+ * @return last item outputs in the chain of cascaded signals
+ */
+static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t *selectors, int desired_width, nnode_t *node, netlist_t *netlist)
+{
+    /* check if cascade is needed */
+    if (signalvar->count == desired_width) {
+        return (signalvar);
+    }
+
+    /* validate signals list size */
+    oassert(signalvar->count % desired_width == 0);
+
+    int i, j;
+    int num_chunk = signalvar->count / desired_width;
+    signal_list_t *return_value = NULL;
+    /* validate selector size */
+    oassert(selectors->count == num_chunk);
+
+    /* initialize splitted signals */
+    signal_list_t **splitted_signals = split_signal_list(signalvar, desired_width);
+
+    /* create cascaded multiplexers */
+    nnode_t **muxes = (nnode_t **)vtr::calloc(num_chunk, sizeof(nnode_t *));
+    signal_list_t **internal_outputs = (signal_list_t **)vtr::calloc(num_chunk, sizeof(signal_list_t *));
+    for (i = 0; i < num_chunk; ++i) {
+        /* mux inputs */
+        signal_list_t **mux_inputs = (signal_list_t **)vtr::calloc(2, sizeof(signal_list_t *));
+        mux_inputs[0] = init_signal_list();
+        if (i == 0) {
+            /* the first port of the first mux should be driven by PAD node */
+            for (j = 0; j < desired_width; j++) {
+                add_pin_to_signal_list(mux_inputs[0], get_pad_pin(netlist));
+            }
+        } else {
+            /* the first port of the rest muxe should be driven by previous mux output */
+            for (j = 0; j < desired_width; j++) {
+                add_pin_to_signal_list(mux_inputs[0], internal_outputs[i - 1]->pins[j]);
+            }
+        }
+
+        /* hook the splitted signals[i] as the second mux input */
+        mux_inputs[1] = init_signal_list();
+        for (j = 0; j < desired_width; j++) {
+            add_pin_to_signal_list(mux_inputs[1], splitted_signals[i]->pins[j]);
+        }
+
+        /* handle mux selector and create the multiplexer */
+        {
+            /* create a signal list for selector pin */
+            signal_list_t *selector_i = init_signal_list();
+            add_pin_to_signal_list(selector_i, selectors->pins[i]);
+            /* a regular multiplexer instatiation */
+            muxes[i] = make_multiport_smux(mux_inputs, selector_i, 2, NULL, node, netlist);
+
+            // CLEAN UP
+            free_signal_list(selector_i);
+        }
+
+        /* initialize the internal outputs */
+        internal_outputs[i] = init_signal_list();
+        for (j = 0; j < desired_width; j++) {
+            npin_t *output_pin = muxes[i]->output_pins[j];
+            nnet_t *output_net = output_pin->net;
+            /* add new fanout */
+            npin_t *new_pin = allocate_npin();
+            add_fanout_pin_to_net(output_net, new_pin);
+            /* keep the record of the new pin as internal outputs */
+            add_pin_to_signal_list(internal_outputs[i], new_pin);
+        }
+
+        // CLEAN UP
+        free_signal_list(mux_inputs[0]);
+        free_signal_list(mux_inputs[1]);
+        vtr::free(mux_inputs);
+    }
+
+    return_value = internal_outputs[num_chunk - 1];
+
+    // CLEAN UP
+    for (i = 0; i < num_chunk; ++i) {
+        free_signal_list(splitted_signals[i]);
+    }
+    vtr::free(splitted_signals);
+
+    /* free internal output signal list expect the last one since it is the return value */
+    for (i = 0; i < num_chunk - 1; ++i) {
+        free_signal_list(internal_outputs[i]);
+    }
+    vtr::free(internal_outputs);
+    vtr::free(muxes);
+
+    return (return_value);
+}
+
+/**
+ * (function: decode_out_port)
+ *
+ * @brief decode the memory outputs to the n output ports
+ *
+ * @param src the mux input that will pass if en is 1
+ * @param outs list of signals (like write data)
+ * @param selectors list of selectors (like write enables)
+ * @param node pointer to the corresponding node
+ * @param netlist pointer to the current netlist
+ */
+static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list_t *selectors, nnode_t *node, netlist_t *netlist)
+{
+    int width = src->count;
+    /* validate signals list size */
+    oassert(width != 0);
+    oassert(outs->count % width == 0);
+
+    int i, j;
+    int num_chunk = outs->count / width;
+
+    /* initialize splitted signals */
+    signal_list_t **splitted_signals = split_signal_list(outs, width);
+
+    /* validate selector size */
+    oassert(selectors->count == num_chunk);
+
+    /* adding fanout pins to src pin nets */
+    signal_list_t **src_nets_fanouts = (signal_list_t **)vtr::calloc(width, sizeof(signal_list_t *));
+    /* create the n fanout pin for src pins, since they are output pins of a memory */
+    for (i = 0; i < width; ++i) {
+        npin_t *src_pin = src->pins[i];
+        /* validate that it is output */
+        oassert(src_pin->type == OUTPUT);
+
+        /* init the related sig list */
+        src_nets_fanouts[i] = init_signal_list();
+        /* add fanouts */
+        for (j = 0; j < num_chunk; j++) {
+            npin_t *new_pin = allocate_npin();
+            /* adding fanout pin to the src_pin net */
+            add_fanout_pin_to_net(src_pin->net, new_pin);
+            /* keep the record of the newly added pin */
+            add_pin_to_signal_list(src_nets_fanouts[i], new_pin);
+        }
+    }
+
+    /* create multiplexers */
+    nnode_t **muxes = (nnode_t **)vtr::calloc(num_chunk, sizeof(nnode_t *));
+    for (i = 0; i < num_chunk; ++i) {
+        /* mux inputs */
+        signal_list_t **mux_inputs = (signal_list_t **)vtr::calloc(2, sizeof(signal_list_t *));
+        mux_inputs[0] = init_signal_list();
+        /* the first port of the first mux should be driven by PAD node */
+        for (j = 0; j < width; j++) {
+            add_pin_to_signal_list(mux_inputs[0], get_pad_pin(netlist));
+        }
+
+        /* hook the splitted signals[i] as the second mux input */
+        mux_inputs[1] = init_signal_list();
+        for (j = 0; j < width; j++) {
+            add_pin_to_signal_list(mux_inputs[1], src_nets_fanouts[j]->pins[i]);
+        }
+
+        /* handle mux selector and create the multiplexer */
+        {
+            /* create a signal list for selector pin */
+            signal_list_t *selector_i = init_signal_list();
+            add_pin_to_signal_list(selector_i, selectors->pins[i]);
+            /* a regular multiplexer instatiation */
+            muxes[i] = make_multiport_smux(mux_inputs, selector_i, 2, splitted_signals[i], node, netlist);
+
+            // CLEAN UP
+            free_signal_list(selector_i);
+        }
+
+        // CLEAN UP
+        free_signal_list(mux_inputs[0]);
+        free_signal_list(mux_inputs[1]);
+        vtr::free(mux_inputs);
+    }
+
+    // CLEAN UP
+    for (i = 0; i < num_chunk; ++i) {
+        free_signal_list(splitted_signals[i]);
+    }
+    vtr::free(splitted_signals);
+    for (i = 0; i < width; ++i) {
+        free_signal_list(src_nets_fanouts[i]);
+    }
+    vtr::free(src_nets_fanouts);
+    vtr::free(muxes);
+}
+
+/**
+ * (function: cleanup_block_memory_old_node)
+ *
+ * @brief Frees memory used for indexing block memories.
+ *
+ * @param node pointer to the old node
+ */
+static void cleanup_block_memory_old_node(nnode_t *old_node)
+{
+    int i;
+    for (i = 0; i < old_node->num_input_pins; ++i) {
+        npin_t *pin = old_node->input_pins[i];
+
+        if (pin)
+            old_node->input_pins[i] = NULL;
+    }
+
+    for (i = 0; i < old_node->num_output_pins; ++i) {
+        npin_t *pin = old_node->output_pins[i];
+
+        if (pin)
+            old_node->output_pins[i] = NULL;
+    }
+
+    /* clean up */
+    free_nnode(old_node);
+}
+
+/**
+ * (function: free_block_memory_indices)
+ *
+ * @brief Frees memory used for indexing block memories.
+ */
+void free_block_memories()
+{
+    /* check if any block memory indexed */
+    if (block_memories_info.block_memory_list) {
+        free_block_memory_index(block_memories_info.block_memories);
+        while (block_memories_info.block_memory_list != NULL)
+            block_memories_info.block_memory_list = delete_in_vptr_list(block_memories_info.block_memory_list);
+    }
+    /* check if any read only memory indexed */
+    if (block_memories_info.read_only_memory_list) {
+        free_block_memory_index(block_memories_info.read_only_memories);
+        while (block_memories_info.read_only_memory_list != NULL)
+            block_memories_info.read_only_memory_list = delete_in_vptr_list(block_memories_info.read_only_memory_list);
+    }
+}
+
+/**
+ * (function: free_block_memory_index)
+ *
+ * @brief Frees memory used for indexing block memories. Finalises each
+ * memory, making sure it has the right ports, and collapsing
+ * the memory if possible.
+ *
+ * @param to_free to be freed block memory hashtable
+ */
+void free_block_memory_index(block_memory_hashtable to_free)
+{
+    if (!to_free.empty()) {
+        for (auto mem_it : to_free) {
+            free_block_memory(mem_it.second);
+        }
+    }
+    to_free.clear();
+}
+
+/**
+ * (function: free_block_memory_index_and_finalize_memories)
+ *
+ * @brief Frees memory used for indexing block memories. Finalises each
+ * memory, making sure it has the right ports, and collapsing
+ * the memory if possible.
+ *
+ * @param to_free to be freed block memory structure
+ */
+static void free_block_memory(block_memory_t *to_free)
+{
+    free_signal_list(to_free->read_addr);
+    free_signal_list(to_free->read_data);
+    free_signal_list(to_free->read_en);
+    free_signal_list(to_free->write_addr);
+    free_signal_list(to_free->write_data);
+    free_signal_list(to_free->write_en);
+    free_signal_list(to_free->clk);
+
+    vtr::free(to_free->name);
+    vtr::free(to_free->memory_id);
+
+    vtr::free(to_free);
+}
diff --git a/parmys-plugin/src/HardSoftLogicMixer.cc b/parmys-plugin/src/HardSoftLogicMixer.cc
new file mode 100644
index 000000000..ec7dce087
--- /dev/null
+++ b/parmys-plugin/src/HardSoftLogicMixer.cc
@@ -0,0 +1,70 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "HardSoftLogicMixer.hpp"
+
+#include <stdint.h> // INT_MAX
+#include <vector>
+
+#include "multipliers.h" // instantiate_simple_soft_multiplier
+#include "odin_error.h"  // error_message
+
+HardSoftLogicMixer::HardSoftLogicMixer()
+{
+    for (int i = 0; i < operation_list_END; i++) {
+        if (i == MULTIPLY) {
+            this->_opts[i] = new MultsOpt();
+        } else {
+            this->_opts[i] = new MixingOpt();
+        }
+    }
+}
+
+HardSoftLogicMixer::~HardSoftLogicMixer()
+{
+    for (int i = 0; i < operation_list_END; i++) {
+        delete this->_opts[i];
+    }
+}
+void HardSoftLogicMixer::note_candidate_node(nnode_t *opNode) { _nodes_by_opt[opNode->type].push_back(opNode); }
+
+bool HardSoftLogicMixer::hardenable(nnode_t *node) { return this->_opts[node->type]->hardenable(node); }
+
+bool HardSoftLogicMixer::enabled(nnode_t *node) { return this->_opts[node->type]->enabled(); }
+
+int HardSoftLogicMixer::hard_blocks_needed(operation_list opt) { return _nodes_by_opt[opt].size(); }
+
+void HardSoftLogicMixer::partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist)
+{
+    _opts[node->type]->partial_map_node(node, traverse_number, netlist, this);
+}
+
+void HardSoftLogicMixer::perform_optimizations(netlist_t *netlist)
+{
+    if (_opts[MULTIPLY]->enabled()) {
+        int blocks_needed = this->hard_blocks_needed(MULTIPLY);
+        _opts[MULTIPLY]->set_blocks_needed(blocks_needed);
+        _opts[MULTIPLY]->assign_weights(netlist, _nodes_by_opt[MULTIPLY]);
+        _opts[MULTIPLY]->perform(netlist, _nodes_by_opt[MULTIPLY]);
+        _opts[MULTIPLY]->instantiate_soft_logic(netlist, _nodes_by_opt[MULTIPLY]);
+    }
+}
diff --git a/parmys-plugin/src/Hashtable.cc b/parmys-plugin/src/Hashtable.cc
new file mode 100644
index 000000000..6fac26204
--- /dev/null
+++ b/parmys-plugin/src/Hashtable.cc
@@ -0,0 +1,60 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Hashtable.hpp"
+#include "odin_types.h"
+#include "vtr_memory.h"
+
+void Hashtable::destroy_free_items()
+{
+    for (auto kv : my_map)
+        vtr::free(kv.second);
+}
+
+void Hashtable::add(std::string key, void *item) { this->my_map.emplace(key, item); }
+
+void *Hashtable::remove(std::string key)
+{
+    void *value = NULL;
+    auto v = this->my_map.find(key);
+    if (v != this->my_map.end()) {
+        value = v->second;
+        this->my_map.erase(v);
+    }
+    return value;
+}
+
+void *Hashtable::get(std::string key)
+{
+    void *value = NULL;
+    auto v = this->my_map.find(key);
+    if (v != this->my_map.end())
+        value = v->second;
+
+    return value;
+}
+
+bool Hashtable::is_empty() { return my_map.empty(); }
diff --git a/parmys-plugin/src/MixingOptimization.cc b/parmys-plugin/src/MixingOptimization.cc
new file mode 100644
index 000000000..293238c4b
--- /dev/null
+++ b/parmys-plugin/src/MixingOptimization.cc
@@ -0,0 +1,186 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "MixingOptimization.hpp"
+
+#include <stdint.h> // INT_MAX
+#include <vector>
+
+#include "HardSoftLogicMixer.hpp" // HardSoftLogicMixer
+#include "adders.h"               // hard_adders
+#include "multipliers.h"          // instantiate_simple_soft_multiplier
+#include "netlist_statistic.h"    // mixing_optimization_stats
+#include "odin_error.h"           // error_message
+
+void MixingOpt::scale_counts()
+{
+    if (this->_blocks_count < 0 || this->_blocks_count == INT_MAX || this->_ratio < 0.0 || this->_ratio > 1.0) {
+        error_message(NETLIST, unknown_location, "The parameters for optimization kind:%i are configured incorrectly : count %i, ratio %f\n",
+                      this->_kind, this->_blocks_count, this->_ratio);
+        exit(0);
+    }
+    this->_blocks_count = this->_blocks_count * this->_ratio;
+}
+
+void MixingOpt::assign_weights(netlist_t * /*netlist*/, std::vector<nnode_t *> /*nodes*/)
+{
+    // compute weights for all noted nodes
+    error_message(NETLIST, unknown_location,
+                  "Assign_weights mixing optimization was called for optimization without specification provided, for kind  %i\n", this->_kind);
+    exit(0);
+}
+
+void MixingOpt::perform(netlist_t *, std::vector<nnode_t *> &)
+{
+    error_message(NETLIST, unknown_location, "Performing mixing optimization was called for optimization without method provided, for kind  %i\n",
+                  this->_kind);
+    exit(0);
+}
+
+MultsOpt::MultsOpt(int _exact) : MixingOpt(1.0, MULTIPLY)
+{
+    this->_blocks_count = _exact;
+    this->_enabled = true;
+}
+
+MultsOpt::MultsOpt(float ratio) : MixingOpt(ratio, MULTIPLY)
+{
+    if (ratio < 0.0 || ratio > 1.0) {
+        error_message(NETLIST, unknown_location, "Miltipliers mixing optimization is started with wrong ratio %f\n", ratio);
+        exit(0);
+    }
+
+    // Explicitly set all hard block multipliers to max
+    this->_blocks_count = INT_MAX;
+    this->_enabled = true;
+}
+
+bool MultsOpt::hardenable(nnode_t *node)
+{
+    int mult_size = std::max<int>(node->input_port_sizes[0], node->input_port_sizes[1]);
+    return (hard_multipliers && (mult_size > min_mult));
+}
+
+void MultsOpt::assign_weights(netlist_t *netlist, std::vector<nnode_t *> nodes)
+{
+    // compute weights for all noted nodes
+    for (size_t i = 0; i < nodes.size(); i++) {
+        mixing_optimization_stats(nodes[i], netlist);
+    }
+}
+
+void MultsOpt::perform(netlist_t *netlist, std::vector<nnode_t *> &weighted_nodes)
+{
+    size_t nodes_count = weighted_nodes.size();
+
+    // per optimization, instantiate hard logic
+    for (int i = 0; i < this->_blocks_count; i++) {
+        int maximal_cost = -1;
+        int index = -1;
+        for (size_t j = 0; j < nodes_count; j++) {
+            // if found a new maximal cost that is higher than a current maximum AND is not restricted by input
+            // params for minimal "hardenable" multiplier width
+            if (maximal_cost < weighted_nodes[j]->weight && this->hardenable(weighted_nodes[j])) {
+                maximal_cost = weighted_nodes[j]->weight;
+                index = j;
+            }
+        }
+
+        // if there are no suitable nodes left, leave the loop to
+        // implement remaining nodes in soft logic
+        if (index < 0)
+            break;
+
+        // indicate for future iterations the node was hardened
+        weighted_nodes[index]->weight = -1;
+
+        if (hard_multipliers) {
+            instantiate_hard_multiplier(weighted_nodes[index], this->cached_traverse_value, netlist);
+        }
+    }
+
+    // From the end of the vector, remove all nodes that were implemented in hard logic. The remaining
+    // nodes will be instantiated in soft_map_remaining_nodes
+    for (int i = nodes_count - 1; i >= 0; i--) {
+        if (weighted_nodes[i]->weight == -1) {
+            weighted_nodes.erase(weighted_nodes.begin() + i);
+        }
+    }
+}
+
+void MixingOpt::set_blocks_needed(int new_count) { this->_blocks_count = new_count; }
+
+void MultsOpt::set_blocks_needed(int new_count)
+{
+    // with development for fixed_layout, this value will change
+    int availableHardBlocks = INT_MAX;
+    int hardBlocksNeeded = new_count;
+    int hardBlocksCount = availableHardBlocks;
+
+    if (hardBlocksCount > hardBlocksNeeded) {
+        hardBlocksCount = hardBlocksNeeded;
+    }
+
+    if (hardBlocksCount < this->_blocks_count) {
+        this->_blocks_count = hardBlocksCount;
+    }
+
+    this->scale_counts();
+}
+void MixingOpt::instantiate_soft_logic(netlist_t * /*netlist*/, std::vector<nnode_t *> /* nodes*/)
+{
+    error_message(NETLIST, unknown_location, "Performing instantiate_soft_logic was called for optimization without method provided, for kind  %i\n",
+                  this->_kind);
+    exit(0);
+}
+
+void MixingOpt::partial_map_node(nnode_t * /*node*/, short /*traverse_value*/, netlist_t *, /*netlist*/ HardSoftLogicMixer * /*mixer*/)
+{
+    error_message(NETLIST, unknown_location, "Performing partial_map_node was called for optimization without method provided, for kind  %i\n",
+                  this->_kind);
+    exit(0);
+}
+
+void MultsOpt::partial_map_node(nnode_t *node, short traverse_value, netlist_t *netlist, HardSoftLogicMixer *mixer)
+{
+    if (mixer->enabled(node) && mixer->hardenable(node)) {
+        mixer->note_candidate_node(node);
+    } else if (mixer->hardenable(node)) {
+        instantiate_hard_multiplier(node, traverse_value, netlist);
+    } else if (!hard_adders) {
+        instantiate_simple_soft_multiplier(node, traverse_value, netlist);
+    }
+    this->cached_traverse_value = traverse_value;
+}
+
+void MultsOpt::instantiate_soft_logic(netlist_t *netlist, std::vector<nnode_t *> nodes)
+{
+    unsigned int size = nodes.size();
+    for (unsigned int j = 0; j < size; j++) {
+        instantiate_simple_soft_multiplier(nodes[j], this->cached_traverse_value, netlist);
+    }
+    for (int i = size - 1; i >= 0; i--) {
+        nodes[i] = free_nnode(nodes[i]);
+        nodes.erase(nodes.begin() + i);
+    }
+}
diff --git a/parmys-plugin/src/adders.cc b/parmys-plugin/src/adders.cc
new file mode 100644
index 000000000..500757044
--- /dev/null
+++ b/parmys-plugin/src/adders.cc
@@ -0,0 +1,1422 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "adders.h"
+#include "multipliers.h"
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include "subtractions.h"
+#include <string.h>
+
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "../parmys_utils.hpp"
+
+using vtr::t_linked_vptr;
+
+t_model *hard_adders = NULL;
+t_linked_vptr *add_list = NULL;
+t_linked_vptr *processed_adder_list = NULL;
+t_linked_vptr *chain_list = NULL;
+int total = 0;
+int *adder = NULL;
+int min_add = 0;
+int min_threshold_adder = 0;
+
+void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag, netlist_t *netlist);
+static void cleanup_add_old_node(nnode_t *nodeo, netlist_t *netlist);
+
+/*---------------------------------------------------------------------------
+ * (function: init_add_distribution)
+ *  For adder, the output will only be the maxim input size + 1
+ *-------------------------------------------------------------------------*/
+void init_add_distribution()
+{
+    oassert(hard_adders != NULL);
+
+    int len = hard_adders->inputs->size + hard_adders->inputs->next->size + 1;
+    adder = (int *)vtr::calloc(len, sizeof(int));
+}
+
+/* These values are collected during the unused logic removal sweep */
+extern long adder_chain_count;
+extern long longest_adder_chain;
+extern long total_adders;
+
+extern double geomean_addsub_length;
+extern double sum_of_addsub_logs;
+
+void report_add_distribution()
+{
+    if (hard_adders == NULL)
+        return;
+
+    printf("\nHard adder Distribution\n");
+    printf("============================\n");
+    printf("\n");
+    printf("\nTotal # of chains = %ld\n", adder_chain_count);
+
+    printf("\nHard adder chain Details\n");
+    printf("============================\n");
+
+    printf("\n");
+    printf("\nThe Number of Hard Block adders in the Longest Chain: %ld\n", longest_adder_chain);
+
+    printf("\n");
+    printf("\nThe Total Number of Hard Block adders: %ld\n", total_adders);
+
+    printf("\n");
+    printf("\nGeometric mean adder/subtractor chain length: %.2f\n", geomean_addsub_length);
+
+    vtr::free(adder);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: find_hard_adders)
+ *-------------------------------------------------------------------------*/
+void find_hard_adders()
+{
+    hard_adders = Arch.models;
+    // Disable the size in configuration file.(The threshold for the extra bits).
+    // min_add = configuration.min_hard_adder;
+    min_threshold_adder = configuration.min_threshold_adder;
+
+    while (hard_adders != NULL) {
+        if (strcmp(hard_adders->name, "adder") == 0) {
+            init_add_distribution();
+            return;
+        } else {
+            hard_adders = hard_adders->next;
+        }
+    }
+
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: declare_hard_adder)
+ *-------------------------------------------------------------------------*/
+void declare_hard_adder(nnode_t *node)
+{
+    t_adder *tmp;
+    int width_a, width_b, width_sumout;
+
+    /* See if this size instance of adder exists? */
+    if (hard_adders == NULL)
+        warning_message(NETLIST, node->loc, "%s\n", "Instantiating adder where adders do not exist");
+
+    tmp = (t_adder *)hard_adders->instances;
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width_sumout = node->output_port_sizes[1];
+
+    while (tmp != NULL) {
+        if ((tmp->size_a == width_a) && (tmp->size_b == width_b) && (tmp->size_sumout == width_sumout))
+            return;
+        else
+            tmp = tmp->next;
+    }
+
+    /* Does not exist - must create an instance */
+    tmp = (t_adder *)vtr::malloc(sizeof(t_adder));
+    tmp->next = (t_adder *)hard_adders->instances;
+    hard_adders->instances = tmp;
+    tmp->size_a = width_a;
+    tmp->size_b = width_b;
+    tmp->size_cin = 1;
+    tmp->size_cout = 1;
+    tmp->size_sumout = width_sumout;
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: instantiate_hard_addier )
+ *-------------------------------------------------------------------------*/
+void instantiate_hard_adder(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    char *new_name;
+    int len, sanity, i;
+
+    declare_hard_adder(node);
+
+    /* Need to give node proper name */
+    len = strlen(node->name);
+    len = len + 20; /* 20 chars should hold mul specs */
+    new_name = (char *)vtr::malloc(len);
+
+    /* wide input first :) identical branches! */
+    // if (node->input_port_sizes[0] > node->input_port_sizes[1])
+    // 	sanity = odin_sprintf(new_name, "%s", node->name);
+    // else
+    sanity = odin_sprintf(new_name, "%s", node->name);
+
+    if (new_name)
+        vtr::free(new_name);
+
+    if (len <= sanity) /* buffer not large enough */
+        oassert(false);
+
+    /* Give names to the output pins */
+    for (i = 0; i < node->num_output_pins; i++) {
+        if (node->output_pins[i]->name == NULL) {
+            len = strlen(node->name) + 20; /* 6 chars for pin idx */
+            new_name = (char *)vtr::malloc(len);
+            odin_sprintf(new_name, "%s[%d]", node->name, node->output_pins[i]->pin_node_idx);
+            node->output_pins[i]->name = new_name;
+        }
+    }
+
+    node->traverse_visited = mark;
+    return;
+}
+
+/*----------------------------------------------------------------------------
+ * function: add_the_blackbox_for_adds()
+ *--------------------------------------------------------------------------*/
+void add_the_blackbox_for_adds_yosys(Yosys::Design *design)
+{
+
+    int hard_add_inputs, hard_add_outputs;
+    t_adder *adds;
+    t_model_ports *ports;
+    char *pa, *pb, *psumout, *pcin, *pcout;
+
+    /* Check to make sure this target architecture has hard adders */
+    if (hard_adders == NULL)
+        return;
+
+    /* Get the names of the ports for the adder */
+    ports = hard_adders->inputs;
+    pcin = ports->name;
+    ports = ports->next;
+    pb = ports->name;
+    ports = ports->next;
+    pa = ports->name;
+
+    ports = hard_adders->outputs;
+    psumout = ports->name;
+    ports = ports->next;
+    pcout = ports->name;
+
+    /* find the adder devices in the tech library */
+    adds = (t_adder *)(hard_adders->instances);
+    if (adds == NULL) /* No adders instantiated */
+        return;
+
+    /* simplified way of getting the multsize, but fine for quick example */
+    while (adds != NULL) {
+
+        Yosys::RTLIL::Module *module = nullptr;
+
+        Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> wideports_cache;
+
+        module = new Yosys::RTLIL::Module;
+        module->name = Yosys::RTLIL::escape_id("adder");
+
+        if (design->module(module->name))
+            Yosys::log_error("Duplicate definition of module %s!\n", Yosys::log_id(module->name));
+        design->add(module);
+
+        /* add the inputs */
+        hard_add_inputs = adds->size_a + adds->size_b + adds->size_cin;
+        for (int i = 0; i < hard_add_inputs; i++) {
+            std::string w_name;
+            if (i < adds->size_a) {
+                w_name = Yosys::stringf("%s[%d]", pa, i);
+            } else if (i < hard_add_inputs - adds->size_cin && i >= adds->size_a) {
+                w_name = Yosys::stringf("%s[%d]", pb, i - adds->size_a);
+            } else {
+                w_name = Yosys::stringf("%s[%d]", pcin, i - adds->size_a - adds->size_b);
+            }
+
+            Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+            wire->port_input = true;
+
+            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+            if (!wp.first.empty() && wp.second >= 0) {
+                wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                wideports_cache[wp.first].second = true;
+            }
+        }
+
+        /* add the outputs */
+        hard_add_outputs = adds->size_cout + adds->size_sumout;
+        for (int i = 0; i < hard_add_outputs; i++) {
+            std::string w_name;
+            if (i < adds->size_cout) {
+                w_name = Yosys::stringf("%s[%d]", pcout, i);
+            } else {
+                w_name = Yosys::stringf("%s[%d]", psumout, i - adds->size_cout);
+            }
+
+            Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+            wire->port_output = true;
+
+            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+            if (!wp.first.empty() && wp.second >= 0) {
+                wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                wideports_cache[wp.first].second = false;
+            }
+        }
+
+        handle_wideports_cache(&wideports_cache, module);
+
+        module->fixup_ports();
+        wideports_cache.clear();
+
+        module->attributes[Yosys::ID::blackbox] = Yosys::RTLIL::Const(1);
+
+        adds = adds->next;
+    }
+}
+
+void define_add_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Design *design)
+{
+
+    oassert(node->input_port_sizes[0] > 0);
+    oassert(node->input_port_sizes[1] > 0);
+    oassert(node->input_port_sizes[2] > 0);
+    oassert(node->output_port_sizes[0] > 0);
+    oassert(node->output_port_sizes[1] > 0);
+
+    std::string cell_type_name = "adder";
+
+    Yosys::IdString celltype = Yosys::RTLIL::escape_id(cell_type_name);
+    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+
+    Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> cell_wideports_cache;
+
+    /* Write the input pins*/
+    for (int i = 0; i < node->num_input_pins; i++) {
+        std::string p, q;
+        oassert(node->input_pins[i]->net->num_driver_pins == 1);
+        npin_t *driver_pin = node->input_pins[i]->net->driver_pins[0];
+
+        if (i < node->input_port_sizes[0]) {
+            p = Yosys::stringf("%s[%d]", hard_adders->inputs->next->next->name, i);
+            if (!driver_pin->name)
+                q = driver_pin->node->name;
+            else
+                q = driver_pin->name;
+        } else if (i >= node->input_port_sizes[0] && i < node->input_port_sizes[1] + node->input_port_sizes[0]) {
+            p = Yosys::stringf("%s[%d]", hard_adders->inputs->next->name, i - node->input_port_sizes[0]);
+            if (!driver_pin->name)
+                q = driver_pin->node->name;
+            else
+                q = driver_pin->name;
+        } else {
+            p = Yosys::stringf("%s[%d]", hard_adders->inputs->name, i - (node->input_port_sizes[0] + node->input_port_sizes[1]));
+            if (!driver_pin->name)
+                q = driver_pin->node->name;
+            else
+                q = driver_pin->name;
+        }
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+    }
+
+    /* Write the output pins*/
+    for (int i = 0; i < node->num_output_pins; i++) {
+        std::string p, q;
+        if (i < node->output_port_sizes[0]) {
+            p = Yosys::stringf("%s[%d]", hard_adders->outputs->next->name, i);
+            q = node->output_pins[i]->name;
+        } else {
+            p = Yosys::stringf("%s[%d]", hard_adders->outputs->name, i - node->output_port_sizes[0]);
+            q = node->output_pins[i]->name;
+        }
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+    }
+
+    handle_cell_wideports_cache(&cell_wideports_cache, design, module, cell);
+
+    return;
+}
+
+/*-----------------------------------------------------------------------
+ * (function: init_split_adder)
+ * ##################################
+ * TODO the soft logic adders can now be splitted at the source, we could tap onto that and merge these function for
+ * simplicicity and would also make sure to keep the allocation at one place
+ *###################################
+ *	Create a carry chain adder when spliting. Inputs are connected
+ *	to original pins, output pins are set to NULL for later connecting
+ *	flag = 0: all adders are hard logic block; flag = 1: the last adder in the chain is soft logic block
+ *---------------------------------------------------------------------*/
+void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag, netlist_t *netlist)
+{
+    int i;
+    int flaga = 0, flagb = 0;
+    int current_sizea, current_sizeb;
+    int aa = 0, bb = 0, num = 0;
+
+    // if the input of the first cin is generated by a dummy adder added
+    // to the start of the chain, then an offset is needed to compensate
+    // for that in various positions in the code, otherwise the offset is 0
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Copy properties from original node */
+    ptr->type = node->type;
+    ptr->bit_width = node->bit_width;
+    ptr->related_ast_node = node->related_ast_node;
+    ptr->traverse_visited = node->traverse_visited;
+    ptr->node_data = NULL;
+
+    /* decide the current size of input a and b */
+    if (flag == 0) {
+        // increase input sizes by one if a dummy adder is
+        // added to feed the first cin in the chain
+        current_sizea = (a + offset) - sizea * index;
+        current_sizeb = (b + offset) - sizeb * index;
+
+        if (current_sizea >= sizea)
+            current_sizea = sizea;
+        else if (current_sizea <= 0) {
+            current_sizea = sizea;
+            flaga = 1;
+        } else {
+            aa = current_sizea;
+            current_sizea = sizea;
+            flaga = 2;
+        }
+
+        if (current_sizeb >= sizeb)
+            current_sizeb = sizeb;
+        else if (current_sizeb <= 0) {
+            current_sizeb = sizeb;
+            flagb = 1;
+        } else {
+            bb = current_sizeb;
+            current_sizeb = sizeb;
+            flagb = 2;
+        }
+    } else {
+        if (sizea != 0)
+            current_sizea = sizea;
+        else
+            current_sizea = 1;
+        if (sizeb != 0)
+            current_sizeb = sizeb;
+        else
+            current_sizeb = 1;
+    }
+
+    /* Set new port sizes and parameters */
+    ptr->num_input_port_sizes = 3;
+    ptr->input_port_sizes = (int *)vtr::malloc(3 * sizeof(int));
+    ptr->input_port_sizes[0] = current_sizea;
+    ptr->input_port_sizes[1] = current_sizeb;
+    ptr->input_port_sizes[2] = cin;
+    ptr->num_output_port_sizes = 2;
+    ptr->output_port_sizes = (int *)vtr::malloc(2 * sizeof(int));
+    ptr->output_port_sizes[0] = cout;
+
+    /* The size of output port sumout equals the maxim size of sizea and sizeb  */
+    if (current_sizea > current_sizeb)
+        ptr->output_port_sizes[1] = current_sizea;
+    else
+        ptr->output_port_sizes[1] = current_sizeb;
+
+    /* Set the number of pins and re-locate previous pin entries */
+    ptr->num_input_pins = current_sizea + current_sizeb + cin;
+    ptr->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (current_sizea + current_sizeb + cin));
+    // if flaga or flagb = 1, the input pins should be empty.
+    if (flaga == 1) {
+        for (i = 0; i < current_sizea; i++)
+            ptr->input_pins[i] = NULL;
+    } else if (flaga == 2) {
+        if (index == 0) {
+            ptr->input_pins[0] = NULL;
+            if (sizea > 1) {
+                for (i = 1; i < aa; i++) {
+                    ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+                for (i = 0; i < (sizea - aa); i++)
+                    ptr->input_pins[i + aa] = NULL;
+            }
+        } else {
+            for (i = 0; i < aa; i++) {
+                ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
+                ptr->input_pins[i]->node = ptr;
+                ptr->input_pins[i]->pin_node_idx = i;
+            }
+            for (i = 0; i < (sizea - aa); i++)
+                ptr->input_pins[i + aa] = NULL;
+        }
+    } else {
+        if (index == 0 && !configuration.adder_cin_global) {
+            if (flag == 0) {
+                ptr->input_pins[0] = NULL;
+                if (current_sizea > 1) {
+                    for (i = 1; i < current_sizea; i++) {
+                        ptr->input_pins[i] = node->input_pins[i - 1];
+                        ptr->input_pins[i]->node = ptr;
+                        ptr->input_pins[i]->pin_node_idx = i;
+                    }
+                }
+            } else {
+                for (i = 0; i < current_sizea; i++) {
+                    ptr->input_pins[i] = node->input_pins[i];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+            }
+        } else {
+            if (flag == 0) {
+                for (i = 0; i < current_sizea; i++) {
+                    // use the offset to compensate for the dummy adder added at start of the chain
+                    ptr->input_pins[i] = node->input_pins[i + index * sizea - offset];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+            } else {
+                if (sizea == 0)
+                    connect_nodes(netlist->gnd_node, 0, ptr, 0);
+                else {
+                    num = node->input_port_sizes[0];
+                    for (i = 0; i < current_sizea; i++) {
+                        ptr->input_pins[i] = node->input_pins[i + num - current_sizea];
+                        ptr->input_pins[i]->node = ptr;
+                        ptr->input_pins[i]->pin_node_idx = i;
+                    }
+                }
+            }
+        }
+    }
+
+    if (flagb == 1) {
+        for (i = 0; i < current_sizeb; i++)
+            ptr->input_pins[i + current_sizeb] = NULL;
+    } else if (flagb == 2) {
+        if (index == 0) {
+            ptr->input_pins[sizea] = NULL;
+            if (current_sizeb > 1) {
+                for (i = 1; i < bb; i++) {
+                    ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
+                    ptr->input_pins[i + current_sizea]->node = ptr;
+                    ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+                }
+                for (i = 0; i < (sizeb - bb); i++)
+                    ptr->input_pins[i + current_sizea + bb] = NULL;
+            }
+        } else {
+            for (i = 0; i < bb; i++) {
+                ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
+                ptr->input_pins[i + current_sizea]->node = ptr;
+                ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+            }
+            for (i = 0; i < (sizeb - bb); i++)
+                ptr->input_pins[i + current_sizea + bb] = NULL;
+        }
+    } else {
+        if (index == 0 && !configuration.adder_cin_global) {
+            if (flag == 0) {
+                ptr->input_pins[sizea] = NULL;
+                if (current_sizeb > 1) {
+                    for (i = 1; i < current_sizeb; i++) {
+                        ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
+                        ptr->input_pins[i + current_sizea]->node = ptr;
+                        ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+                    }
+                }
+            } else {
+                for (i = 0; i < current_sizeb; i++) {
+                    ptr->input_pins[i + current_sizea] = node->input_pins[i + a];
+                    ptr->input_pins[i + current_sizea]->node = ptr;
+                    ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+                }
+            }
+        } else {
+            if (flag == 0) {
+                for (i = 0; i < current_sizeb; i++) {
+                    ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - offset];
+                    ptr->input_pins[i + current_sizea]->node = ptr;
+                    ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+                }
+            } else {
+                if (sizeb == 0)
+                    connect_nodes(netlist->gnd_node, 0, ptr, current_sizea);
+                else {
+                    num = node->input_port_sizes[0] + node->input_port_sizes[1];
+                    for (i = 0; i < current_sizeb; i++) {
+                        ptr->input_pins[i + current_sizea] = node->input_pins[i + num - current_sizeb];
+                        ptr->input_pins[i + current_sizea]->node = ptr;
+                        ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
+                    }
+                }
+            }
+        }
+    }
+
+    /* Carry_in should be NULL*/
+    for (i = 0; i < cin; i++) {
+        ptr->input_pins[i + current_sizea + current_sizeb] = NULL;
+    }
+
+    /* output pins */
+    int output;
+    if (current_sizea > current_sizeb)
+        output = current_sizea + cout;
+    else
+        output = current_sizeb + cout;
+
+    ptr->num_output_pins = output;
+    ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * output);
+    for (i = 0; i < output; i++)
+        ptr->output_pins[i] = NULL;
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_adder)
+ *
+ * This function works to split a adder into several smaller
+ *  adders to better "fit" with the available resources in a
+ *  targeted FPGA architecture.
+ *
+ * This function is at the lowest level since it simply receives
+ *  a adder and is told how to split it.
+ *
+ * Note: In this function, we can do padding(default -1), fix the size of hard block adder.
+ *-----------------------------------------------------------------------*/
+
+void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist)
+{
+    nnode_t **node;
+    int i, j;
+    int num, lefta = 0, leftb = 0;
+    int max_num = 0;
+    int flag = 0;
+
+    // if the input of the first cin is generated by a dummy adder added
+    // to the start of the chain, then an offset is needed to compensate
+    // for that in various positions in the code, otherwise the offset is 0
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Check for a legitimate split */
+    oassert(nodeo->input_port_sizes[0] == a);
+    oassert(nodeo->input_port_sizes[1] == b);
+
+    node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (count));
+
+    for (i = 0; i < count; i++) {
+        node[i] = allocate_nnode(nodeo->loc);
+        node[i]->name = (char *)vtr::malloc(strlen(nodeo->name) + 20);
+        odin_sprintf(node[i]->name, "%s-%d", nodeo->name, i);
+        if (i == count - 1) {
+            // fixed_hard_adder = 1 then adder need to be exact size;
+            if (configuration.fixed_hard_adder == 1)
+                init_split_adder(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag, netlist);
+            else {
+                if (count == 1) {
+                    lefta = a;
+                    leftb = b;
+                } else {
+                    lefta = (a + 1) % sizea;
+                    leftb = (b + 1) % sizeb;
+                }
+
+                max_num = (lefta >= leftb) ? lefta : leftb;
+                // if fixed_hard_adder = 0, and the left of a and b is more than min_add, then adder need to be remain the same size.
+                if (max_num >= min_add)
+                    init_split_adder(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag, netlist);
+                else {
+                    // Using soft logic to do the addition, No need to pad as the same size
+                    flag = 1;
+                    init_split_adder(nodeo, node[i], a, lefta, b, leftb, cin, cout, i, flag, netlist);
+                }
+            }
+        } else
+            init_split_adder(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag, netlist);
+
+        // store the processed hard adder node for optimization
+        processed_adder_list = insert_in_vptr_list(processed_adder_list, node[i]);
+    }
+
+    chain_information_t *adder_chain = allocate_chain_info();
+    // if flag = 0, the last adder use soft logic, so the count of the chain should be one less
+    if (flag == 0)
+        adder_chain->count = count;
+    else
+        adder_chain->count = count - 1;
+    adder_chain->num_bits = a + b;
+    adder_chain->name = nodeo->name;
+    chain_list = insert_in_vptr_list(chain_list, adder_chain);
+
+    // don't add a dummy adder in the beginning of the chain if the first cin will be connected to a global gnd
+    if ((flag == 0 || count > 1) && !configuration.adder_cin_global) {
+        // connect the a[0] and b[0] of first adder node to ground
+        connect_nodes(netlist->vcc_node, 0, node[0], 0);
+        connect_nodes(netlist->gnd_node, 0, node[0], sizea);
+        // hang the first sumout
+        node[0]->output_pins[1] = allocate_npin();
+        node[0]->output_pins[1]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, 1);
+    }
+
+    if (nodeo->num_input_port_sizes == 2) {
+        // connect the first cin pin to unconn
+        connect_nodes(netlist->pad_node, 0, node[0], node[0]->num_input_pins - 1);
+    } else if (nodeo->num_input_port_sizes == 3) {
+        // remap the first cin pins)
+        remap_pin_to_new_node(nodeo->input_pins[nodeo->num_input_pins - 1], node[0], (node[0]->num_input_pins - 1));
+    }
+    // if (a + 1) % sizea == 0, the a[0] and b[0] of node[count-1] should connect to gound
+    if ((a + 1) % sizea == 0 && (b + 1) % sizeb == 0) {
+        if (flag == 0) {
+            connect_nodes(netlist->gnd_node, 0, node[count - 1], 0);
+            connect_nodes(netlist->gnd_node, 0, node[count - 1], sizea);
+        }
+    }
+
+    // if any input pins beside first cin pins are NULL, connect those pins to unconn
+    for (i = 0; i < count; i++) {
+        num = node[i]->num_input_pins;
+        for (j = 0; j < num - 1; j++) {
+            if (node[i]->input_pins[j] == NULL)
+                connect_nodes(netlist->pad_node, 0, node[i], j);
+        }
+    }
+
+    if (configuration.adder_cin_global) {
+        // connect first cin to gnd
+        connect_nodes(netlist->gnd_node, 0, node[0], (node[0]->num_input_pins - 1));
+    }
+
+    // connect cout to next cin
+    for (i = 1; i < count; i++)
+        connect_nodes(node[i - 1], 0, node[i], (node[i]->num_input_pins - 1));
+
+    // remap the output pins of each adder to nodeo
+    if (count == 1) {
+        if (flag == 0) {
+            for (j = 0; j < node[0]->num_output_pins - 2; j++) {
+                if (j < nodeo->num_output_pins)
+                    remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
+                else {
+                    node[0]->output_pins[j + 2] = allocate_npin();
+                    node[0]->output_pins[j + 2]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, j + 2);
+                }
+                // hang the first cout
+                node[0]->output_pins[0] = allocate_npin();
+                node[0]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, 0);
+            }
+        } else {
+            for (j = 0; j < node[0]->num_output_pins - 1; j++)
+                remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 1);
+            remap_pin_to_new_node(nodeo->output_pins[nodeo->num_output_pins - 1], node[0], 0);
+        }
+    } else {
+        // First adder
+        for (j = 0; j < node[0]->num_output_pins - 2; j++)
+            remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
+        // if a dummy adder is added (offset = 1) start from the second adder)
+        for (i = offset; i < count - 1; i++) {
+            for (j = 0; j < node[i]->num_output_pins - 1; j++)
+                remap_pin_to_new_node(nodeo->output_pins[i * sizea + j - offset], node[i], j + 1);
+        }
+        // Last adder
+        if (flag == 0) {
+            for (j = 0; j < node[count - 1]->num_output_pins - 1; j++) {
+                // if a dummy adder is added to this chain (offset = 1), adjust the index of the adder using the offset constant
+                if (((count - 1) * sizea + j - offset) < nodeo->num_output_pins)
+                    remap_pin_to_new_node(nodeo->output_pins[(count - 1) * sizea + j - offset], node[count - 1], j + 1);
+                else {
+                    node[count - 1]->output_pins[j + 1] = allocate_npin();
+                    // Pad outputs with a unique and descriptive name to avoid collisions.
+                    node[count - 1]->output_pins[j + 1]->name = append_string("", "%s~dummy_output~%d~%d", node[count - 1]->name, count - 1, j + 1);
+                }
+            }
+            // Hang the last cout
+            node[count - 1]->output_pins[0] = allocate_npin();
+            // Pad outputs with a unique and descriptive name to avoid collisions.
+            node[count - 1]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[count - 1]->name, count - 1, 0);
+        } else {
+            for (j = 0; j < node[count - 1]->num_output_pins - 1; j++)
+                // if(((count - 1) * sizea + j - 1) < nodeo->num_output_pins)
+                remap_pin_to_new_node(nodeo->output_pins[(count - 1) * sizea + j - 1], node[count - 1], j + 1);
+            if (nodeo->output_pins[nodeo->num_output_pins - 1] != NULL)
+                remap_pin_to_new_node(nodeo->output_pins[nodeo->num_output_pins - 1], node[count - 1], 0);
+            else {
+                node[count - 1]->output_pins[0] = allocate_npin();
+                // Pad outputs with a unique and descriptive name to avoid collisions.
+                node[count - 1]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[count - 1]->name, count - 1, 0);
+            }
+        }
+    }
+
+    for (i = offset; configuration.coarsen && i < count - 1; i++) {
+        for (j = 0; j < node[i]->num_output_pins - 1; j++) {
+            char *new_output_pin_name = (char *)vtr::malloc((strlen(node[i]->name) + 20) * sizeof(char)); /* 6 chars for pin idx */
+            odin_sprintf(new_output_pin_name, "%s[1]", node[i]->name);
+            node[i]->output_pins[1]->name = new_output_pin_name;
+        }
+    }
+
+    /* Freeing the old node! */
+    cleanup_add_old_node(nodeo, netlist);
+
+    vtr::free(node);
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: iterate_adders)
+ *
+ * This function will iterate over all of the add operations that
+ *	exist in the netlist and perform a splitting so that they can
+ *	fit into a basic hard adder block that exists on the FPGA.
+ *	If the proper option is set, then it will be expanded as well
+ *	to just use a fixed size hard adder.
+ *-----------------------------------------------------------------------*/
+void iterate_adders(netlist_t *netlist)
+{
+    int sizea, sizeb, sizecin; // the size of
+    int a, b;
+    int count, counta, countb;
+    int num = 0;
+    nnode_t *node;
+
+    // offset to the adder size in case a dummy adder is added to
+    // start of the adder chain to feed the first cin with gnd
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Can only perform the optimization if hard adders exist! */
+    if (hard_adders == NULL)
+        return;
+    // In hard block adder, the summand and addend are same size.
+    sizecin = hard_adders->inputs->size;
+    sizeb = hard_adders->inputs->next->size;
+    sizea = hard_adders->inputs->next->size;
+
+    oassert(sizecin == 1);
+
+    while (add_list != NULL) {
+        node = (nnode_t *)add_list->data_vptr;
+        add_list = delete_in_vptr_list(add_list);
+        oassert(node != NULL);
+        if (node->type == HARD_IP)
+            node->type = ADD;
+
+        oassert(node->type == ADD);
+
+        a = node->input_port_sizes[0];
+        b = node->input_port_sizes[1];
+        num = (a >= b) ? a : b;
+        node->bit_width = num;
+        if (num >= min_threshold_adder && num >= min_add) {
+            // if the first cin in a chain is fed by a global input (offset = 0) the adder width is the
+            // input width + 1 (to pass the last cout -> sumout) divided by size of the adder input ports
+            // otherwise (offset = 1) a dummy adder is added to the chain to feed the first cin with gnd
+            // how many adders a can split
+            counta = (a + 1) / sizea + offset;
+            // how many adders b can split
+            countb = (b + 1) / sizeb + offset;
+            // how many adders need to be split
+            if (counta >= countb)
+                count = counta;
+            else
+                count = countb;
+            total++;
+            split_adder(node, a, b, sizea, sizeb, 1, 1, count, netlist);
+        }
+        // Store the node into processed_adder_list if the threshold is bigger than num
+        else
+            processed_adder_list = insert_in_vptr_list(processed_adder_list, node);
+    }
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: clean_adders)
+ *
+ * Clean up the memory by deleting the list structure of adders
+ *	during optimization
+ *-----------------------------------------------------------------------*/
+void clean_adders()
+{
+    while (add_list != NULL)
+        add_list = delete_in_vptr_list(add_list);
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: reduce_operations)
+ *
+ * reduce the operations that are redundant
+ *-----------------------------------------------------------------------*/
+void reduce_operations(netlist_t * /*netlist*/, operation_list op)
+{
+    t_linked_vptr *place = NULL;
+    operation_list oper;
+    switch (op) {
+    case ADD:
+        place = add_list;
+        oper = ADD;
+        break;
+
+    case MULTIPLY:
+        place = mult_list;
+        oper = MULTIPLY;
+        break;
+
+    case MINUS:
+        place = sub_list;
+        oper = MINUS;
+        break;
+
+    default:
+        oper = NO_OP;
+        break;
+    }
+
+    traverse_list(oper, place);
+}
+
+/*-------------------------------------------------------------------------
+ * (function: traverse_list)
+ *
+ * traverse the operation lists
+ *-----------------------------------------------------------------------*/
+void traverse_list(operation_list oper, t_linked_vptr *place)
+{
+    while (place != NULL && place->next != NULL) {
+        match_node(place, oper);
+        place = place->next;
+    }
+}
+
+/*---------------------------------------------------------------------------
+ * (function: match_node)
+ *-------------------------------------------------------------------------*/
+void match_node(t_linked_vptr *place, operation_list oper)
+{
+    int flag = 0;
+    int mark = 0;
+    nnode_t *node = NULL;
+    nnode_t *next_node = NULL;
+    node = (nnode_t *)place->data_vptr;
+    t_linked_vptr *pre = place;
+    t_linked_vptr *next = NULL;
+    if (place->next != NULL)
+        next = place->next;
+    while (next != NULL) {
+        flag = 0;
+        mark = 0;
+        next_node = (nnode_t *)next->data_vptr;
+        if (node->type == next_node->type) {
+            if (node->num_input_pins == next_node->num_input_pins) {
+                flag = match_ports(node, next_node, oper);
+                if (flag == 1) {
+                    mark = match_pins(node, next_node);
+                    if (mark == 1) {
+                        merge_nodes(node, next_node);
+                        remove_list_node(pre, next);
+                    }
+                }
+            }
+        }
+        if (mark == 1)
+            next = pre->next;
+        else {
+            pre = next;
+            next = next->next;
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------
+ * (function: match_ports)
+ *-------------------------------------------------------------------------*/
+int match_ports(nnode_t *node, nnode_t *next_node, operation_list oper)
+{
+    int flag = 0;
+    int sign = 0;
+    int mark1 = 1;
+    int mark2 = 1;
+    ast_node_t *ast_node, *ast_node_next;
+    char *component_s[2] = {0};
+    char *component_o[2] = {0};
+    ast_node = node->related_ast_node;
+    ast_node_next = next_node->related_ast_node;
+    /* in case of coarsen blifs, there is no related ast node, so we skip this part */
+    if (ast_node && ast_node->types.operation.op == oper) {
+        traverse_operation_node(ast_node, component_s, oper, &sign);
+        if (sign != 1) {
+            traverse_operation_node(ast_node_next, component_o, oper, &sign);
+            if (sign != 1) {
+                oassert(component_s[0] && component_o[0] && "missing children on operation");
+                switch (oper) {
+                case ADD:
+                case MULTIPLY: {
+                    mark1 = strcmp(component_s[0], component_o[0]);
+                    if (component_s[1] && component_o[1]) {
+                        if (mark1 == 0) {
+                            mark2 = strcmp(component_s[1], component_o[1]);
+                        } else {
+                            mark1 = strcmp(component_s[0], component_o[1]);
+                            mark2 = strcmp(component_s[1], component_o[0]);
+                        }
+                    }
+                } break;
+
+                case MINUS: {
+                    mark1 = strcmp(component_s[0], component_o[0]);
+                    if (mark1 == 0 && component_s[1] && component_o[1]) {
+                        mark2 = strcmp(component_s[1], component_o[1]);
+                    }
+                } break;
+
+                default:
+
+                    break;
+                }
+                if (mark1 == 0 && mark2 == 0) {
+                    flag = 1;
+                }
+            }
+        }
+        for (int i = 0; i < ast_node->num_children; i++) {
+            if (ast_node->children[i]->type != IDENTIFIERS) {
+                vtr::free(component_s[i]);
+            }
+        }
+        for (int i = 0; i < ast_node_next->num_children; i++) {
+            if (ast_node_next->children[i]->type != IDENTIFIERS) {
+                vtr::free(component_o[i]);
+            }
+        }
+    }
+
+    return flag;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: traverse_operation_node)
+ *
+ * search the ast find the couple of components
+ *-----------------------------------------------------------------------*/
+void traverse_operation_node(ast_node_t *node, char *component[], operation_list op, int *mark)
+{
+    long i;
+
+    if (node == NULL)
+        return;
+
+    if (node->types.operation.op == op) {
+        for (i = 0; i < node->num_children; i++) {
+            *mark = 0;
+            if (node->children[i]->type != IDENTIFIERS && node->children[i]->type != NUMBERS) {
+                *mark = 1;
+                break;
+            } else {
+                if (node->children[i]->type == IDENTIFIERS) {
+                    component[i] = node->children[i]->types.identifier;
+                } else if (node->children[i]->type == NUMBERS) {
+                    long value = node->children[i]->types.vnumber->get_value();
+                    long len = snprintf(NULL, 0, "%ld", value);
+                    component[i] = (char *)vtr::calloc(len + 1, sizeof(char));
+                    odin_sprintf(component[i], "%ld", value);
+                }
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------
+ * (function: merge_node)
+ *-------------------------------------------------------------------------*/
+void merge_nodes(nnode_t *node, nnode_t *next_node)
+{
+    remove_fanout_pins(next_node);
+    reallocate_pins(node, next_node);
+    free_op_nodes(next_node);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: remove_list_node)
+ *-------------------------------------------------------------------------*/
+void remove_list_node(t_linked_vptr *pre, t_linked_vptr *next)
+{
+    if (next->next != NULL)
+        pre->next = next->next;
+    else
+        pre->next = NULL;
+    vtr::free(next);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: remove_fanout_pins)
+ *-------------------------------------------------------------------------*/
+void remove_fanout_pins(nnode_t *node)
+{
+    int i, j, k, idx;
+    for (i = 0; i < node->num_input_pins; i++) {
+        idx = node->input_pins[i]->unique_id;
+        for (j = 0; j < node->input_pins[i]->net->num_fanout_pins; j++) {
+            if (node->input_pins[i]->net->fanout_pins[j]->unique_id == idx)
+                break;
+        }
+        for (k = j; k < node->input_pins[i]->net->num_fanout_pins - 1; k++) {
+            node->input_pins[i]->net->fanout_pins[k] = node->input_pins[i]->net->fanout_pins[k + 1];
+            node->input_pins[i]->net->fanout_pins[k]->pin_net_idx = k;
+        }
+        node->input_pins[i]->net->fanout_pins[k] = NULL;
+        node->input_pins[i]->net->num_fanout_pins--;
+    }
+}
+
+/*---------------------------------------------------------------------------
+ * (function: reallocate_pins)
+ *-------------------------------------------------------------------------*/
+void reallocate_pins(nnode_t *node, nnode_t *next_node)
+{
+    int i, j;
+    int pin_idx;
+    nnode_t *input_node = NULL;
+    nnet_t *net = NULL;
+    npin_t *pin = NULL;
+    for (i = 0; i < next_node->num_output_pins; i++) {
+        for (j = 0; j < next_node->output_pins[i]->net->num_fanout_pins; j++) {
+            if (next_node->output_pins[i]->net->fanout_pins[j]->node != NULL) {
+                input_node = next_node->output_pins[i]->net->fanout_pins[j]->node;
+                net = node->output_pins[i]->net;
+                pin_idx = next_node->output_pins[i]->net->fanout_pins[j]->pin_node_idx;
+                pin = input_node->input_pins[pin_idx];
+                add_fanout_pin_to_net(net, pin);
+            } else {
+                free_npin(next_node->output_pins[i]->net->fanout_pins[j]);
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------
+ * (function: free_op_nodes)
+ *-------------------------------------------------------------------------*/
+void free_op_nodes(nnode_t *node)
+{
+    for (int i = 0; i < node->num_output_pins; i++) {
+        if (node->output_pins[i]->net != NULL) {
+            free_nnet(node->output_pins[i]->net);
+        }
+    }
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: match_pins)
+ *-------------------------------------------------------------------------*/
+int match_pins(nnode_t *node, nnode_t *next_node)
+{
+    for (int i = 0; i < node->num_input_pins; i++) {
+        for (int j = 0; j < node->input_pins[i]->net->num_driver_pins; j++) {
+            bool found = false;
+            long id = node->input_pins[i]->net->driver_pins[j]->unique_id;
+            for (int k = 0; k < next_node->num_input_pins && !found; k++) {
+                for (int l = 0; l < next_node->input_pins[k]->net->num_driver_pins; l++) {
+                    if (id == next_node->input_pins[k]->net->driver_pins[l]->unique_id) {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+            if (!found)
+                return -1;
+        }
+    }
+
+    return 1;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * connect adder type output pin to a node
+ *-------------------------------------------------------------------------------------------*/
+static void connect_output_pin_to_node(int *width, int current_pin, int output_pin_id, nnode_t *node, nnode_t *current_adder, short subtraction)
+{
+    // output
+    if (subtraction) {
+        remap_pin_to_new_node(node->output_pins[current_pin], current_adder, output_pin_id);
+    } else {
+        npin_t *node_pin_select =
+          node->output_pins[(node->num_input_port_sizes == 2) ? current_pin : (current_pin < width[output_pin_id] - 1) ? current_pin + 1 : 0];
+        if (node_pin_select) {
+            if (node_pin_select->type != NO_ID || (node->num_input_port_sizes == 2)) {
+                remap_pin_to_new_node(node_pin_select, current_adder, output_pin_id);
+            } else {
+                current_adder->output_pins[output_pin_id] = allocate_npin();
+                current_adder->output_pins[output_pin_id]->name = append_string("", "%s~dummy_output~%d", current_adder->name, output_pin_id);
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * make a single half-adder (can do unary subtraction, binary subtraction and addition)
+ *-------------------------------------------------------------------------------------------*/
+static nnode_t *make_adder(operation_list funct, nnode_t *current_adder, nnode_t *previous_carry, int *width, int current_pin, netlist_t *netlist,
+                           nnode_t *node, short subtraction, short mark)
+{
+    // make a 2 bit 0r 3 bit sum or carry based on previous carry
+    nnode_t *new_funct = NULL;
+    short is_three_port_gate = 0;
+
+    if (previous_carry == netlist->gnd_node) {
+        if (funct == ADDER_FUNC)
+            new_funct = make_2port_gate(LOGICAL_XOR, 1, 1, 1, node, mark);
+        else if (funct == CARRY_FUNC)
+            new_funct = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, mark);
+    } else if (previous_carry == netlist->vcc_node) {
+        if (funct == ADDER_FUNC)
+            new_funct = make_2port_gate(LOGICAL_XNOR, 1, 1, 1, node, mark);
+        else if (funct == CARRY_FUNC)
+            new_funct = make_2port_gate(LOGICAL_OR, 1, 1, 1, node, mark);
+    } else {
+        new_funct = make_3port_gate(funct, 1, 1, 1, 1, node, mark);
+        connect_nodes(previous_carry, 0, new_funct, 0);
+        is_three_port_gate = 1;
+    }
+
+    // copy the input pin of a half-adder to another function (CARRY or ADDER)
+    if (current_adder != NULL) {
+        add_input_pin_to_node(new_funct, copy_input_npin(current_adder->input_pins[0 + is_three_port_gate]), 0 + is_three_port_gate);
+        add_input_pin_to_node(new_funct, copy_input_npin(current_adder->input_pins[1 + is_three_port_gate]), 1 + is_three_port_gate);
+    }
+    // create one from scratch
+    else {
+        // connect input a
+        if (current_pin < width[1]) {
+            npin_t *temp_pin = node->input_pins[current_pin];
+            oassert(temp_pin->net->num_driver_pins <= 1);
+            if (!temp_pin->net->num_driver_pins || temp_pin->net->driver_pins[0]->node->type == GND_NODE) {
+                connect_nodes(netlist->gnd_node, 0, new_funct, 0 + is_three_port_gate);
+                remove_fanout_pins_from_net(temp_pin->net, temp_pin, temp_pin->pin_net_idx);
+            } else if (temp_pin->net->driver_pins[0]->node->type == VCC_NODE) {
+                connect_nodes(netlist->vcc_node, 0, new_funct, 0 + is_three_port_gate);
+                remove_fanout_pins_from_net(temp_pin->net, temp_pin, temp_pin->pin_net_idx);
+            } else {
+                remap_pin_to_new_node(temp_pin, new_funct, 0 + is_three_port_gate);
+            }
+        } else {
+            connect_nodes(netlist->gnd_node, 0, new_funct, 0 + is_three_port_gate);
+        }
+
+        // connect input b
+        if (current_pin < width[2]) {
+            // pin a is neighbor to pin b
+            npin_t *temp_pin = node->input_pins[current_pin + width[1]];
+            oassert(temp_pin->net->num_driver_pins <= 1);
+            if (temp_pin->net->num_driver_pins == 0 || temp_pin->net->driver_pins[0]->node->type == GND_NODE) {
+                nnode_t *attach_to = (subtraction) ? netlist->vcc_node : netlist->gnd_node;
+                connect_nodes(attach_to, 0, new_funct, 1 + is_three_port_gate);
+                remove_fanout_pins_from_net(temp_pin->net, temp_pin, temp_pin->pin_net_idx);
+            } else if (temp_pin->net->driver_pins[0]->node->type == VCC_NODE) {
+                nnode_t *attach_to = (subtraction) ? netlist->gnd_node : netlist->vcc_node;
+                connect_nodes(attach_to, 0, new_funct, 1 + is_three_port_gate);
+                remove_fanout_pins_from_net(temp_pin->net, temp_pin, temp_pin->pin_net_idx);
+            } else {
+                if (subtraction) {
+                    nnode_t *new_not_cells = make_not_gate(node, mark);
+                    remap_pin_to_new_node(temp_pin, new_not_cells, 0);
+                    connect_nodes(new_not_cells, 0, new_funct, 1 + is_three_port_gate);
+                } else {
+                    remap_pin_to_new_node(temp_pin, new_funct, 1 + is_three_port_gate);
+                }
+            }
+        } else {
+            nnode_t *attach_to = (subtraction) ? netlist->vcc_node : netlist->gnd_node;
+            connect_nodes(attach_to, 0, new_funct, 1 + is_three_port_gate);
+        }
+    }
+    return new_funct;
+}
+
+void instantiate_add_w_carry_block(int *width, nnode_t *node, short mark, netlist_t *netlist, short subtraction)
+{
+    nnode_t *previous_carry = (subtraction) ? netlist->vcc_node : netlist->gnd_node;
+
+    for (int i = 0; i < width[0]; i++) {
+        /* set of flags for building purposes */
+        short construct_last_carry_flag = (i != width[0] - 1 || !subtraction) ? 1 : 0;
+
+        // build Ripple Carry Adder
+        nnode_t *current_adder = make_adder(ADDER_FUNC, NULL, previous_carry, width, i, netlist, node, subtraction, mark);
+        if (construct_last_carry_flag)
+            previous_carry = make_adder(CARRY_FUNC, current_adder, previous_carry, width, i, netlist, node, subtraction, mark);
+
+        connect_output_pin_to_node(width, i, 0, node, current_adder, subtraction);
+    }
+}
+
+/**
+ * -------------------------------------------------------------------------
+ * (function: cleanup_add_old_node)
+ *
+ * @brief <clean up nodeo, a high level ADD node>
+ * In split_adder function, nodeo is splitted to small adders,
+ * while because of the complexity of input pin connections they have not been
+ * remapped to new nodes, they just copied and added to new nodes. This function
+ * will detach input pins from the nodeo. Moreover, it will connect the net of
+ * unconnected output signals to the GND node, detach the pin from nodeo and
+ * free the output pins to avoid memory leak.
+ *
+ * @param nodeo representing the old adder node
+ * @param netlist representing the current netlist
+ *-----------------------------------------------------------------------*/
+static void cleanup_add_old_node(nnode_t *nodeo, netlist_t *netlist)
+{
+    int i;
+    /* Disconnecting input pins from the old node side */
+    for (i = 0; i < nodeo->num_input_pins; i++) {
+        nodeo->input_pins[i] = NULL;
+    }
+
+    /* connecting the extra output pins to the gnd node */
+    for (i = 0; i < nodeo->num_output_pins; i++) {
+        npin_t *output_pin = nodeo->output_pins[i];
+
+        if (output_pin && output_pin->node) {
+            /* for now we just pass the signals directly through */
+            npin_t *zero_pin = get_zero_pin(netlist);
+            int idx_2_buffer = zero_pin->pin_net_idx;
+
+            // Dont eliminate the buffer if there are multiple drivers or the AST included it
+            if (output_pin->net->num_driver_pins <= 1) {
+                /* join all fanouts of the output net with the input pins net */
+                join_nets(zero_pin->net, output_pin->net);
+
+                /* erase the pointer to this buffer */
+                zero_pin->net->fanout_pins[idx_2_buffer] = NULL;
+            }
+
+            free_npin(zero_pin);
+            free_npin(output_pin);
+
+            /* Disconnecting output pins from the old node side */
+            nodeo->output_pins[i] = NULL;
+        }
+    }
+
+    // CLEAN UP
+    free_nnode(nodeo);
+}
+
+/**
+ *-------------------------------------------------------------------------------------------
+ * (function: check_missing_ports )
+ *
+ * @brief check for missing ports such as carry-in/out in case of
+ * dealing with generated netlist from Yosys blif file.
+ *
+ * @param node pointing to the netlist node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ *-----------------------------------------------------------------------------------------*/
+nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    nnode_t *new_node = NULL;
+    int num_input_port = node->num_input_port_sizes;
+
+    /* check for operations that has 2 operands */
+    if (num_input_port == 2) {
+        int i;
+        int in_port1_size = node->input_port_sizes[0];
+        int in_port2_size = node->input_port_sizes[1];
+        int out_port_size = (in_port1_size >= in_port2_size) ? in_port1_size + 1 : in_port2_size + 1;
+
+        new_node = make_3port_gate(node->type, in_port1_size, in_port2_size, 1, out_port_size, node, traverse_mark_number);
+
+        /* copy attributes */
+        copy_attribute(new_node->attributes, node->attributes);
+
+        for (i = 0; i < in_port1_size; i++) {
+            remap_pin_to_new_node(node->input_pins[i], new_node, i);
+        }
+
+        for (i = 0; i < in_port2_size; i++) {
+            remap_pin_to_new_node(node->input_pins[i + in_port1_size], new_node, i + in_port1_size);
+        }
+
+        /* adding a cin connected to GND */
+        npin_t *cin_pin = get_zero_pin(netlist);
+        cin_pin->type = INPUT;
+        cin_pin->mapping = vtr::strdup("cin");
+
+        add_input_pin_to_node(new_node, cin_pin, new_node->num_input_pins - 1);
+
+        // moving the output pins to the new node
+        for (i = 0; i < out_port_size; i++) {
+            if (i < node->num_output_pins) {
+                remap_pin_to_new_node(node->output_pins[i], new_node, i);
+            } else {
+                npin_t *new_pin1 = allocate_npin();
+                npin_t *new_pin2 = allocate_npin();
+                nnet_t *new_net = allocate_nnet();
+                new_net->name = make_full_ref_name(NULL, NULL, NULL, new_node->name, i);
+                /* hook the output pin into the node */
+                add_output_pin_to_node(new_node, new_pin1, i);
+                /* hook up new pin 1 into the new net */
+                add_driver_pin_to_net(new_net, new_pin1);
+                /* hook up the new pin 2 to this new net */
+                add_fanout_pin_to_net(new_net, new_pin2);
+            }
+        }
+
+        /**
+         * if number of output pins is greater than the max of input pins,
+         * here we connect the exceeded pins to the GND
+         */
+        for (i = out_port_size; i < node->num_output_pins; i++) {
+            /* creating a buf node */
+            nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, traverse_mark_number);
+            /* adding the GND input pin to the buf node */
+            add_input_pin_to_node(buf_node, get_zero_pin(netlist), 0);
+            /* remapping the outpin to buf node */
+            remap_pin_to_new_node(node->output_pins[i], buf_node, 0);
+        }
+
+        // CLEAN UP
+        free_nnode(node);
+    }
+    /* otherwise there is unary minus, like -A. no need for any change */
+    else if (num_input_port == 1) {
+        new_node = node;
+    }
+
+    return new_node;
+}
\ No newline at end of file
diff --git a/parmys-plugin/src/ast_util.cc b/parmys-plugin/src/ast_util.cc
new file mode 100644
index 000000000..db15ce510
--- /dev/null
+++ b/parmys-plugin/src/ast_util.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <algorithm>
+#include <ctype.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ast_util.h"
+#include "odin_util.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+/*---------------------------------------------------------------------------
+ * (function: create_node_w_type)
+ *-------------------------------------------------------------------------*/
+ast_node_t *create_node_w_type(ids id, loc_t loc)
+{
+    oassert(id != NO_ID);
+
+    static long unique_count = 0;
+
+    ast_node_t *new_node;
+
+    new_node = (ast_node_t *)vtr::calloc(1, sizeof(ast_node_t));
+    oassert(new_node != NULL);
+
+    new_node->type = id;
+    new_node->children = NULL;
+    new_node->num_children = 0;
+    new_node->unique_count = unique_count++; //++count_id;
+    new_node->loc = loc;
+    new_node->far_tag = 0;
+    new_node->high_number = 0;
+    new_node->hb_port = 0;
+    new_node->net_node = 0;
+    new_node->types.vnumber = nullptr;
+    new_node->types.identifier = NULL;
+    new_node->chunk_size = 1;
+    new_node->identifier_node = NULL;
+    /* init value */
+    new_node->types.variable.initial_value = nullptr;
+    /* reset flags */
+    new_node->types.variable.is_parameter = false;
+    new_node->types.variable.is_string = false;
+    new_node->types.variable.is_localparam = false;
+    new_node->types.variable.is_defparam = false;
+    new_node->types.variable.is_port = false;
+    new_node->types.variable.is_input = false;
+    new_node->types.variable.is_output = false;
+    new_node->types.variable.is_inout = false;
+    new_node->types.variable.is_wire = false;
+    new_node->types.variable.is_reg = false;
+    new_node->types.variable.is_genvar = false;
+    new_node->types.variable.is_memory = false;
+    new_node->types.variable.signedness = UNSIGNED;
+
+    new_node->types.concat.num_bit_strings = 0;
+    new_node->types.concat.bit_strings = NULL;
+
+    return new_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: create_tree_node_id)
+ *-------------------------------------------------------------------------------------------*/
+ast_node_t *create_tree_node_id(char *string, loc_t loc)
+{
+    ast_node_t *new_node = create_node_w_type(IDENTIFIERS, loc);
+    new_node->types.identifier = string;
+
+    return new_node;
+}
diff --git a/parmys-plugin/src/enum_str.cc b/parmys-plugin/src/enum_str.cc
new file mode 100644
index 000000000..3134fa5d6
--- /dev/null
+++ b/parmys-plugin/src/enum_str.cc
@@ -0,0 +1,359 @@
+#include "odin_types.h"
+
+const char *edge_type_e_STR[] = {
+  "UNDEFINED_SENSITIVITY",   "FALLING_EDGE_SENSITIVITY", "RISING_EDGE_SENSITIVITY",
+  "ACTIVE_HIGH_SENSITIVITY", "ACTIVE_LOW_SENSITIVITY",   "ASYNCHRONOUS_SENSITIVITY",
+};
+
+const char *_ZERO_GND_ZERO[] = {"ZERO_GND_ZERO", "ZGZ"};
+
+const char *_ONE_VCC_CNS[] = {
+  "ONE_VCC_CNS",
+  "OVC",
+};
+
+const char *_ZERO_PAD_ZERO[] = {"ZERO_PAD_ZERO", "ZPZ"};
+
+const char *ZERO_GND_ZERO = _ZERO_GND_ZERO[ODIN_STRING_TYPE];
+const char *ONE_VCC_CNS = _ONE_VCC_CNS[ODIN_STRING_TYPE];
+const char *ZERO_PAD_ZERO = _ZERO_PAD_ZERO[ODIN_STRING_TYPE];
+
+const char *SINGLE_PORT_RAM_string = "single_port_ram";
+const char *DUAL_PORT_RAM_string = "dual_port_ram";
+const char *LUTRAM_string = "lutram_ram";
+
+const char *operation_list_STR[][2] = {
+  {"NO_OP", "nOP"},
+  {"FF_NODE", "FF"},
+  {"BUF_NODE", "BUF"},
+  {"MULTI_PORT_MUX", "nMUX"}, // port 1 = control, port 2+ = mux options
+  {"INPUT_NODE", "IN"},
+  {"OUTPUT_NODE", "OUT"},
+  {"GND_NODE", "GND"},
+  {"VCC_NODE", "VCC"},
+  {"CLOCK_NODE", "CLK"},
+  {"ADD", "ADD"},             // +
+  {"MINUS", "MIN"},           // -
+  {"BITWISE_NOT", "bNOT"},    // ~
+  {"BITWISE_AND", "bAND"},    // &
+  {"BITWISE_OR", "bOR"},      // |
+  {"BITWISE_NAND", "bNAND"},  // ~&
+  {"BITWISE_NOR", "bNOR"},    // ~|
+  {"BITWISE_XNOR", "bXNOR"},  // ~^
+  {"BITWISE_XOR", "bXOR"},    // ^
+  {"LOGICAL_NOT", "lNOT"},    // !
+  {"LOGICAL_OR", "lOR"},      // ||
+  {"LOGICAL_AND", "lAND"},    // &&
+  {"LOGICAL_NAND", "lNAND"},  // No Symbol
+  {"LOGICAL_NOR", "lNOR"},    // No Symbol
+  {"LOGICAL_XNOR", "lXNOR"},  // No symbol
+  {"LOGICAL_XOR", "lXOR"},    // No Symbol
+  {"MULTIPLY", "MUL"},        // *
+  {"DIVIDE", "DIV"},          // /
+  {"MODULO", "MOD"},          // %
+  {"POWER", "POW"},           // **
+  {"LT", "LT"},               // <
+  {"GT", "GT"},               // >
+  {"LOGICAL_EQUAL", "lEQ"},   // ==
+  {"NOT_EQUAL", "lNEQ"},      // !=
+  {"LTE", "LTE"},             // <=
+  {"GTE", "GTE"},             // >=
+  {"SR", "SR"},               // >>
+  {"ASR", "ASR"},             // >>>
+  {"SL", "SL"},               // <<
+  {"ASL", "ASL"},             // <<<
+  {"CASE_EQUAL", "cEQ"},      // ===
+  {"CASE_NOT_EQUAL", "cNEQ"}, // !==
+  {"ADDER_FUNC", "ADDER"},
+  {"CARRY_FUNC", "CARRY"},
+  {"MUX_2", "MUX_2"},
+  {"BLIF_FUNCTION", "BLIFf"},
+  {"NETLIST_FUNCTION", "NETf"},
+  {"SMUX_2", "SMUX_2"}, // MUX_2 with single bit selector (no need to add not selector as the second pin)
+  {"MEMORY", "MEM"},
+  {"PAD_NODE", "PAD"},
+  {"HARD_IP", "HARD"},
+  {"GENERIC", "GEN"},   /*added for the unknown node type */
+  {"CLOG2", "CL2"},     // $clog2
+  {"UNSIGNED", "UNSG"}, // $unsigned
+  {"SIGNED", "SG"},     // $signed
+  // [START] operations to cover yosys subckt
+  {"MULTI_BIT_MUX_2", "nbMUX"},      // like MUX_2 but with n-bit input/output
+  {"MULTIPORT_nBIT_SMUX", "npbMUX"}, // n-bit input/output in multi port mux
+  {"PMUX", "pMUX"},                  // Multiplexer with many inputs using one-hot select signal
+  {"SDFF", "sDFF"},                  // data, S to reset value and output port
+  {"DFFE", "DFFe"},                  // data, enable to output port
+  {"SDFFE", "sDFFe"},                // data, synchronous reset value and enable to output port
+  {"SDFFCE", "sDFFce"},              // data, synchronous reset value and enable to reset value and output port
+  {"DFFSR", "DFFsr"},                // data, clear and set to output port
+  {"DFFSRE", "DFFsre"},              // data, clear and set with enable to output port
+  {"DLATCH", "Dlatch"},              // datato output port based on polarity without clk
+  {"ADLATCH", "aDlatch"},            // datato output port based on polarity without clk
+  {"SETCLR", "setclr"},              // set or clear an input pins
+  {"SPRAM", "spRAM"},                // representing primitive single port ram
+  {"DPRAM", "dpRAM"},                // representing primitive dual port ram
+  {"YMEM", "yRAM"},                  // representing primitive dual port ram
+  {"YMEM2", "yRAM"},                 // representing primitive dual port ram
+  {"ROM", "ROM"},
+  {"BRAM", "bRAM"},    // block of memry generated in yosys subcircuit formet blif file
+                       // [END] operations to cover yosys subckt
+  {"ERROR OOB", "OOB"} // should not reach this
+};
+
+// EDDIE: new enum value for ids to replace MEMORY from operation_t
+/* supported input/output file extensions */
+strmap<file_type_e> file_type_strmap({{"ilang", file_type_e::_ILANG},
+                                      {"verilog", file_type_e::_VERILOG},
+                                      {"verilog_header", file_type_e::_VERILOG_HEADER},
+                                      {"blif", file_type_e::_BLIF},
+                                      {"eblif", file_type_e::_EBLIF},
+                                      {"undef", file_type_e::_UNDEFINED}});
+
+/**
+ * global hashmap of yosys subckt types
+ * Technically, Yosys only outputs the following hard blocks
+ * as (.subckt) record in its output BLIF file
+ *
+ *  FIRST_ELEMENT: Yosys model names showing in a blif file
+ *  SECOND_ELEMENT: corresponding Odin-II cell type
+ *
+ * NOTE: to add support for a new type, first you would find a
+ * corresponding Odin-II cell type or create a new one matching
+ * with the new type corresponding model (.subckt) in BLIF file,
+ * and add it to the following typemap. Then, you would need to
+ * specify the model input-output order in the Odin-II BLIF Reader.
+ * At the end, a resolve_XXX_node function needs to be implemented
+ * in the BLIF Elaboration phase to make the new node compatible
+ * with the Odin-II partial mapper.
+ */
+strmap<operation_list> yosys_subckt_strmap({
+  {"$_ANDNOT_", SKIP},
+  {"$_AND_", SKIP},          // (A, B, Y)
+  {"$_AOI3_", SKIP},         // (A, B, C, Y)
+  {"$_AOI4_", SKIP},         // (A, B, C, Y)
+  {"$_BUF_", SKIP},          // (A, Y)
+  {"$_DFFE_NN0N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NN0P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NN1N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NN1P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NN_", SKIP},      // (D, C, E, Q)
+  {"$_DFFE_NP0N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NP0P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NP1N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NP1P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_NP_", SKIP},      // (D, C, E, Q)
+  {"$_DFFE_PN0N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PN0P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PN1N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PN1P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PN_", SKIP},      // (D, C, E, Q)
+  {"$_DFFE_PP0N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PP0P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PP1N_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PP1P_", SKIP},    // (D, C, R, E, Q)
+  {"$_DFFE_PP_", SKIP},      // (D, C, E, Q)
+  {"$_DFFSRE_NNNN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NNNP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NNPN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NNPP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NPNN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NPNP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NPPN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_NPPP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PNNN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PNNP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PNPN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PNPP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PPNN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PPNP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PPPN_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSRE_PPPP_", SKIP},  // (C, S, R, E, D, Q)
+  {"$_DFFSR_NNN_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_NNP_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_NPN_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_NPP_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_PNN_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_PNP_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_PPN_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFFSR_PPP_", SKIP},    // (C, S, R, D, Q)
+  {"$_DFF_NN0_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_NN1_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_NP0_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_NP1_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_N_", SKIP},        // (D, C, Q)
+  {"$_DFF_PN0_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_PN1_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_PP0_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_PP1_", SKIP},      // (D, C, R, Q)
+  {"$_DFF_P_", SKIP},        // (D, C, Q)
+  {"$_DLATCHSR_NNN_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_NNP_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_NPN_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_NPP_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_PNN_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_PNP_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_PPN_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCHSR_PPP_", SKIP}, // (E, S, R, D, Q)
+  {"$_DLATCH_NN0_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_NN1_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_NP0_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_NP1_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_N_", SKIP},     // (E, D, Q)
+  {"$_DLATCH_PN0_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_PN1_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_PP0_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_PP1_", SKIP},   // (E, R, D, Q)
+  {"$_DLATCH_P_", SKIP},     // (E, D, Q)
+  {"$_FF_", SKIP},           // (D, Q)
+  {"$_MUX16_", SKIP},        // (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, S, T, U, V, Y)
+  {"$_MUX4_", SKIP},         // (A, B, C, D, S, T, Y)
+  {"$_MUX8_", SKIP},         // (A, B, C, D, E, F, G, H, S, T, U, Y)
+  {"$_MUX_", SKIP},          // (A, B, S, Y)
+  {"$_NAND_", SKIP},         // (A, B, Y)
+  {"$_NMUX_", SKIP},         // (A, B, S, Y)
+  {"$_NOR_", SKIP},          // (A, B, Y)
+  {"$_NOT_", SKIP},          // (A, Y)
+  {"$_OAI3_", SKIP},         // (A, B, C, Y)
+  {"$_OAI4_", SKIP},         // (A, B, C, Y)
+  {"$_ORNOT_", SKIP},        // (A, B, Y)
+  {"$_OR_", SKIP},           // (A, B, Y)
+  {"$_SDFFCE_NN0N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NN0P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NN1N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NN1P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NP0N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NP0P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NP1N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_NP1P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PN0N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PN0P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PN1N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PN1P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PP0N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PP0P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PP1N_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFCE_PP1P_", SKIP},  // (D, C, R, E, Q)
+  {"$_SDFFE_NN0N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NN0P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NN1N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NN1P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NP0N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NP0P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NP1N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_NP1P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PN0N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PN0P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PN1N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PN1P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PP0N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PP0P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PP1N_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFFE_PP1P_", SKIP},   // (D, C, R, E, Q)
+  {"$_SDFF_NN0_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_NN1_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_NP0_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_NP1_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_PN0_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_PN1_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_PP0_", SKIP},     // (D, C, R, Q)
+  {"$_SDFF_PP1_", SKIP},     // (D, C, R, Q)
+  {"$_SR_NN_", SKIP},        // (S, R, Q)
+  {"$_SR_NP_", SKIP},        // (S, R, Q)
+  {"$_SR_PN_", SKIP},        // (S, R, Q)
+  {"$_SR_PP_", SKIP},        // (S, R, Q)
+  {"$_TBUF_", SKIP},         // (A, E, Y)
+  {"$_XNOR_", SKIP},         // (A, B, Y)
+  {"$_XOR_", SKIP},          // (A, B, Y)
+  {"$add", ADD},             // (A, B, Y)
+  {"$adff", SKIP},           // (CLK, ARST, D, Q)
+  {"$adffe", SKIP},          // (CLK, ARST, EN, D, Q)
+  {"$adlatch", SKIP},        // (EN, ARST, D, Q)
+  {"$allconst", SKIP},       // (Y)
+  {"$allseq", SKIP},         // (Y)
+  {"$alu", SKIP},            // (A, B, CI, BI, X, Y, CO)
+  {"$and", SKIP},            // (A, B, Y)
+  {"$anyconst", SKIP},       // (Y)
+  {"$anyseq", SKIP},         // (Y)
+  {"$assert", SKIP},         // (A, EN)
+  {"$assume", SKIP},         // (A, EN)
+  {"$concat", SKIP},         // (A, B, Y)
+  {"$cover", SKIP},          // (A, EN)
+  {"$dff", SKIP},            // (CLK, D, Q)
+  {"$dffe", SKIP},           // (CLK, EN, D, Q)
+  {"$dffsr", SKIP},          // (CLK, SET, CLR, D, Q)
+  {"$dffsre", SKIP},         // (CLK, SET, CLR, EN, D, Q)
+  {"$div", SKIP},            // (A, B, Y)
+  {"$divfloor", SKIP},       // (A, B, Y)
+  {"$dlatch", SKIP},         // (EN, D, Q)
+  {"$dlatchsr", SKIP},       // (EN, SET, CLR, D, Q)
+  {"$eq", SKIP},             // (A, B, Y)
+  {"$equiv", SKIP},          // (A, B, Y)
+  {"$eqx", SKIP},            // (A, B, Y)
+  {"$fa", SKIP},             // (A, B, C, X, Y)
+  {"$fair", SKIP},           // (A, EN)
+  {"$ff", SKIP},             // (D, Q)
+  {"$fsm", SKIP},            // (CLK, ARST, CTRL_IN, CTRL_OUT)
+  {"$ge", SKIP},             // (A, B, Y)
+  {"$gt", SKIP},             // (A, B, Y)
+  {"$initstate", SKIP},      // (Y)
+  {"$lcu", SKIP},            // (P, G, CI, CO)
+  {"$le", SKIP},             // (A, B, Y)
+  {"$live", SKIP},           // (A, EN)
+  {"$logic_and", SKIP},      // (A, B, Y)
+  {"$logic_not", SKIP},      // (A, Y)
+  {"$logic_or", SKIP},       // (A, B, Y)
+  {"$lt", SKIP},             // (A, B, Y)
+  {"$lut", SKIP},            // (A, Y)
+  {"$mem", YMEM},
+  {"$mem_v2", YMEM2},
+  {"$macc", SKIP},        // (A, B, Y)
+  {"$meminit", SKIP},     // (ADDR, DATA)
+  {"$memrd", SKIP},       // (CLK, EN, ADDR, DATA)
+  {"$memwr", SKIP},       // (CLK, EN, ADDR, DATA)
+  {"$mod", SKIP},         // (A, B, Y)
+  {"$modfloor", SKIP},    // (A, B, Y)
+  {"$mul", MULTIPLY},     // (A, B, Y)
+  {"$mux", SKIP},         // (A, B, S, Y)
+  {"$ne", SKIP},          // (A, B, Y)
+  {"$neg", SKIP},         // (A, Y)
+  {"$nex", SKIP},         // (A, B, Y)
+  {"$not", SKIP},         // (A, Y)
+  {"$or", SKIP},          // (A, B, Y)
+  {"$pmux", SKIP},        // (A, B, S, Y)
+  {"$pos", SKIP},         // (A, Y)
+  {"$pow", SKIP},         // (A, B, Y)
+  {"$reduce_and", SKIP},  // (A, Y)
+  {"$reduce_bool", SKIP}, // (A, Y)
+  {"$reduce_or", SKIP},   // (A, Y)
+  {"$reduce_xnor", SKIP}, // (A, Y)
+  {"$reduce_xor", SKIP},  // (A, Y)
+  {"$sdff", SKIP},        // (CLK, SRST, D, Q)
+  {"$sdffce", SKIP},      // (CLK, SRST, EN, D, Q)
+  {"$sdffe", SKIP},       // (CLK, SRST, EN, D, Q)
+  {"$shift", SKIP},       // (A, B, Y)
+  {"$shiftx", SKIP},      // (A, B, Y)
+  {"$shl", SKIP},         // (A, B, Y)
+  {"$shr", SKIP},         // (A, B, Y)
+  {"$slice", SKIP},       // (A, Y)
+  {"$sop", SKIP},         // (A, Y)
+  {"$specify2", SKIP},    // (EN, SRC, DST)
+  {"$specify3", SKIP},    // (EN, SRC, DST, DAT)
+  {"$specrule", SKIP},    // (EN_SRC, EN_DST, SRC, DST)
+  {"$sr", SKIP},          // (SET, CLR, Q)
+  {"$sshl", SKIP},        // (A, B, Y)
+  {"$sshr", SKIP},        // (A, B, Y)
+  {"$sub", MINUS},        // (A, B, Y)
+  {"$tribuf", SKIP},      // (A, EN, Y)
+  {"$xnor", SKIP},        // (A, B, Y)
+
+  /*********** VTR Primitive modules START ***********/
+  {"$xor", SKIP},                            // (A, B, Y)
+  {"LUT_K", SKIP},                           // (in, out)
+  {"DFF", FF_NODE},                          // (clock, D, Q)
+  {"fpga_interconnect", operation_list_END}, // (datain, dataout)
+  {"mux", SMUX_2},                           // (select, x, y, z)
+  {"adder", ADD},                            // (a, b, out)
+  {"multiply", MULTIPLY},                    // (a, b, cin, cout, sumout)
+  {"single_port_ram", SPRAM},                // (clock, addr, data, we, out)
+  {"dual_port_ram", DPRAM}                   // (clock, addr1, addr2, data1, data2, we1, we2, out1, out2)
+});
diff --git a/parmys-plugin/src/hard_blocks.cc b/parmys-plugin/src/hard_blocks.cc
new file mode 100644
index 000000000..2d1bb2253
--- /dev/null
+++ b/parmys-plugin/src/hard_blocks.cc
@@ -0,0 +1,314 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+
+#include "hard_blocks.h"
+#include "memories.h"
+#include "netlist_utils.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+
+#include "kernel/yosys.h"
+
+#include "../parmys_utils.hpp"
+
+STRING_CACHE *hard_block_names = NULL;
+
+void cache_hard_block_names();
+void register_hb_port_size(t_model_ports *hb_ports, int size);
+
+void register_hb_port_size(t_model_ports *hb_ports, int size)
+{
+    if (hb_ports)
+        hb_ports->size = size;
+    /***
+     * else
+     *	TODO error
+     */
+}
+
+t_model_ports *get_model_port(t_model_ports *ports, const char *name)
+{
+    while (ports && strcmp(ports->name, name))
+        ports = ports->next;
+
+    return ports;
+}
+
+void cache_hard_block_names()
+{
+    t_model *hard_blocks = NULL;
+
+    hard_blocks = Arch.models;
+    hard_block_names = sc_new_string_cache();
+    while (hard_blocks) {
+        int sc_spot = sc_add_string(hard_block_names, hard_blocks->name);
+        hard_block_names->data[sc_spot] = (void *)hard_blocks;
+        hard_blocks = hard_blocks->next;
+    }
+}
+
+void register_hard_blocks()
+{
+    cache_hard_block_names();
+    single_port_rams = find_hard_block(SINGLE_PORT_RAM_string);
+    dual_port_rams = find_hard_block(DUAL_PORT_RAM_string);
+
+    if (single_port_rams) {
+        if (configuration.split_memory_width) {
+            register_hb_port_size(get_model_port(single_port_rams->inputs, "data"), 1);
+
+            register_hb_port_size(get_model_port(single_port_rams->outputs, "out"), 1);
+        }
+
+        register_hb_port_size(get_model_port(single_port_rams->inputs, "addr"), get_sp_ram_split_depth());
+    }
+
+    if (dual_port_rams) {
+        if (configuration.split_memory_width) {
+            register_hb_port_size(get_model_port(dual_port_rams->inputs, "data1"), 1);
+            register_hb_port_size(get_model_port(dual_port_rams->inputs, "data2"), 1);
+
+            register_hb_port_size(get_model_port(dual_port_rams->outputs, "out1"), 1);
+            register_hb_port_size(get_model_port(dual_port_rams->outputs, "out2"), 1);
+        }
+
+        int split_depth = get_dp_ram_split_depth();
+
+        register_hb_port_size(get_model_port(dual_port_rams->inputs, "addr1"), split_depth);
+        register_hb_port_size(get_model_port(dual_port_rams->inputs, "addr2"), split_depth);
+    }
+}
+
+t_model *find_hard_block(const char *name)
+{
+    t_model *hard_blocks;
+
+    hard_blocks = Arch.models;
+    while (hard_blocks)
+        if (!strcmp(hard_blocks->name, name))
+            return hard_blocks;
+        else
+            hard_blocks = hard_blocks->next;
+
+    return NULL;
+}
+
+void cell_hard_block(nnode_t *node, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+{
+    int index, port;
+
+    /* Assert that every hard block has at least an input and output */
+    oassert(node->input_port_sizes[0] > 0);
+    oassert(node->output_port_sizes[0] > 0);
+
+    Yosys::IdString celltype = Yosys::RTLIL::escape_id(node->related_ast_node->identifier_node->types.identifier);
+    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+
+    Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> cell_wideports_cache;
+
+    /* print the input port mappings */
+    port = index = 0;
+    for (int i = 0; i < node->num_input_pins; i++) {
+        /* Check that the input pin is driven */
+        if (node->input_pins[i]->net->num_driver_pins == 0 && node->input_pins[i]->net != netlist->zero_net &&
+            node->input_pins[i]->net != netlist->one_net && node->input_pins[i]->net != netlist->pad_net) {
+            warning_message(NETLIST, node->loc, "Signal %s is not driven. padding with ground\n", node->input_pins[i]->name);
+            add_fanout_pin_to_net(netlist->zero_net, node->input_pins[i]);
+        } else if (node->input_pins[i]->net->num_driver_pins > 1) {
+            error_message(NETLIST, node->loc, "Multiple (%d) driver pins not supported in hard block definition\n",
+                          node->input_pins[i]->net->num_driver_pins);
+        }
+        std::string p, q;
+
+        if (node->input_port_sizes[port] == 1) {
+            p = node->input_pins[i]->mapping;
+            if (node->input_pins[i]->net->driver_pins[0]->name != NULL)
+                q = node->input_pins[i]->net->driver_pins[0]->name;
+            else
+                q = node->input_pins[i]->net->driver_pins[0]->node->name;
+        } else {
+            p = Yosys::stringf("%s[%d]", node->input_pins[i]->mapping, index);
+            if (node->input_pins[i]->net->driver_pins[0]->name != NULL)
+                q = node->input_pins[i]->net->driver_pins[0]->name;
+            else
+                q = node->input_pins[i]->net->driver_pins[0]->node->name;
+        }
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+
+        index++;
+        if (node->input_port_sizes[port] == index) {
+            index = 0;
+            port++;
+        }
+    }
+
+    /* print the output port mappings */
+    port = index = 0;
+    for (int i = 0; i < node->num_output_pins; i++) {
+        std::string p, q;
+        if (node->output_port_sizes[port] != 1) {
+            p = Yosys::stringf("%s[%d]", node->output_pins[i]->mapping, index);
+            q = node->output_pins[i]->name;
+        } else {
+            p = node->output_pins[i]->mapping;
+            q = node->output_pins[i]->name;
+        }
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+
+        index++;
+        if (node->output_port_sizes[port] == index) {
+            index = 0;
+            port++;
+        }
+    }
+
+    handle_cell_wideports_cache(&cell_wideports_cache, design, module, cell);
+
+    for (auto &param : node->cell_parameters) {
+        cell->parameters[Yosys::RTLIL::IdString(param.first)] = Yosys::Const(param.second);
+    }
+
+    return;
+}
+
+void output_hard_blocks_yosys(Yosys::Design *design)
+{
+    t_model_ports *hb_ports;
+    t_model *hard_blocks;
+
+    hard_blocks = Arch.models;
+    while (hard_blocks != NULL) {
+        if (hard_blocks->used == 1) /* Hard Block is utilized */
+        {
+            // IF the hard_blocks is an adder or a multiplier, we ignore it.(Already print out in add_the_blackbox_for_adds and
+            // add_the_blackbox_for_mults)
+            if (strcmp(hard_blocks->name, "adder") == 0 || strcmp(hard_blocks->name, "multiply") == 0) {
+                hard_blocks = hard_blocks->next;
+                break;
+            }
+
+            Yosys::RTLIL::Module *module = nullptr;
+
+            Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> wideports_cache;
+
+            module = new Yosys::RTLIL::Module;
+            module->name = Yosys::RTLIL::escape_id(hard_blocks->name);
+
+            if (design->module(module->name))
+                Yosys::log_error("Duplicate definition of module %s!\n", Yosys::log_id(module->name));
+            design->add(module);
+
+            hb_ports = hard_blocks->inputs;
+            while (hb_ports != NULL) {
+                for (int i = 0; i < hb_ports->size; i++) {
+                    std::string w_name;
+                    if (hb_ports->size == 1)
+                        w_name = hb_ports->name;
+                    else
+                        w_name = Yosys::stringf("%s[%d]", hb_ports->name, i);
+
+                    Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                    wire->port_input = true;
+
+                    std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                    if (!wp.first.empty() && wp.second >= 0) {
+                        wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                        wideports_cache[wp.first].second = true;
+                    }
+                }
+
+                hb_ports = hb_ports->next;
+            }
+
+            // fprintf(out, "\n.outputs");
+            hb_ports = hard_blocks->outputs;
+            while (hb_ports != NULL) {
+                for (int i = 0; i < hb_ports->size; i++) {
+                    std::string w_name;
+                    if (hb_ports->size == 1)
+                        w_name = hb_ports->name;
+                    else
+                        w_name = Yosys::stringf("%s[%d]", hb_ports->name, i);
+
+                    Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                    wire->port_output = true;
+
+                    std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                    if (!wp.first.empty() && wp.second >= 0) {
+                        wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                        wideports_cache[wp.first].second = false;
+                    }
+                }
+
+                hb_ports = hb_ports->next;
+            }
+
+            handle_wideports_cache(&wideports_cache, module);
+
+            module->fixup_ports();
+            wideports_cache.clear();
+
+            module->attributes[Yosys::ID::blackbox] = Yosys::RTLIL::Const(1);
+        }
+
+        hard_blocks = hard_blocks->next;
+    }
+
+    return;
+}
+
+void instantiate_hard_block(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    int i, port, index;
+
+    port = index = 0;
+    /* Give names to the output pins */
+    for (i = 0; i < node->num_output_pins; i++) {
+        if (node->output_pins[i]->name == NULL)
+            node->output_pins[i]->name = make_full_ref_name(node->name, NULL, NULL, node->output_pins[i]->mapping, i);
+        // node->output_pins[i]->name = make_full_ref_name(node->name, NULL, NULL, node->output_pins[i]->mapping,
+        // (configuration.elaborator_type == elaborator_e::_YOSYS) ? i : -1); //@TODO
+
+        index++;
+        if (node->output_port_sizes[port] == index) {
+            index = 0;
+            port++;
+        }
+    }
+
+    node->traverse_visited = mark;
+    return;
+}
diff --git a/parmys-plugin/src/memories.cc b/parmys-plugin/src/memories.cc
new file mode 100644
index 000000000..675da7b18
--- /dev/null
+++ b/parmys-plugin/src/memories.cc
@@ -0,0 +1,2273 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ast_util.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include <math.h>
+#include <string.h>
+
+#include "hard_blocks.h"
+#include "memories.h"
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "partial_map.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+using vtr::t_linked_vptr;
+
+t_model *single_port_rams = NULL;
+t_model *dual_port_rams = NULL;
+
+t_linked_vptr *sp_memory_list;
+t_linked_vptr *dp_memory_list;
+
+void copy_input_port_to_memory(nnode_t *node, signal_list_t *signals, const char *port_name);
+void pad_dp_memory_width(nnode_t *node, netlist_t *netlist);
+void pad_sp_memory_width(nnode_t *node, netlist_t *netlist);
+void pad_memory_output_port(nnode_t *node, netlist_t *netlist, t_model *model, const char *port_name);
+void pad_memory_input_port(nnode_t *node, netlist_t *netlist, t_model *model, const char *port_name);
+
+int get_sp_ram_split_width();
+int get_dp_ram_split_width();
+void filter_memories_by_soft_logic_cutoff();
+
+/**
+ * (function: init_sp_ram_signals)
+ *
+ * @brief initialize sp ram signal lists
+ */
+sp_ram_signals *init_sp_ram_signals()
+{
+    sp_ram_signals *signals = (sp_ram_signals *)vtr::malloc(sizeof(sp_ram_signals));
+
+    signals->addr = init_signal_list();
+    signals->data = init_signal_list();
+    signals->out = init_signal_list();
+
+    return (signals);
+}
+
+/**
+ * (function: init_dp_ram_signals)
+ *
+ * @brief initialize dp ram signal lists
+ */
+dp_ram_signals *init_dp_ram_signals()
+{
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    signals->addr1 = init_signal_list();
+    signals->addr2 = init_signal_list();
+    signals->data1 = init_signal_list();
+    signals->data2 = init_signal_list();
+    signals->out1 = init_signal_list();
+    signals->out2 = init_signal_list();
+
+    return (signals);
+}
+
+long get_sp_ram_depth(nnode_t *node)
+{
+    sp_ram_signals *signals = get_sp_ram_signals(node);
+    long depth = shift_left_value_with_overflow_check(0x1, signals->addr->count, node->loc);
+    free_sp_ram_signals(signals);
+    return depth;
+}
+
+long get_dp_ram_depth(nnode_t *node)
+{
+    dp_ram_signals *signals = get_dp_ram_signals(node);
+    oassert(signals->addr1->count == signals->addr2->count);
+    long depth = shift_left_value_with_overflow_check(0x1, signals->addr1->count, node->loc);
+    free_dp_ram_signals(signals);
+    return depth;
+}
+
+long get_sp_ram_width(nnode_t *node)
+{
+    sp_ram_signals *signals = get_sp_ram_signals(node);
+    long width = signals->data->count;
+    free_sp_ram_signals(signals);
+    return width;
+}
+
+long get_dp_ram_width(nnode_t *node)
+{
+    dp_ram_signals *signals = get_dp_ram_signals(node);
+    oassert(signals->data1->count == signals->data2->count);
+    long width = signals->data1->count;
+    free_dp_ram_signals(signals);
+    return width;
+}
+
+void copy_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name)
+{
+    signal_list_t *temp = copy_input_signals(signalsvar);
+    add_input_port_to_memory(node, temp, port_name);
+    free_signal_list(temp);
+}
+
+/*
+ * Re-maps the given input signals to the given port name on the given memory node.
+ */
+void remap_input_port_to_memory(nnode_t *node, signal_list_t *signals, const char *port_name)
+{
+    int i;
+    int j = node->num_input_pins;
+
+    // Make sure the port is not already assigned.
+    for (i = 0; i < j; i++) {
+        npin_t *pin = node->input_pins[i];
+        if (!strcmp(pin->mapping, port_name)) {
+            error_message(NETLIST, node->loc, "Attempted to reassign output port %s to memory %s.", port_name, node->name);
+        }
+    }
+
+    // Make room for the new port.
+    allocate_more_input_pins(node, signals->count);
+    add_input_port_information(node, signals->count);
+
+    // Add the new port.
+    for (i = 0; i < signals->count; i++, j++) {
+        npin_t *pin = signals->pins[i];
+        if (strcmp(pin->mapping, port_name)) {
+            if (pin->mapping)
+                vtr::free(pin->mapping);
+            pin->mapping = vtr::strdup(port_name);
+        }
+        remap_pin_to_new_node(pin, node, j);
+    }
+}
+
+/*
+ * Adds an input port with the given name and signals to the given memory node.
+ *
+ * Only allows each port to be added once.
+ */
+void add_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name)
+{
+    int i;
+    int j = node->num_input_pins;
+
+    // Make sure the port is not already assigned.
+    for (i = 0; i < j; i++) {
+        npin_t *pin = node->input_pins[i];
+        if (!strcmp(pin->mapping, port_name)) {
+            error_message(NETLIST, node->loc, "Attempted to reassign input port %s to memory %s.", port_name, node->name);
+        }
+    }
+
+    // Make room for the new port.
+    allocate_more_input_pins(node, signalsvar->count);
+    add_input_port_information(node, signalsvar->count);
+
+    // Add the new port.
+    for (i = 0; i < signalsvar->count; i++, j++) {
+        npin_t *pin = signalsvar->pins[i];
+        if (pin->mapping) {
+            vtr::free(pin->mapping);
+        }
+        pin->mapping = vtr::strdup(port_name);
+        add_input_pin_to_node(node, pin, j);
+    }
+}
+
+/*
+ * Adds an output port with the given name and the given
+ * signals to the given memory node. Only allows the same port
+ * to be added once.
+ */
+void add_output_port_to_memory(nnode_t *node, signal_list_t *signals, const char *port_name)
+{
+    int i;
+    int j = node->num_output_pins;
+
+    // Make sure the port is not already assigned.
+    // TODO: more complicated logic needs to be implementd this is temporary solution
+    for (i = 0; i < j; i++) {
+        npin_t *pin = node->output_pins[i];
+        if (!strcmp(pin->mapping, port_name)) {
+            error_message(NETLIST, node->loc, "Attempted to reassign output port %s to node %s.", port_name, node->name);
+            return;
+        }
+    }
+
+    // Make room for the new port.
+    allocate_more_output_pins(node, signals->count);
+    add_output_port_information(node, signals->count);
+
+    // Add the new port.
+    for (i = 0; i < signals->count; i++, j++) {
+        npin_t *pin = signals->pins[i];
+        if (pin->mapping) {
+            vtr::free(pin->mapping);
+        }
+        pin->mapping = vtr::strdup(port_name);
+        add_output_pin_to_node(node, pin, j);
+    }
+}
+
+/*
+ * Checks memories to ensure that they fall within sane size boundaries.
+ *
+ * Reports the memory distribution as well.
+ */
+void check_memories_and_report_distribution()
+{
+    if ((sp_memory_list == NULL) && (dp_memory_list == NULL))
+        return;
+
+    printf("\nHard Logical Memory Distribution\n");
+    printf("============================\n");
+
+    long total_memory_bits = 0;
+    int total_memory_block_counter = 0;
+    long memory_max_width = 0;
+    long memory_max_depth = 0;
+
+    t_linked_vptr *temp = sp_memory_list;
+    while (temp != NULL) {
+        nnode_t *node = (nnode_t *)temp->data_vptr;
+
+        long width = get_sp_ram_width(node);
+        long depth = get_sp_ram_depth(node);
+
+        if (depth > shift_left_value_with_overflow_check(0x1, HARD_RAM_ADDR_LIMIT, node->loc))
+            error_message(NETLIST, node->loc, "Memory %s of depth %zu exceeds ODIN depth bound of 2^%d.", node->name, depth, HARD_RAM_ADDR_LIMIT);
+
+        printf("SPRAM: %zu width %zu depth\n", width, depth);
+
+        total_memory_bits += width * depth;
+
+        total_memory_block_counter++;
+
+        if (width > memory_max_width) {
+            memory_max_width = width;
+        }
+        if (depth > memory_max_depth) {
+            memory_max_depth = depth;
+        }
+
+        temp = temp->next;
+    }
+
+    temp = dp_memory_list;
+    while (temp != NULL) {
+        nnode_t *node = (nnode_t *)temp->data_vptr;
+
+        long width = get_dp_ram_width(node);
+        long depth = get_dp_ram_depth(node);
+        if (depth > shift_left_value_with_overflow_check(0x1, HARD_RAM_ADDR_LIMIT, node->loc))
+            error_message(NETLIST, node->loc, "Memory %s of depth %zu exceeds ODIN depth bound of 2^%d.", node->name, depth, HARD_RAM_ADDR_LIMIT);
+
+        printf("DPRAM: %zu width %zu depth\n", width, depth);
+        total_memory_bits += width * depth;
+
+        total_memory_block_counter++;
+        if (width > memory_max_width) {
+            memory_max_width = width;
+        }
+        if (depth > memory_max_depth) {
+            memory_max_depth = depth;
+        }
+
+        temp = temp->next;
+    }
+
+    printf("\nTotal Logical Memory Blocks = %d \n", total_memory_block_counter);
+    printf("Total Logical Memory bits = %ld \n", total_memory_bits);
+    printf("Max Memory Width = %ld \n", memory_max_width);
+    printf("Max Memory Depth = %ld \n", memory_max_depth);
+    printf("\n");
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_sp_memory_depth)
+ *
+ * This function works to split the depth of a single port memory into
+ *   several smaller memories.
+ *
+ *   split_size: the number of address bits in the resulting memory.
+ *------------------------------------------------------------------------
+ */
+void split_sp_memory_depth(nnode_t *node, int split_size)
+{
+    sp_ram_signals *signals = get_sp_ram_signals(node);
+
+    int logical_size = signals->addr->count;
+
+    /* Check that the memory needs to be split */
+    if (logical_size <= split_size) {
+        free_sp_ram_signals(signals);
+        sp_memory_list = insert_in_vptr_list(sp_memory_list, node);
+        return;
+    }
+
+    int i;
+    signal_list_t *new_addr = init_signal_list();
+    for (i = 1; i < signals->addr->count; i++)
+        add_pin_to_signal_list(new_addr, signals->addr->pins[i]);
+
+    /* Create the new memory node */
+    nnode_t *new_mem_node1 = allocate_nnode(node->loc);
+    nnode_t *new_mem_node2 = allocate_nnode(node->loc);
+
+    // Append the new name with an __S or __H
+    new_mem_node1->name = append_string(node->name, "__S");
+    new_mem_node2->name = append_string(node->name, "__H");
+
+    /* Copy properties from the original memory node */
+    new_mem_node1->type = node->type;
+    new_mem_node1->related_ast_node = node->related_ast_node;
+    new_mem_node1->traverse_visited = node->traverse_visited;
+    new_mem_node2->type = node->type;
+    new_mem_node2->related_ast_node = node->related_ast_node;
+    new_mem_node2->traverse_visited = node->traverse_visited;
+
+    // Move over the original pins to the first memory node.
+    signal_list_t *clk = init_signal_list();
+    add_pin_to_signal_list(clk, signals->clk);
+    remap_input_port_to_memory(new_mem_node1, new_addr, "addr");
+    remap_input_port_to_memory(new_mem_node1, signals->data, "data");
+
+    // Copy the inputs to the second memory node.
+    copy_input_port_to_memory(new_mem_node2, new_addr, "addr");
+    copy_input_port_to_memory(new_mem_node2, signals->data, "data");
+
+    // Hook up addresses and write enables.
+    {
+        signal_list_t *we;
+        nnode_t *and_g = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        remap_pin_to_new_node(signals->we, and_g, 1);
+        remap_pin_to_new_node(signals->addr->pins[0], and_g, 0);
+
+        we = make_output_pins_for_existing_node(and_g, 1);
+        add_input_port_to_memory(new_mem_node1, we, "we");
+        free_signal_list(we);
+
+        nnode_t *not_g = make_not_gate_with_input(copy_input_npin(signals->addr->pins[0]), new_mem_node2, new_mem_node2->traverse_visited);
+        and_g = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node2, new_mem_node2->traverse_visited);
+        connect_nodes(not_g, 0, and_g, 0);
+
+        add_input_pin_to_node(and_g, copy_input_npin(signals->we), 1);
+
+        we = make_output_pins_for_existing_node(and_g, 1);
+        add_input_port_to_memory(new_mem_node2, we, "we");
+        free_signal_list(we);
+    }
+
+    // Add the clock signals.
+    remap_input_port_to_memory(new_mem_node1, clk, "clk");
+    copy_input_port_to_memory(new_mem_node2, clk, "clk");
+    free_signal_list(clk);
+
+    // Setup output ports on both nodes.
+    allocate_more_output_pins(new_mem_node1, signals->out->count);
+    add_output_port_information(new_mem_node1, signals->out->count);
+
+    allocate_more_output_pins(new_mem_node2, signals->out->count);
+    add_output_port_information(new_mem_node2, signals->out->count);
+
+    /* Copy over the output pins for the new memory */
+    for (i = 0; i < signals->data->count; i++) {
+        nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
+        add_input_pin_to_node(mux, copy_input_npin(signals->addr->pins[0]), 0);
+        add_input_pin_to_node(not_g, copy_input_npin(signals->addr->pins[0]), 0);
+        connect_nodes(not_g, 0, mux, 1);
+
+        npin_t *pin = signals->out->pins[i];
+        if (pin->name)
+            vtr::free(pin->name);
+        pin->name = mux->name;
+
+        if (pin->mapping)
+            vtr::free(pin->mapping);
+        pin->mapping = NULL;
+
+        remap_pin_to_new_node(pin, mux, 0);
+
+        connect_nodes(new_mem_node1, i, mux, 2);
+        if (new_mem_node1->output_pins[i]->mapping) {
+            vtr::free(new_mem_node1->output_pins[i]->mapping);
+        }
+        new_mem_node1->output_pins[i]->mapping = vtr::strdup("out");
+
+        connect_nodes(new_mem_node2, i, mux, 3);
+        if (new_mem_node2->output_pins[i]->mapping) {
+            vtr::free(new_mem_node2->output_pins[i]->mapping);
+        }
+        new_mem_node2->output_pins[i]->mapping = vtr::strdup("out");
+    }
+
+    free_sp_ram_signals(signals);
+    free_signal_list(new_addr);
+
+    free_nnode(node);
+
+    split_sp_memory_depth(new_mem_node1, split_size);
+    split_sp_memory_depth(new_mem_node2, split_size);
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_dp_memory_depth)
+ *
+ * This function works to split the depth of a dual port memory into
+ *   several smaller memories.
+ *------------------------------------------------------------------------
+ */
+void split_dp_memory_depth(nnode_t *node, int split_size)
+{
+    dp_ram_signals *signals = get_dp_ram_signals(node);
+
+    int logical_size = signals->addr1->count;
+
+    /* Check that the memory needs to be split */
+    if (logical_size <= split_size) {
+        free_dp_ram_signals(signals);
+        dp_memory_list = insert_in_vptr_list(dp_memory_list, node);
+        return;
+    }
+
+    signal_list_t *new_addr1 = init_signal_list();
+
+    int i;
+    for (i = 1; i < signals->addr1->count; i++)
+        add_pin_to_signal_list(new_addr1, signals->addr1->pins[i]);
+
+    signal_list_t *new_addr2 = init_signal_list();
+    for (i = 1; i < signals->addr2->count; i++)
+        add_pin_to_signal_list(new_addr2, signals->addr2->pins[i]);
+
+    /* Create the new memory node */
+    nnode_t *new_mem_node1 = allocate_nnode(node->loc);
+    nnode_t *new_mem_node2 = allocate_nnode(node->loc);
+
+    // Append the new name with an __S or __H
+    new_mem_node1->name = append_string(node->name, "__S");
+    new_mem_node2->name = append_string(node->name, "__H");
+
+    /* Copy properties from the original memory node */
+    new_mem_node1->type = node->type;
+    new_mem_node1->related_ast_node = node->related_ast_node;
+    new_mem_node1->traverse_visited = node->traverse_visited;
+    new_mem_node2->type = node->type;
+    new_mem_node2->related_ast_node = node->related_ast_node;
+    new_mem_node2->traverse_visited = node->traverse_visited;
+
+    // Move over the original pins to the first memory node.
+    signal_list_t *clk = init_signal_list();
+    add_pin_to_signal_list(clk, signals->clk);
+    remap_input_port_to_memory(new_mem_node1, new_addr1, "addr1");
+    remap_input_port_to_memory(new_mem_node1, new_addr2, "addr2");
+    remap_input_port_to_memory(new_mem_node1, signals->data1, "data1");
+    remap_input_port_to_memory(new_mem_node1, signals->data2, "data2");
+
+    // Copy the inputs to the second memory node.
+    copy_input_port_to_memory(new_mem_node2, new_addr1, "addr1");
+    copy_input_port_to_memory(new_mem_node2, new_addr2, "addr2");
+    copy_input_port_to_memory(new_mem_node2, signals->data1, "data1");
+    copy_input_port_to_memory(new_mem_node2, signals->data2, "data2");
+
+    // Hook up addresses and write enables.
+    {
+        signal_list_t *we;
+        nnode_t *and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        remap_pin_to_new_node(signals->we1, and_node, 1);
+        remap_pin_to_new_node(signals->addr1->pins[0], and_node, 0);
+
+        we = make_output_pins_for_existing_node(and_node, 1);
+        add_input_port_to_memory(new_mem_node1, we, "we1");
+        free_signal_list(we);
+
+        and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        remap_pin_to_new_node(signals->we2, and_node, 1);
+        remap_pin_to_new_node(signals->addr2->pins[0], and_node, 0);
+
+        we = make_output_pins_for_existing_node(and_node, 1);
+        add_input_port_to_memory(new_mem_node1, we, "we2");
+        free_signal_list(we);
+
+        nnode_t *not_g = make_not_gate_with_input(copy_input_npin(signals->addr1->pins[0]), new_mem_node2, new_mem_node2->traverse_visited);
+        and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node2, new_mem_node2->traverse_visited);
+        connect_nodes(not_g, 0, and_node, 0);
+        add_input_pin_to_node(and_node, copy_input_npin(signals->we1), 1);
+
+        we = make_output_pins_for_existing_node(and_node, 1);
+        add_input_port_to_memory(new_mem_node2, we, "we1");
+        free_signal_list(we);
+
+        not_g = make_not_gate_with_input(copy_input_npin(signals->addr2->pins[0]), new_mem_node2, new_mem_node2->traverse_visited);
+        and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node2, new_mem_node2->traverse_visited);
+        connect_nodes(not_g, 0, and_node, 0);
+
+        add_input_pin_to_node(and_node, copy_input_npin(signals->we2), 1);
+
+        we = make_output_pins_for_existing_node(and_node, 1);
+        add_input_port_to_memory(new_mem_node2, we, "we2");
+        free_signal_list(we);
+    }
+
+    // Add the clock signals.
+    remap_input_port_to_memory(new_mem_node1, clk, "clk");
+    copy_input_port_to_memory(new_mem_node2, clk, "clk");
+    free_signal_list(clk);
+
+    // Setup output ports on both nodes.
+    allocate_more_output_pins(new_mem_node1, signals->out1->count + signals->out2->count);
+    add_output_port_information(new_mem_node1, signals->out1->count);
+    add_output_port_information(new_mem_node1, signals->out2->count);
+
+    allocate_more_output_pins(new_mem_node2, signals->out1->count + signals->out2->count);
+    add_output_port_information(new_mem_node2, signals->out1->count);
+    add_output_port_information(new_mem_node2, signals->out2->count);
+
+    /* Copy over the output pins for the new memory */
+    for (i = 0; i < signals->data1->count; i++) {
+        nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
+        add_input_pin_to_node(mux, copy_input_npin(signals->addr1->pins[0]), 0);
+        add_input_pin_to_node(not_g, copy_input_npin(signals->addr1->pins[0]), 0);
+        connect_nodes(not_g, 0, mux, 1);
+
+        npin_t *pin = signals->out1->pins[i];
+        if (pin->name) {
+            vtr::free(pin->name);
+        }
+        pin->name = mux->name;
+        if (pin->mapping) {
+            vtr::free(pin->mapping);
+        }
+        pin->mapping = NULL;
+
+        remap_pin_to_new_node(pin, mux, 0);
+
+        connect_nodes(new_mem_node1, i, mux, 2);
+        if (new_mem_node1->output_pins[i]->mapping) {
+            vtr::free(new_mem_node1->output_pins[i]->mapping);
+        }
+        new_mem_node1->output_pins[i]->mapping = vtr::strdup("out1");
+
+        connect_nodes(new_mem_node2, i, mux, 3);
+        if (new_mem_node2->output_pins[i]->mapping) {
+            vtr::free(new_mem_node2->output_pins[i]->mapping);
+        }
+        new_mem_node2->output_pins[i]->mapping = vtr::strdup("out1");
+    }
+
+    /* Copy over the output pins for the new memory */
+    for (i = 0; i < signals->data1->count; i++) {
+        nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
+        nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
+        add_input_pin_to_node(mux, copy_input_npin(signals->addr2->pins[0]), 0);
+        add_input_pin_to_node(not_g, copy_input_npin(signals->addr2->pins[0]), 0);
+        connect_nodes(not_g, 0, mux, 1);
+
+        int pin_index = new_mem_node1->output_port_sizes[0] + i;
+
+        npin_t *pin = signals->out2->pins[i];
+        if (pin->name) {
+            vtr::free(pin->name);
+        }
+        pin->name = mux->name;
+        if (pin->mapping) {
+            vtr::free(pin->mapping);
+        }
+        pin->mapping = NULL;
+
+        remap_pin_to_new_node(pin, mux, 0);
+
+        connect_nodes(new_mem_node1, pin_index, mux, 2);
+        if (new_mem_node1->output_pins[pin_index]->mapping) {
+            vtr::free(new_mem_node1->output_pins[pin_index]->mapping);
+        }
+        new_mem_node1->output_pins[pin_index]->mapping = vtr::strdup("out2");
+
+        connect_nodes(new_mem_node2, pin_index, mux, 3);
+        if (new_mem_node2->output_pins[pin_index]->mapping) {
+            vtr::free(new_mem_node2->output_pins[pin_index]->mapping);
+        }
+        new_mem_node2->output_pins[pin_index]->mapping = vtr::strdup("out2");
+    }
+
+    free_dp_ram_signals(signals);
+    free_signal_list(new_addr1);
+    free_signal_list(new_addr2);
+    free_nnode(node);
+
+    split_dp_memory_depth(new_mem_node1, split_size);
+    split_dp_memory_depth(new_mem_node2, split_size);
+}
+
+/*
+ * Width-splits the given memory up into chunks the of the
+ * width specified in the arch file.
+ */
+void split_sp_memory_width(nnode_t *node, int target_size)
+{
+    char port_name[] = "data";
+    int data_port_number = get_input_port_index_from_mapping(node, port_name);
+
+    oassert(data_port_number != -1);
+
+    int data_port_size = node->input_port_sizes[data_port_number];
+
+    int num_memories = ceil((double)data_port_size / (double)target_size);
+
+    if (data_port_size <= target_size) {
+        // If we don't need to split, put the original node back.
+        sp_memory_list = insert_in_vptr_list(sp_memory_list, node);
+    } else {
+        int i;
+        int data_pins_moved = 0;
+        int output_pins_moved = 0;
+        for (i = 0; i < num_memories; i++) {
+            nnode_t *new_node = allocate_nnode(node->loc);
+            new_node->name = append_string(node->name, "-%d", i);
+            sp_memory_list = insert_in_vptr_list(sp_memory_list, new_node);
+
+            /* Copy properties from the original node */
+            new_node->type = node->type;
+            new_node->related_ast_node = node->related_ast_node;
+            new_node->traverse_visited = node->traverse_visited;
+            new_node->node_data = NULL;
+
+            int j;
+            for (j = 0; j < node->num_input_port_sizes; j++)
+                add_input_port_information(new_node, 0);
+
+            add_output_port_information(new_node, 0);
+
+            int index = 0;
+            int old_index = 0;
+            for (j = 0; j < node->num_input_port_sizes; j++) {
+                // Move this node's share of data pins out of the data port of the original node.
+                if (j == data_port_number) {
+                    // Skip over data pins we've already moved.
+                    old_index += data_pins_moved;
+                    int k;
+                    for (k = 0; k < target_size && data_pins_moved < data_port_size; k++) {
+                        allocate_more_input_pins(new_node, 1);
+                        new_node->input_port_sizes[j]++;
+                        remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                        data_pins_moved++;
+                    }
+                    int remaining_data_pins = data_port_size - data_pins_moved;
+                    // Skip over pins we have yet to copy.
+                    old_index += remaining_data_pins;
+                } else {
+                    int k;
+                    for (k = 0; k < node->input_port_sizes[j]; k++) {
+                        allocate_more_input_pins(new_node, 1);
+                        new_node->input_port_sizes[j]++;
+                        // Copy pins for all but the last memory. the last one get the original pins moved to it.
+                        if (i < num_memories - 1)
+                            add_input_pin_to_node(new_node, copy_input_npin(node->input_pins[old_index]), index);
+                        else
+                            remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                    }
+                }
+            }
+
+            index = 0;
+            old_index = 0;
+            old_index += output_pins_moved;
+
+            int k;
+            for (k = 0; k < target_size && output_pins_moved < data_port_size; k++) {
+                allocate_more_output_pins(new_node, 1);
+                new_node->output_port_sizes[0]++;
+                remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
+                index++;
+                old_index++;
+                output_pins_moved++;
+            }
+        }
+        // Free the original node.
+        free_nnode(node);
+    }
+}
+
+/*
+ * Splits the given dual port memory width into one or more memories with
+ * width less than or equal to target_size.
+ */
+void split_dp_memory_width(nnode_t *node, int target_size)
+{
+    char data1_name[] = "data1";
+    char data2_name[] = "data2";
+    char out1_name[] = "out1";
+    char out2_name[] = "out2";
+
+    int data1_port_number = get_input_port_index_from_mapping(node, data1_name);
+    int data2_port_number = get_input_port_index_from_mapping(node, data2_name);
+
+    int out1_port_number = get_output_port_index_from_mapping(node, out1_name);
+    int out2_port_number = get_output_port_index_from_mapping(node, out2_name);
+
+    oassert(data1_port_number != -1);
+    oassert(data2_port_number != -1);
+    oassert(out1_port_number != -1);
+    oassert(out2_port_number != -1);
+
+    int data1_port_size = node->input_port_sizes[data1_port_number];
+    int data2_port_size = node->input_port_sizes[data2_port_number];
+
+    int out1_port_size = node->output_port_sizes[out1_port_number];
+    int out2_port_size = node->output_port_sizes[out2_port_number];
+
+    oassert(data1_port_size == data2_port_size);
+    oassert(out1_port_size == out2_port_size);
+    oassert(data1_port_size == out1_port_size);
+
+    int num_memories = ceil((double)data1_port_size / (double)target_size);
+
+    if (data1_port_size <= target_size) {
+        // If we're not splitting, put the original memory node back.
+        dp_memory_list = insert_in_vptr_list(dp_memory_list, node);
+    } else {
+        int i;
+        int data1_pins_moved = 0;
+        int data2_pins_moved = 0;
+        int out1_pins_moved = 0;
+        int out2_pins_moved = 0;
+        for (i = 0; i < num_memories; i++) {
+            nnode_t *new_node = allocate_nnode(node->loc);
+            new_node->name = append_string(node->name, "-%d", i);
+            dp_memory_list = insert_in_vptr_list(dp_memory_list, new_node);
+
+            /* Copy properties from the original node */
+            new_node->type = node->type;
+            new_node->related_ast_node = node->related_ast_node;
+            new_node->traverse_visited = node->traverse_visited;
+            new_node->node_data = NULL;
+
+            int j;
+            for (j = 0; j < node->num_input_port_sizes; j++)
+                add_input_port_information(new_node, 0);
+
+            int index = 0;
+            int old_index = 0;
+            for (j = 0; j < node->num_input_port_sizes; j++) {
+                // Move this node's share of data pins out of the data port of the original node.
+                if (j == data1_port_number) {
+                    // Skip over data pins we've already moved.
+                    old_index += data1_pins_moved;
+                    int k;
+                    for (k = 0; k < target_size && data1_pins_moved < data1_port_size; k++) {
+                        allocate_more_input_pins(new_node, 1);
+                        new_node->input_port_sizes[j]++;
+                        remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                        data1_pins_moved++;
+                    }
+                    int remaining_data_pins = data1_port_size - data1_pins_moved;
+                    // Skip over pins we have yet to copy.
+                    old_index += remaining_data_pins;
+                } else if (j == data2_port_number) {
+                    // Skip over data pins we've already moved.
+                    old_index += data2_pins_moved;
+                    int k;
+                    for (k = 0; k < target_size && data2_pins_moved < data2_port_size; k++) {
+                        allocate_more_input_pins(new_node, 1);
+                        new_node->input_port_sizes[j]++;
+                        remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                        data2_pins_moved++;
+                    }
+                    int remaining_data_pins = data2_port_size - data2_pins_moved;
+                    // Skip over pins we have yet to copy.
+                    old_index += remaining_data_pins;
+                } else {
+                    int k;
+                    for (k = 0; k < node->input_port_sizes[j]; k++) {
+                        allocate_more_input_pins(new_node, 1);
+                        new_node->input_port_sizes[j]++;
+                        // Copy pins for all but the last memory. the last one get the original pins moved to it.
+                        if (i < num_memories - 1)
+                            add_input_pin_to_node(new_node, copy_input_npin(node->input_pins[old_index]), index);
+                        else
+                            remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                    }
+                }
+            }
+
+            for (j = 0; j < node->num_output_port_sizes; j++)
+                add_output_port_information(new_node, 0);
+
+            index = 0;
+            old_index = 0;
+            for (j = 0; j < node->num_output_port_sizes; j++) {
+                // Move this node's share of data pins out of the data port of the original node.
+                if (j == out1_port_number) {
+                    // Skip over data pins we've already moved.
+                    old_index += out1_pins_moved;
+                    int k;
+                    for (k = 0; k < target_size && out1_pins_moved < out1_port_size; k++) {
+                        allocate_more_output_pins(new_node, 1);
+                        new_node->output_port_sizes[j]++;
+                        remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                        out1_pins_moved++;
+                    }
+                    int remaining_pins = out1_port_size - out1_pins_moved;
+                    // Skip over pins we have yet to copy.
+                    old_index += remaining_pins;
+                } else if (j == out2_port_number) {
+                    // Skip over data pins we've already moved.
+                    old_index += out2_pins_moved;
+                    int k;
+                    for (k = 0; k < target_size && out2_pins_moved < out2_port_size; k++) {
+                        allocate_more_output_pins(new_node, 1);
+                        new_node->output_port_sizes[j]++;
+                        remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
+                        index++;
+                        old_index++;
+                        out2_pins_moved++;
+                    }
+                    int remaining_pins = out2_port_size - out2_pins_moved;
+                    // Skip over pins we have yet to copy.
+                    old_index += remaining_pins;
+                } else {
+                    oassert(false);
+                }
+            }
+        }
+        // Free the original node.
+        free_nnode(node);
+    }
+}
+
+/*
+ * Determines the single port ram split depth based on the configuration
+ * variables and architecture.
+ */
+long get_sp_ram_split_depth()
+{
+    t_model_ports *hb_ports = get_model_port(single_port_rams->inputs, "addr");
+    long split_size;
+    if (configuration.split_memory_depth == -1) /* MIN */
+        split_size = hb_ports->min_size;
+    else if (configuration.split_memory_depth == -2) /* MIN */
+        split_size = hb_ports->size;
+    else if (configuration.split_memory_depth > 0)
+        split_size = configuration.split_memory_depth;
+    else
+        split_size = hb_ports->size;
+
+    oassert(split_size > 0);
+
+    return split_size;
+}
+
+/*
+ * Determines the dual port ram split depth based on the configuration
+ * variables and architecture.
+ */
+long get_dp_ram_split_depth()
+{
+    t_model_ports *hb_ports = get_model_port(dual_port_rams->inputs, "addr1");
+    long split_depth;
+    if (configuration.split_memory_depth == -1) /* MIN */
+        split_depth = hb_ports->min_size;
+    else if (configuration.split_memory_depth == -2) /* MIN */
+        split_depth = hb_ports->size;
+    else if (configuration.split_memory_depth > 0)
+        split_depth = configuration.split_memory_depth;
+    else
+        split_depth = hb_ports->size;
+
+    oassert(split_depth > 0);
+
+    return split_depth;
+}
+
+/*
+ * Determines the single port ram split depth based on the configuration
+ * variables and architecture.
+ */
+int get_sp_ram_split_width()
+{
+    if (configuration.split_memory_width) {
+        return 1;
+    } else {
+        t_model *model = single_port_rams;
+        char port_name[] = "data";
+        t_model_ports *ports = get_model_port(model->inputs, port_name);
+        return ports->size;
+    }
+}
+
+/*
+ * Determines the dual port ram split depth based on the configuration
+ * variables and architecture.
+ */
+int get_dp_ram_split_width()
+{
+    if (configuration.split_memory_width) {
+        return 1;
+    } else {
+        t_model *model = dual_port_rams;
+        char port_name[] = "data1";
+        t_model_ports *ports = get_model_port(model->inputs, port_name);
+        return ports->size;
+    }
+}
+
+/*
+ * Removes all memories from the sp_memory_list and dp_memory_list which do not
+ * have more than configuration.soft_logic_memory_depth_threshold address bits.
+ */
+void filter_memories_by_soft_logic_cutoff()
+{
+    if (single_port_rams) {
+        t_linked_vptr *temp = sp_memory_list;
+        sp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+
+            long depth = get_sp_ram_depth(node);
+            long width = get_sp_ram_width(node);
+            if (depth > configuration.soft_logic_memory_depth_threshold || width > configuration.soft_logic_memory_width_threshold)
+                sp_memory_list = insert_in_vptr_list(sp_memory_list, node);
+        }
+    }
+
+    if (dual_port_rams) {
+        t_linked_vptr *temp = dp_memory_list;
+        dp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+
+            long depth = get_dp_ram_depth(node);
+            long width = get_dp_ram_width(node);
+            if (depth > configuration.soft_logic_memory_depth_threshold || width > configuration.soft_logic_memory_width_threshold)
+                dp_memory_list = insert_in_vptr_list(dp_memory_list, node);
+        }
+    }
+}
+
+/*-------------------------------------------------------------------------
+ * (function: iterate_memories)
+ *
+ * This function will iterate over all of the memory hard blocks that
+ *      exist in the netlist and perform a splitting so that they can
+ *      be easily packed into hard memory blocks on the FPGA.
+ *
+ * This function will drop memories which fall below the soft logic threshold,
+ * if those configuration variables are set.
+ *-----------------------------------------------------------------------*/
+void iterate_memories(netlist_t *netlist)
+{
+    /* Report on Logical Memory usage */
+    check_memories_and_report_distribution();
+
+    // Remove memories that don't meet the soft logic cutoff.
+    filter_memories_by_soft_logic_cutoff();
+
+    if (single_port_rams) {
+        // Depth split
+        int split_depth = get_sp_ram_split_depth();
+        t_linked_vptr *temp = sp_memory_list;
+        sp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            split_sp_memory_depth(node, split_depth);
+        }
+
+        // Width split
+        int split_width = get_sp_ram_split_width();
+        temp = sp_memory_list;
+        sp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            split_sp_memory_width(node, split_width);
+        }
+
+        // Remove memories that are too small to use hard blocks.
+        filter_memories_by_soft_logic_cutoff();
+
+        // Pad the rest.
+        temp = sp_memory_list;
+        sp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            pad_sp_memory_width(node, netlist);
+            pad_memory_input_port(node, netlist, single_port_rams, "addr");
+        }
+    }
+
+    if (dual_port_rams) {
+        // Depth split
+        int split_depth = get_dp_ram_split_depth();
+        t_linked_vptr *temp = dp_memory_list;
+        dp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            split_dp_memory_depth(node, split_depth);
+        }
+
+        // Width split
+        int split_width = get_dp_ram_split_width();
+        temp = dp_memory_list;
+        dp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            split_dp_memory_width(node, split_width);
+        }
+
+        // Remove memories that are too small to use hard blocks.
+        filter_memories_by_soft_logic_cutoff();
+
+        // Pad the rest
+        temp = dp_memory_list;
+        dp_memory_list = NULL;
+        while (temp != NULL) {
+            nnode_t *node = (nnode_t *)temp->data_vptr;
+            oassert(node != NULL);
+            oassert(node->type == MEMORY);
+            temp = delete_in_vptr_list(temp);
+            pad_dp_memory_width(node, netlist);
+            pad_memory_input_port(node, netlist, dual_port_rams, "addr1");
+            pad_memory_input_port(node, netlist, dual_port_rams, "addr2");
+        }
+    }
+}
+
+/*-------------------------------------------------------------------------
+ * (function: free_memory_lists)
+ *
+ * Clean up the memory by deleting the list structure of memories
+ *      during optimisation.
+ *-----------------------------------------------------------------------*/
+void free_memory_lists()
+{
+    while (sp_memory_list != NULL)
+        sp_memory_list = delete_in_vptr_list(sp_memory_list);
+    while (dp_memory_list != NULL)
+        dp_memory_list = delete_in_vptr_list(dp_memory_list);
+}
+
+/*
+ * Pads the width of a dual port memory to that specified in the arch file.
+ */
+void pad_dp_memory_width(nnode_t *node, netlist_t *netlist)
+{
+    oassert(node->type == MEMORY);
+    oassert(dual_port_rams != NULL);
+
+    pad_memory_input_port(node, netlist, dual_port_rams, "data1");
+    pad_memory_input_port(node, netlist, dual_port_rams, "data2");
+
+    pad_memory_output_port(node, netlist, dual_port_rams, "out1");
+    pad_memory_output_port(node, netlist, dual_port_rams, "out2");
+
+    dp_memory_list = insert_in_vptr_list(dp_memory_list, node);
+}
+
+/*
+ * Pads the width of a single port memory to that specified in the arch file.
+ */
+void pad_sp_memory_width(nnode_t *node, netlist_t *netlist)
+{
+    oassert(node->type == MEMORY);
+    oassert(single_port_rams != NULL);
+
+    pad_memory_input_port(node, netlist, single_port_rams, "data");
+
+    pad_memory_output_port(node, netlist, single_port_rams, "out");
+
+    sp_memory_list = insert_in_vptr_list(sp_memory_list, node);
+}
+
+/*
+ * Pads the given output port to the width specified in the given model.
+ */
+void pad_memory_output_port(nnode_t *node, netlist_t * /*netlist*/, t_model *model, const char *port_name)
+{
+    static int pad_pin_number = 0;
+
+    int port_number = get_output_port_index_from_mapping(node, port_name);
+    int port_index = get_output_pin_index_from_mapping(node, port_name);
+
+    int port_size = node->output_port_sizes[port_number];
+
+    t_model_ports *ports = get_model_port(model->outputs, port_name);
+
+    oassert(ports != NULL);
+
+    int target_size = ports->size;
+    int diff = target_size - port_size;
+
+    if (diff > 0) {
+        allocate_more_output_pins(node, diff);
+
+        // Shift other pins to the right, if any.
+        int i;
+        for (i = node->num_output_pins - 1; i >= port_index + target_size; i--)
+            move_output_pin(node, i - diff, i);
+
+        for (i = port_index + port_size; i < port_index + target_size; i++) {
+            // Add new pins to the higher order spots.
+            npin_t *new_pin = allocate_npin();
+            // Pad outputs with a unique and descriptive name to avoid collisions.
+            new_pin->name = append_string("", "unconnected_memory_output~%d", pad_pin_number++);
+            new_pin->mapping = vtr::strdup(port_name);
+            add_output_pin_to_node(node, new_pin, i);
+        }
+        node->output_port_sizes[port_number] = target_size;
+    }
+}
+
+/*
+ * Pads the given input port to the width specified in the given model.
+ */
+void pad_memory_input_port(nnode_t *node, netlist_t *netlist, t_model *model, const char *port_name)
+{
+    oassert(node->type == MEMORY);
+    oassert(model != NULL);
+
+    int port_number = get_input_port_index_from_mapping(node, port_name);
+    int port_index = get_input_pin_index_from_mapping(node, port_name);
+
+    oassert(port_number != -1);
+    oassert(port_index != -1);
+
+    int port_size = node->input_port_sizes[port_number];
+
+    t_model_ports *ports = get_model_port(model->inputs, port_name);
+
+    oassert(ports != NULL);
+
+    int target_size = ports->size;
+    int diff = target_size - port_size;
+
+    // Expand the inputs
+    if (diff > 0) {
+        allocate_more_input_pins(node, diff);
+
+        // Shift other pins to the right, if any.
+        int i;
+        for (i = node->num_input_pins - 1; i >= port_index + target_size; i--)
+            move_input_pin(node, i - diff, i);
+
+        for (i = port_index + port_size; i < port_index + target_size; i++) {
+            add_input_pin_to_node(node, get_pad_pin(netlist), i);
+            if (node->input_pins[i]->mapping) {
+                vtr::free(node->input_pins[i]->mapping);
+            }
+            node->input_pins[i]->mapping = vtr::strdup(port_name);
+        }
+
+        node->input_port_sizes[port_number] = target_size;
+    }
+}
+
+bool is_sp_ram(nnode_t *node)
+{
+    oassert(node != NULL);
+    oassert(node->type == MEMORY);
+    return !strcmp(node->related_ast_node->identifier_node->types.identifier, SINGLE_PORT_RAM_string);
+}
+
+bool is_dp_ram(nnode_t *node)
+{
+    oassert(node != NULL);
+    oassert(node->type == MEMORY);
+    return !strcmp(node->related_ast_node->identifier_node->types.identifier, DUAL_PORT_RAM_string);
+}
+
+/**
+ * (function: is_blif_sp_ram)
+ *
+ * @brief to check if the given node is a valid
+ * single port ram based on VTR primitive definition
+ *
+ * @param node pointing to a spram node
+ */
+bool is_blif_sp_ram(nnode_t *node)
+{
+    oassert(node->name);
+    /* return value */
+    bool is_ram = true;
+    if (std::string(node->name).find(SINGLE_PORT_RAM_string) == std::string::npos)
+        return false;
+
+    /* check the num input/output ports */
+    is_ram = (node->num_input_port_sizes == 4) && (node->num_output_port_sizes == 1);
+
+    /* check if it is a ram */
+    if (is_ram) {
+        /* port connections were passed by name; verify input port names */
+        for (int i = 0; i < node->num_input_pins && is_ram; i++) {
+            oassert(node->input_pins[i]->mapping);
+            char *port_id = node->input_pins[i]->mapping;
+
+            /* comparision of mapping ports with single_port_ram port names */
+            if ((strcmp(port_id, "we") != 0) && (strcmp(port_id, "clk") != 0) && (strcmp(port_id, "addr") != 0) && (strcmp(port_id, "data") != 0)) {
+                is_ram = false;
+                break;
+            }
+        }
+        /* port connections were passed by name; verify output port names */
+        for (int i = 0; i < node->num_output_pins && is_ram; i++) {
+            oassert(node->output_pins[i]->mapping);
+            char *port_id = node->output_pins[i]->mapping;
+
+            /* comparision of mapping ports with single_port_ram port names */
+            if ((strcmp(port_id, "out") != 0)) {
+                is_ram = false;
+                break;
+            }
+        }
+    }
+
+    return (is_ram);
+}
+
+/**
+ * (function: is_blif_dp_ram)
+ *
+ * @brief to check if the given node is a valid
+ * dual port ram based on VTR primitive definition
+ *
+ * @param node pointing to a dpram node
+ */
+bool is_blif_dp_ram(nnode_t *node)
+{
+    oassert(node->name);
+    /* return value */
+    bool is_ram = true;
+    if (std::string(node->name).find(DUAL_PORT_RAM_string) == std::string::npos)
+        return false;
+
+    /* check the num input/output ports */
+    is_ram = (node->num_input_port_sizes == 7) && (node->num_output_port_sizes == 2);
+
+    /* check if it is a ram */
+    if (is_ram) {
+        /* port connections were passed by name; verify input port names */
+        for (int i = 0; i < node->num_input_pins && is_ram; i++) {
+            oassert(node->input_pins[i]->mapping);
+            char *port_id = node->input_pins[i]->mapping;
+
+            /* comparision of mapping ports with dual_port_ram port names */
+            if ((strcmp(port_id, "clk") != 0) && (strcmp(port_id, "we1") != 0) && (strcmp(port_id, "we2") != 0) && (strcmp(port_id, "addr1") != 0) &&
+                (strcmp(port_id, "addr2") != 0) && (strcmp(port_id, "data1") != 0) && (strcmp(port_id, "data2") != 0)) {
+                is_ram = false;
+                break;
+            }
+        }
+        /* port connections were passed by name; verify output port names */
+        for (int i = 0; i < node->num_output_pins && is_ram; i++) {
+            oassert(node->output_pins[i]->mapping);
+            char *port_id = node->output_pins[i]->mapping;
+
+            /* comparision of mapping ports with dual_port_ram port names */
+            if ((strcmp(port_id, "out1") != 0) && (strcmp(port_id, "out2") != 0)) {
+                is_ram = false;
+                break;
+            }
+        }
+    }
+
+    return (is_ram);
+}
+
+sp_ram_signals *get_sp_ram_signals(nnode_t *node)
+{
+    oassert(is_sp_ram(node));
+
+    ast_node_t *ast_node = node->related_ast_node;
+    sp_ram_signals *signals = (sp_ram_signals *)vtr::malloc(sizeof(sp_ram_signals));
+
+    // Separate the input signals according to their mapping.
+    signals->addr = init_signal_list();
+    signals->data = init_signal_list();
+    signals->out = init_signal_list();
+    signals->we = NULL;
+    signals->clk = NULL;
+
+    int i;
+    for (i = 0; i < node->num_input_pins; i++) {
+        npin_t *pin = node->input_pins[i];
+        if (!strcmp(pin->mapping, "addr"))
+            add_pin_to_signal_list(signals->addr, pin);
+        else if (!strcmp(pin->mapping, "data"))
+            add_pin_to_signal_list(signals->data, pin);
+        else if (!strcmp(pin->mapping, "we"))
+            signals->we = pin;
+        else if (!strcmp(pin->mapping, "clk"))
+            signals->clk = pin;
+        else
+            error_message(NETLIST, ast_node->loc, "Unexpected input pin mapping \"%s\" on memory node: %s\n", pin->mapping, node->name);
+    }
+
+    oassert(signals->clk != NULL);
+    oassert(signals->we != NULL);
+    oassert(signals->addr->count >= 1);
+    oassert(signals->data->count >= 1);
+    oassert(signals->data->count == node->num_output_pins);
+
+    for (i = 0; i < node->num_output_pins; i++) {
+        npin_t *pin = node->output_pins[i];
+        if (!strcmp(pin->mapping, "out"))
+            add_pin_to_signal_list(signals->out, pin);
+        else
+            error_message(NETLIST, ast_node->loc, "Unexpected output pin mapping \"%s\" on memory node: %s\n", pin->mapping, node->name);
+    }
+
+    oassert(signals->out->count == signals->data->count);
+
+    return signals;
+}
+
+void free_sp_ram_signals(sp_ram_signals *signalsvar)
+{
+    free_signal_list(signalsvar->data);
+    free_signal_list(signalsvar->addr);
+    free_signal_list(signalsvar->out);
+
+    vtr::free(signalsvar);
+}
+
+dp_ram_signals *get_dp_ram_signals(nnode_t *node)
+{
+    oassert(is_dp_ram(node));
+
+    ast_node_t *ast_node = node->related_ast_node;
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
+
+    // Separate the input signals according to their mapping.
+    signals->addr1 = init_signal_list();
+    signals->addr2 = init_signal_list();
+    signals->data1 = init_signal_list();
+    signals->data2 = init_signal_list();
+    signals->out1 = init_signal_list();
+    signals->out2 = init_signal_list();
+    signals->we1 = NULL;
+    signals->we2 = NULL;
+    signals->clk = NULL;
+
+    int i;
+    for (i = 0; i < node->num_input_pins; i++) {
+        npin_t *pin = node->input_pins[i];
+        if (!strcmp(pin->mapping, "addr1"))
+            add_pin_to_signal_list(signals->addr1, pin);
+        else if (!strcmp(pin->mapping, "addr2"))
+            add_pin_to_signal_list(signals->addr2, pin);
+        else if (!strcmp(pin->mapping, "data1"))
+            add_pin_to_signal_list(signals->data1, pin);
+        else if (!strcmp(pin->mapping, "data2"))
+            add_pin_to_signal_list(signals->data2, pin);
+        else if (!strcmp(pin->mapping, "we1"))
+            signals->we1 = pin;
+        else if (!strcmp(pin->mapping, "we2"))
+            signals->we2 = pin;
+        else if (!strcmp(pin->mapping, "clk"))
+            signals->clk = pin;
+        else
+            error_message(NETLIST, ast_node->loc, "Unexpected input pin mapping \"%s\" on memory node: %s\n", pin->mapping, node->name);
+    }
+
+    // Sanity checks.
+    oassert(signals->clk != NULL);
+    oassert(signals->we1 != NULL && signals->we2 != NULL);
+    oassert(signals->addr1->count >= 1 && signals->data1->count >= 1);
+    oassert(signals->addr2->count >= 1 && signals->data2->count >= 1);
+    oassert(signals->addr1->count == signals->addr2->count);
+    oassert(signals->data1->count == signals->data2->count);
+    oassert(signals->data1->count + signals->data2->count == node->num_output_pins);
+
+    // Separate output signals according to mapping.
+    for (i = 0; i < node->num_output_pins; i++) {
+        npin_t *pin = node->output_pins[i];
+        if (!strcmp(pin->mapping, "out1"))
+            add_pin_to_signal_list(signals->out1, pin);
+        else if (!strcmp(pin->mapping, "out2"))
+            add_pin_to_signal_list(signals->out2, pin);
+        else
+            error_message(NETLIST, ast_node->loc, "Unexpected output pin mapping \"%s\" on memory node: %s\n", pin->mapping, node->name);
+    }
+
+    oassert(signals->out1->count == signals->out2->count);
+    oassert(signals->out1->count == signals->data1->count);
+
+    return signals;
+}
+
+void free_dp_ram_signals(dp_ram_signals *signalsvar)
+{
+    free_signal_list(signalsvar->data1);
+    free_signal_list(signalsvar->data2);
+    free_signal_list(signalsvar->addr1);
+    free_signal_list(signalsvar->addr2);
+    free_signal_list(signalsvar->out1);
+    free_signal_list(signalsvar->out2);
+
+    vtr::free(signalsvar);
+}
+
+/*
+ * Expands the given single port ram block into soft logic.
+ */
+void instantiate_soft_single_port_ram(nnode_t *node, short mark, netlist_t *netlist)
+{
+    oassert(is_sp_ram(node));
+
+    sp_ram_signals *signals = get_sp_ram_signals(node);
+
+    // Construct an address decoder.
+    signal_list_t *decoder = create_decoder(node, mark, signals->addr, netlist);
+
+    // The total number of memory addresses. (2^address_bits)
+    long num_addr = decoder->count;
+
+    nnode_t **and_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
+
+    for (long i = 0; i < num_addr; i++) {
+        npin_t *address_pin = decoder->pins[i];
+        /* Check that the input pin is driven */
+        oassert(address_pin->net->num_driver_pins || address_pin->net == netlist->zero_net || address_pin->net == netlist->one_net ||
+                address_pin->net == netlist->pad_net);
+
+        // An AND gate to enable and disable writing.
+        nnode_t *and_g = make_1port_logic_gate(LOGICAL_AND, 2, node, mark);
+        add_input_pin_to_node(and_g, address_pin, 0);
+
+        if (!i)
+            remap_pin_to_new_node(signals->we, and_g, 1);
+        else
+            add_input_pin_to_node(and_g, copy_input_npin(signals->we), 1);
+
+        and_gates[i] = and_g;
+    }
+
+    for (long i = 0; i < signals->data->count; i++) {
+        npin_t *data_pin = signals->data->pins[i];
+
+        // The output multiplexer determines which memory cell is connected to the output register.
+        nnode_t *output_mux = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
+
+        int j;
+        for (j = 0; j < num_addr; j++) {
+            npin_t *address_pin = decoder->pins[j];
+            /* Check that the input pin is driven */
+            oassert(address_pin->net->num_driver_pins || address_pin->net == netlist->zero_net || address_pin->net == netlist->one_net ||
+                    address_pin->net == netlist->pad_net);
+
+            // A multiplexer switches between accepting incoming data and keeping existing data.
+            nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, node, mark);
+            nnode_t *not_g = make_not_gate(node, mark);
+            connect_nodes(and_gates[j], 0, not_g, 0);
+            connect_nodes(and_gates[j], 0, mux, 0);
+            connect_nodes(not_g, 0, mux, 1);
+            if (!j)
+                remap_pin_to_new_node(data_pin, mux, 2);
+            else
+                add_input_pin_to_node(mux, copy_input_npin(data_pin), 2);
+
+            // A flipflop holds the value of each memory cell.
+            nnode_t *ff = make_2port_gate(FF_NODE, 1, 1, 1, node, mark);
+            connect_nodes(mux, 0, ff, 0);
+            if (!i && !j)
+                remap_pin_to_new_node(signals->clk, ff, 1);
+            else
+                add_input_pin_to_node(ff, copy_input_npin(signals->clk), 1);
+
+            // The output of the flipflop connects back to the multiplexer (to hold the value.)
+            connect_nodes(ff, 0, mux, 3);
+
+            // The flipflop connects to the output multiplexer.
+            connect_nodes(ff, 0, output_mux, num_addr + j);
+
+            // Hook the address pin up to the output mux.
+            add_input_pin_to_node(output_mux, copy_input_npin(address_pin), j);
+            ff->attributes->clk_edge_type = RISING_EDGE_SENSITIVITY;
+        }
+
+        npin_t *output_pin = node->output_pins[i];
+
+        // Make sure the BLIF name comes directly from the MUX.
+        if (output_pin->name)
+            vtr::free(output_pin->name);
+        output_pin->name = NULL;
+
+        remap_pin_to_new_node(output_pin, output_mux, 0);
+        instantiate_multi_port_mux(output_mux, mark, netlist);
+    }
+
+    vtr::free(and_gates);
+
+    // Free signal lists.
+    free_sp_ram_signals(signals);
+    free_signal_list(decoder);
+
+    // Free the original hard block memory.
+    free_nnode(node);
+}
+
+/*
+ * Expands the given dual port ram block into soft logic.
+ */
+void instantiate_soft_dual_port_ram(nnode_t *node, short mark, netlist_t *netlist)
+{
+    oassert(is_dp_ram(node));
+
+    dp_ram_signals *signals = get_dp_ram_signals(node);
+
+    // Construct the address decoders.
+    signal_list_t *decoder1 = create_decoder(node, mark, signals->addr1, netlist);
+    signal_list_t *decoder2 = create_decoder(node, mark, signals->addr2, netlist);
+
+    oassert(decoder1->count == decoder2->count);
+
+    // The total number of memory addresses. (2^address_bits)
+    int num_addr = decoder1->count;
+    int data_width = signals->data1->count;
+
+    // Arrays of common gates, one per address.
+    nnode_t **and1_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
+    nnode_t **and2_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
+    nnode_t **or_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
+
+    int i;
+    for (i = 0; i < num_addr; i++) {
+        npin_t *addr1_pin = decoder1->pins[i];
+        npin_t *addr2_pin = decoder2->pins[i];
+
+        oassert(addr1_pin->net->num_driver_pins || addr1_pin->net == netlist->zero_net || addr1_pin->net == netlist->one_net ||
+                addr1_pin->net == netlist->pad_net);
+        oassert(addr2_pin->net->num_driver_pins || addr2_pin->net == netlist->zero_net || addr2_pin->net == netlist->one_net ||
+                addr2_pin->net == netlist->pad_net);
+
+        // Write enable and gate for address 1.
+        nnode_t *and1 = make_1port_logic_gate(LOGICAL_AND, 2, node, mark);
+        add_input_pin_to_node(and1, addr1_pin, 0);
+
+        if (!i)
+            remap_pin_to_new_node(signals->we1, and1, 1);
+        else
+            add_input_pin_to_node(and1, copy_input_npin(signals->we1), 1);
+
+        // Write enable and gate for address 2.
+        nnode_t *and2 = make_1port_logic_gate(LOGICAL_AND, 2, node, mark);
+        add_input_pin_to_node(and2, addr2_pin, 0);
+
+        if (!i)
+            remap_pin_to_new_node(signals->we2, and2, 1);
+        else
+            add_input_pin_to_node(and2, copy_input_npin(signals->we2), 1);
+
+        and1_gates[i] = and1;
+        and2_gates[i] = and2;
+
+        // OR, to enable writing to this address when either port selects it for writing.
+        nnode_t *or_g = make_1port_logic_gate(LOGICAL_OR, 2, node, mark);
+        connect_nodes(and1, 0, or_g, 0);
+        connect_nodes(and2, 0, or_g, 1);
+
+        or_gates[i] = or_g;
+    }
+
+    for (i = 0; i < data_width; i++) {
+        npin_t *data1_pin = signals->data1->pins[i];
+        npin_t *data2_pin = signals->data2->pins[i];
+
+        // The output multiplexer determines which memory cell is connected to the output register.
+        nnode_t *output_mux1 = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
+        nnode_t *output_mux2 = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
+
+        int j;
+        for (j = 0; j < num_addr; j++) {
+            npin_t *addr1_pin = decoder1->pins[j];
+            npin_t *addr2_pin = decoder2->pins[j];
+
+            oassert(addr1_pin->net->num_driver_pins || addr1_pin->net == netlist->zero_net || addr1_pin->net == netlist->one_net ||
+                    addr1_pin->net == netlist->pad_net);
+            oassert(addr2_pin->net->num_driver_pins || addr2_pin->net == netlist->zero_net || addr2_pin->net == netlist->one_net ||
+                    addr2_pin->net == netlist->pad_net);
+
+            // The data mux selects between the two data lines for this address.
+            nnode_t *data_mux = make_2port_gate(MUX_2, 2, 2, 1, node, mark);
+            // Port 2 before 1 to mimic the simulator's behaviour when the addresses are the same.
+            connect_nodes(and2_gates[j], 0, data_mux, 0);
+            connect_nodes(and1_gates[j], 0, data_mux, 1);
+            if (!j)
+                remap_pin_to_new_node(data2_pin, data_mux, 2);
+            else
+                add_input_pin_to_node(data_mux, copy_input_npin(data2_pin), 2);
+            if (!j)
+                remap_pin_to_new_node(data1_pin, data_mux, 3);
+            else
+                add_input_pin_to_node(data_mux, copy_input_npin(data1_pin), 3);
+
+            nnode_t *not_g = make_not_gate(node, mark);
+            connect_nodes(or_gates[j], 0, not_g, 0);
+
+            // A multiplexer switches between accepting incoming data and keeping existing data.
+            nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, node, mark);
+            connect_nodes(or_gates[j], 0, mux, 0);
+            connect_nodes(not_g, 0, mux, 1);
+            connect_nodes(data_mux, 0, mux, 2);
+
+            // A flipflop holds the value of each memory cell.
+            nnode_t *ff = make_2port_gate(FF_NODE, 1, 1, 1, node, mark);
+            connect_nodes(mux, 0, ff, 0);
+            if (!i && !j)
+                remap_pin_to_new_node(signals->clk, ff, 1);
+            else
+                add_input_pin_to_node(ff, copy_input_npin(signals->clk), 1);
+
+            // The output of the flipflop connects back to the multiplexer (to hold the value.)
+            connect_nodes(ff, 0, mux, 3);
+
+            // Connect the flipflop to both output muxes.
+            connect_nodes(ff, 0, output_mux1, num_addr + j);
+            connect_nodes(ff, 0, output_mux2, num_addr + j);
+
+            // Connect address lines to the output muxes for this address.
+            add_input_pin_to_node(output_mux1, copy_input_npin(addr1_pin), j);
+            add_input_pin_to_node(output_mux2, copy_input_npin(addr2_pin), j);
+            ff->attributes->clk_edge_type = RISING_EDGE_SENSITIVITY;
+        }
+
+        npin_t *out1_pin = signals->out1->pins[i];
+        npin_t *out2_pin = signals->out2->pins[i];
+
+        // Make sure the BLIF name comes directly from the MUX.
+        if (out1_pin->name)
+            vtr::free(out1_pin->name);
+        out1_pin->name = NULL;
+
+        if (out2_pin->name)
+            vtr::free(out2_pin->name);
+        out2_pin->name = NULL;
+
+        remap_pin_to_new_node(out1_pin, output_mux1, 0);
+        remap_pin_to_new_node(out2_pin, output_mux2, 0);
+
+        // Convert the output muxes to MUX_2 nodes.
+        instantiate_multi_port_mux(output_mux1, mark, netlist);
+        instantiate_multi_port_mux(output_mux2, mark, netlist);
+    }
+
+    vtr::free(and1_gates);
+    vtr::free(and2_gates);
+    vtr::free(or_gates);
+
+    // Free signal lists.
+    free_dp_ram_signals(signals);
+    free_signal_list(decoder1);
+    free_signal_list(decoder2);
+
+    // Free the original hard block memory.
+    free_nnode(node);
+}
+
+/*
+ * Creates an n to 2^n decoder from the input signal list.
+ */
+signal_list_t *create_decoder(nnode_t *node, short mark, signal_list_t *input_list, netlist_t *netlist)
+{
+    long num_inputs = input_list->count;
+    if (num_inputs > SOFT_RAM_ADDR_LIMIT)
+        error_message(
+          NETLIST, node->loc,
+          "Memory %s of depth 2^%ld exceeds ODIN bound of 2^%d.\nMust use an FPGA architecture that contains embedded hard block memories",
+          node->name, num_inputs, SOFT_RAM_ADDR_LIMIT);
+
+    // Number of outputs is 2^num_inputs
+    long num_outputs = shift_left_value_with_overflow_check(0x1, num_inputs, node->loc);
+
+    // Create NOT gates for all inputs and put the outputs in their own signal list.
+    signal_list_t *not_gates = init_signal_list();
+    for (long i = 0; i < num_inputs; i++) {
+        if (!input_list->pins[i]->net->num_driver_pins && input_list->pins[i]->net != netlist->zero_net &&
+            input_list->pins[i]->net != netlist->one_net && input_list->pins[i]->net != netlist->pad_net) {
+            warning_message(NETLIST, node->loc, "Signal %s is not driven. padding with ground\n", input_list->pins[i]->name);
+            add_fanout_pin_to_net(netlist->zero_net, input_list->pins[i]);
+        }
+
+        nnode_t *not_g = make_not_gate(node, mark);
+        remap_pin_to_new_node(input_list->pins[i], not_g, 0);
+        npin_t *not_output = allocate_npin();
+        add_output_pin_to_node(not_g, not_output, 0);
+        nnet_t *net = allocate_nnet();
+        net->name = make_full_ref_name(NULL, NULL, NULL, not_g->name, 0);
+        add_driver_pin_to_net(net, not_output);
+        not_output = allocate_npin();
+        add_fanout_pin_to_net(net, not_output);
+        add_pin_to_signal_list(not_gates, not_output);
+
+        npin_t *pin = allocate_npin();
+        net = input_list->pins[i]->net;
+
+        add_fanout_pin_to_net(net, pin);
+
+        input_list->pins[i] = pin;
+    }
+
+    // Create AND gates and assign signals.
+    signal_list_t *return_list = init_signal_list();
+    for (long i = 0; i < num_outputs; i++) {
+        // Each output is connected to an and gate which is driven by a single permutation of the inputs.
+        nnode_t *and_g = make_1port_logic_gate(LOGICAL_AND, num_inputs, node, mark);
+
+        for (long j = 0; j < num_inputs; j++) {
+            // Look at the jth bit of i. If it's 0, take the negated signal.
+            long value = shift_left_value_with_overflow_check(0x1, j, and_g->loc);
+            value &= i;
+            value >>= j;
+
+            npin_t *pin = value ? input_list->pins[j] : not_gates->pins[j];
+
+            // Use the original not pins on the first iteration and the original input pins on the last.
+            if (i > 0 && i < num_outputs - 1)
+                pin = copy_input_npin(pin);
+
+            // Connect the signal to the output and gate.
+            add_input_pin_to_node(and_g, pin, j);
+        }
+
+        // Add output pin, net, and fanout pin.
+        npin_t *output = allocate_npin();
+        nnet_t *net = allocate_nnet();
+        add_output_pin_to_node(and_g, output, 0);
+        net->name = make_full_ref_name(NULL, NULL, NULL, and_g->name, 0);
+        add_driver_pin_to_net(net, output);
+        output = allocate_npin();
+        add_fanout_pin_to_net(net, output);
+
+        // Add the fanout pin (decoder output) to the return list.
+        add_pin_to_signal_list(return_list, output);
+    }
+
+    free_signal_list(not_gates);
+    return return_list;
+}
+
+/**
+ * (function: create_single_port_rom)
+ *
+ * @brief create a single port ram with the given spram signals
+ *
+ * @param signals spram signals
+ * @param node corresponding netlist node
+ *
+ * @return a new single port ram
+ */
+nnode_t *create_single_port_ram(sp_ram_signals *spram_signals, nnode_t *node)
+{
+    /* sanity checks */
+    oassert(spram_signals->clk != NULL);
+    oassert(spram_signals->we != NULL);
+    oassert(spram_signals->addr->count >= 1);
+    oassert(spram_signals->data->count >= 1);
+    if (spram_signals->out != NULL) {
+        oassert(spram_signals->data->count == spram_signals->out->count);
+    }
+
+    /* create a single port ram node */
+    nnode_t *spram = allocate_nnode(node->loc);
+
+    spram->type = MEMORY;
+    /* some information from ast node is needed in partial mapping */
+    char *hb_name = vtr::strdup(SINGLE_PORT_RAM_string);
+    spram->name = node_name(spram, hb_name);
+    spram->attributes->memory_id = vtr::strdup(node->attributes->memory_id);
+
+    /* Create a fake ast node. */
+    spram->related_ast_node = create_node_w_type(RAM, node->loc);
+    spram->related_ast_node->identifier_node = create_tree_node_id(hb_name, node->loc);
+
+    /* INPUTS */
+    /* hook address portd into spram */
+    add_input_port_to_memory(spram, spram_signals->addr, "addr");
+
+    /* hook data ports into spram */
+    add_input_port_to_memory(spram, spram_signals->data, "data");
+
+    /* hook enable pins to spram */
+    signal_list_t *we = init_signal_list();
+    add_pin_to_signal_list(we, spram_signals->we);
+    add_input_port_to_memory(spram, we, "we");
+
+    /* hook clk pin into spram */
+    signal_list_t *clk = init_signal_list();
+    add_pin_to_signal_list(clk, spram_signals->clk);
+    add_input_port_to_memory(spram, clk, "clk");
+
+    /* OUTPUT */
+    if (spram_signals->out == NULL) {
+        /* init the signal list */
+        spram_signals->out = init_signal_list();
+        for (int i = 0; i < spram_signals->data->count; i++) {
+            npin_t *new_pin = allocate_npin();
+            nnet_t *new_net = allocate_nnet();
+            /* add pin as the net driver */
+            add_driver_pin_to_net(new_net, new_pin);
+            /* store them into dpram signals */
+            add_pin_to_signal_list(spram_signals->out, new_pin);
+        }
+    }
+    add_output_port_to_memory(spram, spram_signals->out, "out");
+
+    /* already compatible with SPRAM config so we leave it as is */
+    sp_memory_list = insert_in_vptr_list(sp_memory_list, spram);
+    /* register the SPRAM in arch model to have the related model in BLIF for simulation */
+    register_memory_model(spram);
+
+    // CLEAN UP
+    free_signal_list(we);
+    free_signal_list(clk);
+
+    return (spram);
+}
+
+/**
+ * (function: create_dual_port_rom)
+ *
+ * @brief create a dual port ram with the given dpram signals
+ *
+ * @param signals dpram signals
+ * @param node corresponding netlist node
+ *
+ * @return a new dual port ram
+ */
+nnode_t *create_dual_port_ram(dp_ram_signals *dpram_signals, nnode_t *node)
+{
+    /* sanity checks */
+    oassert(dpram_signals->clk);
+    oassert((dpram_signals->we1) && (dpram_signals->we2));
+    oassert((dpram_signals->addr1) && (dpram_signals->addr2));
+    oassert((dpram_signals->data1) && (dpram_signals->data1));
+    oassert(dpram_signals->addr1->count == dpram_signals->addr2->count);
+    oassert(dpram_signals->data1->count == dpram_signals->data2->count);
+    oassert(dpram_signals->addr1->count >= 1 && dpram_signals->data1->count >= 1);
+    oassert(dpram_signals->addr2->count >= 1 && dpram_signals->data2->count >= 1);
+
+    if (dpram_signals->out1 != NULL) {
+        oassert(dpram_signals->data1->count == dpram_signals->out1->count);
+    }
+    if (dpram_signals->out2 != NULL) {
+        oassert(dpram_signals->data2->count == dpram_signals->out2->count);
+    }
+
+    /* create a dual port ram node */
+    nnode_t *dpram = allocate_nnode(node->loc);
+
+    dpram->type = MEMORY;
+    /* some information from ast node is needed in partial mapping */
+    char *hb_name = vtr::strdup(DUAL_PORT_RAM_string);
+    dpram->name = node_name(dpram, hb_name);
+    dpram->attributes->memory_id = vtr::strdup(node->attributes->memory_id);
+
+    /* Create a fake ast node. */
+    dpram->related_ast_node = create_node_w_type(RAM, node->loc);
+    dpram->related_ast_node->identifier_node = create_tree_node_id(hb_name, node->loc);
+
+    /* INPUTS */
+    /* hook address portd into dpram */
+    add_input_port_to_memory(dpram, dpram_signals->addr1, "addr1");
+    add_input_port_to_memory(dpram, dpram_signals->addr2, "addr2");
+
+    /* hook data ports into dpram */
+    add_input_port_to_memory(dpram, dpram_signals->data1, "data1");
+    add_input_port_to_memory(dpram, dpram_signals->data2, "data2");
+
+    /* hook enable pins to dpram */
+    signal_list_t *we1 = init_signal_list();
+    add_pin_to_signal_list(we1, dpram_signals->we1);
+    add_input_port_to_memory(dpram, we1, "we1");
+
+    signal_list_t *we2 = init_signal_list();
+    add_pin_to_signal_list(we2, dpram_signals->we2);
+    add_input_port_to_memory(dpram, we2, "we2");
+
+    /* hook clk pin into dpram */
+    signal_list_t *clk = init_signal_list();
+    add_pin_to_signal_list(clk, dpram_signals->clk);
+    add_input_port_to_memory(dpram, clk, "clk");
+
+    /* OUTPUT */
+    if (dpram_signals->out1 == NULL) {
+        /* init the signal list */
+        dpram_signals->out1 = init_signal_list();
+        for (int i = 0; i < dpram_signals->data1->count; i++) {
+            npin_t *new_pin = allocate_npin();
+            nnet_t *new_net = allocate_nnet();
+            /* add pin as the net driver */
+            add_driver_pin_to_net(new_net, new_pin);
+            /* store them into dpram signals */
+            add_pin_to_signal_list(dpram_signals->out1, new_pin);
+        }
+    }
+    add_output_port_to_memory(dpram, dpram_signals->out1, "out1");
+    if (dpram_signals->out2 == NULL) {
+        /* init the signal list */
+        dpram_signals->out2 = init_signal_list();
+        for (int i = 0; i < dpram_signals->data2->count; i++) {
+            npin_t *new_pin = allocate_npin();
+            nnet_t *new_net = allocate_nnet();
+            /* add pin as the net driver */
+            add_driver_pin_to_net(new_net, new_pin);
+            /* store them into dpram signals */
+            add_pin_to_signal_list(dpram_signals->out2, new_pin);
+        }
+    }
+    add_output_port_to_memory(dpram, dpram_signals->out2, "out2");
+
+    /* already compatible with DPRAM config so we leave it as is */
+    dp_memory_list = insert_in_vptr_list(dp_memory_list, dpram);
+    /* register the DPRAM in arch model to have the related model in BLIF for simulation */
+    register_memory_model(dpram);
+
+    // CLEAN UP
+    free_signal_list(we1);
+    free_signal_list(we2);
+    free_signal_list(clk);
+
+    return (dpram);
+}
+
+/**
+ * (function: register_memory_model)
+ *
+ * @brief register the corresponding memory hard if any is
+ * available in the given architecture
+ *
+ * @param mem pointing to the memory node
+ */
+void register_memory_model(nnode_t *mem)
+{
+    /* See if the hard block declared is supported by FPGA architecture */
+    t_model *hb_model = find_hard_block(mem->related_ast_node->identifier_node->types.identifier);
+
+    if (hb_model) {
+        /* Declare the hard block as used for the blif generation */
+        hb_model->used = 1;
+    }
+}
+
+/**
+ * (function: resolve_single_port_ram)
+ *
+ * @brief resolve the spram block by reordering the input signals
+ * to be compatible with Odin's partial mapping phase
+ *
+ * @param node pointing to a spram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t * /* netlist */)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+    oassert(node->num_input_port_sizes == 4);
+    oassert(node->num_output_port_sizes == 1);
+
+    /* check if the node is a valid spram */ //@TODO
+    if (!is_blif_sp_ram(node))
+        error_message(RESOLVE, node->loc, "SPRAM (%s) ports mismatch with VTR single_port_ram hard block ports\n", node->name);
+
+    /**
+     * blif single port ram information
+     *
+     * ADDR:    input port [0]
+     * CLOCK:   input port [1]
+     * DATAIN:  input port [2]
+     * WE:      input port [3]
+     *
+     * DATAOUT: output port [0]
+     */
+
+    int i;
+    int SP_ADDR_width = node->input_port_sizes[0];
+    int SP_CLK_width = node->input_port_sizes[1]; // should be 1
+    int SP_DATA_width = node->input_port_sizes[2];
+    int SP_WE_width = node->input_port_sizes[3]; // should be 1
+    int SP_OUT_width = node->output_port_sizes[0];
+
+    /* validate the data width */
+    oassert(SP_CLK_width == 1 && SP_WE_width == 1);
+    oassert(SP_DATA_width == node->output_port_sizes[0]);
+
+    /* creating dpram node for the range of data width */
+    int offset = 0;
+    /* creating a new node */
+    sp_ram_signals *signals = init_sp_ram_signals();
+
+    /* INPUTS */
+    /* adding the addr signals */
+    for (i = 0; i < SP_ADDR_width; i++) {
+        npin_t *pin = node->input_pins[offset + i];
+        /* detach from the main node, since it will be connected to a new dpram */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->addr, pin);
+    }
+    offset += SP_ADDR_width;
+
+    /* adding the clk signals */
+    npin_t *clk_pin = node->input_pins[offset];
+    /* detach from the main node, since it will be connected to a new dpram */
+    clk_pin->node->input_pins[clk_pin->pin_node_idx] = NULL;
+    signals->clk = clk_pin;
+    /* increment offset */
+    offset += 1;
+
+    /* adding the data signals */
+    for (i = 0; i < SP_DATA_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->input_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->data, pin);
+    }
+    offset += SP_DATA_width;
+
+    /* adding the we signals */
+    npin_t *we_pin = node->input_pins[offset];
+    /* detach from the main node, since it will be connected to a new dpram */
+    we_pin->node->input_pins[we_pin->pin_node_idx] = NULL;
+    signals->we = we_pin;
+
+    /* OUTPUT */
+    /* adding the output signals */
+    offset = 0;
+    for (i = 0; i < SP_OUT_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->output_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->output_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->out, pin);
+    }
+
+    /* creating a new spram with size modified input signals */
+    create_single_port_ram(signals, node);
+
+    // CLEAN UP
+    free_nnode(node);
+    free_sp_ram_signals(signals);
+}
+
+/**
+ * (function: resolve_dual_port_ram)
+ *
+ * @brief resolve the dpram block by reordering the input signals
+ * to be compatible with Odin's partial mapping phase
+ *
+ * @param node pointing to a dual port ram node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+    oassert(node->num_input_port_sizes == 7);
+    oassert(node->num_output_port_sizes == 2);
+
+    /* check if the node is a valid spram */ //@TODO
+    if (!is_blif_dp_ram(node))
+        error_message(RESOLVE, node->loc, "DPRAM (%s) ports mismatch with VTR dual_port_ram hard block ports\n", node->name);
+
+    /**
+     * blif dual port ram information
+     *
+     * ADDR1:    input port [0]
+     * ADDR2:    input port [1]
+     * CLOCK:    input port [2]
+     * DATAIN1:  input port [3]
+     * DATAIN2:  input port [4]
+     * WE1:      input port [5]
+     * WE2:      input port [6]
+     *
+     * DATAOUT1: output port [0]
+     * DATAOUT2: output port [1]
+     */
+    int i;
+    int DP_ADDR1_width = node->input_port_sizes[0];
+    int DP_ADDR2_width = node->input_port_sizes[1];
+    int DP_CLK_width = node->input_port_sizes[2]; // should be 1
+    int DP_DATA1_width = node->input_port_sizes[3];
+    int DP_DATA2_width = node->input_port_sizes[4];
+    int DP_WE1_width = node->input_port_sizes[5]; // should be 1
+    int DP_WE2_width = node->input_port_sizes[6]; // should be 1
+    int DP_OUT1_width = node->output_port_sizes[0];
+    int DP_OUT2_width = node->output_port_sizes[1];
+
+    /* validate the port width */
+    oassert((DP_CLK_width == 1) && (DP_WE1_width == 1) && (DP_WE2_width == 1));
+    oassert(DP_DATA1_width == DP_DATA2_width);
+    oassert(DP_DATA1_width == node->output_port_sizes[0]);
+    oassert(DP_DATA1_width == node->output_port_sizes[1]);
+
+    int max_addr_width = std::max(DP_ADDR1_width, DP_ADDR2_width);
+
+    /* creating dpram node for the range of data width */
+    int offset = 0;
+    /* creating a new node */
+    dp_ram_signals *signals = init_dp_ram_signals();
+
+    /* INPUTS */
+    /* adding the addr1 signals */
+    for (i = 0; i < max_addr_width; i++) {
+        /* hook the addr1 pin to new node */
+        if (i < DP_ADDR1_width) {
+            npin_t *pin = node->input_pins[offset + i];
+            /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+            pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+            add_pin_to_signal_list(signals->addr1, pin);
+        } else {
+            add_pin_to_signal_list(signals->addr1, get_pad_pin(netlist));
+        }
+    }
+    offset += DP_ADDR1_width;
+
+    /* adding the addr2 signals */
+    for (i = 0; i < max_addr_width; i++) {
+        /* hook the addr1 pin to new node */
+        if (i < DP_ADDR2_width) {
+            npin_t *pin = node->input_pins[offset + i];
+            /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+            pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+            add_pin_to_signal_list(signals->addr2, pin);
+        } else {
+            add_pin_to_signal_list(signals->addr2, get_pad_pin(netlist));
+        }
+    }
+    offset += DP_ADDR2_width;
+
+    /* adding the clk signals */
+    npin_t *clk_pin = node->input_pins[offset];
+    /* detach from the main node, since it will be connected to a new dpram */
+    clk_pin->node->input_pins[clk_pin->pin_node_idx] = NULL;
+    signals->clk = clk_pin;
+    /* increment offset */
+    offset += 1;
+
+    /* adding the data1 signals */
+    for (i = 0; i < DP_DATA1_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->input_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->data1, pin);
+    }
+    offset += DP_DATA1_width;
+
+    /* adding the data2 signals */
+    for (i = 0; i < DP_DATA2_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->input_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->data2, pin);
+    }
+    offset += DP_DATA2_width;
+
+    /* adding the we1 signals */
+    npin_t *we1_pin = node->input_pins[offset];
+    /* detach from the main node, since it will be connected to a new dpram */
+    we1_pin->node->input_pins[we1_pin->pin_node_idx] = NULL;
+    signals->we1 = we1_pin;
+    /* increment offset */
+    offset += 1;
+
+    /* adding the we2 signals */
+    npin_t *we2_pin = node->input_pins[offset];
+    /* detach from the main node, since it will be connected to a new dpram */
+    we2_pin->node->input_pins[we2_pin->pin_node_idx] = NULL;
+    signals->we2 = we2_pin;
+
+    /* OUTPUT */
+    offset = 0;
+    /* adding the output1 signals */
+    for (i = 0; i < DP_OUT1_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->output_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->output_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->out1, pin);
+    }
+    offset += DP_OUT1_width;
+
+    /* adding the output1 signals */
+    for (i = 0; i < DP_OUT2_width; i++) {
+        /* hook the data1 pin to new node */
+        npin_t *pin = node->output_pins[offset + i];
+        /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
+        pin->node->output_pins[pin->pin_node_idx] = NULL;
+
+        add_pin_to_signal_list(signals->out2, pin);
+    }
+
+    /* creating a new dpram with size modified input signals */
+    create_dual_port_ram(signals, node);
+
+    // CLEAN UP
+    free_nnode(node);
+    free_dp_ram_signals(signals);
+}
diff --git a/parmys-plugin/src/multipliers.cc b/parmys-plugin/src/multipliers.cc
new file mode 100644
index 000000000..424e92dda
--- /dev/null
+++ b/parmys-plugin/src/multipliers.cc
@@ -0,0 +1,1910 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "multipliers.h"
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include "partial_map.h"
+#include "read_xml_arch_file.h"
+#include <algorithm>
+#include <cmath>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+
+#include "adders.h"
+
+#include "vtr_list.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "../parmys_utils.hpp"
+
+using vtr::insert_in_vptr_list;
+using vtr::t_linked_vptr;
+
+t_model *hard_multipliers = NULL;
+t_linked_vptr *mult_list = NULL;
+int min_mult = 0;
+int *mults = NULL;
+
+void record_mult_distribution(nnode_t *node);
+void terminate_mult_distribution();
+void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int offb, int b, nnode_t *node_a, nnode_t *node_b);
+void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b);
+void split_multiplier_a(nnode_t *node, int a0, int a1, int b);
+void split_multiplier_b(nnode_t *node, int a, int b1, int b0);
+void pad_multiplier(nnode_t *node, netlist_t *netlist);
+void split_soft_multiplier(nnode_t *node, netlist_t *netlist);
+static mult_port_stat_e is_constant_multipication(nnode_t *node, netlist_t *netlist);
+static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_stat_e port_status, short mark, netlist_t *netlist);
+static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat, nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+static void cleanup_mult_old_node(nnode_t *nodeo, netlist_t *netlist);
+
+// data structure representing a row of bits an adder tree
+struct AdderTreeRow {
+    // the shift of this row from the least significant bit of the multiplier output
+    size_t shift;
+    // array representing the bits in the row, each bit is a node
+    // pointer and the index of this bit in this node output array.
+    std::vector<std::pair<nnode_t *, int>> bits;
+};
+
+/*---------------------------------------------------------------------------
+ * (function: instantiate_simple_soft_multiplier )
+ * Sample 4x4 multiplier to help understand logic.
+ *
+ * 					a3 	a2	a1	a0
+ *					b3 	b2 	b1 	b0
+ *					---------------------------
+ *					c03	c02	c01	c00
+ *			+	c13	c12	c11	c10
+ *			-----------------------------------
+ *			r14	r13	r12	r11	r10
+ *		+	c23	c22	c21	c20
+ *		-----------------------------------
+ *		r24	r23	r22	r21	r20
+ *	+	c33	c32	c31	c30
+ *	------------------------------------
+ *	o7	o6	o5	o4	o3	o2	o1	o0
+ *
+ *	In the first case will be c01
+ *-------------------------------------------------------------------------*/
+void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *netlist)
+{
+    int width_a;
+    int width_b;
+    int width;
+    int multiplier_width;
+    int multiplicand_width;
+    nnode_t **adders_for_partial_products;
+    nnode_t ***partial_products;
+    int multiplicand_offset_index;
+    int multiplier_offset_index;
+    int current_index;
+    int i, j;
+
+    /* need for an carry-ripple-adder for each of the bits of port B. */
+    /* good question of which is better to put on the bottom of multiplier.  Larger means more smaller adds, or small is
+     * less large adds */
+    oassert(node->num_output_pins > 0);
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->num_output_port_sizes == 1);
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width = node->output_port_sizes[0];
+    multiplicand_width = width_b;
+    multiplier_width = width_a;
+    /* offset is related to which multport is chosen as the multiplicand */
+    multiplicand_offset_index = width_a;
+    multiplier_offset_index = 0;
+
+    adders_for_partial_products = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * multiplicand_width - 1);
+
+    /* need to generate partial products for each bit in width B. */
+    partial_products = (nnode_t ***)vtr::malloc(sizeof(nnode_t **) * multiplicand_width);
+
+    /* generate the AND partial products */
+    for (i = 0; i < multiplicand_width; i++) {
+        /* create the memory for each AND gate needed for the levels of partial products */
+        partial_products[i] = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * multiplier_width);
+
+        if (i < multiplicand_width - 1) {
+            adders_for_partial_products[i] = make_2port_gate(ADD, multiplier_width + 1, multiplier_width + 1, multiplier_width + 1, node, mark);
+        }
+
+        for (j = 0; j < multiplier_width; j++) {
+            /* create each one of the partial products */
+            partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, mark);
+        }
+    }
+
+    /* generate the connections to the AND gates */
+    for (i = 0; i < multiplicand_width; i++) {
+        for (j = 0; j < multiplier_width; j++) {
+            /* hookup the input of B to each AND gate */
+            if (j == 0) {
+                /* IF - this is the first time we are mapping multiplicand port then can remap */
+                remap_pin_to_new_node(node->input_pins[i + multiplicand_offset_index], partial_products[i][j], 0);
+            } else {
+                /* ELSE - this needs to be a new output of the multiplicand port */
+                add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[i][0]->input_pins[0]), 0);
+            }
+
+            /* hookup the input of the multiplier to each AND gate */
+            if (i == 0) {
+                /* IF - this is the first time we are mapping multiplier port then can remap */
+                remap_pin_to_new_node(node->input_pins[j + multiplier_offset_index], partial_products[i][j], 1);
+            } else {
+                /* ELSE - this needs to be a new output of the multiplier port */
+                add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[0][j]->input_pins[1]), 1);
+            }
+        }
+    }
+
+    /* hookup each of the adders */
+    for (i = 0; i < multiplicand_width - 1; i++) // -1 since the first stage is a combo of partial products while all others are part of tree
+    {
+        for (j = 0; j < multiplier_width + 1; j++) // +1 since adders are one greater than multwidth to pass carry
+        {
+            /* join to port 1 of the add one of the partial products.  */
+            if (i == 0) {
+                /* IF - this is the first addition row, then adding two sets of partial products and first set is from the c0* */
+                if (j < multiplier_width - 1) {
+                    /* IF - we just take an element of the first list c[0][j+1]. */
+                    connect_nodes(partial_products[i][j + 1], 0, adders_for_partial_products[i], j);
+                } else {
+                    /* ELSE - this is the last input to the first adder, then we pass in 0 since no carry yet */
+                    add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j);
+                }
+            } else if (j < multiplier_width) {
+                /* ELSE - this is the standard situation when we need to hookup this adder with a previous adder, r[i-1][j+1] */
+                connect_nodes(adders_for_partial_products[i - 1], j + 1, adders_for_partial_products[i], j);
+            } else {
+                add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j);
+            }
+
+            if (j < multiplier_width) {
+                /* IF - this is not most significant bit then just add current partial product */
+                connect_nodes(partial_products[i + 1][j], 0, adders_for_partial_products[i], j + multiplier_width + 1);
+            } else {
+                add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j + multiplier_width + 1);
+            }
+        }
+    }
+
+    current_index = 0;
+    /* hookup the outputs */
+    for (i = 0; i < width; i++) {
+        if (multiplicand_width == 1) {
+            // this is undealt with
+            error_message(AST, node->loc, "%s", "Cannot create soft multiplier with multiplicand width of 1.\n");
+        } else if (i == 0) {
+            /* IF - this is the LSbit, then we use a pass through from the partial product */
+            remap_pin_to_new_node(node->output_pins[i], partial_products[0][0], 0);
+        } else if (i < multiplicand_width - 1) {
+            /* ELSE IF - these are the middle values that come from the LSbit of partial adders */
+            remap_pin_to_new_node(node->output_pins[i], adders_for_partial_products[i - 1], 0);
+        } else {
+            if (current_index > multiplier_width) {
+                /* output pins greater than 2X multiplier width will be driven with pad node */
+                nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, mark);
+                /* hook a pad pin into buf node */
+                add_input_pin_to_node(buf_node, get_pad_pin(netlist), 0);
+                /* hook the over size output pin to into the buf node */
+                remap_pin_to_new_node(node->output_pins[i], buf_node, 0);
+            } else {
+                /* ELSE - the final outputs are straight from the outputs of the last adder */
+                remap_pin_to_new_node(node->output_pins[i], adders_for_partial_products[multiplicand_width - 2], current_index);
+            }
+            current_index++;
+        }
+    }
+
+    /* soft map the adders if they need to be mapped */
+    for (i = 0; i < multiplicand_width - 1; i++) {
+        instantiate_add_w_carry(adders_for_partial_products[i], mark, netlist);
+    }
+
+    /* Cleanup everything */
+    if (adders_for_partial_products != NULL) {
+        for (i = 0; i < multiplicand_width - 1; i++) {
+            free_nnode(adders_for_partial_products[i]);
+        }
+        vtr::free(adders_for_partial_products);
+    }
+    /* generate the AND partial products */
+    for (i = 0; i < multiplicand_width; i++) {
+        /* create the memory for each AND gate needed for the levels of partial products */
+        if (partial_products[i] != NULL) {
+            vtr::free(partial_products[i]);
+        }
+    }
+    if (partial_products != NULL) {
+        vtr::free(partial_products);
+    }
+}
+
+/**
+ * --------------------------------------------------------------------------
+ * (function: implement_constant_multipication)
+ *
+ * @brief implementing constant multipication utilizing shift and ADD operations
+ *
+ * @note this function should call before partial mapping phase
+ * since some logic need to be softened
+ *
+ * @param node pointer to the multipication netlist node
+ * @param port_status showing which value is constant, which is variable
+ * @param mark a unique DFS traversal number
+ * @param netlist pointer to the current netlist
+ *
+ * @return output signal
+ * -------------------------------------------------------------------------*/
+static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_stat_e port_status, short mark, netlist_t *netlist)
+{
+    /* validate the port sizes */
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->num_output_port_sizes == 1);
+
+    signal_list_t *return_value = init_signal_list();
+
+    /**
+     * Multiply ports
+     * IN1: (n bits)        input_port[0]
+     * IN2: (m bits)        input_port[1]
+     * OUT: min(m, n) bits  output_port[0]
+     */
+
+    int IN1_width = node->input_port_sizes[0];
+
+    int i, j;
+    int const_operand_offset = (port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? IN1_width : 0;
+    int const_operand_width = node->input_port_sizes[(port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? 1 : 0];
+
+    int variable_operand_offset = (port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? 0 : IN1_width;
+    int variable_operand_width = node->num_input_pins - const_operand_width;
+    operation_list variable_operand_signedness =
+      (port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? node->attributes->port_a_signed : node->attributes->port_b_signed;
+
+    /* after each level one bit will be added to the width of results */
+    int width = node->num_output_pins;
+
+    /* container for constatnt operand */
+    signal_list_t *const_operand = init_signal_list();
+    for (i = 0; i < const_operand_width; i++) {
+        add_pin_to_signal_list(const_operand, node->input_pins[const_operand_offset + i]);
+    }
+    /* container for variable operand */
+    signal_list_t *variable_operand = init_signal_list();
+    for (i = 0; i < variable_operand_width; i++) {
+        add_pin_to_signal_list(variable_operand, node->input_pins[variable_operand_offset + i]);
+    }
+
+    /* netlist GND and VCC net */
+    nnet_t *gnd_net = netlist->zero_net;
+    nnet_t *vcc_net = netlist->one_net;
+
+    int internal_outputs_size = const_operand_width;
+    /* to keep the record of internal outputs for connection purposes */
+    signal_list_t **internal_outputs = (signal_list_t **)vtr::calloc(internal_outputs_size, sizeof(signal_list_t *));
+    /* implementing the multipication using shift and add operation */
+    for (i = 0; i < node->num_output_pins + 1; i++) {
+        npin_t *pin;
+        /* checking a couple conditions to avoid going further if there is not needed */
+        if (i == node->num_output_pins || i == const_operand_width) {
+            internal_outputs_size = i;
+            /* initializing the return value */
+            for (j = 0; j < internal_outputs[i - 1]->count; j++) {
+                add_pin_to_signal_list(return_value, internal_outputs[i - 1]->pins[j]);
+            }
+            break;
+        } else {
+            pin = const_operand->pins[i];
+        }
+        /* init the interanl outputs signal list */
+        internal_outputs[i] = init_signal_list();
+
+        /* if the pin is GND we pass */
+        if (!strcmp(pin->net->name, gnd_net->name)) {
+            for (j = 0; j < width; j++) {
+                /* if the first bit of const_operand is zero we need to initiate the multipication by zero pins */
+                npin_t *internal_output_pin = (i == 0) ? get_zero_pin(netlist) : internal_outputs[i - 1]->pins[j];
+                add_pin_to_signal_list(internal_outputs[i], internal_output_pin);
+            }
+        }
+        /* the const_operand pin is connected to VCC */
+        else if (!strcmp(pin->net->name, vcc_net->name)) {
+            /* for the first round we do not need to shift */
+            if (i == 0) {
+                for (j = 0; j < width; j++) {
+                    if (j < variable_operand_width) {
+                        add_pin_to_signal_list(internal_outputs[0], copy_input_npin(variable_operand->pins[j]));
+                    } else {
+                        add_pin_to_signal_list(internal_outputs[0], get_zero_pin(netlist));
+                    }
+                }
+            } else {
+                /*****************************************************************************************/
+                /*************************************** SHIFT_NODE **************************************/
+                /*****************************************************************************************/
+                /**
+                 * create a shift node to shift the variable port based on the i idx
+                 *
+                 * (shift node)
+                 * IN1: variable_operand of the multiplier
+                 * IN2: shift value (const_operand_width maximum size)
+                 * OUT: shifted IN1 (width)
+                 *
+                 */
+                nnode_t *shift_node = make_2port_gate(SL, width, width, width, node, mark);
+                /* connecting the shift value pins */
+                signal_list_t *shift_value = create_constant_signal(i, width, netlist);
+
+                /* keeping the shift output nodes for adding with the previous stage internal outputs */
+                signal_list_t *shift_outputs = init_signal_list();
+
+                int pad_pin = variable_operand->count - 1;
+                for (j = 0; j < width; j++) {
+                    if (j < variable_operand_width) {
+                        /* connecing the first input of the shift node */
+                        add_input_pin_to_node(shift_node, copy_input_npin(variable_operand->pins[j]), j);
+                    } else {
+                        add_input_pin_to_node(
+                          shift_node,
+                          (variable_operand_signedness == SIGNED) ? copy_input_npin(variable_operand->pins[pad_pin]) : get_zero_pin(netlist), j);
+                    }
+
+                    /* hook shift value pins into the shift node */
+                    add_input_pin_to_node(shift_node, shift_value->pins[j], width + j);
+
+                    /* Specifying the level_muxes outputs */
+                    // Connect output pin to related input pin
+                    npin_t *var_op_out1 = allocate_npin();
+                    npin_t *var_op_out2 = allocate_npin();
+                    nnet_t *var_op_net = allocate_nnet();
+                    var_op_net->name = make_full_ref_name(NULL, NULL, NULL, shift_node->name, j);
+                    /* hook the output pin into the node */
+                    add_output_pin_to_node(shift_node, var_op_out1, j);
+                    /* hook up new pin 1 into the new net */
+                    add_driver_pin_to_net(var_op_net, var_op_out1);
+                    /* hook up the new pin 2 to this new net */
+                    add_fanout_pin_to_net(var_op_net, var_op_out2);
+
+                    /* adding the output pin to the shoft output signal container */
+                    add_pin_to_signal_list(shift_outputs, var_op_out2);
+                }
+
+                /*****************************************************************************************/
+                /**************************************** ADD_NODE ***************************************/
+                /*****************************************************************************************/
+                nnode_t *add_node = make_2port_gate(ADD, width, width, width, node, mark);
+                add_list = insert_in_vptr_list(add_list, add_node);
+                /* connecting add node input pins */
+                for (j = 0; j < width; j++) {
+                    /* connecting the previous stage internal outputs as the first add inputs */
+                    add_input_pin_to_node(add_node, internal_outputs[i - 1]->pins[j], j);
+
+                    /* connecting the shift output pins as the second input */
+                    add_input_pin_to_node(add_node, shift_outputs->pins[j], width + j);
+
+                    /* creating new output pins and adding to the internal outputs for next stages */
+                    // Connect output pin to related input pin
+                    npin_t *add_op_out1 = allocate_npin();
+                    npin_t *add_op_out2 = allocate_npin();
+                    nnet_t *add_op_net = allocate_nnet();
+                    add_op_net->name = make_full_ref_name(NULL, NULL, NULL, add_node->name, j);
+                    /* hook the output pin into the node */
+                    add_output_pin_to_node(add_node, add_op_out1, j);
+                    /* hook up new pin 1 into the new net */
+                    add_driver_pin_to_net(add_op_net, add_op_out1);
+                    /* hook up the new pin 2 to this new net */
+                    add_fanout_pin_to_net(add_op_net, add_op_out2);
+
+                    /* adding the output pin to the shoft output signal container */
+                    add_pin_to_signal_list(internal_outputs[i], add_op_out2);
+                }
+
+                // CLEAN UP
+                free_signal_list(shift_value);
+                free_signal_list(shift_outputs);
+            }
+        }
+    }
+
+    // CLEAN UP
+    free_signal_list(const_operand);
+    free_signal_list(variable_operand);
+
+    for (i = 0; i < internal_outputs_size; i++) {
+        if (internal_outputs[i])
+            free_signal_list(internal_outputs[i]);
+    }
+    vtr::free(internal_outputs);
+
+    return (return_value);
+}
+
+/**
+ * --------------------------------------------------------------------------
+ * (function: connect_constant_mult_outputs)
+ *
+ * @brief connecting the constant multipication
+ * pins to the main mult node
+ *
+ * @param node pointer to the multipication netlist node
+ * @param output_signal_list list of pins
+ * @param netlist pointer to the current netlist file
+ * -------------------------------------------------------------------------*/
+void connect_constant_mult_outputs(nnode_t *node, signal_list_t *output_signal_list)
+{
+    /* validate the size of output width and num of signals */
+    int output_width = node->num_output_pins;
+    oassert(output_width == output_signal_list->count);
+
+    int i;
+    /* hook the output signals into the node output */
+    for (i = 0; i < output_signal_list->count; i++) {
+        npin_t *pin = output_signal_list->pins[i];
+        /* join nets of the output pin and the calculated pin */
+        nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, node->traverse_visited);
+
+        /* connect the mults output pins as buf node driver */
+        add_input_pin_to_node(buf_node, pin, 0);
+        /* remap the main mult output pin to the buf node output pin */
+        remap_pin_to_new_node(node->output_pins[i], buf_node, 0);
+    }
+
+    // CLEAN UP
+    free_signal_list(output_signal_list);
+    for (i = 0; i < node->num_input_pins; i++) {
+        npin_t *pin = node->input_pins[i];
+
+        /* detach from input nets */
+        remove_fanout_pins_from_net(pin->net, pin, pin->pin_net_idx);
+
+        /* free pin */
+        free_npin(node->input_pins[i]);
+        node->input_pins[i] = NULL;
+    }
+
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: init_mult_distribution)
+ *-------------------------------------------------------------------------*/
+void init_mult_distribution()
+{
+    oassert(hard_multipliers != NULL);
+    int len = (1 + hard_multipliers->inputs->size) * (1 + hard_multipliers->inputs->next->size);
+    mults = (int *)vtr::calloc(len, sizeof(int));
+}
+
+/*---------------------------------------------------------------------------
+ * (function: record_mult_distribution)
+ *-------------------------------------------------------------------------*/
+void record_mult_distribution(nnode_t *node)
+{
+    int a, b;
+
+    oassert(hard_multipliers != NULL);
+    oassert(node != NULL);
+
+    a = node->input_port_sizes[0];
+    b = node->input_port_sizes[1];
+
+    mults[a * hard_multipliers->inputs->size + b] += 1;
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: report_mult_distribution)
+ *-------------------------------------------------------------------------*/
+void report_mult_distribution()
+{
+    long num_total = 0;
+
+    if (hard_multipliers == NULL)
+        return;
+
+    printf("\nHard Multiplier Distribution\n");
+    printf("============================\n");
+    for (long i = 0; i <= hard_multipliers->inputs->size; i++) {
+        for (long j = 1; j <= hard_multipliers->inputs->next->size; j++) {
+            if (mults[i * hard_multipliers->inputs->size + j] != 0) {
+                num_total += mults[i * hard_multipliers->inputs->size + j];
+                printf("%ld X %ld => %d\n", i, j, mults[i * hard_multipliers->inputs->size + j]);
+            }
+        }
+    }
+    printf("\n");
+    printf("\nTotal # of multipliers = %ld\n", num_total);
+    vtr::free(mults);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: find_hard_multipliers)
+ *-------------------------------------------------------------------------*/
+void find_hard_multipliers()
+{
+    hard_multipliers = Arch.models;
+    min_mult = configuration.min_hard_multiplier;
+    while (hard_multipliers != NULL) {
+        if (strcmp(hard_multipliers->name, "multiply") == 0) {
+            init_mult_distribution();
+            return;
+        } else {
+            hard_multipliers = hard_multipliers->next;
+        }
+    }
+
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: declare_hard_multiplier)
+ *-------------------------------------------------------------------------*/
+void declare_hard_multiplier(nnode_t *node)
+{
+    t_multiplier *tmp;
+    int width_a, width_b, width, swap;
+
+    /* See if this size instance of multiplier exists? */
+    if (hard_multipliers == NULL)
+        warning_message(NETLIST, node->loc, "%s\n", "Instantiating Mulitpliers where hard multipliers do not exist");
+
+    tmp = (t_multiplier *)hard_multipliers->instances;
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width = node->output_port_sizes[0];
+    if (width_a < width_b) /* Make sure a is bigger than b */
+    {
+        swap = width_b;
+        width_b = width_a;
+        width_a = swap;
+    }
+    while (tmp != NULL) {
+        if ((tmp->size_a == width_a) && (tmp->size_b == width_b) && (tmp->size_out == width))
+            return;
+        else
+            tmp = tmp->next;
+    }
+
+    /* Does not exist - must create an instance */
+    tmp = (t_multiplier *)vtr::malloc(sizeof(t_multiplier));
+    tmp->next = (t_multiplier *)hard_multipliers->instances;
+    hard_multipliers->instances = tmp;
+    tmp->size_a = width_a;
+    tmp->size_b = width_b;
+    tmp->size_out = width;
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: instantiate_hard_multiplier )
+ *-------------------------------------------------------------------------*/
+void instantiate_hard_multiplier(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    oassert(node && "node is NULL to instanciate hard multiplier");
+
+    declare_hard_multiplier(node);
+
+    std::string node_name = "";
+    if (node->name) {
+        node_name = node->name;
+        vtr::free(node->name);
+        node->name = NULL;
+    }
+
+    if (node->num_output_pins <= 0) {
+        /* wide input first :) */
+        int portA = 0;
+        int portB = 1;
+        if (node->input_port_sizes[1] > node->input_port_sizes[0]) {
+            portA = 1;
+            portB = 0;
+        }
+        std::string tmp(node_name + "_" + std::to_string(node->input_port_sizes[portA]) + "_" + std::to_string(node->input_port_sizes[portB]) + "_" +
+                        std::to_string(node->output_port_sizes[0]));
+        node->name = vtr::strdup(tmp.c_str());
+    } else {
+        /* Give names to the output pins */
+        for (int i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->name) {
+                vtr::free(node->output_pins[i]->name);
+            }
+            // build the output string
+            std::string tmp(node_name + "[" + std::to_string(node->output_pins[i]->pin_node_idx) + "]");
+            node->output_pins[i]->name = vtr::strdup(tmp.c_str());
+        }
+        node->name = vtr::strdup(node->output_pins[node->num_output_pins - 1]->name);
+    }
+    node->traverse_visited = mark;
+    return;
+}
+
+void add_the_blackbox_for_mults_yosys(Yosys::Design *design)
+{
+    int hard_mult_inputs;
+    t_multiplier *muls;
+    t_model_ports *ports;
+    char *pa, *pb, *po;
+
+    /* Check to make sure this target architecture has hard multipliers */
+    if (hard_multipliers == NULL)
+        return;
+
+    /* Get the names of the ports for the multiplier */
+    ports = hard_multipliers->inputs;
+    pb = ports->name;
+    ports = ports->next;
+    pa = ports->name;
+    po = hard_multipliers->outputs->name;
+
+    /* find the multiplier devices in the tech library */
+    muls = (t_multiplier *)(hard_multipliers->instances);
+    if (muls == NULL) /* No multipliers instantiated */
+        return;
+
+    /* simplified way of getting the multsize, but fine for quick example */
+    while (muls != NULL) {
+        /* write out this multiplier model */
+        std::string mul_name;
+        if (configuration.fixed_hard_multiplier != 0)
+            mul_name = "multiply";
+        else
+            mul_name = Yosys::stringf("mult_%d_%d_%d", muls->size_a, muls->size_b, muls->size_out);
+
+        Yosys::RTLIL::Module *module = nullptr;
+
+        Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> wideports_cache;
+
+        module = new Yosys::RTLIL::Module;
+        module->name = Yosys::RTLIL::escape_id(mul_name);
+
+        if (design->module(module->name))
+            Yosys::log_error("Duplicate definition of module %s!\n", Yosys::log_id(module->name));
+        design->add(module);
+
+        /* add the inputs */
+        hard_mult_inputs = muls->size_a + muls->size_b;
+        for (int i = 0; i < hard_mult_inputs; i++) {
+            std::string w_name;
+            if (i < muls->size_a) {
+                w_name = Yosys::stringf("%s[%d]", pa, i);
+            } else {
+                w_name = Yosys::stringf("%s[%d]", pb, i - muls->size_a);
+            }
+
+            Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+
+            wire->port_input = true;
+
+            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+            if (!wp.first.empty() && wp.second >= 0) {
+                wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                wideports_cache[wp.first].second = true;
+            }
+            // }
+        }
+
+        for (int i = 0; i < muls->size_out; i++) {
+            std::string w_name = Yosys::stringf("%s[%d]", po, i);
+
+            Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+
+            wire->port_output = true;
+
+            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+            if (!wp.first.empty() && wp.second >= 0) {
+                wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
+                wideports_cache[wp.first].second = false;
+            }
+        }
+
+        handle_wideports_cache(&wideports_cache, module);
+
+        module->fixup_ports();
+        wideports_cache.clear();
+
+        module->attributes[Yosys::ID::blackbox] = Yosys::RTLIL::Const(1);
+
+        muls = muls->next;
+    }
+
+    free_multipliers();
+}
+
+/*-------------------------------------------------------------------------
+ * (function: define_mult_function)
+ *-----------------------------------------------------------------------*/
+void define_mult_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Design *design)
+{
+    oassert(node->input_port_sizes[0] > 0);
+    oassert(node->input_port_sizes[1] > 0);
+    oassert(node->output_port_sizes[0] > 0);
+
+    std::string cell_type_name;
+
+    int flip = false;
+
+    if (configuration.fixed_hard_multiplier != 0) {
+        cell_type_name = "multiply";
+    } else {
+        if (node->input_port_sizes[0] > node->input_port_sizes[1]) {
+            cell_type_name = Yosys::stringf("mult_%d_%d_%d", node->input_port_sizes[0], node->input_port_sizes[1], node->output_port_sizes[0]);
+
+            flip = false;
+        } else {
+            cell_type_name = Yosys::stringf("mult_%d_%d_%d", node->input_port_sizes[1], node->input_port_sizes[0], node->output_port_sizes[0]);
+
+            flip = true;
+        }
+    }
+
+    Yosys::IdString celltype = Yosys::RTLIL::escape_id(cell_type_name);
+    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+
+    Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> cell_wideports_cache;
+
+    for (int i = 0; i < node->num_input_pins; i++) {
+        std::string p, q;
+        if (i < node->input_port_sizes[flip ? 1 : 0]) {
+            int input_index = flip ? i + node->input_port_sizes[0] : i;
+            nnet_t *net = node->input_pins[input_index]->net;
+            oassert(net->num_driver_pins == 1);
+            npin_t *driver_pin = net->driver_pins[0];
+
+            p = Yosys::stringf("%s[%d]", hard_multipliers->inputs->next->name, i);
+
+            if (!driver_pin->name)
+                q = driver_pin->node->name;
+            else
+                q = driver_pin->name;
+        } else {
+            int input_index = flip ? i - node->input_port_sizes[1] : i;
+            nnet_t *net = node->input_pins[input_index]->net;
+            oassert(net->num_driver_pins == 1);
+            npin_t *driver_pin = net->driver_pins[0];
+
+            long index = flip ? i - node->input_port_sizes[1] : i - node->input_port_sizes[0];
+
+            p = Yosys::stringf("%s[%ld]", hard_multipliers->inputs->name, index);
+
+            if (!driver_pin->name)
+                q = driver_pin->node->name;
+            else
+                q = driver_pin->name;
+        }
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+    }
+
+    for (int i = 0; i < node->num_output_pins; i++) {
+        std::string p, q;
+        p = Yosys::stringf("%s[%d]", hard_multipliers->outputs->name, i);
+        q = node->output_pins[i]->name;
+
+        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(p);
+        if (wp.first.empty())
+            cell->setPort(Yosys::RTLIL::escape_id(p), to_wire(q, module));
+        else
+            cell_wideports_cache[wp.first][wp.second] = to_wire(q, module);
+    }
+
+    handle_cell_wideports_cache(&cell_wideports_cache, design, module, cell);
+
+    return;
+}
+
+/*-----------------------------------------------------------------------
+ * (function: init_split_multiplier)
+ *	Create a dummy multiplier when spliting. Inputs are connected
+ *	to original pins, output pins are set to NULL for later connecting
+ *	with temp pins to connect cascading multipliers/adders.
+ *---------------------------------------------------------------------*/
+void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int offb, int b, nnode_t *node_a, nnode_t *node_b)
+{
+    int i;
+
+    /* Copy properties from original node */
+    ptr->type = node->type;
+    ptr->related_ast_node = node->related_ast_node;
+    ptr->traverse_visited = node->traverse_visited;
+    ptr->node_data = NULL;
+
+    /* Set new port sizes and parameters */
+    ptr->num_input_port_sizes = 2;
+    ptr->input_port_sizes = (int *)vtr::malloc(2 * sizeof(int));
+    ptr->input_port_sizes[0] = a;
+    ptr->input_port_sizes[1] = b;
+    ptr->num_output_port_sizes = 1;
+    ptr->output_port_sizes = (int *)vtr::malloc(sizeof(int));
+    ptr->output_port_sizes[0] = a + b;
+
+    /* Set the number of pins and re-locate previous pin entries */
+    ptr->num_input_pins = a + b;
+    ptr->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
+    for (i = 0; i < a; i++) {
+        if (node_a)
+            add_input_pin_to_node(ptr, copy_input_npin(node_a->input_pins[i]), i);
+        else
+            remap_pin_to_new_node(node->input_pins[i + offa], ptr, i);
+    }
+
+    for (i = 0; i < b; i++) {
+        if (node_b)
+            add_input_pin_to_node(ptr, copy_input_npin(node_b->input_pins[i + node_b->input_port_sizes[0]]), i + a);
+        else
+            remap_pin_to_new_node(node->input_pins[i + node->input_port_sizes[0] + offb], ptr, i + a);
+    }
+
+    /* Prep output pins for connecting to cascaded multipliers */
+    ptr->num_output_pins = a + b;
+    ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
+    for (i = 0; i < a + b; i++)
+        ptr->output_pins[i] = NULL;
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: init_multiplier_adder)
+ *
+ * This function is used to initialize an adder that is within
+ * a split multiplier or a multiplier addition tree.
+ *-----------------------------------------------------------------------*/
+void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b)
+{
+    int i, size;
+
+    node->type = ADD;
+    node->related_ast_node = parent->related_ast_node;
+    node->traverse_visited = parent->traverse_visited;
+    node->node_data = NULL;
+
+    /* Set size to be the maximum input size */
+    size = a;
+    size = (size < b) ? b : size;
+
+    /* Set new port sizes and parameters */
+    node->num_input_port_sizes = 2;
+    node->input_port_sizes = (int *)vtr::malloc(2 * sizeof(int));
+    node->input_port_sizes[0] = a;
+    node->input_port_sizes[1] = b;
+    node->num_output_port_sizes = 1;
+    node->output_port_sizes = (int *)vtr::malloc(sizeof(int));
+    node->output_port_sizes[0] = size;
+
+    /* Set the number of input pins and clear pin entries */
+    node->num_input_pins = a + b;
+    node->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
+    for (i = 0; i < a + b; i++)
+        node->input_pins[i] = NULL;
+
+    /* Set the number of output pins and clear pin entries */
+    node->num_output_pins = size;
+    node->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * size);
+    for (i = 0; i < size; i++)
+        node->output_pins[i] = NULL;
+
+    add_list = insert_in_vptr_list(add_list, node);
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_multiplier)
+ *
+ * This function works to split a multiplier into several smaller
+ *  multipliers to better "fit" with the available resources in a
+ *  targeted FPGA architecture.
+ *
+ * This function is at the lowest level since it simply receives
+ *  a multiplier and is told how to split it. The end result is:
+ *
+ *  a1a0 * b1b0 => a0 * b0 + a0 * b1 + a1 * b0 + a1 * b1 => c1c0 => c
+ *
+ * If we "balance" the additions, we can actually remove one of the
+ * addition operations since we know that a0 * b0 and a1 * b1 will
+ * not overlap in bits. This allows us to skip the addition between
+ * these two terms and simply concat the results together. Giving us
+ * the resulting logic:
+ *
+ * ((a1 * b1) . (a0 * b0)) + ((a0 * b1) + (a1 * b0)) ==> Result
+ *
+ * Note that for some of the additions we need to perform sign extensions,
+ * but this should not be a problem since the sign extension is always
+ * extending NOT contracting.
+ *
+ *-----------------------------------------------------------------------*/
+void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist)
+{
+    nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig;
+    int i, size;
+
+    /* Check for a legitimate split */
+    oassert(node->input_port_sizes[0] == (a0 + a1));
+    oassert(node->input_port_sizes[1] == (b0 + b1));
+
+    /* New node for small multiply */
+    a0b0 = allocate_nnode(node->loc);
+    a0b0->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a0b0->name, node->name);
+    strcat(a0b0->name, "-0");
+    init_split_multiplier(node, a0b0, 0, a0, 0, b0, nullptr, nullptr);
+    mult_list = insert_in_vptr_list(mult_list, a0b0);
+
+    /* New node for big multiply */
+    a1b1 = allocate_nnode(node->loc);
+    a1b1->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a1b1->name, node->name);
+    strcat(a1b1->name, "-3");
+    init_split_multiplier(node, a1b1, a0, a1, b0, b1, nullptr, nullptr);
+    mult_list = insert_in_vptr_list(mult_list, a1b1);
+
+    /* New node for 2nd multiply */
+    a0b1 = allocate_nnode(node->loc);
+    a0b1->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a0b1->name, node->name);
+    strcat(a0b1->name, "-1");
+    init_split_multiplier(node, a0b1, 0, a0, b0, b1, a0b0, a1b1);
+    mult_list = insert_in_vptr_list(mult_list, a0b1);
+
+    /* New node for 3rd multiply */
+    a1b0 = allocate_nnode(node->loc);
+    a1b0->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a1b0->name, node->name);
+    strcat(a1b0->name, "-2");
+    init_split_multiplier(node, a1b0, a0, a1, 0, b0, a1b1, a0b0);
+    mult_list = insert_in_vptr_list(mult_list, a1b0);
+
+    /* New node for the initial add */
+    addsmall = allocate_nnode(node->loc);
+    addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6);
+    strcpy(addsmall->name, node->name);
+    strcat(addsmall->name, "-add0");
+    // this addition will have a carry out in the worst case, add to input pins and connect then to gnd
+    init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1);
+
+    /* New node for the BIG add */
+    addbig = allocate_nnode(node->loc);
+    addbig->name = (char *)vtr::malloc(strlen(node->name) + 6);
+    strcpy(addbig->name, node->name);
+    strcat(addbig->name, "-add1");
+    init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins);
+
+    // connect inputs to port a of addsmall
+    for (i = 0; i < a1b0->num_output_pins; i++)
+        connect_nodes(a1b0, i, addsmall, i);
+    add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins);
+    // connect inputs to port b of addsmall
+    for (i = 0; i < a0b1->num_output_pins; i++)
+        connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]);
+    add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]);
+
+    // connect inputs to port a of addbig
+    size = addsmall->num_output_pins;
+    for (i = 0; i < size; i++)
+        connect_nodes(addsmall, i, addbig, i);
+
+    // connect inputs to port b of addbig
+    for (i = b0; i < a0b0->output_port_sizes[0]; i++)
+        connect_nodes(a0b0, i, addbig, i - b0 + size);
+    size = size + a0b0->output_port_sizes[0] - b0;
+    for (i = 0; i < a1b1->output_port_sizes[0]; i++)
+        connect_nodes(a1b1, i, addbig, i + size);
+
+    // remap the multiplier outputs coming directly from a0b0
+    for (i = 0; i < b0; i++) {
+        remap_pin_to_new_node(node->output_pins[i], a0b0, i);
+    }
+
+    // remap the multiplier outputs coming from addbig
+    for (i = 0; i < addbig->num_output_pins; i++) {
+        remap_pin_to_new_node(node->output_pins[i + b0], addbig, i);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_multiplier_a)
+ *
+ * This function works to split the "a" input of a multiplier into
+ *  several smaller multipliers to better "fit" with the available
+ *  resources in a targeted FPGA architecture.
+ *
+ * This function is at the lowest level since it simply receives
+ *  a multiplier and is told how to split it. The end result is:
+ *
+ *  a1a0 * b => a0 * b + a1 * b => c
+ *
+ * Note that for the addition we need to perform sign extension,
+ * but this should not be a problem since the sign extension is always
+ * extending NOT contracting.
+ *
+ *-----------------------------------------------------------------------*/
+void split_multiplier_a(nnode_t *node, int a0, int a1, int b)
+{
+    nnode_t *a0b, *a1b, *addsmall;
+    int i;
+
+    /* Check for a legitimate split */
+    oassert(node->input_port_sizes[0] == (a0 + a1));
+    oassert(node->input_port_sizes[1] == b);
+
+    /* New node for a0b multiply */
+    a0b = allocate_nnode(node->loc);
+    a0b->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a0b->name, node->name);
+    strcat(a0b->name, "-0");
+    init_split_multiplier(node, a0b, 0, a0, 0, b, nullptr, nullptr);
+    mult_list = insert_in_vptr_list(mult_list, a0b);
+
+    /* New node for a1b multiply */
+    a1b = allocate_nnode(node->loc);
+    a1b->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(a1b->name, node->name);
+    strcat(a1b->name, "-1");
+    init_split_multiplier(node, a1b, a0, a1, 0, b, nullptr, a0b);
+    mult_list = insert_in_vptr_list(mult_list, a1b);
+
+    /* New node for the add */
+    addsmall = allocate_nnode(node->loc);
+    addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6);
+    strcpy(addsmall->name, node->name);
+    strcat(addsmall->name, "-add0");
+    init_multiplier_adder(addsmall, a0b, b, a1b->num_output_pins);
+
+    /* Connect pins for addsmall */
+    for (i = a0; i < a0b->num_output_pins; i++)
+        connect_nodes(a0b, i, addsmall, i - a0);
+    for (i = 0; i < a1b->num_output_pins; i++)
+        connect_nodes(a1b, i, addsmall, i + addsmall->input_port_sizes[0]);
+
+    /* Move original output pins for multiply to new outputs */
+    for (i = 0; i < a0; i++)
+        remap_pin_to_new_node(node->output_pins[i], a0b, i);
+
+    for (i = 0; i < addsmall->num_output_pins; i++)
+        remap_pin_to_new_node(node->output_pins[i + a0], addsmall, i);
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_multiplier_b)
+ *
+ * This function works to split the "b" input of a multiplier into
+ *  several smaller multipliers to better "fit" with the available
+ *  resources in a targeted FPGA architecture.
+ *
+ * This function is at the lowest level since it simply receives
+ *  a multiplier and is told how to split it. The end result is:
+ *
+ *  a * b1b0 => a * b1 + a * b0 => c
+ *
+ * Note that for the addition we need to perform sign extension,
+ * but this should not be a problem since the sign extension is always
+ * extending NOT contracting.
+ *
+ *-----------------------------------------------------------------------*/
+void split_multiplier_b(nnode_t *node, int a, int b1, int b0)
+{
+    nnode_t *ab0, *ab1, *addsmall;
+    int i;
+
+    /* Check for a legitimate split */
+    oassert(node->input_port_sizes[0] == a);
+    oassert(node->input_port_sizes[1] == (b0 + b1));
+
+    /* New node for ab0 multiply */
+    ab0 = allocate_nnode(node->loc);
+    ab0->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(ab0->name, node->name);
+    strcat(ab0->name, "-0");
+    init_split_multiplier(node, ab0, 0, a, 0, b0, nullptr, nullptr);
+    mult_list = insert_in_vptr_list(mult_list, ab0);
+
+    /* New node for ab1 multiply */
+    ab1 = allocate_nnode(node->loc);
+    ab1->name = (char *)vtr::malloc(strlen(node->name) + 3);
+    strcpy(ab1->name, node->name);
+    strcat(ab1->name, "-1");
+    init_split_multiplier(node, ab1, 0, a, b0, b1, ab0, nullptr);
+    mult_list = insert_in_vptr_list(mult_list, ab1);
+
+    /* New node for the add */
+    addsmall = allocate_nnode(node->loc);
+    addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6);
+    strcpy(addsmall->name, node->name);
+    strcat(addsmall->name, "-add0");
+    init_multiplier_adder(addsmall, ab1, ab1->num_output_pins, a + b1);
+
+    /* Connect pins for addsmall */
+    for (i = b0; i < ab0->output_port_sizes[0]; i++)
+        connect_nodes(ab0, i, addsmall, i - b0);
+    for (i = ab0->output_port_sizes[0] - b0; i < a + b1; i++) /* Sign extend */
+        connect_nodes(ab0, ab0->output_port_sizes[0] - 1, addsmall, i);
+    for (i = b1 + a; i < (2 * (a + b1)); i++)
+        connect_nodes(ab1, i - (b1 + a), addsmall, i);
+
+    /* Move original output pins for multiply to new outputs */
+    for (i = 0; i < b0; i++)
+        remap_pin_to_new_node(node->output_pins[i], ab0, i);
+
+    for (i = b0; i < node->num_output_pins; i++)
+        remap_pin_to_new_node(node->output_pins[i], addsmall, i - b0);
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: pad_multiplier)
+ *
+ * Fill out a multiplier to a fixed size. Size is retrieved from global
+ *	hard_multipliers data.
+ *
+ * NOTE: The inputs are extended based on multiplier padding setting.
+ *-----------------------------------------------------------------------*/
+void pad_multiplier(nnode_t *node, netlist_t *netlist)
+{
+    int diffa, diffb, diffout, i;
+    int sizea, sizeb, sizeout;
+    int ina, inb;
+
+    int testa, testb;
+
+    static int pad_pin_number = 0;
+
+    oassert(node->type == MULTIPLY);
+    oassert(hard_multipliers != NULL);
+
+    sizea = node->input_port_sizes[0];
+    sizeb = node->input_port_sizes[1];
+    sizeout = node->output_port_sizes[0];
+    record_mult_distribution(node);
+
+    /* Calculate the BEST fit hard multiplier to use */
+    ina = hard_multipliers->inputs->size;
+    inb = hard_multipliers->inputs->next->size;
+    if (ina < inb) {
+        ina = hard_multipliers->inputs->next->size;
+        inb = hard_multipliers->inputs->size;
+    }
+    diffa = ina - sizea;
+    diffb = inb - sizeb;
+    diffout = hard_multipliers->outputs->size - sizeout;
+
+    if (configuration.split_hard_multiplier == 1) {
+        t_linked_vptr *plist = hard_multipliers->pb_types;
+        while ((diffa + diffb) && plist) {
+            t_pb_type *physical = (t_pb_type *)(plist->data_vptr);
+            plist = plist->next;
+            testa = physical->ports[0].num_pins;
+            testb = physical->ports[1].num_pins;
+            if ((testa >= sizea) && (testb >= sizeb) && ((testa - sizea + testb - sizeb) < (diffa + diffb))) {
+                diffa = testa - sizea;
+                diffb = testb - sizeb;
+                diffout = physical->ports[2].num_pins - sizeout;
+            }
+        }
+    }
+
+    /* Expand the inputs */
+    if ((diffa != 0) || (diffb != 0)) {
+        allocate_more_input_pins(node, diffa + diffb);
+
+        /* Shift pins for expansion of first input pins */
+        if (diffa != 0) {
+            for (i = 1; i <= sizeb; i++) {
+                move_input_pin(node, sizea + sizeb - i, node->num_input_pins - diffb - i);
+            }
+
+            /* Connect unused first input pins to zero/pad pin */
+            for (i = 0; i < diffa; i++) {
+                if (configuration.mult_padding == 0)
+                    add_input_pin_to_node(node, get_zero_pin(netlist), i + sizea);
+                else
+                    add_input_pin_to_node(node, get_pad_pin(netlist), i + sizea);
+            }
+
+            node->input_port_sizes[0] = sizea + diffa;
+        }
+
+        if (diffb != 0) {
+            /* Connect unused second input pins to zero/pad pin */
+            for (i = 1; i <= diffb; i++) {
+                if (configuration.mult_padding == 0)
+                    add_input_pin_to_node(node, get_zero_pin(netlist), node->num_input_pins - i);
+                else
+                    add_input_pin_to_node(node, get_pad_pin(netlist), node->num_input_pins - i);
+            }
+
+            node->input_port_sizes[1] = sizeb + diffb;
+        }
+    }
+
+    /* Expand the outputs */
+    if (diffout != 0) {
+        allocate_more_output_pins(node, diffout);
+        for (i = 0; i < diffout; i++) {
+            // Add new pins to the higher order spots.
+            npin_t *new_pin = allocate_npin();
+            // Pad outputs with a unique and descriptive name to avoid collisions.
+            new_pin->name = append_string("", "unconnected_multiplier_output~%d", pad_pin_number++);
+            add_output_pin_to_node(node, new_pin, i + sizeout);
+        }
+        node->output_port_sizes[0] = sizeout + diffout;
+    }
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: iterate_multipliers)
+ *
+ * This function will iterate over all of the multiply operations that
+ *	exist in the netlist and perform a splitting so that they can
+ *	fit into a basic hard multiplier block that exists on the FPGA.
+ *	If the proper option is set, then it will be expanded as well
+ *	to just use a fixed size hard multiplier.
+ *-----------------------------------------------------------------------*/
+void iterate_multipliers(netlist_t *netlist)
+{
+    int sizea, sizeb, swap;
+    int mula, mulb;
+    int a0, a1, b0, b1;
+    nnode_t *node;
+
+    /* Can only perform the optimisation if hard multipliers exist! */
+    if (hard_multipliers == NULL)
+        return;
+
+    sizea = hard_multipliers->inputs->size;
+    sizeb = hard_multipliers->inputs->next->size;
+    if (sizea < sizeb) {
+        swap = sizea;
+        sizea = sizeb;
+        sizeb = swap;
+    }
+
+    while (mult_list != NULL) {
+        node = (nnode_t *)mult_list->data_vptr;
+        mult_list = delete_in_vptr_list(mult_list);
+
+        oassert(node != NULL);
+
+        if (node->type == HARD_IP)
+            node->type = MULTIPLY;
+
+        oassert(node->type == MULTIPLY);
+
+        mula = node->input_port_sizes[0];
+        mulb = node->input_port_sizes[1];
+        int mult_size = std::max<int>(mula, mulb);
+        if (mula < mulb) {
+            swap = sizea;
+            sizea = sizeb;
+            sizeb = swap;
+        }
+
+        /* Do I need to split the multiplier on both inputs? */
+        if ((mula > sizea) && (mulb > sizeb)) {
+            a0 = sizea;
+            a1 = mula - sizea;
+            b0 = sizeb;
+            b1 = mulb - sizeb;
+            split_multiplier(node, a0, b0, a1, b1, netlist);
+        } else if (mula > sizea) /* split multiplier on a input? */
+        {
+            a0 = sizea;
+            a1 = mula - sizea;
+            split_multiplier_a(node, a0, a1, mulb);
+        } else if (mulb > sizeb) /* split multiplier on b input? */
+        {
+            b1 = sizeb;
+            b0 = mulb - sizeb;
+            split_multiplier_b(node, mula, b1, b0);
+        }
+        // if either of the multiplicands is larger than the
+        // minimum hard multiplier size, use hard multiplier
+        // TODO: implement multipliers where one of the operands is
+        // 1 bit wide using soft logic
+        else if (mult_size >= min_mult || mula == 1 || mulb == 1) {
+            /* Check to ensure IF mult needs to be exact size */
+            if (configuration.fixed_hard_multiplier != 0)
+                pad_multiplier(node, netlist);
+
+            /* Otherwise, we still want to record the multiplier node for
+             * reporting later on (the pad_multiplier function does this for the
+             * other case */
+            else {
+                record_mult_distribution(node);
+            }
+        } else if (hard_adders) {
+            if (configuration.fixed_hard_multiplier != 0) {
+                split_soft_multiplier(node, netlist);
+            }
+        }
+    }
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: split_soft_multiplier)
+ *
+ * This function splits the input multiplier (node) into partial products (AND gates) and
+ * adders, as shown below. The partial products starts with "I", and all the partial products
+ * generated are added together by implementing a balanced adder tree to produce the final product
+ * Sample 4x4 multiplier to help understand logic:
+ *
+ * 	    				A3 	A2	A1	A0
+ *	    				B3 	B2 	B1 	B0
+ *	   	-------------------------------
+ *	    				I03	I02 I01 I00
+ *	   	+         	I13	I12	I11	I10
+ *	    		I23	I22	I21	I20             Level 0
+ *  	+	I23	I22	I21 I20
+ *      -------------------------------
+ *  		    C4	C3	C2	C1  C0
+ * 	+	D4  D3	D2	D1  D0	I20             Level 1
+ *  	-------------------------------
+ *  	E5	E4  E3  E2	E1	E0	C0  I00     Level 2
+ *
+ *-------------------------------------------------------------------------*/
+void split_soft_multiplier(nnode_t *node, netlist_t *netlist)
+{
+    oassert(node->num_output_pins > 0);
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->num_output_port_sizes == 1);
+
+    size_t multiplier_width = static_cast<size_t>(node->input_port_sizes[0]);
+    size_t multiplicand_width = static_cast<size_t>(node->input_port_sizes[1]);
+
+    // ODIN II doesn't work with multiplicand sizes of 1 since it assumes that the
+    // output of the multiplier is still the sum of the operands sizes. However, it
+    // should only be equal to the long operand since its an AND operation in this case.
+    // If this is fixed, this assert statement should be removed and the code will work properly
+    oassert(multiplicand_width > 1);
+
+    // number of adders in a balanced tree of the partial product rows
+    const int add_levels = std::ceil(std::log((double)multiplicand_width) / std::log(2.));
+
+    // data structure holding the rows of output pins to be added in each addition stage
+    // as well as the shift of each row from the position of the first output
+    std::vector<std::vector<AdderTreeRow>> addition_stages(add_levels + 1);
+    // 2-D array of adders, indexed by the level of the adder in the tree and the adder id within the level
+    std::vector<std::vector<nnode_t *>> adders(add_levels);
+    // array holding the adder width at each level in the adder tree
+    std::vector<std::vector<size_t>> adder_widths(add_levels);
+
+    // 2-D array of partial products. [0..multiplicand_width][0..multiplier_width]
+    std::vector<std::vector<nnode_t *>> partial_products(multiplicand_width);
+
+    addition_stages[0].resize(multiplicand_width);
+    // initialize all the AND gates needed for the partial products
+    for (size_t i = 0; i < multiplicand_width; i++) {
+        std::vector<std::pair<nnode_t *, int>> pp_bits(multiplier_width);
+        // resize the ith row of the partial products
+        partial_products[i].resize(multiplier_width);
+        for (size_t j = 0; j < multiplier_width; j++) {
+            // create each one of the partial products
+            partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, node->traverse_visited);
+            pp_bits[j] = {partial_products[i][j], 0};
+        }
+        // add the partial product rows the addition stages data structure
+        addition_stages[0][i] = {i, pp_bits};
+    }
+
+    // generate the connections to the AND gates that generates the partial products of the multiplication
+    for (size_t i = 0; i < multiplicand_width; i++) {
+        for (size_t j = 0; j < multiplier_width; j++) {
+            // hookup the multiplier bits to the AND gates
+            if (i == 0) {
+                // when connecting the input to an AND gate for the first time, remap the input
+                remap_pin_to_new_node(node->input_pins[j], partial_products[i][j], 1);
+            } else {
+                // this input was remapped before, copy from the AND gate input instead
+                add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[0][j]->input_pins[1]), 1);
+            }
+            // hookup the input multiplicand bits the AND gates
+            if (j == 0) {
+                // when connecting the input to an AND gate for the first time, remap the input
+                remap_pin_to_new_node(node->input_pins[i + node->input_port_sizes[0]], partial_products[i][j], 0);
+            } else {
+                // this input was remapped before, copy from the AND gate input instead
+                add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[i][0]->input_pins[0]), 0);
+            }
+        }
+    }
+
+    // iterate over all the levels of addition
+    for (size_t level = 0; level < adders.size(); level++) {
+        // the number of rows in the next stage is the ceiling of number of rows in this stage divided by 2
+        addition_stages[level + 1].resize(std::ceil(addition_stages[level].size() / 2.));
+        // the number of adders in this stage is the integer division of the number of rows in this stage
+        adder_widths[level].resize(addition_stages[level].size() / 2);
+        adders[level].resize(addition_stages[level].size() / 2);
+
+        // iterate over every two rows
+        for (size_t row = 0; row < addition_stages[level].size() - 1; row += 2) {
+            auto &first_row = addition_stages[level][row];
+            auto &second_row = addition_stages[level][row + 1];
+            long shift_difference = second_row.shift - first_row.shift;
+            auto add_id = row / 2;
+
+            // get the widths of the adder, by finding the larger operand size
+            adder_widths[level][add_id] = std::max<size_t>(first_row.bits.size() - shift_difference, second_row.bits.size());
+            // first level of addition has a carry out that needs to be generated, so increase adder size by 1
+            if (level == 0)
+                adder_widths[level][add_id]++;
+            // add one bit for carry out if that last bit of the addition is fed by both levels
+            // (was found to be the only case were a carry out will be needed in this multiplier adder tree)
+            if (first_row.bits.size() - shift_difference == second_row.bits.size())
+                adder_widths[level][add_id]++;
+
+            // initialize this adder
+            adders[level][add_id] = allocate_nnode(node->loc);
+            init_multiplier_adder(adders[level][add_id], node, adder_widths[level][add_id], adder_widths[level][add_id]);
+            adders[level][add_id]->name = node_name(adders[level][add_id], node->name);
+
+            // initialize the output of this adder in the next stage
+            addition_stages[level + 1][add_id].shift = first_row.shift;
+            addition_stages[level + 1][add_id].bits.resize(shift_difference + adder_widths[level][add_id]);
+            // copy the bits that weren't fed to adders in the previous stage
+            for (size_t i = 0; (long)i < shift_difference; i++) {
+                addition_stages[level + 1][add_id].bits[i] = first_row.bits[i];
+            }
+            // copy adder output bits to their row in next stage
+            for (size_t i = 0; i < adder_widths[level][add_id]; i++) {
+                addition_stages[level + 1][add_id].bits[i + shift_difference] = {adders[level][add_id], i};
+            }
+
+            // connect the bits in the rows to the adder inputs.
+            for (size_t bit = 0; bit < adder_widths[level][add_id]; bit++) {
+                // input port a of the adder
+                if (bit < first_row.bits.size() - shift_difference) {
+                    auto bit_a = first_row.bits[bit + shift_difference];
+                    connect_nodes(bit_a.first, bit_a.second, adders[level][add_id], bit);
+                } else {
+                    // connect additional inputs to gnd
+                    add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit);
+                }
+                // input port b of the adder
+                if (bit < second_row.bits.size()) {
+                    connect_nodes(second_row.bits[bit].first, second_row.bits[bit].second, adders[level][add_id], bit + adder_widths[level][add_id]);
+                } else {
+                    // connect additional inputs to gnd
+                    add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit + adder_widths[level][add_id]);
+                }
+            }
+        }
+
+        // if this level have odd number of rows copy the last row to the next level to be added later
+        if (addition_stages[level].size() % 2 == 1) {
+            addition_stages[level + 1].back() = addition_stages[level].back();
+        }
+    }
+
+    // the size of the last stage of the adder tree should match the output size of the multiplier
+    oassert((long)addition_stages[add_levels][0].bits.size() == node->num_output_pins);
+
+    // Remap the outputs of the multiplier
+    for (size_t i = 0; i < addition_stages[add_levels][0].bits.size(); i++) {
+        auto output_bit = addition_stages[add_levels][0].bits[i];
+        remap_pin_to_new_node(node->output_pins[i], output_bit.first, output_bit.second);
+    }
+
+    // check that all connections and input/output remapping is done right
+    // meaning all the inputs and outputs of the multiplier that is splitted are nullptrs
+    // and all inputs and outputs of the AND gates and adders are not nullptrs
+
+    // check that all the inputs/outputs of the multiplier are remapped
+    for (long i = 0; i < node->num_input_pins; i++) {
+        oassert(!node->input_pins[i]);
+    }
+    for (long i = 0; i < node->num_output_pins; i++) {
+        oassert(!node->output_pins[i]);
+    }
+
+    // check that all the partial product gates have nets connected to their inputs/outputs
+    for (size_t ilevel = 0; ilevel < partial_products.size(); ilevel++) {
+        for (size_t depth = 0; depth < partial_products[ilevel].size(); depth++) {
+            for (int i = 0; i < partial_products[ilevel][depth]->num_input_pins; i++) {
+                oassert(partial_products[ilevel][depth]->input_pins[i]);
+            }
+            for (int i = 0; i < partial_products[ilevel][depth]->num_output_pins; i++) {
+                oassert(partial_products[ilevel][depth]->output_pins[i]);
+            }
+        }
+    }
+
+    // check that all adders have nets connected to their inputs/outputs
+    for (size_t ilevel = 0; ilevel < adders.size(); ilevel++) {
+        for (size_t iadd = 0; iadd < adders[ilevel].size(); iadd++) {
+            for (int i = 0; i < adders[ilevel][iadd]->num_input_pins; i++) {
+                oassert(adders[ilevel][iadd]->input_pins[i]);
+            }
+            for (int i = 0; i < adders[ilevel][iadd]->num_output_pins; i++) {
+                oassert(adders[ilevel][iadd]->output_pins[i]);
+            }
+        }
+    }
+
+    // CLEAN UP
+    cleanup_mult_old_node(node, netlist);
+}
+
+/**
+ * --------------------------------------------------------------------------
+ * (function: is_constant_multipication)
+ *
+ * @brief checking multipication ports to specify whether it
+ * is a constant multipication or not
+ *
+ * @param node pointer to the multipication netlist node
+ *
+ * @return multipication ports status
+ * -------------------------------------------------------------------------*/
+mult_port_stat_e is_constant_multipication(nnode_t *node, netlist_t *netlist)
+{
+    int i;
+    mult_port_stat_e is_const = mult_port_stat_e::mult_port_stat_END;
+
+    /**
+     * Multiply ports
+     * IN1: (n bits)        input_port[0]
+     * IN2: (m bits)        input_port[1]
+     * OUT: min(m, n) bits  output_port[0]
+     */
+
+    int IN1_width = node->input_port_sizes[0];
+    int IN2_width = node->input_port_sizes[1];
+
+    bool multiplier_const = true;
+    /* going through the IN1 port */
+    for (i = 0; i < IN1_width; i++) {
+        /* corresponding pin of the port */
+        npin_t *pin = node->input_pins[i];
+        /* atleast equal to VCC or GND */
+        if (!strcmp(pin->net->name, netlist->zero_net->name) || !strcmp(pin->net->name, netlist->one_net->name))
+            continue;
+        else {
+            multiplier_const = false;
+            break;
+        }
+    }
+
+    bool multiplicand_const = true;
+    /* going through the IN1 port */
+    for (i = 0; i < IN2_width; i++) {
+        /* corresponding pin of the port */
+        npin_t *pin = node->input_pins[IN1_width + i];
+        /* atleast equal to VCC or GND */
+        if (!strcmp(pin->net->name, netlist->zero_net->name) || !strcmp(pin->net->name, netlist->one_net->name))
+            continue;
+        else {
+            multiplicand_const = false;
+            break;
+        }
+    }
+
+    if (multiplier_const && multiplicand_const)
+        is_const = mult_port_stat_e::CONSTANT;
+    else if (multiplier_const)
+        is_const = mult_port_stat_e::MULTIPLIER_CONSTANT;
+    else if (multiplicand_const)
+        is_const = mult_port_stat_e::MULTIPICAND_CONSTANT;
+    else
+        is_const = mult_port_stat_e::NOT_CONSTANT;
+
+    return (is_const);
+}
+
+/**
+ *-------------------------------------------------------------------------------------------
+ * (function: check_constant_multipication )
+ *
+ * @brief checking for constant multipication. If one port is constant,
+ * the multipication node will explode into multiple adders
+ *
+ * @param node pointing to the mul node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ *-----------------------------------------------------------------------------------------*/
+bool check_constant_multipication(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    /* to calculate return value */
+    mult_port_stat_e is_const;
+
+    /* checking multipication ports to specify whether it is constant or not */
+    if ((is_const = is_constant_multipication(node, netlist)) != mult_port_stat_e::NOT_CONSTANT) {
+        /* performaing optimization on the constant multiplication ports */
+        node = perform_const_mult_optimization(is_const, node, traverse_mark_number, netlist);
+        /* implementation of constant multipication which is actually cascading adders */
+        signal_list_t *output_signals = implement_constant_multipication(node, is_const, static_cast<short>(traverse_mark_number), netlist);
+
+        /* connecting the output pins */
+        connect_constant_mult_outputs(node, output_signals);
+    }
+
+    return (is_const != mult_port_stat_e::NOT_CONSTANT);
+}
+
+/**
+ *-------------------------------------------------------------------------------------------
+ * (function: perform_const_mult_optimization )
+ *
+ * @brief checking for constant multipication constant port size.
+ * if possible the extra unneccessary pins of constant port will
+ * be reduced
+ *
+ * @param node pointing to the mul node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ *-----------------------------------------------------------------------------------------*/
+static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat, nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i;
+    /* constatnt and variable port of the given multipication */
+    signal_list_t *const_port = init_signal_list();
+    signal_list_t *var_port = init_signal_list();
+    operation_list const_signedness = UNSIGNED;
+    operation_list var_signedness = UNSIGNED;
+
+    /* initialize const and var port signals */
+    if (mult_port_stat == mult_port_stat_e::MULTIPICAND_CONSTANT) {
+        /* adding var port pins to signal list */
+        for (i = 0; i < node->input_port_sizes[0]; i++) {
+            add_pin_to_signal_list(var_port, node->input_pins[i]);
+        }
+        var_signedness = node->attributes->port_a_signed;
+        /* adding const port pins to signal list */
+        for (i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
+            add_pin_to_signal_list(const_port, node->input_pins[i]);
+        }
+        const_signedness = node->attributes->port_b_signed;
+    } else if (mult_port_stat == mult_port_stat_e::MULTIPLIER_CONSTANT) {
+        /* adding var port pins to signal list */
+        for (i = 0; i < node->input_port_sizes[0]; i++) {
+            add_pin_to_signal_list(const_port, node->input_pins[i]);
+        }
+        const_signedness = node->attributes->port_a_signed;
+        /* adding const port pins to signal list */
+        for (i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
+            add_pin_to_signal_list(var_port, node->input_pins[i]);
+        }
+        var_signedness = node->attributes->port_b_signed;
+    }
+
+    int idx = -1;
+    signal_list_t *new_const_port = init_signal_list();
+    /* iterating over const port to determine useless ports */
+    for (i = const_port->count; i > 0; i--) {
+        npin_t *pin = const_port->pins[i - 1];
+        /* starting from the end and prune pins connected to GND */
+        if (!strcmp(pin->net->name, netlist->one_net->name)) {
+            idx = i;
+            break;
+        } else {
+            /* detach from the old mult node and free pin */
+            delete_npin(pin);
+        }
+    }
+    /* initializing new const port */
+    for (i = 0; i < idx; i++) {
+        npin_t *pin = const_port->pins[i];
+        add_pin_to_signal_list(new_const_port, pin);
+    }
+
+    signal_list_t *first_port = (mult_port_stat == mult_port_stat_e::MULTIPLIER_CONSTANT) ? new_const_port : var_port;
+    signal_list_t *second_port = (mult_port_stat == mult_port_stat_e::MULTIPLIER_CONSTANT) ? var_port : new_const_port;
+    /* creating new mult node */
+    int offset = 0;
+    nnode_t *new_node = make_2port_gate(node->type, first_port->count, second_port->count, node->num_output_pins, node, traverse_mark_number);
+    /* copy attributes */
+    if (mult_port_stat == mult_port_stat_e::MULTIPLIER_CONSTANT) {
+        new_node->attributes->port_a_signed = const_signedness;
+        new_node->attributes->port_b_signed = var_signedness;
+    } else {
+        new_node->attributes->port_a_signed = var_signedness;
+        new_node->attributes->port_b_signed = const_signedness;
+    }
+    /* adding first port */
+    for (i = 0; i < first_port->count; i++) {
+        remap_pin_to_new_node(first_port->pins[i], new_node, offset + i);
+    }
+    offset += first_port->count;
+    /* adding second port */
+    for (i = 0; i < second_port->count; i++) {
+        remap_pin_to_new_node(second_port->pins[i], new_node, offset + i);
+    }
+    /* remap output ports */
+    for (i = 0; i < node->num_output_pins; i++) {
+        remap_pin_to_new_node(node->output_pins[i], new_node, i);
+    }
+
+    // CLEAN UP
+    free_signal_list(const_port);
+    free_signal_list(var_port);
+    free_signal_list(new_const_port);
+    free_nnode(node);
+    node = NULL;
+
+    return (new_node);
+}
+
+/**
+ * -------------------------------------------------------------------------
+ * (function: check_multiplier_port_size)
+ *
+ * If output size is less than the sum of input sizes,
+ * we need to expand output pins with pad pins
+ *
+ * @param node pointer to the multiplication node
+ * -----------------------------------------------------------------------
+ */
+void check_multiplier_port_size(nnode_t *node)
+{
+    /* Can only perform the optimisation if hard multipliers exist! */
+    if (hard_multipliers == NULL)
+        return;
+
+    int mula = node->input_port_sizes[0];
+    int mulb = node->input_port_sizes[1];
+    int sizeout = node->num_output_pins;
+    int limit = mula + mulb;
+
+    /* check the output port size */
+    if (node->num_output_pins < limit) {
+        // Set the limit value as the number of output pins
+        node->num_output_pins = limit;
+        node->output_port_sizes[0] = limit;
+        // Keep record of old output pins pointer for cleaning up later
+        npin_t **old_output_pins = node->output_pins;
+        node->output_pins = (npin_t **)calloc(node->num_output_pins, sizeof(npin_t *));
+
+        // Move output pins to new array and adding pad pins in extra spots
+        for (int i = 0; i < node->num_output_pins; i++) {
+            if (i < sizeout)
+                node->output_pins[i] = old_output_pins[i];
+            else {
+                npin_t *new_pin = allocate_npin();
+                new_pin->name = append_string("", "%s~dummy_output~%d", node->name, 0);
+                nnet_t *new_net = allocate_nnet();
+
+                // hook the output pin into the node
+                add_output_pin_to_node(node, new_pin, i);
+                // hook up new pin 1 into the new net
+                add_driver_pin_to_net(new_net, new_pin);
+            }
+        }
+        // CLEAN UP
+        vtr::free(old_output_pins);
+    }
+}
+/*-------------------------------------------------------------------------
+ * (function: clean_multipliers)
+ *
+ * Clean up the memory by deleting the list structure of multipliers
+ *	during optimization
+ *-----------------------------------------------------------------------*/
+void clean_multipliers()
+{
+    while (mult_list != NULL)
+        mult_list = delete_in_vptr_list(mult_list);
+    return;
+}
+
+/**
+ * -------------------------------------------------------------------------
+ * (function: cleanup_mult_old_node)
+ *
+ * @brief <clean up nodeo, a high level MULT node>
+ * In split_soft_multplier function, nodeo is splitted to small multipliers,
+ * while because of the complexity of input pin connections they have not been
+ * remapped to new nodes, they just copied and added to new nodes. This function
+ * will detach input pins from the nodeo. Moreover, it will connect the net of
+ * unconnected output signals to the GND node, detach the pin from nodeo and
+ * free the output pins to avoid memory leak.
+ *
+ * @param nodeo representing the old adder node
+ * @param netlist representing the current netlist
+ *-----------------------------------------------------------------------*/
+static void cleanup_mult_old_node(nnode_t *nodeo, netlist_t *netlist)
+{
+    int i;
+    /* Disconnecting input pins from the old node side */
+    for (i = 0; i < nodeo->num_input_pins; i++) {
+        nodeo->input_pins[i] = NULL;
+    }
+
+    /* connecting the extra output pins to the gnd node */
+    for (i = 0; i < nodeo->num_output_pins; i++) {
+        npin_t *output_pin = nodeo->output_pins[i];
+
+        if (output_pin && output_pin->node) {
+            /* for now we just pass the signals directly through */
+            npin_t *zero_pin = get_zero_pin(netlist);
+            int idx_2_buffer = zero_pin->pin_net_idx;
+
+            // Dont eliminate the buffer if there are multiple drivers or the AST included it
+            if (output_pin->net->num_driver_pins <= 1) {
+                /* join all fanouts of the output net with the input pins net */
+                join_nets(zero_pin->net, output_pin->net);
+
+                /* erase the pointer to this buffer */
+                zero_pin->net->fanout_pins[idx_2_buffer] = NULL;
+            }
+
+            free_npin(zero_pin);
+            free_npin(output_pin);
+
+            /* Disconnecting output pins from the old node side */
+            nodeo->output_pins[i] = NULL;
+        }
+    }
+
+    // CLEAN UP
+    free_nnode(nodeo);
+}
+
+void free_multipliers()
+{
+    if (hard_multipliers && hard_multipliers->instances) {
+        t_multiplier *tmp = (t_multiplier *)hard_multipliers->instances;
+
+        while (tmp != NULL) {
+            t_multiplier *tmp2 = tmp->next;
+            vtr::free(tmp);
+            tmp = tmp2;
+        }
+
+        hard_multipliers->instances = NULL;
+    }
+}
diff --git a/parmys-plugin/src/netlist_check.cc b/parmys-plugin/src/netlist_check.cc
new file mode 100644
index 000000000..92e945feb
--- /dev/null
+++ b/parmys-plugin/src/netlist_check.cc
@@ -0,0 +1,741 @@
+/*
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "netlist_utils.h"
+#include "odin_util.h"
+// #include "ast_util.h"
+#include "netlist_check.h"
+#include "netlist_visualizer.h"
+#include "string_cache.h"
+#include "vtr_memory.h"
+
+void levelize_backwards(netlist_t *netlist);
+void levelize_backwards_clean_checking_for_liveness(netlist_t *netlist);
+void levelize_forwards(netlist_t *netlist);
+void levelize_forwards_clean_checking_for_combo_loop_and_liveness(netlist_t *netlist);
+nnode_t *find_node_at_top_of_combo_loop(nnode_t *start_node);
+void depth_first_traversal_check_if_forward_leveled(short marker_value, netlist_t *netlist);
+void depth_first_traverse_check_if_forward_leveled(nnode_t *node, uintptr_t traverse_mark_number);
+
+void sequential_levelized_dfs(short marker_value, netlist_t *netlist);
+void depth_first_traverse_until_next_ff_or_output(nnode_t *node, nnode_t *calling_node, uintptr_t traverse_mark_number, int seq_level,
+                                                  netlist_t *netlist);
+
+/*---------------------------------------------------------------------------------------------
+ * (function: check_netlist)
+ * Note: netlist passed in needs to be initialized by allocate_netlist() to make sure correctly initialized.
+ *-------------------------------------------------------------------------------------------*/
+void check_netlist(netlist_t *netlist)
+{
+    /* create a graph output of this netlist */
+    if (configuration.output_netlist_graphs) {
+        /* Path is where we are */
+        graphVizOutputNetlist(configuration.debug_output_path, "net", 1, netlist);
+    }
+}
+
+void depth_traverse_check_combinational_loop(nnode_t *node, short start, STRING_CACHE *in_path);
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traversal_check_if_forward_leveled()
+ *-------------------------------------------------------------------------------------------*/
+void sequential_levelized_dfs(short marker_value, netlist_t *netlist)
+{
+    int i;
+
+    int sequential_level = 0;
+    netlist->num_sequential_levels = 1;
+    netlist->num_at_sequential_level = (int *)vtr::realloc(netlist->num_at_sequential_level, sizeof(int) * netlist->num_sequential_levels);
+    netlist->sequential_level_nodes =
+      (nnode_t ***)vtr::realloc(netlist->sequential_level_nodes, sizeof(nnode_t **) * (netlist->num_sequential_levels));
+    netlist->sequential_level_nodes[netlist->num_sequential_levels - 1] = NULL;
+    netlist->num_at_sequential_level[netlist->num_sequential_levels - 1] = 0;
+
+    /* allocate the first list.  Includes vcc and gnd */
+    netlist->sequential_level_nodes[sequential_level] =
+      (nnode_t **)vtr::realloc(netlist->sequential_level_nodes[sequential_level], sizeof(nnode_t *) * (netlist->num_top_input_nodes + 2));
+
+    /* add all the primary nodes to the first level */
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        if (netlist->top_input_nodes[i] != NULL) {
+            netlist->sequential_level_nodes[sequential_level][i] = netlist->top_input_nodes[i];
+            netlist->num_at_sequential_level[sequential_level]++;
+            /* record the level */
+            netlist->top_input_nodes[i]->sequential_level = sequential_level;
+        }
+    }
+
+    /* now traverse the ground and vcc pins */
+    if (netlist->gnd_node != NULL) {
+        netlist->sequential_level_nodes[sequential_level][i] = netlist->gnd_node;
+        netlist->num_at_sequential_level[sequential_level]++;
+        /* record the level */
+        netlist->gnd_node->sequential_level = sequential_level;
+    }
+    if (netlist->vcc_node != NULL) {
+        netlist->sequential_level_nodes[sequential_level][i + 1] = netlist->vcc_node;
+        netlist->num_at_sequential_level[sequential_level]++;
+        /* record the level */
+        netlist->vcc_node->sequential_level = sequential_level;
+    }
+
+    while (netlist->num_at_sequential_level[sequential_level] > 0) {
+        /* WHILE there are PIs at this level */
+
+        /* Allocate the next level of storage since this part is a forward thing of the next flip-flops at the level */
+        /* add anothersequential level.  Note, needs to be done before we depth first the current combinational level. */
+        netlist->num_sequential_levels++;
+        netlist->sequential_level_nodes =
+          (nnode_t ***)vtr::realloc(netlist->sequential_level_nodes, sizeof(nnode_t **) * (netlist->num_sequential_levels));
+        netlist->num_at_sequential_level = (int *)vtr::realloc(netlist->num_at_sequential_level, sizeof(int) * netlist->num_sequential_levels);
+        netlist->sequential_level_nodes[netlist->num_sequential_levels - 1] = NULL;
+        netlist->num_at_sequential_level[netlist->num_sequential_levels - 1] = 0;
+
+        /* deals with recording the combinational nodes that terminate this level */
+        netlist->num_sequential_level_combinational_termination_nodes++;
+        netlist->sequential_level_combinational_termination_node =
+          (nnode_t ***)vtr::realloc(netlist->sequential_level_combinational_termination_node,
+                                    sizeof(nnode_t **) * (netlist->num_sequential_level_combinational_termination_nodes));
+        netlist->num_at_sequential_level_combinational_termination_node =
+          (int *)vtr::realloc(netlist->num_at_sequential_level_combinational_termination_node,
+                              sizeof(int) * netlist->num_sequential_level_combinational_termination_nodes);
+        netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] = NULL;
+        netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] = 0;
+
+        /* go through the entire list, mark with sequential level, and build the next list */
+        for (i = 0; i < netlist->num_at_sequential_level[sequential_level]; i++) {
+            depth_first_traverse_until_next_ff_or_output(netlist->sequential_level_nodes[sequential_level][i], NULL, marker_value, sequential_level,
+                                                         netlist);
+        }
+
+        /* now potentially do next sequential level */
+        sequential_level++;
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traverse_until_next_ff_or_output)
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traverse_until_next_ff_or_output(nnode_t *node, nnode_t *calling_node, uintptr_t traverse_mark_number, int seq_level,
+                                                  netlist_t *netlist)
+{
+    int i, j;
+    nnode_t *next_node;
+    nnet_t *next_net;
+
+    /* first, check if the clalling node should be recorderd */
+    if ((calling_node != NULL) && ((node->type == FF_NODE) || (node->type == OUTPUT_NODE))) {
+        /* IF - the this node is the end of a sequential level then the node before needs to be stored */
+        if (calling_node->sequential_terminator == false) {
+            /* IF - it hasn't been stored before */
+            netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1]++;
+            netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] =
+              (nnode_t **)vtr::realloc(
+                netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1],
+                sizeof(nnode_t *) *
+                  netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1]);
+            netlist->sequential_level_combinational_termination_node
+              [netlist->num_sequential_level_combinational_termination_nodes - 1]
+              [netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] -
+               1] = calling_node;
+            /* mark the node locally */
+            calling_node->sequential_terminator = true;
+        }
+    }
+
+    if (node->traverse_visited == traverse_mark_number) {
+        /* if already visited then nothing to do */
+        return;
+    } else if (node->type == CLOCK_NODE) {
+        /* since this is a node that touches all flip flops, don't analyze for sequential level */
+        return;
+    } else if (node->type == FF_NODE) {
+        /* ELSE IF - this is a ff_node, so add it to the list for the next sequential level */
+        /* mark as traversed */
+        node->traverse_visited = traverse_mark_number;
+        node->sequential_level = seq_level + 1;
+
+        /* add to the next sequntial list */
+        netlist->num_at_sequential_level[seq_level + 1]++;
+        netlist->sequential_level_nodes[seq_level + 1] = (nnode_t **)vtr::realloc(
+          netlist->sequential_level_nodes[seq_level + 1], sizeof(nnode_t *) * netlist->num_at_sequential_level[seq_level + 1]);
+        netlist->sequential_level_nodes[seq_level + 1][netlist->num_at_sequential_level[seq_level + 1] - 1] = node;
+
+        return;
+    } else {
+        /* ELSE - this is a node so depth visit it */
+
+        node->traverse_visited = traverse_mark_number;
+        node->sequential_level = seq_level;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net == NULL)
+                continue;
+
+            next_net = node->output_pins[i]->net;
+            for (j = 0; j < next_net->num_fanout_pins; j++) {
+                if (next_net->fanout_pins[j] == NULL)
+                    continue;
+
+                next_node = next_net->fanout_pins[j]->node;
+                if (next_node == NULL)
+                    continue;
+
+                depth_first_traverse_until_next_ff_or_output(next_node, node, traverse_mark_number, seq_level, netlist);
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traversal_check_if_forward_leveled()
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traversal_check_if_forward_leveled(short marker_value, netlist_t *netlist)
+{
+    int i;
+
+    /* start with the primary input list */
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        if (netlist->top_input_nodes[i] != NULL) {
+            depth_first_traverse_check_if_forward_leveled(netlist->top_input_nodes[i], marker_value);
+        }
+    }
+    /* now traverse the ground and vcc pins */
+    if (netlist->gnd_node != NULL)
+        depth_first_traverse_check_if_forward_leveled(netlist->gnd_node, marker_value);
+    if (netlist->vcc_node != NULL)
+        depth_first_traverse_check_if_forward_leveled(netlist->vcc_node, marker_value);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traverse)
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traverse_check_if_forward_leveled(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    int i, j;
+    nnode_t *next_node;
+    nnet_t *next_net;
+
+    if (node->traverse_visited == traverse_mark_number) {
+        return;
+    } else {
+        /* ELSE - this is a new node so depth visit it */
+
+        node->traverse_visited = traverse_mark_number;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net == NULL)
+                continue;
+
+            next_net = node->output_pins[i]->net;
+            for (j = 0; j < next_net->num_fanout_pins; j++) {
+                if (next_net->fanout_pins[j] == NULL)
+                    continue;
+
+                next_node = next_net->fanout_pins[j]->node;
+                if (next_node == NULL)
+                    continue;
+
+                if ((next_node->forward_level == -1) && (next_node->type != FF_NODE)) {
+                    graphVizOutputCombinationalNet(configuration.debug_output_path, "combo_loop", COMBO_LOOP_ERROR,
+                                                   /*next_node);*/ find_node_at_top_of_combo_loop(next_node));
+                    oassert(false);
+                }
+
+                depth_first_traverse_check_if_forward_leveled(next_node, traverse_mark_number);
+            }
+        }
+    }
+}
+/*---------------------------------------------------------------------------------------------
+ * (function: levelize_forwards)
+ * Note that this levlizing is combinational delay levels where the assumption is that
+ * each node has a unit delay.
+ *-------------------------------------------------------------------------------------------*/
+void levelize_forwards(netlist_t *netlist)
+{
+    int i, j, k;
+    int cur_for_level;
+    short more_levels = true;
+    short all_visited = true;
+
+    /* add all the POs and FFs POs as forward level 0 */
+    cur_for_level = 0;
+    netlist->num_forward_levels = 1;
+    netlist->num_at_forward_level = (int *)vtr::realloc(netlist->num_at_forward_level, sizeof(int) * netlist->num_forward_levels);
+    netlist->forward_levels = (nnode_t ***)vtr::realloc(netlist->forward_levels, sizeof(nnode_t **) * (netlist->num_forward_levels));
+    netlist->forward_levels[netlist->num_forward_levels - 1] = NULL;
+    netlist->num_at_forward_level[netlist->num_forward_levels - 1] = 0;
+    for (i = 0; i < netlist->num_top_input_nodes + 3; i++) {
+        if ((i == netlist->num_top_input_nodes) && (netlist->vcc_node != NULL)) {
+            /* vcc */
+            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
+                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
+            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->vcc_node;
+            netlist->num_at_forward_level[cur_for_level]++;
+            netlist->vcc_node->forward_level = 0;
+        } else if ((i == netlist->num_top_input_nodes + 1) && (netlist->gnd_node != NULL)) {
+            /* gnd */
+            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
+                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
+            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->gnd_node;
+            netlist->num_at_forward_level[cur_for_level]++;
+            netlist->gnd_node->forward_level = 0;
+        } else if ((i == netlist->num_top_input_nodes + 2) && (netlist->pad_node != NULL)) {
+            /* pad */
+            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
+                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
+            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->pad_node;
+            netlist->num_at_forward_level[cur_for_level]++;
+            netlist->pad_node->forward_level = 0;
+        } else if (i >= netlist->num_top_input_nodes) {
+            continue;
+        } else if (netlist->top_input_nodes[i] != NULL) {
+            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
+                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
+            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->top_input_nodes[i];
+            netlist->num_at_forward_level[cur_for_level]++;
+            netlist->top_input_nodes[i]->forward_level = 0;
+        }
+    }
+    for (i = 0; i < netlist->num_ff_nodes; i++) {
+        if (netlist->ff_nodes[i] != NULL) {
+            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
+                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
+            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->ff_nodes[i];
+            netlist->num_at_forward_level[cur_for_level]++;
+            netlist->ff_nodes[i]->forward_level = 0;
+        }
+    }
+
+    while (more_levels) {
+        /* another level so add space */
+        netlist->num_forward_levels++;
+        netlist->num_at_forward_level = (int *)vtr::realloc(netlist->num_at_forward_level, sizeof(int) * netlist->num_forward_levels);
+        netlist->forward_levels = (nnode_t ***)vtr::realloc(netlist->forward_levels, sizeof(nnode_t **) * (netlist->num_forward_levels));
+        netlist->forward_levels[netlist->num_forward_levels - 1] = NULL;
+        netlist->num_at_forward_level[netlist->num_forward_levels - 1] = 0;
+
+        /* go through each element at this level */
+        for (i = 0; i < netlist->num_at_forward_level[cur_for_level]; i++) {
+            nnode_t *current_node = netlist->forward_levels[cur_for_level][i];
+            if (current_node == NULL)
+                continue;
+
+            /* at each node visit all the inputs */
+            for (j = 0; j < current_node->num_output_pins; j++) {
+                int *fanouts_visited;
+                if (current_node->output_pins[j] == NULL)
+                    continue;
+
+                for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
+                    int idx;
+                    /* visit the fanout point */
+                    if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
+                        (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
+                        continue;
+
+                    nnode_t *output_node = current_node->output_pins[j]->net->fanout_pins[k]->node;
+
+                    if (output_node == NULL)
+                        continue;
+
+                    if (output_node->node_data == NULL) {
+                        /* if this fanout hasn't been visited yet this will be null */
+                        fanouts_visited = (int *)vtr::malloc(sizeof(int) * (output_node->num_input_pins));
+
+                        for (idx = 0; idx < output_node->num_input_pins; idx++) {
+                            fanouts_visited[idx] = -1;
+                        }
+
+                        output_node->node_data = (void *)fanouts_visited;
+                        output_node->unique_node_data_id = LEVELIZE;
+                    } else {
+                        /* ELSE - get the list */
+                        oassert(output_node->unique_node_data_id == LEVELIZE);
+                        fanouts_visited = (int *)output_node->node_data;
+                    }
+
+                    /* mark this entry as visited */
+                    fanouts_visited[current_node->output_pins[j]->net->fanout_pins[k]->pin_node_idx] = cur_for_level;
+
+                    /* check if they've all been marked */
+                    all_visited = true;
+                    for (idx = 0; idx < output_node->num_input_pins; idx++) {
+                        if (fanouts_visited[idx] == -1) {
+                            all_visited = false;
+                            break;
+                        }
+                    }
+
+                    if ((all_visited == true) && (output_node->type != FF_NODE)) {
+                        /* This one has been visited by everyone */
+                        netlist->forward_levels[cur_for_level + 1] = (nnode_t **)vtr::realloc(
+                          netlist->forward_levels[cur_for_level + 1], sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level + 1] + 1));
+                        netlist->forward_levels[cur_for_level + 1][netlist->num_at_forward_level[cur_for_level + 1]] = output_node;
+                        netlist->num_at_forward_level[cur_for_level + 1]++;
+
+                        output_node->forward_level = cur_for_level + 1;
+                    }
+                }
+            }
+        }
+
+        /* check if tere are more elements to procees at the next level */
+        if (netlist->num_at_forward_level[cur_for_level + 1] > 0) {
+            /* there are elements in the next set then process */
+            cur_for_level++;
+        } else {
+            /* ELSE - we've levelized forwards */
+            more_levels = false;
+        }
+    }
+}
+/*---------------------------------------------------------------------------------------------
+ * (function: levelize_forwards_clean_checking_for_combo_loop_and_liveness)
+ *-------------------------------------------------------------------------------------------*/
+void levelize_forwards_clean_checking_for_combo_loop_and_liveness(netlist_t *netlist)
+{
+    int i, j, k;
+    int cur_for_level;
+    short more_levels = true;
+    short all_visited = true;
+
+    cur_for_level = 0;
+
+    while (more_levels) {
+        /* go through each element at this level */
+        for (i = 0; i < netlist->num_at_forward_level[cur_for_level]; i++) {
+            nnode_t *current_node = netlist->forward_levels[cur_for_level][i];
+            if (current_node == NULL)
+                continue;
+
+            /* at each node visit all the inputs */
+            for (j = 0; j < current_node->num_output_pins; j++) {
+                int *fanouts_visited;
+                if (current_node->output_pins[j] == NULL)
+                    continue;
+
+                for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
+                    if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
+                        (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
+                        continue;
+
+                    /* visit the fanout point */
+                    nnode_t *output_node = current_node->output_pins[j]->net->fanout_pins[k]->node;
+
+                    if (output_node == NULL)
+                        continue;
+
+                    if (output_node->node_data == NULL) {
+                        oassert(output_node->unique_node_data_id == RESET);
+                    } else {
+                        int idx;
+                        /* ELSE - get the list */
+                        oassert(output_node->unique_node_data_id == LEVELIZE);
+                        fanouts_visited = (int *)output_node->node_data;
+                        output_node->node_data = NULL;
+
+                        /* check if they've all been marked */
+                        all_visited = true;
+                        for (idx = 0; idx < output_node->num_input_pins; idx++) {
+                            if (fanouts_visited[idx] == -1) {
+                                all_visited = false;
+                                break;
+                            }
+                        }
+
+                        if (all_visited == false) {
+                            /* Combo node since one of the outputs hasn'y been visisted. */
+                            error_message(
+                              NETLIST, output_node->loc,
+                              "!!!Combinational loop on forward pass.  Node %s is missing a driven pin idx %d.  Isn't neccessarily the culprit of "
+                              "the combinational loop.  Odin only detects combinational loops, but currently doesn't pinpoint.\n",
+                              output_node->name, idx);
+                        }
+                        /* free the data and reset to be used elsewhere */
+                        vtr::free(fanouts_visited);
+                        output_node->unique_node_data_id = RESET;
+                    }
+
+                    if ((output_node->backward_level == -1) && (output_node->type != FF_NODE)) {
+                        warning_message(
+                          NETLIST, output_node->loc,
+                          "Node does not connect to a primary output or FF...DEAD NODE!!!.  Node %s is not connected to a primary output.\n",
+                          output_node->name);
+                    }
+                }
+            }
+        }
+
+        /* check if tere are more elements to procees at the next level */
+        if (netlist->num_at_forward_level[cur_for_level + 1] > 0) {
+            /* there are elements in the next set then process */
+            cur_for_level++;
+        } else {
+            /* ELSE - we've levelized forwards */
+            more_levels = false;
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: levelize_backwards)
+ * Note this levelizing is a reverse combinational delay count
+ *-------------------------------------------------------------------------------------------*/
+void levelize_backwards(netlist_t *netlist)
+{
+    int i, j, k;
+    int cur_back_level;
+    short more_levels = true;
+    short all_visited = true;
+
+    /* add all the POs and FFs POs as backward level 0 */
+    cur_back_level = 0;
+    netlist->num_backward_levels = 1;
+    netlist->num_at_backward_level = (int *)vtr::realloc(netlist->num_at_backward_level, sizeof(int) * netlist->num_backward_levels);
+    netlist->backward_levels = (nnode_t ***)vtr::realloc(netlist->backward_levels, sizeof(nnode_t **) * (netlist->num_backward_levels));
+    netlist->backward_levels[netlist->num_backward_levels - 1] = NULL;
+    netlist->num_at_backward_level[netlist->num_backward_levels - 1] = 0;
+    for (i = 0; i < netlist->num_top_output_nodes; i++) {
+        if (netlist->top_output_nodes[i] != NULL) {
+            netlist->backward_levels[cur_back_level] = (nnode_t **)vtr::realloc(
+              netlist->backward_levels[cur_back_level], sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level] + 1));
+            netlist->backward_levels[cur_back_level][netlist->num_at_backward_level[cur_back_level]] = netlist->top_output_nodes[i];
+            netlist->num_at_backward_level[cur_back_level]++;
+            netlist->top_output_nodes[i]->backward_level = 0;
+        }
+    }
+    for (i = 0; i < netlist->num_ff_nodes; i++) {
+        if (netlist->ff_nodes[i] != NULL) {
+            netlist->backward_levels[cur_back_level] = (nnode_t **)vtr::realloc(
+              netlist->backward_levels[cur_back_level], sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level] + 1));
+            netlist->backward_levels[cur_back_level][netlist->num_at_backward_level[cur_back_level]] = netlist->ff_nodes[i];
+            netlist->num_at_backward_level[cur_back_level]++;
+            netlist->ff_nodes[i]->backward_level = 0;
+        }
+    }
+
+    while (more_levels) {
+        /* another level so add space */
+        netlist->num_backward_levels++;
+        netlist->num_at_backward_level = (int *)vtr::realloc(netlist->num_at_backward_level, sizeof(int) * netlist->num_backward_levels);
+        netlist->backward_levels = (nnode_t ***)vtr::realloc(netlist->backward_levels, sizeof(nnode_t **) * (netlist->num_backward_levels));
+        netlist->backward_levels[netlist->num_backward_levels - 1] = NULL;
+        netlist->num_at_backward_level[netlist->num_backward_levels - 1] = 0;
+
+        /* go through each element at this level */
+        for (i = 0; i < netlist->num_at_backward_level[cur_back_level]; i++) {
+            nnode_t *current_node = netlist->backward_levels[cur_back_level][i];
+            if (current_node) {
+                /* at each node visit all the inputs */
+                for (j = 0; j < current_node->num_input_pins; j++) {
+                    int *fanouts_visited = NULL;
+                    if (current_node->input_pins[j]) {
+                        /* visit the fanout point */
+                        nnet_t *fanout_net = current_node->input_pins[j]->net;
+                        if (fanout_net) {
+                            if (fanout_net->net_data == NULL) {
+                                int idx;
+                                /* if this fanout hasn't been visited yet this will be null */
+                                fanouts_visited = (int *)vtr::malloc(sizeof(int) * (fanout_net->num_fanout_pins));
+
+                                for (idx = 0; idx < fanout_net->num_fanout_pins; idx++) {
+                                    fanouts_visited[idx] = -1;
+                                }
+
+                                fanout_net->net_data = (void *)fanouts_visited;
+                                fanout_net->unique_net_data_id = LEVELIZE;
+                            } else {
+                                /* ELSE - get the list */
+                                fanouts_visited = (int *)fanout_net->net_data;
+                                oassert(fanout_net->unique_net_data_id == LEVELIZE);
+                            }
+
+                            /* mark this entry as visited */
+                            if (fanout_net->num_driver_pins != 0) {
+                                fanouts_visited[current_node->input_pins[j]->pin_net_idx] = cur_back_level;
+                            }
+
+                            /* check if they've all been marked */
+                            all_visited = true;
+                            for (k = 0; k < fanout_net->num_fanout_pins && all_visited; k++) {
+                                all_visited = (!(fanout_net->fanout_pins[k] && fanout_net->fanout_pins[k]->node && fanouts_visited[k] == -1));
+                            }
+
+                            if (all_visited) {
+                                for (k = 0; k < fanout_net->num_driver_pins; k++) {
+                                    if (!fanout_net->driver_pins[k]->node || fanout_net->driver_pins[k]->node->type == FF_NODE)
+                                        continue;
+                                    /* This one has been visited by everyone */
+                                    if (fanout_net->driver_pins[k]->node->backward_level == -1) {
+                                        /* already added to a list...this means that we won't have the correct ordering */
+                                        netlist->backward_levels[cur_back_level + 1] =
+                                          (nnode_t **)vtr::realloc(netlist->backward_levels[cur_back_level + 1],
+                                                                   sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level + 1] + 1));
+                                        netlist->backward_levels[cur_back_level + 1][netlist->num_at_backward_level[cur_back_level + 1]] =
+                                          fanout_net->driver_pins[k]->node;
+                                        netlist->num_at_backward_level[cur_back_level + 1]++;
+                                    }
+
+                                    fanout_net->driver_pins[k]->node->backward_level = cur_back_level + 1;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        /* check if tere are more elements to procees at the next level */
+        if (netlist->num_at_backward_level[cur_back_level + 1] > 0) {
+            /* there are elements in the next set then process */
+            cur_back_level++;
+        } else {
+            /* ELSE - we've levelized backwards */
+            more_levels = false;
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: levelize_backwards_clean_checking_for_liveness)
+ *-------------------------------------------------------------------------------------------*/
+void levelize_backwards_clean_checking_for_liveness(netlist_t *netlist)
+{
+    int i, j, k;
+    int cur_back_level;
+    short more_levels = true;
+    short all_visited = true;
+
+    cur_back_level = 0;
+
+    while (more_levels) {
+        /* go through each element at this level */
+        for (i = 0; i < netlist->num_at_backward_level[cur_back_level]; i++) {
+            nnode_t *current_node = netlist->backward_levels[cur_back_level][i];
+            if (current_node == NULL)
+                continue;
+
+            /* at each node visit all the inputs */
+            for (j = 0; j < current_node->num_input_pins; j++) {
+                int *fanouts_visited;
+                if (current_node->input_pins[j] == NULL)
+                    continue;
+
+                /* visit the fanout point */
+                nnet_t *fanout_net = current_node->input_pins[j]->net;
+
+                if (fanout_net->net_data == NULL) {
+                    /* IF - already cleaned */
+                    oassert(fanout_net->unique_net_data_id == -1);
+                } else {
+                    /* ELSE - get the list */
+                    oassert(fanout_net->unique_net_data_id == LEVELIZE);
+                    fanouts_visited = (int *)fanout_net->net_data;
+                    fanout_net->net_data = NULL;
+
+                    /* check if they've all been marked */
+                    all_visited = true;
+                    for (k = 0; k < fanout_net->num_fanout_pins; k++) {
+                        if ((fanout_net->fanout_pins[k] != NULL) && (fanout_net->fanout_pins[k]->node != NULL) && (fanouts_visited[k] == -1)) {
+                            all_visited = false;
+                            break;
+                        }
+                    }
+
+                    if (all_visited == false) {
+                        /* one of these nodes was not visited on the backward analysis */
+                        warning_message(NETLIST, current_node->loc, "Liveness check on backward pass.  Node %s is missing a driving pin idx %d\n",
+                                        current_node->name, k);
+                    }
+
+                    /* free the data and reset to be used elsewhere */
+                    vtr::free(fanouts_visited);
+                    fanout_net->unique_net_data_id = -1;
+                }
+            }
+        }
+
+        /* check if tere are more elements to procees at the next level */
+        if (netlist->num_at_backward_level[cur_back_level + 1] > 0) {
+            /* there are elements in the next set then process */
+            cur_back_level++;
+        } else {
+            /* ELSE - we've levelized backwards */
+            more_levels = false;
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: find_node_at_top_of_combo_loop)
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *find_node_at_top_of_combo_loop(nnode_t *start_node)
+{
+    int stack_size = 1;
+    nnode_t **stack = (nnode_t **)vtr::calloc(stack_size, sizeof(nnode_t *));
+    stack[0] = start_node;
+
+    while (true) {
+        nnode_t *next_node = stack[--stack_size];
+        oassert(next_node->unique_node_data_id == LEVELIZE);
+        int *fanouts_visited = (int *)next_node->node_data;
+        next_node->node_data = NULL;
+
+        /* check if they've all been marked */
+        bool all_visited = true;
+        int idx_missed = -1;
+        for (int i = 0; i < next_node->num_input_pins; i++) {
+            if (fanouts_visited[i] == -1) {
+                all_visited = false;
+                idx_missed = i;
+                break;
+            }
+        }
+
+        if (!all_visited) {
+            for (int i = 0; i < next_node->input_pins[idx_missed]->net->num_driver_pins; i++) {
+                if (next_node->input_pins[idx_missed]->net->driver_pins[i]->node->backward_level < next_node->backward_level) {
+                    /* IF - the next node has a lower backward level than this node suggests that it is
+                     * closer to primary outputs and not in the combo loop */
+                    vtr::free(stack);
+                    return next_node;
+                }
+
+                stack_size++;
+                stack = (nnode_t **)vtr::realloc(stack, sizeof(nnode_t *) * stack_size);
+                stack[stack_size - 1] = next_node->input_pins[idx_missed]->net->driver_pins[i]->node;
+            }
+        } else {
+            vtr::free(stack);
+            return next_node;
+        }
+    }
+}
diff --git a/parmys-plugin/src/netlist_cleanup.cc b/parmys-plugin/src/netlist_cleanup.cc
new file mode 100644
index 000000000..d9458b479
--- /dev/null
+++ b/parmys-plugin/src/netlist_cleanup.cc
@@ -0,0 +1,340 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <algorithm> // std::fill
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "netlist_utils.h"
+#include "odin_ii.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+bool coarsen_cleanup;
+
+/* Used in the nnode_t.node_data field to mark if the node was already visited
+ * during a forward or backward sweep traversal or the removal phase */
+int _visited_forward, _visited_backward, _visited_removal;
+#define VISITED_FORWARD ((void *)&_visited_forward)
+#define VISITED_BACKWARD ((void *)&_visited_backward)
+#define VISITED_REMOVAL ((void *)&_visited_removal)
+
+/* Simple linked list of nodes structure */
+struct node_list_t {
+    nnode_t *node;
+    struct node_list_t *next;
+};
+
+node_list_t useless_nodes;                       // List of the nodes to be removed
+node_list_t *removal_list_next = &useless_nodes; // Tail of the nodes to be removed
+
+node_list_t addsub_nodes;                              // List of the adder/subtractor nodes
+node_list_t *addsub_list_next = &addsub_nodes;         // Tail of the adder/subtractor node list
+long long num_removed_nodes[operation_list_END] = {0}; // List of removed nodes by type
+
+/* Function declarations */
+node_list_t *insert_node_list(node_list_t *node_list, nnode_t *node);
+void traverse_backward(nnode_t *node);
+void traverse_forward(nnode_t *node, int toplevel, int remove_me);
+void mark_output_dependencies(netlist_t *netlist);
+void identify_unused_nodes(netlist_t *netlist);
+void remove_unused_nodes(node_list_t *remove);
+void calculate_addsub_statistics(node_list_t *addsub);
+void remove_unused_logic(netlist_t *netlist);
+void count_node_type(nnode_t *node);
+void report_removed_nodes(long long *node_list);
+
+node_list_t *insert_node_list(node_list_t *node_list, nnode_t *node)
+{
+    node_list->node = node;
+    node_list->next = (node_list_t *)vtr::calloc(1, sizeof(node_list_t));
+    return node_list->next;
+}
+
+/* Traverse the netlist backwards, moving from outputs to inputs */
+void traverse_backward(nnode_t *node)
+{
+    if (node->node_data == VISITED_BACKWARD)
+        return;                         // Already visited
+    node->node_data = VISITED_BACKWARD; // Mark as visited
+    int i;
+    for (i = 0; i < node->num_input_pins; i++) {
+        // ensure this net has a driver (i.e. skip undriven outputs)
+        for (int j = 0; j < node->input_pins[i]->net->num_driver_pins; j++) {
+            if (node->input_pins[i]->net->driver_pins[j]->node)
+                // Visit the drivers of this node
+                traverse_backward(node->input_pins[i]->net->driver_pins[j]->node);
+        }
+    }
+}
+
+/* Traverse the netlist forward, moving from inputs to outputs.
+ * Adds nodes that do not affect any outputs to the useless_nodes list
+ * Arguments:
+ * 	node: the current node in the netlist
+ * 	toplevel: are we at one of the top-level nodes? (GND, VCC, PAD or INPUT)
+ * 	remove_me: should the current node be removed?
+ * */
+void traverse_forward(nnode_t *node, int toplevel, int remove_me)
+{
+    if (node == NULL)
+        return; // Shouldn't happen, but check just in case
+    if (node->node_data == VISITED_FORWARD)
+        return; // Already visited, shouldn't happen anyway
+
+    /* We want to remove this node if either its parent was removed,
+     * or if it was not visited on the backwards sweep */
+    remove_me = remove_me || ((node->node_data != VISITED_BACKWARD) && (toplevel == false));
+
+    /* Mark this node as visited */
+    node->node_data = VISITED_FORWARD;
+
+    if (remove_me) {
+        /* Add this node to the list of nodes to remove */
+        removal_list_next = insert_node_list(removal_list_next, node);
+        count_node_type(node);
+    }
+
+    if (node->type == ADD || node->type == MINUS) {
+        // check if adders/subtractors are starting using a global gnd/vcc node or a pad node
+        auto ADDER_START_NODE = PAD_NODE;
+        if (configuration.adder_cin_global) {
+            if (node->type == ADD)
+                ADDER_START_NODE = GND_NODE;
+            else
+                ADDER_START_NODE = VCC_NODE;
+        }
+        oassert(node->input_pins[node->num_input_pins - 1]->net->num_driver_pins == 1);
+        /* Check if we've found the head of an adder or subtractor chain */
+        if (node->input_pins[node->num_input_pins - 1]->net->driver_pins[0]->node->type == ADDER_START_NODE) {
+            addsub_list_next = insert_node_list(addsub_list_next, node);
+        }
+    }
+
+    /* Iterate through every fanout node */
+    int i, j;
+    for (i = 0; i < node->num_output_pins; i++) {
+        if (node->output_pins[i] && node->output_pins[i]->net) {
+            for (j = 0; j < node->output_pins[i]->net->num_fanout_pins; j++) {
+                if (node->output_pins[i]->net->fanout_pins[j]) {
+                    nnode_t *child = node->output_pins[i]->net->fanout_pins[j]->node;
+                    if (child) {
+                        /* If this child hasn't already been visited, visit it now */
+                        if (child->node_data != VISITED_FORWARD) {
+                            traverse_forward(child, false, remove_me);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+/* Start at each of the top level output nodes and traverse backwards to the inputs
+ * to determine which nodes have an effect on the outputs */
+void mark_output_dependencies(netlist_t *netlist)
+{
+    int i;
+    for (i = 0; i < netlist->num_top_output_nodes; i++) {
+        traverse_backward(netlist->top_output_nodes[i]);
+    }
+}
+
+/* Traversed the netlist forward from the top level inputs and special nodes
+ * (VCC, GND, PAD) */
+void identify_unused_nodes(netlist_t *netlist)
+{
+    useless_nodes.node = NULL;
+    useless_nodes.next = NULL;
+
+    addsub_nodes.node = NULL;
+    addsub_nodes.next = NULL;
+
+    traverse_forward(netlist->gnd_node, true, false);
+    traverse_forward(netlist->vcc_node, true, false);
+    traverse_forward(netlist->pad_node, true, false);
+    int i;
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        traverse_forward(netlist->top_input_nodes[i], true, false);
+    }
+}
+
+/* Note: This does not actually free the unused logic, but simply detaches
+ * it from the rest of the circuit */
+void remove_unused_nodes(node_list_t *remove)
+{
+    while (remove != NULL && remove->node != NULL) {
+        int i;
+        for (i = 0; i < remove->node->num_input_pins; i++) {
+            npin_t *input_pin = remove->node->input_pins[i];
+            /* Remove the fanout pin from the net */
+            if (input_pin)
+                input_pin->net->fanout_pins[input_pin->pin_net_idx] = NULL;
+        }
+        remove->node->node_data = VISITED_REMOVAL;
+        remove = remove->next;
+    }
+}
+
+/* Since we are traversing the entire netlist anyway, we can use this
+ * opportunity to keep track of the heads of adder/subtractors chains
+ * and then compute statistics on them */
+long adder_chain_count = 0;
+long longest_adder_chain = 0;
+long total_adders = 0;
+
+long subtractor_chain_count = 0;
+long longest_subtractor_chain = 0;
+long total_subtractors = 0;
+
+double geomean_addsub_length = 0.0; // Geometric mean of add/sub chain length
+double sum_of_addsub_logs = 0.0;    // Sum of the logarithms of the add/sub chain lengths; used for geomean
+double total_addsub_chain_count = 0.0;
+
+void calculate_addsub_statistics(node_list_t *addsub)
+{
+    while (addsub != NULL && addsub->node != NULL) {
+        int found_tail = false;
+        nnode_t *node = addsub->node;
+        int chain_depth = 0;
+        while (!found_tail) {
+            if (node->node_data == VISITED_REMOVAL) {
+                found_tail = true;
+                break;
+            }
+            chain_depth += 1;
+
+            /* Carry out is always output pin 0 */
+            nnet_t *carry_out_net = node->output_pins[0]->net;
+            if (carry_out_net == NULL || carry_out_net->fanout_pins[0] == NULL)
+                found_tail = true;
+            else
+                node = carry_out_net->fanout_pins[0]->node;
+        }
+        if (chain_depth > 0) {
+            if (node->type == ADD) {
+                adder_chain_count += 1;
+                total_adders += chain_depth;
+                if (chain_depth > longest_adder_chain)
+                    longest_adder_chain = chain_depth;
+            } else if (node->type == MINUS) {
+                subtractor_chain_count += 1;
+                total_subtractors += chain_depth;
+                if (chain_depth > longest_subtractor_chain)
+                    longest_subtractor_chain = chain_depth;
+            }
+
+            sum_of_addsub_logs += log(chain_depth);
+            total_addsub_chain_count += 1.0;
+        }
+
+        addsub = addsub->next;
+    }
+    /* Calculate the geometric mean carry chain length */
+    geomean_addsub_length = exp(sum_of_addsub_logs / total_addsub_chain_count);
+}
+void count_node_type(nnode_t *node)
+{
+    switch (node->type) {
+    case LOGICAL_OR:   // fallthrough
+    case LOGICAL_AND:  // fallthrough
+    case LOGICAL_NOR:  // fallthrough
+    case LOGICAL_NAND: // fallthrough
+    case LOGICAL_XOR:  // fallthrough
+    case LOGICAL_XNOR: // fallthrough
+    case LOGICAL_NOT:  // fallthrough
+        num_removed_nodes[node->type]++;
+        num_removed_nodes[GENERIC]++;
+        break;
+
+    case MUX_2:  // fallthrough
+    case SMUX_2: // fallthrough
+        num_removed_nodes[MUX_2]++;
+        num_removed_nodes[GENERIC]++;
+        break;
+
+    case GENERIC: // fallthrough
+        num_removed_nodes[node->type]++;
+        break;
+
+    case MINUS: // fallthrough
+        /* Minus nodes are built of Add nodes */
+        num_removed_nodes[ADD]++;
+        break;
+
+    case PAD_NODE: // fallthrough
+    case GND_NODE: // fallthrough
+    case VCC_NODE: // fallthrough
+        /* These are irrelevent so we dont output */
+        break;
+
+    case INPUT_NODE:  // fallthrough
+    case OUTPUT_NODE: // fallthrough
+        /* these stay untouched but are not added to the total*/
+        num_removed_nodes[node->type]++;
+        break;
+
+    case CLOCK_NODE: // fallthrough
+    case FF_NODE:    // fallthrough
+    case MULTIPLY:   // fallthrough
+    case ADD:        // fallthrough
+    case MEMORY:     // fallthrough
+    case HARD_IP:    // fallthrough
+        /* these stay untouched */
+        num_removed_nodes[node->type]++;
+        break;
+
+    default:
+        /* everything else is generic */
+        num_removed_nodes[GENERIC]++;
+        break;
+    }
+}
+
+void report_removed_nodes(long long *node_list)
+{
+    // return if there is no removed logic
+    if (!useless_nodes.node)
+        return;
+
+    warning_message(NETLIST, unknown_location, "%s", "Following unused node(s) removed from the netlist:\n");
+    for (int i = 0; i < operation_list_END; i++) {
+        if (node_list[i] > UNUSED_NODE_TYPE) {
+            std::string msg = std::string("Number of removed <") + operation_list_STR[i][ODIN_LONG_STRING] + "> node(s): ";
+            printf("%-42s%lld\n", msg.c_str(), node_list[i]);
+        }
+    }
+}
+
+/* Perform the backwards and forward sweeps and remove the unused nodes */
+void remove_unused_logic(netlist_t *netlist)
+{
+    mark_output_dependencies(netlist);
+    identify_unused_nodes(netlist);
+    remove_unused_nodes(&useless_nodes);
+    if (global_args.all_warnings)
+        report_removed_nodes(num_removed_nodes);
+    calculate_addsub_statistics(&addsub_nodes);
+}
\ No newline at end of file
diff --git a/parmys-plugin/src/netlist_statistic.cc b/parmys-plugin/src/netlist_statistic.cc
new file mode 100644
index 000000000..b65f3f004
--- /dev/null
+++ b/parmys-plugin/src/netlist_statistic.cc
@@ -0,0 +1,371 @@
+#include <algorithm>
+
+/* for hb */
+#include "multipliers.h"
+
+#include "netlist_statistic.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "vtr_memory.h"
+
+static void init(metric_t *m);
+static void print_stats(metric_t *m);
+static void copy(metric_t *dest, metric_t *src);
+
+static void add_to_stat(metric_t *dest, long long branching_factor);
+static void count_node_type(nnode_t *node, netlist_t *netlist);
+
+static metric_t *get_upward_stat(nnet_t *net, netlist_t *netlist, uintptr_t traverse_mark_number);
+static metric_t *get_downward_stat(nnet_t *net, netlist_t *netlist, uintptr_t traverse_mark_number);
+static metric_t *get_upward_stat(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number);
+static metric_t *get_downward_stat(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number);
+
+static metric_t *get_upward_stat(metric_t *destination, nnode_t **node_list, long long node_count, netlist_t *netlist,
+                                 uintptr_t traverse_mark_number);
+
+static void init(metric_t *m)
+{
+    m->min_depth = 0;
+    m->max_depth = 0;
+    m->avg_depth = 0;
+    m->avg_width = 0;
+}
+
+void init_stat(netlist_t *netlist)
+{
+    for (int i = 0; i < operation_list_END; i++) {
+        /* we init to -2 to skip unused elements */
+        netlist->num_of_type[i] = UNUSED_NODE_TYPE;
+    }
+
+    init(&netlist->output_node_stat);
+    netlist->num_of_node = 0;
+    netlist->num_logic_element = 0;
+}
+
+void mixing_optimization_stats(nnode_t *node, netlist_t *netlist)
+{
+    // Reinitialize statistics (to avoid interference)
+    init_stat(netlist);
+    // assuming the optimization is started from the node of the type that
+    // matches the node type
+    switch (node->type) {
+    case MULTIPLY: {
+        stat_t *multiply_stats = get_stats(node, netlist, mult_optimization_traverse_value);
+        node->weight = multiply_stats->downward.max_depth;
+        vtr::free(multiply_stats);
+        break;
+    }
+    default:
+        error_message(NETLIST, unknown_location, "Counting weights for mixing optimization for %i: Hard block type is unimplemented", node->type);
+        break;
+    }
+}
+
+static void print_stats(metric_t *m)
+{
+    printf("\n\t%s:%0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n", "shortest path", m->min_depth, "critical path", m->max_depth, "average path",
+           m->avg_depth, "overall fan-out", m->avg_width);
+}
+_static_unused(print_stats) // quiet warning
+
+  static void copy(metric_t *dest, metric_t *src)
+{
+    if (dest) {
+        init(dest);
+        if (src) {
+            dest->min_depth = src->min_depth;
+            dest->max_depth = src->max_depth;
+            dest->avg_depth = src->avg_depth;
+            dest->avg_width = src->avg_width;
+        }
+    }
+}
+
+static void aggregate(metric_t *dest, metric_t **sources, long long source_count)
+{
+    long long actual_count = 0;
+    init(dest);
+
+    // compute stats from parent
+    for (long long i = 0; sources && i < source_count; i += 1) {
+        metric_t *src = sources[i];
+        if (src) {
+            actual_count += 1;
+            if (dest->min_depth == 0) {
+                dest->min_depth = src->min_depth;
+            } else {
+                dest->min_depth = std::min(src->min_depth, dest->min_depth);
+            }
+            dest->max_depth = std::max(src->max_depth, dest->max_depth);
+            dest->avg_depth += src->avg_depth;
+            dest->avg_width += src->avg_width;
+        }
+    }
+
+    if (actual_count) {
+        dest->avg_depth /= actual_count;
+        dest->avg_width /= actual_count;
+    }
+}
+
+static void add_to_stat(metric_t *dest, long long branching_factor)
+{
+    dest->min_depth += 1;
+    dest->max_depth += 1;
+    dest->avg_depth += 1;
+    dest->avg_width += branching_factor;
+}
+
+static bool traverse(nnode_t *node, uintptr_t traverse_mark_number)
+{
+    bool traverse = (node->traverse_visited != traverse_mark_number);
+    node->traverse_visited = traverse_mark_number;
+    return traverse;
+}
+
+static bool traverse(nnet_t *net, uintptr_t traverse_mark_number)
+{
+    bool traverse = (net->traverse_visited != traverse_mark_number);
+    net->traverse_visited = traverse_mark_number;
+    return traverse;
+}
+
+static void increment_type_count(operation_list op, netlist_t *netlist)
+{
+    if (netlist->num_of_type[op] < 0) {
+        netlist->num_of_type[op] = 0;
+    }
+    netlist->num_of_type[op] += 1;
+}
+static void count_node_type(operation_list op, nnode_t *node, netlist_t *netlist)
+{
+    switch (op) {
+    case LOGICAL_OR:
+    case LOGICAL_AND:
+    case LOGICAL_NOR:
+    case LOGICAL_NAND:
+    case LOGICAL_XOR:
+    case LOGICAL_XNOR:
+    case LOGICAL_NOT: {
+        increment_type_count(op, netlist);
+        count_node_type(GENERIC, node, netlist);
+        break;
+    }
+    case MUX_2: // fallthrough
+    case SMUX_2: {
+        increment_type_count(MUX_2, netlist);
+        count_node_type(GENERIC, node, netlist);
+        break;
+    }
+    case GENERIC:
+        /**
+         * generic a packed into luts
+         * so we **roughly **estimate placements, this allows us
+         * to give predictive placement
+         */
+        if (physical_lut_size > 1 && node->num_input_pins > physical_lut_size) {
+            /* the estimate is based on the width of the node split to fit into the lut*/
+            long long input_width = node->num_input_pins;
+            /* we have to account for the glue logic to join the result down to one pin */
+            while (input_width > 1) {
+                long long logic_element = input_width / physical_lut_size;
+                logic_element += ((input_width % physical_lut_size) != 0);
+                input_width = logic_element;
+                netlist->num_logic_element += logic_element;
+            }
+        } else {
+            netlist->num_logic_element += 1;
+        }
+        increment_type_count(op, netlist);
+        netlist->num_of_node += 1;
+        break;
+
+    case MINUS:
+        /* Minus nodes are built of Add nodes */
+        count_node_type(ADD, node, netlist);
+        break;
+
+    case PAD_NODE:
+    case GND_NODE:
+    case VCC_NODE:
+        /* These are irrelevent so we dont output */
+        break;
+
+    case INPUT_NODE:
+    case OUTPUT_NODE:
+        /* these stay untouched but are not added to the total*/
+        increment_type_count(op, netlist);
+        break;
+
+    case CLOCK_NODE:
+    case FF_NODE:
+    case MULTIPLY:
+    case ADD:
+    case MEMORY:
+    case HARD_IP:
+        /* these stay untouched */
+        increment_type_count(op, netlist);
+        netlist->num_of_node += 1;
+        break;
+
+    default:
+        /* everything else is generic */
+        count_node_type(GENERIC, node, netlist);
+        break;
+    }
+}
+
+static void count_node_type(nnode_t *node, netlist_t *netlist) { count_node_type(node->type, node, netlist); }
+
+static metric_t *get_upward_stat(nnet_t *net, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    metric_t *destination = NULL;
+    if (net) {
+        destination = &(net->stat.upward);
+
+        if (traverse(net, traverse_mark_number)) {
+            init(destination);
+
+            if (net->num_driver_pins) {
+                metric_t **parent_stat = (metric_t **)vtr::calloc(net->num_driver_pins, sizeof(metric_t *));
+                for (int i = 0; i < net->num_driver_pins; i++)
+                    parent_stat[i] = get_upward_stat(net->driver_pins[i]->node, netlist, traverse_mark_number);
+                aggregate(destination, parent_stat, net->num_driver_pins);
+                vtr::free(parent_stat);
+            }
+        }
+    }
+    return destination;
+}
+
+static metric_t *get_upward_stat(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    metric_t *destination = NULL;
+    if (node) {
+        destination = &(node->stat.upward);
+
+        if (traverse(node, traverse_mark_number)) {
+            count_node_type(node, netlist);
+
+            init(destination);
+            if (node->num_input_pins) {
+                metric_t **parent_stat = (metric_t **)vtr::calloc(node->num_input_pins, sizeof(metric_t *));
+                for (long long i = 0; i < node->num_input_pins; i++) {
+                    if (node->input_pins[i]) {
+                        parent_stat[i] = get_upward_stat(node->input_pins[i]->net, netlist, traverse_mark_number);
+                    }
+                }
+                aggregate(destination, parent_stat, node->num_input_pins);
+                vtr::free(parent_stat);
+            }
+            add_to_stat(destination, node->num_input_pins);
+        }
+    }
+    return destination;
+}
+
+static metric_t *get_downward_stat(nnet_t *net, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    metric_t *destination = NULL;
+    if (net) {
+        destination = &(net->stat.downward);
+
+        if (traverse(net, traverse_mark_number)) {
+            init(destination);
+            if (net->num_fanout_pins) {
+                metric_t **child_stat = (metric_t **)vtr::calloc(net->num_fanout_pins, sizeof(metric_t *));
+                for (long long i = 0; i < net->num_fanout_pins; i++) {
+                    if (net->fanout_pins[i]) {
+                        child_stat[i] = get_downward_stat(net->fanout_pins[i]->node, netlist, traverse_mark_number);
+                    }
+                }
+                aggregate(destination, child_stat, net->num_fanout_pins);
+                vtr::free(child_stat);
+            }
+        }
+    }
+    return destination;
+}
+
+static metric_t *get_downward_stat(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    metric_t *destination = NULL;
+
+    if (node) {
+        destination = &(node->stat.downward);
+        if (traverse(node, traverse_mark_number)) {
+            count_node_type(node, netlist);
+
+            init(destination);
+            if (node->num_output_pins) {
+                metric_t **child_stat = (metric_t **)vtr::calloc(node->num_output_pins, sizeof(metric_t *));
+                for (long long i = 0; i < node->num_output_pins; i++) {
+                    if (node->output_pins[i]) {
+                        child_stat[i] = get_downward_stat(node->output_pins[i]->net, netlist, traverse_mark_number);
+                    }
+                }
+                aggregate(destination, child_stat, node->num_output_pins);
+                vtr::free(child_stat);
+            }
+            add_to_stat(destination, node->num_output_pins);
+        }
+    }
+    return destination;
+}
+
+static metric_t *get_upward_stat(metric_t *destination, nnode_t **node_list, long long node_count, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    if (node_list) {
+        if (node_count) {
+            metric_t **child_stat = (metric_t **)vtr::calloc(node_count, sizeof(metric_t *));
+            for (long long i = 0; i < node_count; i++) {
+                child_stat[i] = get_upward_stat(node_list[i], netlist, traverse_mark_number);
+            }
+            aggregate(destination, child_stat, node_count);
+
+            vtr::free(child_stat);
+        }
+    }
+    return destination;
+}
+
+stat_t *get_stats(nnode_t *node, netlist_t *netlist, uintptr_t traverse_mark_number)
+{
+    stat_t *stat = (stat_t *)vtr::malloc(sizeof(stat_t));
+    copy(&stat->downward, get_downward_stat(node, netlist, traverse_mark_number));
+    copy(&stat->upward, get_upward_stat(node, netlist, traverse_mark_number));
+    return stat;
+}
+
+static const char _travelsal_id = 0;
+static const uintptr_t travelsal_id = (uintptr_t)&_travelsal_id;
+
+/*---------------------------------------------------------------------------------------------
+ * function: dfs_to_cp() it starts from output towards input of the netlist to calculate critical path
+ *-------------------------------------------------------------------------------------------*/
+void compute_statistics(netlist_t *netlist, bool display)
+{
+    if (netlist) {
+        // reinit the node count
+        init_stat(netlist);
+
+        get_upward_stat(&netlist->output_node_stat, netlist->top_output_nodes, netlist->num_top_output_nodes, netlist, travelsal_id + 1);
+
+        if (display) {
+            printf("\n\t==== Stats ====\n");
+            for (long long op = 0; op < operation_list_END; op += 1) {
+                if (netlist->num_of_type[op] > UNUSED_NODE_TYPE) {
+                    std::string hdr = std::string("Number of <") + operation_list_STR[op][ODIN_LONG_STRING] + "> node: ";
+
+                    printf("%-42s%lld\n", hdr.c_str(), netlist->num_of_type[op]);
+                }
+            }
+            printf("%-42s%lld\n", "Total estimated number of lut: ", netlist->num_logic_element);
+            printf("%-42s%lld\n", "Total number of node: ", netlist->num_of_node);
+            printf("%-42s%0.0f\n", "Longest path: ", netlist->output_node_stat.max_depth);
+            printf("%-42s%0.0f\n", "Average path: ", netlist->output_node_stat.avg_depth);
+            printf("\n");
+        }
+    }
+}
\ No newline at end of file
diff --git a/parmys-plugin/src/netlist_utils.cc b/parmys-plugin/src/netlist_utils.cc
new file mode 100644
index 000000000..2ac86ef22
--- /dev/null
+++ b/parmys-plugin/src/netlist_utils.cc
@@ -0,0 +1,1502 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "odin_util.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+/*---------------------------------------------------------------------------------------------
+ * (function: allocate_nnode)
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *allocate_nnode(loc_t loc)
+{
+    nnode_t *new_node = (nnode_t *)my_malloc_struct(sizeof(nnode_t));
+
+    new_node->loc = loc;
+    new_node->name = NULL;
+    new_node->type = NO_OP;
+    new_node->bit_width = 0;
+    new_node->related_ast_node = NULL;
+    new_node->traverse_visited = -1;
+
+    new_node->input_pins = NULL;
+    new_node->num_input_pins = 0;
+    new_node->output_pins = NULL;
+    new_node->num_output_pins = 0;
+
+    new_node->input_port_sizes = NULL;
+    new_node->num_input_port_sizes = 0;
+    new_node->output_port_sizes = NULL;
+    new_node->num_output_port_sizes = 0;
+
+    new_node->node_data = NULL;
+    new_node->unique_node_data_id = -1;
+
+    new_node->forward_level = -1;
+    new_node->backward_level = -1;
+    new_node->sequential_level = -1;
+    new_node->sequential_terminator = false;
+
+    //    new_node->in_queue = false;
+
+    //    new_node->undriven_pins = 0;
+    //    new_node->num_undriven_pins = 0;
+
+    //    new_node->ratio = 1;
+
+    new_node->attributes = init_attribute();
+
+    new_node->initial_value = init_value_e::undefined;
+
+    return new_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: free_nnode)
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *free_nnode(nnode_t *to_free)
+{
+    if (to_free) {
+        /* need to free node_data */
+
+        for (int i = 0; i < to_free->num_input_pins; i++) {
+            if (to_free->input_pins[i] && to_free->input_pins[i]->name) {
+                vtr::free(to_free->input_pins[i]->name);
+                to_free->input_pins[i]->name = NULL;
+            }
+            to_free->input_pins[i] = (npin_t *)vtr::free(to_free->input_pins[i]);
+        }
+
+        to_free->input_pins = (npin_t **)vtr::free(to_free->input_pins);
+
+        for (int i = 0; i < to_free->num_output_pins; i++) {
+            if (to_free->output_pins[i] && to_free->output_pins[i]->name) {
+                vtr::free(to_free->output_pins[i]->name);
+                to_free->output_pins[i]->name = NULL;
+            }
+            to_free->output_pins[i] = (npin_t *)vtr::free(to_free->output_pins[i]);
+        }
+
+        to_free->output_pins = (npin_t **)vtr::free(to_free->output_pins);
+
+        vtr::free(to_free->input_port_sizes);
+        vtr::free(to_free->output_port_sizes);
+        //        vtr::free(to_free->undriven_pins);
+
+        free_attribute(to_free->attributes);
+
+        if (to_free->name) {
+            vtr::free(to_free->name);
+            to_free->name = NULL;
+        }
+
+        /* now free the node */
+    }
+    return (nnode_t *)vtr::free(to_free);
+}
+
+/*-------------------------------------------------------------------------
+ * (function: allocate_more_node_input_pins)
+ * 	Makes more space in the node for pin connections ...
+ *-----------------------------------------------------------------------*/
+void allocate_more_input_pins(nnode_t *node, int width)
+{
+    int i;
+
+    if (width <= 0) {
+        error_message(NETLIST, node->loc, "tried adding input pins for width %d <= 0 %s\n", width, node->name);
+        return;
+    }
+
+    node->input_pins = (npin_t **)vtr::realloc(node->input_pins, sizeof(npin_t *) * (node->num_input_pins + width));
+    for (i = 0; i < width; i++) {
+        node->input_pins[node->num_input_pins + i] = NULL;
+    }
+    node->num_input_pins += width;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: allocate_more_node_output_pins)
+ * 	Makes more space in the node for pin connections ...
+ *-----------------------------------------------------------------------*/
+void allocate_more_output_pins(nnode_t *node, int width)
+{
+    int i;
+
+    if (width <= 0) {
+        error_message(NETLIST, node->loc, "tried adding output pins for width %d <= 0 %s\n", width, node->name);
+        return;
+    }
+
+    node->output_pins = (npin_t **)vtr::realloc(node->output_pins, sizeof(npin_t *) * (node->num_output_pins + width));
+    for (i = 0; i < width; i++) {
+        node->output_pins[node->num_output_pins + i] = NULL;
+    }
+    node->num_output_pins += width;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_output_port_information)
+ *-------------------------------------------------------------------------------------------*/
+void add_output_port_information(nnode_t *node, int port_width)
+{
+    node->output_port_sizes = (int *)vtr::realloc(node->output_port_sizes, sizeof(int) * (node->num_output_port_sizes + 1));
+    node->output_port_sizes[node->num_output_port_sizes] = port_width;
+    node->num_output_port_sizes++;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_input_port_information)
+ *-------------------------------------------------------------------------------------------*/
+void add_input_port_information(nnode_t *node, int port_width)
+{
+    node->input_port_sizes = (int *)vtr::realloc(node->input_port_sizes, sizeof(int) * (node->num_input_port_sizes + 1));
+    node->input_port_sizes[node->num_input_port_sizes] = port_width;
+    node->num_input_port_sizes++;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: allocate_npin)
+ *-------------------------------------------------------------------------------------------*/
+npin_t *allocate_npin()
+{
+    npin_t *new_pin;
+
+    new_pin = (npin_t *)my_malloc_struct(sizeof(npin_t));
+
+    new_pin->name = NULL;
+    new_pin->type = NO_ID;
+    new_pin->net = NULL;
+    new_pin->pin_net_idx = -1;
+    new_pin->node = NULL;
+    new_pin->pin_node_idx = -1;
+    new_pin->mapping = NULL;
+
+    // new_pin->values = NULL;
+
+    new_pin->coverage = 0;
+
+    new_pin->is_default = false;
+    new_pin->is_implied = false;
+
+    return new_pin;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: free_npin)
+ *-------------------------------------------------------------------------------------------*/
+npin_t *free_npin(npin_t *to_free)
+{
+    if (to_free) {
+        if (to_free->name)
+            vtr::free(to_free->name);
+
+        to_free->name = NULL;
+
+        if (to_free->mapping)
+            vtr::free(to_free->mapping);
+
+        to_free->mapping = NULL;
+
+        /* now free the pin */
+    }
+    return (npin_t *)vtr::free(to_free);
+}
+
+/*-------------------------------------------------------------------------
+ * (function: copy_npin)
+ * 	Copies a pin
+ *  Should only be called by the parent functions,
+ *  copy_input_npin & copy_output_npin
+ *-----------------------------------------------------------------------*/
+static npin_t *copy_npin(npin_t *copy_pin)
+{
+    npin_t *new_pin = allocate_npin();
+    new_pin->name = copy_pin->name ? vtr::strdup(copy_pin->name) : NULL;
+    new_pin->type = copy_pin->type;
+    new_pin->mapping = copy_pin->mapping ? vtr::strdup(copy_pin->mapping) : NULL;
+    new_pin->is_default = copy_pin->is_default;
+    new_pin->sensitivity = copy_pin->sensitivity;
+
+    return new_pin;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: copy_output_npin)
+ * 	Copies an output pin
+ *-----------------------------------------------------------------------*/
+npin_t *copy_output_npin(npin_t *copy_pin)
+{
+    npin_t *new_pin = copy_npin(copy_pin);
+    oassert(copy_pin->type == OUTPUT);
+    new_pin->net = copy_pin->net;
+
+    return new_pin;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: copy_input_npin)
+ * 	Copies an input pin and potentially adds to the net
+ *-----------------------------------------------------------------------*/
+npin_t *copy_input_npin(npin_t *copy_pin)
+{
+    npin_t *new_pin = copy_npin(copy_pin);
+    oassert(copy_pin->type == INPUT);
+    if (copy_pin->net != NULL) {
+        add_fanout_pin_to_net(copy_pin->net, new_pin);
+    }
+
+    return new_pin;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: allocate_nnet)
+ *-------------------------------------------------------------------------------------------*/
+nnet_t *allocate_nnet()
+{
+    nnet_t *new_net = (nnet_t *)my_malloc_struct(sizeof(nnet_t));
+
+    new_net->name = NULL;
+    new_net->driver_pins = NULL;
+    new_net->num_driver_pins = 0;
+    new_net->fanout_pins = NULL;
+    new_net->num_fanout_pins = 0;
+    new_net->combined = false;
+
+    new_net->net_data = NULL;
+    new_net->unique_net_data_id = -1;
+
+    return new_net;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: free_nnet)
+ *-------------------------------------------------------------------------------------------*/
+nnet_t *free_nnet(nnet_t *to_free)
+{
+    if (to_free) {
+        to_free->fanout_pins = (npin_t **)vtr::free(to_free->fanout_pins);
+
+        if (to_free->name)
+            vtr::free(to_free->name);
+
+        if (to_free->num_driver_pins)
+            vtr::free(to_free->driver_pins);
+
+        /* now free the net */
+    }
+    return (nnet_t *)vtr::free(to_free);
+}
+
+/*---------------------------------------------------------------------------
+ * (function: move_a_output_pin)
+ *-------------------------------------------------------------------------*/
+void move_output_pin(nnode_t *node, int old_idx, int new_idx)
+{
+    npin_t *pin;
+
+    oassert(node != NULL);
+    oassert(((old_idx >= 0) && (old_idx < node->num_output_pins)));
+    oassert(((new_idx >= 0) && (new_idx < node->num_output_pins)));
+    /* assumes the pin spots have been allocated and the pin */
+    pin = node->output_pins[old_idx];
+    node->output_pins[new_idx] = node->output_pins[old_idx];
+    node->output_pins[old_idx] = NULL;
+    /* record the node and pin spot in the pin */
+    pin->type = OUTPUT;
+    pin->node = node;
+    pin->pin_node_idx = new_idx;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: move_a_input_pin)
+ *-------------------------------------------------------------------------*/
+void move_input_pin(nnode_t *node, int old_idx, int new_idx)
+{
+    npin_t *pin;
+
+    oassert(node != NULL);
+    oassert(((old_idx >= 0) && (old_idx < node->num_input_pins)));
+    oassert(((new_idx >= 0) && (new_idx < node->num_input_pins)));
+    /* assumes the pin spots have been allocated and the pin */
+    node->input_pins[new_idx] = node->input_pins[old_idx];
+    pin = node->input_pins[old_idx];
+    node->input_pins[old_idx] = NULL;
+    /* record the node and pin spot in the pin */
+    pin->type = INPUT;
+    pin->node = node;
+    pin->pin_node_idx = new_idx;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_a_input_pin_to_node_spot_idx)
+ *-------------------------------------------------------------------------------------------*/
+void add_input_pin_to_node(nnode_t *node, npin_t *pin, int pin_idx)
+{
+    oassert(node != NULL);
+    oassert(pin != NULL);
+    oassert(pin_idx < node->num_input_pins);
+    /* assumes the pin spots have been allocated and the pin */
+    node->input_pins[pin_idx] = pin;
+    /* record the node and pin spot in the pin */
+    pin->type = INPUT;
+    pin->node = node;
+    pin->pin_node_idx = pin_idx;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_a_input_pin_to_spot_idx)
+ *-------------------------------------------------------------------------------------------*/
+void add_fanout_pin_to_net(nnet_t *net, npin_t *pin)
+{
+    oassert(net != NULL);
+    oassert(pin != NULL);
+    oassert(pin->type != OUTPUT);
+    /* assumes the pin spots have been allocated and the pin */
+    net->fanout_pins = (npin_t **)vtr::realloc(net->fanout_pins, sizeof(npin_t *) * (net->num_fanout_pins + 1));
+    net->fanout_pins[net->num_fanout_pins] = pin;
+    net->num_fanout_pins++;
+    /* record the node and pin spot in the pin */
+    pin->net = net;
+    pin->pin_net_idx = net->num_fanout_pins - 1;
+    pin->type = INPUT;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_a_output_pin_to_node_spot_idx)
+ *-------------------------------------------------------------------------------------------*/
+void add_output_pin_to_node(nnode_t *node, npin_t *pin, int pin_idx)
+{
+    oassert(node != NULL);
+    oassert(pin != NULL);
+    oassert(pin_idx < node->num_output_pins);
+    /* assumes the pin spots have been allocated and the pin */
+    node->output_pins[pin_idx] = pin;
+    /* record the node and pin spot in the pin */
+    pin->type = OUTPUT;
+    pin->node = node;
+    pin->pin_node_idx = pin_idx;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_a_output_pin_to_spot_idx)
+ *-------------------------------------------------------------------------------------------*/
+void add_driver_pin_to_net(nnet_t *net, npin_t *pin)
+{
+    oassert(net != NULL);
+    oassert(pin != NULL);
+    oassert(pin->type != INPUT);
+    /* assumes the pin spots have been allocated and the pin */
+    net->num_driver_pins++;
+    net->driver_pins = (npin_t **)vtr::realloc(net->driver_pins, net->num_driver_pins * sizeof(npin_t *));
+    net->driver_pins[net->num_driver_pins - 1] = pin;
+    /* record the node and pin spot in the pin */
+    pin->net = net;
+    pin->type = OUTPUT;
+    pin->pin_net_idx = net->num_driver_pins - 1;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: join_nets)
+ * 	Copies the fanouts from input net into net
+ * TODO: improve error message
+ *-------------------------------------------------------------------------------------------*/
+void join_nets(nnet_t *join_to_net, nnet_t *other_net)
+{
+    if (join_to_net == other_net) {
+        for (int i = 0; i < join_to_net->num_driver_pins; i++) {
+            const char *pin_name = join_to_net->driver_pins[i]->name ? join_to_net->driver_pins[i]->name : "unknown";
+            if ((join_to_net->driver_pins[i]->node != NULL))
+                warning_message(NETLIST, join_to_net->driver_pins[i]->node->loc, "%s %s\n", "Combinational loop with driver pin", pin_name);
+            else
+                warning_message(NETLIST, unknown_location, "%s %s\n", "Combinational loop with driver pin", pin_name);
+        }
+        for (int i = 0; i < join_to_net->num_fanout_pins; i++) {
+            const char *pin_name = join_to_net->fanout_pins[i]->name ? join_to_net->fanout_pins[i]->name : "unknown";
+            if ((join_to_net->fanout_pins[i] != NULL) && (join_to_net->fanout_pins[i]->node != NULL))
+                warning_message(NETLIST, join_to_net->fanout_pins[i]->node->loc, "%s %s\n", "Combinational loop with fanout pin", pin_name);
+            else
+                warning_message(NETLIST, unknown_location, "%s %s\n", "Combinational loop with fanout pin", pin_name);
+        }
+
+        error_message(NETLIST, unknown_location, "%s", "Found a combinational loop");
+    } else if (other_net->num_driver_pins > 1) {
+        if (other_net->name && join_to_net->name)
+            error_message(NETLIST, unknown_location, "Tried to join net %s to %s but this would lose %d drivers for net %s", other_net->name,
+                          join_to_net->name, other_net->num_driver_pins - 1, other_net->name);
+        else
+            error_message(NETLIST, unknown_location, "Tried to join nets but this would lose %d drivers", other_net->num_driver_pins - 1);
+    }
+
+    /* copy the driver over to the new_net */
+    for (int i = 0; i < other_net->num_fanout_pins; i++) {
+        if (other_net->fanout_pins[i]) {
+            add_fanout_pin_to_net(join_to_net, other_net->fanout_pins[i]);
+        }
+    }
+
+    // CLEAN UP
+    free_nnet(other_net);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: remap_pin_to_new_net)
+ *-------------------------------------------------------------------------------------------*/
+void remap_pin_to_new_net(npin_t *pin, nnet_t *new_net)
+{
+    if (pin->type == INPUT) {
+        /* clean out the entry in the old net */
+        pin->net->fanout_pins[pin->pin_net_idx] = NULL;
+        /* do the new addition */
+        add_fanout_pin_to_net(new_net, pin);
+    } else if (pin->type == OUTPUT) {
+        /* clean out the entry in the old net */
+        if (pin->net->num_driver_pins)
+            vtr::free(pin->net->driver_pins);
+        pin->net->num_driver_pins = 0;
+        pin->net->driver_pins = NULL;
+        /* do the new addition */
+        add_driver_pin_to_net(new_net, pin);
+    }
+}
+
+/*-------------------------------------------------------------------------
+ * (function: remap_pin_to_new_node)
+ *-----------------------------------------------------------------------*/
+void remap_pin_to_new_node(npin_t *pin, nnode_t *new_node, int pin_idx)
+{
+    if (pin->type == INPUT) {
+        /* clean out the entry in the old net */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+        /* do the new addition */
+        add_input_pin_to_node(new_node, pin, pin_idx);
+    } else if (pin->type == OUTPUT) {
+        /* clean out the entry in the old net */
+        pin->node->output_pins[pin->pin_node_idx] = NULL;
+        /* do the new addition */
+        add_output_pin_to_node(new_node, pin, pin_idx);
+    }
+}
+
+/*------------------------------------------------------------------------
+ * (function: connect_nodes)
+ * 	Connect one output node to the inputs of the input node
+ *----------------------------------------------------------------------*/
+void connect_nodes(nnode_t *out_node, int out_idx, nnode_t *in_node, int in_idx)
+{
+    npin_t *new_in_pin;
+
+    oassert(out_node->num_output_pins > out_idx);
+    oassert(in_node->num_input_pins > in_idx);
+
+    new_in_pin = allocate_npin();
+
+    /* create the pin that hooks up to the input */
+    add_input_pin_to_node(in_node, new_in_pin, in_idx);
+
+    if (out_node->output_pins[out_idx] == NULL) {
+        /* IF - this node has no output net or pin */
+        npin_t *new_out_pin;
+        nnet_t *new_net;
+        new_net = allocate_nnet();
+        new_out_pin = allocate_npin();
+
+        new_net->name = vtr::strdup(out_node->name);
+        /* create the pin that hooks up to the input */
+        add_output_pin_to_node(out_node, new_out_pin, out_idx);
+        /* hook up in pin out of the new net */
+        add_fanout_pin_to_net(new_net, new_in_pin);
+        /* hook up the new pin 2 to this new net */
+        add_driver_pin_to_net(new_net, new_out_pin);
+    } else {
+        /* ELSE - there is a net so we just add a fanout */
+        /* hook up in pin out of the new net */
+        add_fanout_pin_to_net(out_node->output_pins[out_idx]->net, new_in_pin);
+    }
+}
+
+/**
+ * -------------------------------------------------------------------------------------------
+ * (function: init_attribute_structure)
+ *
+ * @brief Initializes the netlist node attributes
+ * including edge sensitivies and reset value
+ *-------------------------------------------------------------------------------------------*/
+attr_t *init_attribute()
+{
+    attr_t *attribute;
+    attribute = (attr_t *)vtr::malloc(sizeof(attr_t));
+
+    attribute->clk_edge_type = UNDEFINED_SENSITIVITY;
+    attribute->clr_polarity = UNDEFINED_SENSITIVITY;
+    attribute->set_polarity = UNDEFINED_SENSITIVITY;
+    attribute->areset_polarity = UNDEFINED_SENSITIVITY;
+    attribute->sreset_polarity = UNDEFINED_SENSITIVITY;
+    attribute->enable_polarity = UNDEFINED_SENSITIVITY;
+
+    attribute->areset_value = 0;
+    attribute->sreset_value = 0;
+
+    attribute->port_a_signed = UNSIGNED;
+    attribute->port_b_signed = UNSIGNED;
+
+    /* memory node attributes */
+    attribute->size = 0;
+    attribute->offset = 0;
+    attribute->memory_id = NULL;
+
+    attribute->RD_CLK_ENABLE = UNDEFINED_SENSITIVITY;
+    attribute->WR_CLK_ENABLE = UNDEFINED_SENSITIVITY;
+    attribute->RD_CLK_POLARITY = UNDEFINED_SENSITIVITY;
+    attribute->WR_CLK_POLARITY = UNDEFINED_SENSITIVITY;
+
+    attribute->RD_PORTS = 0;
+    attribute->WR_PORTS = 0;
+    attribute->DBITS = 0;
+    attribute->ABITS = 0;
+
+    return (attribute);
+}
+
+/**
+ * -------------------------------------------------------------------------------------------
+ * (function: copy_attribute)
+ *
+ * @brief copy an attribute to another one. If the second
+ * attribute is null, it will creates one
+ *
+ * @param to will copy to this attr
+ * @param copy the attr that will be copied
+ *-------------------------------------------------------------------------------------------*/
+void copy_attribute(attr_t *to, attr_t *copy)
+{
+    if (to == NULL)
+        to = init_attribute();
+
+    to->clk_edge_type = copy->clk_edge_type;
+    to->clr_polarity = copy->clr_polarity;
+    to->set_polarity = copy->set_polarity;
+    to->areset_polarity = copy->areset_polarity;
+    to->sreset_polarity = copy->sreset_polarity;
+    to->enable_polarity = copy->enable_polarity;
+
+    to->areset_value = copy->areset_value;
+    to->sreset_value = copy->sreset_value;
+
+    to->port_a_signed = copy->port_a_signed;
+    to->port_b_signed = copy->port_b_signed;
+
+    /* memory node attributes */
+    to->size = copy->size;
+    to->offset = copy->offset;
+    to->memory_id = vtr::strdup(copy->memory_id);
+
+    to->RD_CLK_ENABLE = copy->RD_CLK_ENABLE;
+    to->WR_CLK_ENABLE = copy->WR_CLK_ENABLE;
+    to->RD_CLK_POLARITY = copy->RD_CLK_POLARITY;
+    to->WR_CLK_POLARITY = copy->WR_CLK_POLARITY;
+
+    to->RD_PORTS = copy->RD_PORTS;
+    to->WR_PORTS = copy->WR_PORTS;
+    to->DBITS = copy->DBITS;
+    to->ABITS = copy->ABITS;
+}
+
+/**
+ * -------------------------------------------------------------------------------------------
+ * (function: copy_signedness)
+ *
+ * @brief copy the signedness variables of an attribute to another
+ * one. If the second attribute is null, it will creates one
+ *
+ * @param to will copy to this attr
+ * @param copy the attr that will be copied
+ *-------------------------------------------------------------------------------------------*/
+void copy_signedness(attr_t *to, attr_t *copy)
+{
+    if (to == NULL)
+        to = init_attribute();
+
+    to->port_a_signed = copy->port_a_signed;
+    to->port_b_signed = copy->port_b_signed;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: init_signal_list_structure)
+ * 	Initializes the list structure which describes inputs and outputs of elements
+ * 	as they coneect to other elements in the graph.
+ *-------------------------------------------------------------------------------------------*/
+signal_list_t *init_signal_list()
+{
+    signal_list_t *list;
+    list = (signal_list_t *)vtr::malloc(sizeof(signal_list_t));
+
+    list->count = 0;
+    list->pins = NULL;
+    list->is_memory = false;
+    list->is_adder = false;
+
+    return list;
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: is_constant_signal)
+ *
+ * @brief showing that a given signal list has a constant value or not
+ *
+ * @param signal list of pins
+ * @param netlist pointer to the current netlist file
+ *
+ * @return is it constant or not
+ *-------------------------------------------------------------------------------------------*/
+bool is_constant_signal(signal_list_t *signal, netlist_t *netlist)
+{
+    int i;
+    bool is_constant = true;
+
+    for (i = 0; i < signal->count; i++) {
+        nnet_t *net = signal->pins[i]->net;
+        /* neither connected to GND nor VCC */
+        if (strcmp(net->name, netlist->zero_net->name) && strcmp(net->name, netlist->one_net->name)) {
+            is_constant = false;
+            break;
+        }
+    }
+
+    return (is_constant);
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: constant_signal_value)
+ *
+ * @brief calculating the value of a constant signal list
+ *
+ * @param signal list of pins
+ * @param netlist pointer to the current netlist file
+ *
+ * @return the integer value of the constant signal
+ *-------------------------------------------------------------------------------------------*/
+long constant_signal_value(signal_list_t *signal, netlist_t *netlist)
+{
+    oassert(is_constant_signal(signal, netlist));
+
+    long return_value = 0;
+
+    int i;
+    for (i = 0; i < signal->count; i++) {
+        nnet_t *net = signal->pins[i]->net;
+        /* if the pin is connected to VCC */
+        if (!strcmp(net->name, netlist->one_net->name)) {
+            return_value += shift_left_value_with_overflow_check(0X1, i, unknown_location);
+        }
+    }
+
+    return (return_value);
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: create_constant_signal)
+ *
+ * @brief create the signal_list of the given constant value
+ *
+ * @param value a long value
+ * @param desired_width the size of return signal list
+ * @param netlist pointer to the netlist
+ *-------------------------------------------------------------------------------------------*/
+signal_list_t *create_constant_signal(const long long value, const int desired_width, netlist_t *netlist)
+{
+    signal_list_t *list = init_signal_list();
+
+    long i;
+    std::string binary_value_str = string_of_radix_to_bitstring(std::to_string(value), 10);
+    long width = binary_value_str.length();
+
+    while (desired_width > width) {
+        if (value < 0)
+            binary_value_str = "1" + binary_value_str;
+        else
+            binary_value_str = "0" + binary_value_str;
+
+        width = binary_value_str.length();
+    }
+
+    long start = width;
+    long end = width - desired_width;
+    bool extension = false;
+
+    /* create vcc/gnd signal pins */
+    for (i = start; i > end; i--) {
+        if (binary_value_str[i - 1] == '1') {
+            add_pin_to_signal_list(list, get_one_pin(netlist));
+        } else {
+            add_pin_to_signal_list(list, (extension) ? get_pad_pin(netlist) : get_zero_pin(netlist));
+        }
+    }
+
+    return (list);
+}
+
+/**
+ * (function: prune_signal)
+ *
+ * @brief to prune extra pins. usually this happens when a memory
+ * address (less than 32 bits) comes from an arithmetic operation
+ * that makes it 32 bits (a lot useless pins).
+ *
+ * @param signalsvar signal list of pins (may include one signal or two signals)
+ * @param signal_width the width of each signal in signalsvar
+ * @param prune_size the desired size of signal list
+ * @param num_of_signals showing the number of signals the signalsvar containing
+ *
+ * @return pruned signal list
+ */
+signal_list_t *prune_signal(signal_list_t *signalsvar, long signal_width, long prune_size, int num_of_signals)
+{
+    /* validation */
+    oassert(prune_size);
+    oassert(num_of_signals);
+    oassert(signalsvar->count % signal_width == 0);
+
+    /* no need to prune */
+    if (prune_size >= signal_width)
+        return (signalsvar);
+
+    int i, j;
+    /* new signal list */
+    signal_list_t *new_signals = NULL;
+    signal_list_t **splitted_signals = split_signal_list(signalsvar, signal_width);
+
+    /* iterating over signals to prune them */
+    for (i = 0; i < num_of_signals; i++) {
+        /* init pruned signal list */
+        signal_list_t *new_signal = init_signal_list();
+        for (j = 0; j < signal_width; j++) {
+            npin_t *pin = splitted_signals[i]->pins[j];
+            /* adding pin to new signal list */
+            if (j < prune_size) {
+                add_pin_to_signal_list(new_signal, pin);
+            }
+            /* pruning the extra pins */
+            else {
+                /* detach from the node, its net and free pin */
+                pin->node->input_pins[pin->pin_node_idx] = NULL;
+                pin->node = NULL;
+                warning_message(NETLIST, unknown_location, "Input pin (%s) exceeds the size of its connected port, will be left unconnected",
+                                pin->net->name);
+            }
+        }
+
+        free_signal_list(splitted_signals[i]);
+        splitted_signals[i] = new_signal;
+    }
+
+    /* combining pruned signals */
+    new_signals = combine_lists(splitted_signals, num_of_signals);
+
+    // CLEAN UP
+    vtr::free(splitted_signals);
+
+    return (new_signals);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: add_inpin_to_signal_list)
+ * 	Stores a pin in the signal list
+ *-------------------------------------------------------------------------------------------*/
+void add_pin_to_signal_list(signal_list_t *list, npin_t *pin)
+{
+    list->pins = (npin_t **)vtr::realloc(list->pins, sizeof(npin_t *) * (list->count + 1));
+    list->pins[list->count] = pin;
+    list->count++;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: combine_lists)
+ *-------------------------------------------------------------------------------------------*/
+signal_list_t *combine_lists(signal_list_t **signal_lists, int num_signal_lists)
+{
+    int i;
+    for (i = 1; i < num_signal_lists; i++) {
+        if (signal_lists[i]) {
+            int j;
+            for (j = 0; j < signal_lists[i]->count; j++) {
+                int k;
+                bool pin_already_added = false;
+                for (k = 0; k < signal_lists[0]->count; k++) {
+                    if (!strcmp(signal_lists[0]->pins[k]->name, signal_lists[i]->pins[j]->name))
+                        pin_already_added = true;
+                }
+
+                if (!pin_already_added)
+                    add_pin_to_signal_list(signal_lists[0], signal_lists[i]->pins[j]);
+            }
+
+            free_signal_list(signal_lists[i]);
+        }
+    }
+
+    return signal_lists[0];
+}
+
+/**
+ * (function: split_list)
+ *
+ * @brief split signals list to a list of signal list with requested width
+ *
+ * @param signalsvar signal list of pins (may include one signal or two signals)
+ * @param width the width of each signal in signalsvar
+ *
+ * @return splitted signal list
+ */
+signal_list_t **split_signal_list(signal_list_t *signalsvar, const int width)
+{
+    signal_list_t **splitted_signals = NULL;
+
+    /* check if split is needed */
+    if (signalsvar->count == width) {
+        splitted_signals = (signal_list_t **)vtr::calloc(1, sizeof(signal_list_t *));
+        splitted_signals[0] = signalsvar;
+        return (splitted_signals);
+    }
+
+    /* validate signals list size */
+    oassert(width != 0);
+    oassert(signalsvar->count % width == 0);
+
+    int i, j;
+    int offset = 0;
+    int num_chunk = signalsvar->count / width;
+
+    /* initialize splitted signals */
+    splitted_signals = (signal_list_t **)vtr::calloc(num_chunk, sizeof(signal_list_t *));
+    for (i = 0; i < num_chunk; i++) {
+        splitted_signals[i] = init_signal_list();
+        for (j = 0; j < width; j++) {
+            npin_t *pin = signalsvar->pins[j + offset];
+            /* add to splitted signals list */
+            add_pin_to_signal_list(splitted_signals[i], pin);
+        }
+        offset += width;
+    }
+
+    return (splitted_signals);
+}
+
+/**
+ * (function: sigcmp)
+ *
+ * @brief to check if sig is the same as be_checked
+ *
+ * @param sig first signals
+ * @param be_checked second signals
+ */
+bool sigcmp(signal_list_t *sig, signal_list_t *be_checked)
+{
+    /* validate signal sizes */
+    oassert(sig->count == be_checked->count);
+
+    int i;
+    for (i = 0; i < sig->count; i++) {
+        /* checking their net */
+        if (sig->pins[i]->net != be_checked->pins[i]->net) {
+            return (false);
+        }
+    }
+    return (true);
+}
+
+signal_list_t *copy_input_signals(signal_list_t *signalsvar)
+{
+    signal_list_t *duplicate_signals = init_signal_list();
+    int i;
+    for (i = 0; i < signalsvar->count; i++) {
+        npin_t *pin = signalsvar->pins[i];
+        pin = copy_input_npin(pin);
+        add_pin_to_signal_list(duplicate_signals, pin);
+    }
+    return duplicate_signals;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_output_pins_for_existing_node)
+ * 	Looks at a node and extracts the output pins into a signal list so they can be accessed
+ * 	in this form
+ *-------------------------------------------------------------------------------------------*/
+signal_list_t *make_output_pins_for_existing_node(nnode_t *node, int width)
+{
+    signal_list_t *return_list = init_signal_list();
+    int i;
+
+    oassert(node->num_output_pins == width);
+
+    for (i = 0; i < width; i++) {
+        npin_t *new_pin1;
+        npin_t *new_pin2;
+        nnet_t *new_net;
+        new_pin1 = allocate_npin();
+        new_pin2 = allocate_npin();
+        new_net = allocate_nnet();
+        new_net->name = node->name;
+        /* hook the output pin into the node */
+        add_output_pin_to_node(node, new_pin1, i);
+        /* hook up new pin 1 into the new net */
+        add_driver_pin_to_net(new_net, new_pin1);
+        /* hook up the new pin 2 to this new net */
+        add_fanout_pin_to_net(new_net, new_pin2);
+
+        /* add the new_pin2 to the list of pins */
+        add_pin_to_signal_list(return_list, new_pin2);
+    }
+
+    return return_list;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: clean_signal_list_structure)
+ *-------------------------------------------------------------------------------------------*/
+void free_signal_list(signal_list_t *list)
+{
+    if (list) {
+        vtr::free(list->pins);
+        list->count = 0;
+    }
+    vtr::free(list);
+    list = NULL;
+}
+
+/**
+ * -------------------------------------------------------------------------------------------
+ * (function: free_attribute)
+ *
+ * @brief clean the given attribute structure to avoid mem leaks
+ *
+ * @param attribute the given attribute structure
+ *-------------------------------------------------------------------------------------------*/
+void free_attribute(attr_t *attribute)
+{
+    if (attribute) {
+        vtr::free(attribute->memory_id);
+        attribute->memory_id = NULL;
+    }
+
+    vtr::free(attribute);
+    attribute = NULL;
+}
+
+void depth_traverse_count(nnode_t *node, int *count, uintptr_t traverse_mark_number);
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traverse)
+ *-------------------------------------------------------------------------------------------*/
+void depth_traverse_count(nnode_t *node, int *count, uintptr_t traverse_mark_number)
+{
+    int i, j;
+    nnode_t *next_node;
+    nnet_t *next_net;
+
+    if (node->traverse_visited == traverse_mark_number) {
+        return;
+    } else {
+        /* ELSE - this is a new node so depth visit it */
+        (*count)++;
+
+        node->traverse_visited = traverse_mark_number;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net == NULL)
+                continue;
+
+            next_net = node->output_pins[i]->net;
+            for (j = 0; j < next_net->num_fanout_pins; j++) {
+                if (next_net->fanout_pins[j] == NULL)
+                    continue;
+                next_node = next_net->fanout_pins[j]->node;
+                if (next_node == NULL)
+                    continue;
+
+                depth_traverse_count(next_node, count, traverse_mark_number);
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function:  allocate_netlist)
+ *-------------------------------------------------------------------------------------------*/
+netlist_t *allocate_netlist()
+{
+    netlist_t *new_netlist;
+
+    new_netlist = (netlist_t *)my_malloc_struct(sizeof(netlist_t));
+
+    new_netlist->gnd_node = NULL;
+    new_netlist->vcc_node = NULL;
+    new_netlist->pad_node = NULL;
+    new_netlist->zero_net = NULL;
+    new_netlist->one_net = NULL;
+    new_netlist->pad_net = NULL;
+    new_netlist->top_input_nodes = NULL;
+    new_netlist->num_top_input_nodes = 0;
+    new_netlist->top_output_nodes = NULL;
+    new_netlist->num_top_output_nodes = 0;
+    new_netlist->ff_nodes = NULL;
+    new_netlist->num_ff_nodes = 0;
+    new_netlist->internal_nodes = NULL;
+    new_netlist->num_internal_nodes = 0;
+    new_netlist->clocks = NULL;
+    new_netlist->num_clocks = 0;
+
+    new_netlist->forward_levels = NULL;
+    new_netlist->num_forward_levels = 0;
+    new_netlist->num_at_forward_level = NULL;
+    new_netlist->backward_levels = NULL;
+    new_netlist->num_backward_levels = 0;
+    new_netlist->num_at_backward_level = NULL;
+    new_netlist->sequential_level_nodes = NULL;
+    new_netlist->num_sequential_levels = 0;
+    new_netlist->num_at_sequential_level = NULL;
+    new_netlist->sequential_level_combinational_termination_node = NULL;
+    new_netlist->num_sequential_level_combinational_termination_nodes = 0;
+    new_netlist->num_at_sequential_level_combinational_termination_node = NULL;
+
+    /* initialize the string chaches */
+    new_netlist->nets_sc = sc_new_string_cache();
+    new_netlist->out_pins_sc = sc_new_string_cache();
+    new_netlist->nodes_sc = sc_new_string_cache();
+
+    return new_netlist;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function:  free_netlist)
+ *-------------------------------------------------------------------------------------------*/
+void free_netlist(netlist_t *to_free)
+{
+    if (!to_free)
+        return;
+
+    sc_free_string_cache(to_free->nets_sc);
+    sc_free_string_cache(to_free->out_pins_sc);
+    sc_free_string_cache(to_free->nodes_sc);
+}
+
+/*
+ * Gets the index of the first output pin with the given mapping
+ * on the given node.
+ */
+int get_output_pin_index_from_mapping(nnode_t *node, const char *name)
+{
+    int i;
+    for (i = 0; i < node->num_output_pins; i++) {
+        npin_t *pin = node->output_pins[i];
+        if (!strcmp(pin->mapping, name))
+            return i;
+    }
+
+    return -1;
+}
+
+/*
+ * Gets the index of the first output port containing a pin with the given
+ * mapping.
+ */
+int get_output_port_index_from_mapping(nnode_t *node, const char *name)
+{
+    int i;
+    int pin_number = 0;
+    for (i = 0; i < node->num_output_port_sizes; i++) {
+        int j;
+        for (j = 0; j < node->output_port_sizes[i]; j++, pin_number++) {
+            npin_t *pin = node->output_pins[pin_number];
+            if (!strcmp(pin->mapping, name))
+                return i;
+        }
+    }
+    return -1;
+}
+
+/*
+ * Gets the index of the first pin with the given mapping.
+ */
+int get_input_pin_index_from_mapping(nnode_t *node, const char *name)
+{
+    int i;
+    for (i = 0; i < node->num_input_pins; i++) {
+        npin_t *pin = node->input_pins[i];
+        if (!strcmp(pin->mapping, name))
+            return i;
+    }
+
+    return -1;
+}
+
+/*
+ * Gets the port index of the first port containing a pin with
+ * the given mapping.
+ */
+int get_input_port_index_from_mapping(nnode_t *node, const char *name)
+{
+    int i;
+    int pin_number = 0;
+    for (i = 0; i < node->num_input_port_sizes; i++) {
+        int j;
+        for (j = 0; j < node->input_port_sizes[i]; j++, pin_number++) {
+            npin_t *pin = node->input_pins[pin_number];
+            if (!strcmp(pin->mapping, name))
+                return i;
+        }
+    }
+    return -1;
+}
+
+/**
+ * (function: legalize_polarity)
+ *
+ * @brief legalize pin polarity to RE
+ *
+ * @param pin first pin
+ * @param pin_polarity first pin polarity
+ * @param node pointer to pins node for tracking purpose
+ *
+ * @return a new pin with RISING_EDGE_SENSITIVITY polarity
+ */
+npin_t *legalize_polarity(npin_t *pin, edge_type_e pin_polarity, nnode_t *node)
+{
+    /* validate pins */
+    oassert(pin && node);
+    oassert(pin->type == INPUT);
+
+    /* pin and its polarity */
+    npin_t *pin_out = pin;
+
+    /* detach pin from its node */
+    if (pin->node)
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+    if (pin_polarity == FALLING_EDGE_SENSITIVITY || pin_polarity == ACTIVE_LOW_SENSITIVITY) {
+        /* create a not gate */
+        nnode_t *not_node = make_1port_gate(LOGICAL_NOT, 1, 1, node, node->traverse_visited);
+        /* hook the pin into not node */
+        add_input_pin_to_node(not_node, pin, 0);
+        /* create output pins */
+        pin_out = allocate_npin();
+        npin_t *not_out = allocate_npin();
+        nnet_t *not_out_net = allocate_nnet();
+        not_out_net->name = make_full_ref_name(NULL, NULL, NULL, not_node->name, 0);
+        /* hook the output pin into the node */
+        add_output_pin_to_node(not_node, not_out, 0);
+        /* hook up new pin 1 into the new net */
+        add_driver_pin_to_net(not_out_net, not_out);
+        /* hook up the new pin 2 to this new net */
+        add_fanout_pin_to_net(not_out_net, pin_out);
+    }
+
+    /* set new pin polarity */
+    pin_out->sensitivity = RISING_EDGE_SENSITIVITY;
+
+    return (pin_out);
+}
+
+/**
+ * (function: reduce_input_ports)
+ *
+ * @brief reduce the input ports size by removing extra pad pins
+ *
+ * @param node pointer to node
+ * @param netlist pointer to the current netlist
+ *
+ * @return nothing, but set the node to a new node with reduced equalized
+ * input port sizes (if more than one input port exist)
+ */
+void reduce_input_ports(nnode_t *&node, netlist_t *netlist)
+{
+    oassert(node->num_input_port_sizes == 1 || node->num_input_port_sizes == 2);
+
+    int i, j;
+    int offset = 0;
+    nnode_t *new_node;
+
+    signal_list_t **input_ports = (signal_list_t **)vtr::calloc(node->num_input_port_sizes, sizeof(signal_list_t *));
+    /* add pins to signals lists */
+    for (i = 0; i < node->num_input_port_sizes; i++) {
+        /* initialize signal list */
+        input_ports[i] = init_signal_list();
+        for (j = 0; j < node->input_port_sizes[i]; j++) {
+            add_pin_to_signal_list(input_ports[i], node->input_pins[j + offset]);
+        }
+        offset += node->input_port_sizes[i];
+    }
+
+    /* reduce the first port */
+    input_ports[0] = reduce_signal_list(input_ports[0], node->attributes->port_a_signed, netlist);
+    /* reduce the second port if exist */
+    if (node->num_input_port_sizes == 2) {
+        input_ports[1] = reduce_signal_list(input_ports[1], node->attributes->port_b_signed, netlist);
+
+        /* equalize port sizes */
+        int max = std::max(input_ports[0]->count, input_ports[1]->count);
+
+        while (input_ports[0]->count < max)
+            add_pin_to_signal_list(input_ports[0], get_pad_pin(netlist));
+        while (input_ports[1]->count < max)
+            add_pin_to_signal_list(input_ports[1], get_pad_pin(netlist));
+    }
+
+    /* creating a new node */
+    new_node = (node->num_input_port_sizes == 1)
+                 ? make_1port_gate(node->type, input_ports[0]->count, node->num_output_pins, node, node->traverse_visited)
+                 : make_2port_gate(node->type, input_ports[0]->count, input_ports[1]->count, node->num_output_pins, node, node->traverse_visited);
+
+    /* copy attributes */
+    copy_signedness(new_node->attributes, node->attributes);
+
+    /* hook the input pins */
+    for (i = 0; i < input_ports[0]->count; i++) {
+        npin_t *pin = input_ports[0]->pins[i];
+        if (pin->node) {
+            /* remap pins to new node */
+            remap_pin_to_new_node(input_ports[0]->pins[i], new_node, i);
+        } else {
+            /* add pins to new node */
+            add_input_pin_to_node(new_node, input_ports[0]->pins[i], i);
+        }
+    }
+    offset = input_ports[0]->count;
+
+    if (node->num_input_port_sizes == 2) {
+        for (i = 0; i < input_ports[1]->count; i++) {
+            npin_t *pin = input_ports[1]->pins[i];
+            if (pin->node) {
+                /* remap pins to new node */
+                remap_pin_to_new_node(input_ports[1]->pins[i], new_node, i + offset);
+            } else {
+                /* add pins to new node */
+                add_input_pin_to_node(new_node, input_ports[1]->pins[i], i + offset);
+            }
+        }
+    }
+
+    /* hook the output pins */
+    for (i = 0; i < node->num_output_pins; i++) {
+        remap_pin_to_new_node(node->output_pins[i], new_node, i);
+    }
+
+    // CLEAN UP
+    for (i = 0; i < node->num_input_port_sizes; i++) {
+        free_signal_list(input_ports[i]);
+    }
+    vtr::free(input_ports);
+    free_nnode(node);
+
+    node = new_node;
+}
+
+/**
+ * (function: reduce_signal_list)
+ *
+ * @brief reduce the size of signal list by removing extra pad pins
+ *
+ * @param signalvar list of signals
+ * @param signedness the signedness of a port corresponding to the signal list
+ * @param netlist pointer to the current netlist
+ *
+ * @return a pruned signal list
+ */
+signal_list_t *reduce_signal_list(signal_list_t *signalvar, operation_list signedness, netlist_t *netlist)
+{
+    /* validate signedness */
+    oassert(signedness == operation_list::SIGNED || signedness == operation_list::UNSIGNED);
+
+    int i;
+    signal_list_t *return_value = init_signal_list();
+    /* specify the extension net */
+    nnet_t *extended_net = (signedness == operation_list::SIGNED) ? netlist->one_net : netlist->zero_net;
+
+    for (i = signalvar->count - 1; i > -1; i--) {
+        npin_t *pin = signalvar->pins[i];
+        if (pin->net == extended_net) {
+            delete_npin(pin);
+            signalvar->pins[i] = NULL;
+        } else {
+            /* reaching to valuable pins, so break the pruning process */
+            break;
+        }
+    }
+
+    /* adding valuable pins to new signals list */
+    for (i = 0; i < signalvar->count; i++) {
+        if (signalvar->pins[i]) {
+            add_pin_to_signal_list(return_value, signalvar->pins[i]);
+        }
+    }
+
+    // CLEAN UP
+    free_signal_list(signalvar);
+
+    return (return_value);
+}
+
+chain_information_t *allocate_chain_info()
+{
+    chain_information_t *new_node;
+
+    new_node = (chain_information_t *)my_malloc_struct(sizeof(chain_information_t));
+
+    new_node->name = NULL;
+    new_node->count = 0;
+
+    return new_node;
+}
+
+/**
+ * (function: equalize_port_sizes)
+ *
+ * @brief equalizing the input and output ports for the given node
+ *
+ * NOTE: at max TWO input ports is supported
+ *
+ * @param node pointing to a shift node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void equalize_ports_size(nnode_t *&node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+    oassert(node->num_input_port_sizes > 0 && node->num_input_port_sizes <= 2);
+
+    /**
+     * INPUTS
+     *  A: (width_a)
+     *  B: (width_b) [optional]
+     * OUTPUT
+     *  Y: width_y
+     */
+    /* removing extra pad pins based on the signedness of ports */
+    reduce_input_ports(node, netlist);
+
+    int port_a_size = node->input_port_sizes[0];
+    int port_b_size = -1;
+    if (node->num_input_port_sizes == 2) {
+        port_b_size = node->input_port_sizes[1];
+        /* validate inputport sizes */
+        oassert(port_a_size == port_b_size);
+    }
+
+    int port_y_size = node->output_port_sizes[0];
+
+    /* no change is needed */
+    if (port_a_size == port_y_size)
+        return;
+
+    /* new port size */
+    int new_out_size = port_a_size;
+
+    /* creating the new node */
+    nnode_t *new_node = (port_b_size == -1) ? make_1port_gate(node->type, port_a_size, new_out_size, node, traverse_mark_number)
+                                            : make_2port_gate(node->type, port_a_size, port_b_size, new_out_size, node, traverse_mark_number);
+
+    /* copy signedness attributes */
+    copy_signedness(new_node->attributes, node->attributes);
+
+    int i;
+    for (i = 0; i < node->num_input_pins; i++) {
+        /* remapping the a pins */
+        remap_pin_to_new_node(node->input_pins[i], new_node, i);
+    }
+
+    /* Connecting output pins */
+    for (i = 0; i < new_out_size; i++) {
+        if (i < port_y_size) {
+            remap_pin_to_new_node(node->output_pins[i], new_node, i);
+        } else {
+            /* need create a new output pin */
+            npin_t *new_pin1 = allocate_npin();
+            npin_t *new_pin2 = allocate_npin();
+            nnet_t *new_net = allocate_nnet();
+            new_net->name = make_full_ref_name(NULL, NULL, NULL, new_node->name, i);
+            /* hook the output pin into the node */
+            add_output_pin_to_node(new_node, new_pin1, i);
+            /* hook up new pin 1 into the new net */
+            add_driver_pin_to_net(new_net, new_pin1);
+            /* hook up the new pin 2 to this new net */
+            add_fanout_pin_to_net(new_net, new_pin2);
+        }
+    }
+
+    if (new_out_size < port_y_size) {
+        for (i = new_out_size; i < port_y_size; i++) {
+            /* need to drive extra output pins with PAD */
+            nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, traverse_mark_number);
+            /* hook a pin from PAD node into the buf node */
+            add_input_pin_to_node(buf_node, get_pad_pin(netlist), 0);
+            /* remap the extra output pin to buf node */
+            remap_pin_to_new_node(node->output_pins[i], buf_node, 0);
+        }
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    node = new_node;
+}
+
+void remove_fanout_pins_from_net(nnet_t *net, npin_t * /*pin*/, int id)
+{
+    int i;
+    for (i = id; i < net->num_fanout_pins - 1; i++) {
+        net->fanout_pins[i] = net->fanout_pins[i + 1];
+        if (net->fanout_pins[i] != NULL)
+            net->fanout_pins[i]->pin_net_idx = i;
+    }
+    net->fanout_pins[i] = NULL;
+    net->num_fanout_pins--;
+}
+
+void delete_npin(npin_t *pin)
+{
+    if (pin->type == INPUT) {
+        /* detach from its node */
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+        /* detach from its net */
+        pin->net->fanout_pins[pin->pin_net_idx] = NULL;
+    } else if (pin->type == OUTPUT) {
+        /* detach from its node */
+        pin->node->output_pins[pin->pin_node_idx] = NULL;
+        /* detach from its net */
+        pin->net->driver_pins[pin->pin_net_idx] = NULL;
+    }
+    // CLEAN UP
+    free_npin(pin);
+}
\ No newline at end of file
diff --git a/parmys-plugin/src/netlist_visualizer.cc b/parmys-plugin/src/netlist_visualizer.cc
new file mode 100644
index 000000000..c7e36d2b3
--- /dev/null
+++ b/parmys-plugin/src/netlist_visualizer.cc
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "netlist_utils.h"
+#include "netlist_visualizer.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+void depth_first_traverse_visualize(nnode_t *node, FILE *fp, uintptr_t traverse_mark_number);
+void depth_first_traversal_graph_display(FILE *out, uintptr_t marker_value, netlist_t *netllist);
+
+void forward_traversal_net_graph_display(FILE *out, uintptr_t marker_value, nnode_t *node);
+void backward_traversal_net_graph_display(FILE *out, uintptr_t marker_value, nnode_t *node);
+
+/*---------------------------------------------------------------------------------------------
+ * (function: graphVizOutputNetlist)
+ *-------------------------------------------------------------------------------------------*/
+void graphVizOutputNetlist(std::string path, const char *name, uintptr_t marker_value, netlist_t *netlist)
+{
+    char path_and_file[4096];
+    FILE *fp;
+
+    /* open the file */
+    odin_sprintf(path_and_file, "%s/%s.dot", path.c_str(), name);
+    fp = fopen(path_and_file, "w");
+
+    /* open graph */
+    fprintf(fp, "digraph G {\n\tranksep=.25;\n");
+
+    depth_first_traversal_graph_display(fp, marker_value, netlist);
+
+    /* close graph */
+    fprintf(fp, "}\n");
+    fclose(fp);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traversal_start()
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traversal_graph_display(FILE *out, uintptr_t marker_value, netlist_t *netlist)
+{
+    int i;
+
+    /* start with the primary input list */
+    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+        if (netlist->top_input_nodes[i] != NULL) {
+            depth_first_traverse_visualize(netlist->top_input_nodes[i], out, marker_value);
+        }
+    }
+    /* now traverse the ground and vcc pins */
+    if (netlist->gnd_node != NULL)
+        depth_first_traverse_visualize(netlist->gnd_node, out, marker_value);
+    if (netlist->vcc_node != NULL)
+        depth_first_traverse_visualize(netlist->vcc_node, out, marker_value);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traverse)
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traverse_visualize(nnode_t *node, FILE *fp, uintptr_t traverse_mark_number)
+{
+    int i, j;
+    nnode_t *next_node;
+    nnet_t *next_net;
+
+    if (node->traverse_visited == traverse_mark_number) {
+        return;
+    } else {
+        /* ELSE - this is a new node so depth visit it */
+        char *temp_string;
+        char *temp_string2;
+
+        node->traverse_visited = traverse_mark_number;
+
+        temp_string = vtr::strdup(make_simple_name(node->name, "^-+.", '_').c_str());
+        if ((node->type == FF_NODE) || (node->type == BUF_NODE)) {
+            fprintf(fp, "\t\"%s\" [shape=box];\n", temp_string);
+        } else if (node->type == INPUT_NODE) {
+            fprintf(fp, "\t\"%s\" [shape=triangle];\n", temp_string);
+        } else if (node->type == CLOCK_NODE) {
+            fprintf(fp, "\t\"%s\" [shape=triangle];\n", temp_string);
+        } else if (node->type == OUTPUT_NODE) {
+            fprintf(fp, "\t\"%s_O\" [shape=triangle];\n", temp_string);
+        } else {
+            fprintf(fp, "\t\"%s\"\n", temp_string);
+        }
+        vtr::free(temp_string);
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net == NULL)
+                continue;
+
+            next_net = node->output_pins[i]->net;
+            for (j = 0; j < next_net->num_fanout_pins; j++) {
+                npin_t *pin = next_net->fanout_pins[j];
+                if (pin) {
+                    next_node = pin->node;
+                    if (next_node == NULL)
+                        continue;
+                    // To see just combinational stuff...also comment above triangels and box
+                    //				if ((next_node->type == FF_NODE) || (next_node->type == INPUT_NODE) || (next_node->type == OUTPUT_NODE))
+                    //					continue;
+                    //				if ((node->type == FF_NODE) || (node->type == INPUT_NODE) || (node->type == OUTPUT_NODE))
+                    //					continue;
+
+                    temp_string = vtr::strdup(make_simple_name(node->name, "^-+.", '_').c_str());
+                    temp_string2 = vtr::strdup(make_simple_name(next_node->name, "^-+.", '_').c_str());
+                    /* renaming for output nodes */
+                    if (node->type == OUTPUT_NODE) {
+                        /* renaming for output nodes */
+                        char *temp_string_old = temp_string;
+                        temp_string = (char *)vtr::malloc(sizeof(char) * strlen(temp_string) + 1 + 2);
+                        odin_sprintf(temp_string, "%s_O", temp_string_old);
+                        free(temp_string_old);
+                    }
+                    if (next_node->type == OUTPUT_NODE) {
+                        /* renaming for output nodes */
+                        char *temp_string2_old = temp_string2;
+                        temp_string2 = (char *)vtr::malloc(sizeof(char) * strlen(temp_string2) + 1 + 2);
+                        odin_sprintf(temp_string2, "%s_O", temp_string2_old);
+                        free(temp_string2_old);
+                    }
+
+                    fprintf(fp, "\t\"%s\" -> \"%s\"", temp_string, temp_string2);
+                    if (next_net->fanout_pins[j]->name)
+                        fprintf(fp, "[label=\"%s\"]", next_net->fanout_pins[j]->name);
+                    fprintf(fp, ";\n");
+
+                    vtr::free(temp_string);
+                    vtr::free(temp_string2);
+
+                    depth_first_traverse_visualize(next_node, fp, traverse_mark_number);
+                }
+            }
+        }
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: graphVizOutputCobinationalNet)
+ *-------------------------------------------------------------------------------------------*/
+void graphVizOutputCombinationalNet(std::string path, const char *name, uintptr_t marker_value, nnode_t *current_node)
+{
+    char path_and_file[4096];
+    FILE *fp;
+
+    /* open the file */
+    odin_sprintf(path_and_file, "%s/%s.dot", path.c_str(), name);
+    fp = fopen(path_and_file, "w");
+
+    /* open graph */
+    fprintf(fp, "digraph G {\n\tranksep=.25;\n");
+
+    forward_traversal_net_graph_display(fp, marker_value, current_node);
+    backward_traversal_net_graph_display(fp, marker_value, current_node);
+
+    /* close graph */
+    fprintf(fp, "}\n");
+    fclose(fp);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: forward_traversal_net_graph_display()
+ *	TODO check if stack of node is freed
+ *-------------------------------------------------------------------------------------------*/
+void forward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode_t *node)
+{
+    int j, k;
+    nnode_t **stack_of_nodes;
+    int index_in_stack = 0;
+    int num_stack_of_nodes = 1;
+    char *temp_string;
+    char *temp_string2;
+
+    stack_of_nodes = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * 1);
+    stack_of_nodes[0] = node;
+
+    while (index_in_stack != num_stack_of_nodes) {
+        nnode_t *current_node = stack_of_nodes[index_in_stack];
+
+        /* mark it */
+        current_node->traverse_visited = marker_value;
+
+        /* printout the details of it */
+        temp_string = vtr::strdup(make_simple_name(current_node->name, "^-+.", '_').c_str());
+        if (index_in_stack == 0) {
+            fprintf(fp, "\t%s [shape=box,color=red];\n", temp_string);
+        } else if ((current_node->type == FF_NODE) || (current_node->type == BUF_NODE)) {
+            fprintf(fp, "\t%s [shape=box];\n", temp_string);
+        } else if (current_node->type == INPUT_NODE) {
+            fprintf(fp, "\t%s [shape=triangle];\n", temp_string);
+        } else if (current_node->type == CLOCK_NODE) {
+            fprintf(fp, "\t%s [shape=triangle];\n", temp_string);
+        } else if (current_node->type == OUTPUT_NODE) {
+            fprintf(fp, "\t%s_O [shape=triangle];\n", temp_string);
+        } else {
+            fprintf(fp, "\t%s [label=\"%d:%d\"];\n", temp_string, current_node->forward_level, current_node->backward_level);
+        }
+        vtr::free(temp_string);
+
+        /* at each node visit all the outputs */
+        for (j = 0; j < current_node->num_output_pins; j++) {
+            if (current_node->output_pins[j] == NULL)
+                continue;
+
+            for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
+                if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
+                    (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
+                    continue;
+
+                /* visit the fanout point */
+                nnode_t *next_node = current_node->output_pins[j]->net->fanout_pins[k]->node;
+
+                if (next_node == NULL)
+                    continue;
+
+                temp_string = vtr::strdup(make_simple_name(current_node->name, "^-+.", '_').c_str());
+                temp_string2 = vtr::strdup(make_simple_name(next_node->name, "^-+.", '_').c_str());
+                if (current_node->type == OUTPUT_NODE) {
+                    /* renaming for output nodes */
+                    temp_string = (char *)vtr::realloc(temp_string, sizeof(char) * strlen(temp_string) + 1 + 2);
+                    odin_sprintf(temp_string, "%s_O", temp_string);
+                }
+                if (next_node->type == OUTPUT_NODE) {
+                    /* renaming for output nodes */
+                    temp_string2 = (char *)vtr::realloc(temp_string2, sizeof(char) * strlen(temp_string2) + 1 + 2);
+                    odin_sprintf(temp_string2, "%s_O", temp_string2);
+                }
+
+                fprintf(fp, "\t%s -> %s [label=\"%s\"];\n", temp_string, temp_string2, current_node->output_pins[j]->net->fanout_pins[k]->name);
+
+                vtr::free(temp_string);
+                vtr::free(temp_string2);
+
+                if ((next_node->traverse_visited != marker_value) && (next_node->type != FF_NODE)) {
+                    /* IF - not visited yet then add to list */
+                    stack_of_nodes = (nnode_t **)vtr::realloc(stack_of_nodes, sizeof(nnode_t *) * (num_stack_of_nodes + 1));
+                    stack_of_nodes[num_stack_of_nodes] = next_node;
+                    num_stack_of_nodes++;
+                }
+            }
+        }
+
+        /* process next element in net */
+        index_in_stack++;
+    }
+
+    for (int i = 0; i < num_stack_of_nodes; i++) {
+        free_nnode(stack_of_nodes[i]);
+    }
+    vtr::free(stack_of_nodes);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: backward_traversal_net_graph_display()
+ *-------------------------------------------------------------------------------------------*/
+void backward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode_t *node)
+{
+    int j;
+    char *temp_string;
+    char *temp_string2;
+    nnode_t **stack_of_nodes;
+    int index_in_stack = 0;
+    int num_stack_of_nodes = 1;
+
+    stack_of_nodes = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * 1);
+    stack_of_nodes[0] = node;
+
+    while (index_in_stack != num_stack_of_nodes) {
+        nnode_t *current_node = stack_of_nodes[index_in_stack];
+
+        /* mark it */
+        current_node->traverse_visited = marker_value;
+
+        /* printout the details of it */
+        temp_string = vtr::strdup(make_simple_name(current_node->name, "^-+.", '_').c_str());
+        if (index_in_stack != 0) {
+            if ((current_node->type == FF_NODE) || (current_node->type == BUF_NODE)) {
+                fprintf(fp, "\t%s [shape=box];\n", temp_string);
+            } else if (current_node->type == INPUT_NODE) {
+                fprintf(fp, "\t%s [shape=triangle];\n", temp_string);
+            } else if (current_node->type == CLOCK_NODE) {
+                fprintf(fp, "\t%s [shape=triangle];\n", temp_string);
+            } else if (current_node->type == OUTPUT_NODE) {
+                fprintf(fp, "\t%s_O [shape=triangle];\n", temp_string);
+            } else {
+                fprintf(fp, "\t%s [label=\"%d:%d\"];\n", temp_string, current_node->forward_level, current_node->backward_level);
+            }
+        }
+        vtr::free(temp_string);
+
+        /* at each node visit all the outputs */
+        for (j = 0; j < current_node->num_input_pins; j++) {
+            if (current_node->input_pins[j] == NULL)
+                continue;
+
+            if ((current_node->input_pins[j] == NULL) || (current_node->input_pins[j]->net == NULL) ||
+                (current_node->input_pins[j]->net->num_driver_pins == 0))
+                continue;
+
+            /* visit the fanout point */
+
+            for (int k = 0; k < current_node->input_pins[j]->net->num_driver_pins; k++) {
+                nnode_t *next_node = current_node->input_pins[j]->net->driver_pins[k]->node;
+                if (next_node == NULL)
+                    continue;
+
+                temp_string = vtr::strdup(make_simple_name(current_node->name, "^-+.", '_').c_str());
+                temp_string2 = vtr::strdup(make_simple_name(next_node->name, "^-+.", '_').c_str());
+
+                fprintf(fp, "\t%s -> %s [label=\"%s\"];\n", temp_string2, temp_string, current_node->input_pins[j]->name);
+
+                vtr::free(temp_string);
+                vtr::free(temp_string2);
+
+                if ((next_node->traverse_visited != marker_value) && (next_node->type != FF_NODE)) {
+                    /* IF - not visited yet then add to list */
+                    stack_of_nodes = (nnode_t **)vtr::realloc(stack_of_nodes, sizeof(nnode_t *) * (num_stack_of_nodes + 1));
+                    stack_of_nodes[num_stack_of_nodes] = next_node;
+                    num_stack_of_nodes++;
+                }
+            }
+        }
+
+        /* process next element in net */
+        index_in_stack++;
+    }
+
+    for (int i = 0; i < num_stack_of_nodes; i++) {
+        free_nnode(stack_of_nodes[i]);
+    }
+    vtr::free(stack_of_nodes);
+}
diff --git a/parmys-plugin/src/node_creation_library.cc b/parmys-plugin/src/node_creation_library.cc
new file mode 100644
index 000000000..45a0b62df
--- /dev/null
+++ b/parmys-plugin/src/node_creation_library.cc
@@ -0,0 +1,412 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "node_creation_library.h"
+#include "netlist_utils.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+long unique_node_name_id = 0;
+
+/*-----------------------------------------------------------------------
+ * (function: get_a_pad_pin)
+ * 	this allows us to attach to the constant netlist driving hb_pad
+ *---------------------------------------------------------------------*/
+npin_t *get_pad_pin(netlist_t *netlist)
+{
+    npin_t *pad_fanout_pin = allocate_npin();
+    pad_fanout_pin->name = vtr::strdup(pad_string);
+    add_fanout_pin_to_net(netlist->pad_net, pad_fanout_pin);
+    return pad_fanout_pin;
+}
+
+/*-----------------------------------------------------------------------
+ * (function: get_a_zero_pin)
+ * 	this allows us to attach to the constant netlist driving zero
+ *---------------------------------------------------------------------*/
+npin_t *get_zero_pin(netlist_t *netlist)
+{
+    npin_t *zero_fanout_pin = allocate_npin();
+    zero_fanout_pin->name = vtr::strdup(zero_string);
+    add_fanout_pin_to_net(netlist->zero_net, zero_fanout_pin);
+    return zero_fanout_pin;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: get_a_one_pin)
+ * 	this allows us to attach to the constant netlist driving one
+ *-------------------------------------------------------------------------------------------*/
+npin_t *get_one_pin(netlist_t *netlist)
+{
+    npin_t *one_fanout_pin = allocate_npin();
+    one_fanout_pin->name = vtr::strdup(one_string);
+    add_fanout_pin_to_net(netlist->one_net, one_fanout_pin);
+    return one_fanout_pin;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_not_gate_with_input)
+ * 	Creates a not gate and attaches it to the inputs
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_not_gate_with_input(npin_t *input_pin, nnode_t *node, short mark)
+{
+    nnode_t *logic_node;
+
+    logic_node = make_not_gate(node, mark);
+
+    /* add the input ports as needed */
+    add_input_pin_to_node(logic_node, input_pin, 0);
+
+    return logic_node;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: make_not_gate)
+ * 	Just make a not gate
+ *-----------------------------------------------------------------------*/
+nnode_t *make_not_gate(nnode_t *node, short mark)
+{
+    nnode_t *logic_node;
+
+    logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = LOGICAL_NOT;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    allocate_more_input_pins(logic_node, 1);
+    allocate_more_output_pins(logic_node, 1);
+
+    return logic_node;
+}
+
+/**
+ * -------------------------------------------------------------------------
+ * (function: make_not_gate)
+ * @brief Making a not gate with the given pin as input
+ * and a new pin allocated as the output
+ *
+ * @param pin input pin
+ * @param node related netlist node
+ * @param mark netlist traversal number
+ *
+ * @return not node
+ *-----------------------------------------------------------------------*/
+nnode_t *make_inverter(npin_t *pin, nnode_t *node, short mark)
+{
+    /* validate the input pin */
+    oassert(pin->type == INPUT);
+
+    nnode_t *logic_node;
+
+    logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = LOGICAL_NOT;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    allocate_more_input_pins(logic_node, 1);
+    allocate_more_output_pins(logic_node, 1);
+
+    if (pin->node)
+        pin->node->input_pins[pin->pin_node_idx] = NULL;
+
+    /* hook the pin into the not node */
+    add_input_pin_to_node(logic_node, pin, 0);
+
+    /* connecting the not_node output pin */
+    npin_t *new_pin1 = allocate_npin();
+    npin_t *new_pin2 = allocate_npin();
+    nnet_t *new_net = allocate_nnet();
+    new_net->name = make_full_ref_name(NULL, NULL, NULL, logic_node->name, 0);
+    /* hook the output pin into the node */
+    add_output_pin_to_node(logic_node, new_pin1, 0);
+    /* hook up new pin 1 into the new net */
+    add_driver_pin_to_net(new_net, new_pin1);
+    /* hook up the new pin 2 to this new net */
+    add_fanout_pin_to_net(new_net, new_pin2);
+
+    return logic_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_1port_gate)
+ * 	Make a 1 port gate with variable sizes
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_1port_gate(operation_list type, int width_input, int width_output, nnode_t *node, short mark)
+{
+    nnode_t *logic_node;
+
+    logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = type;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    /* add the input ports as needed */
+    allocate_more_input_pins(logic_node, width_input);
+    add_input_port_information(logic_node, width_input);
+    /* add output */
+    allocate_more_output_pins(logic_node, width_output);
+    add_output_port_information(logic_node, width_output);
+
+    return logic_node;
+}
+/*---------------------------------------------------------------------------------------------
+ * (function: make_1port_logic_gate)
+ * 	Make a gate with variable sized inputs and 1 output
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_1port_logic_gate(operation_list type, int width, nnode_t *node, short mark)
+{
+    nnode_t *logic_node = make_1port_gate(type, width, 1, node, mark);
+
+    return logic_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_3port_logic_gates)
+ * 	Make a 3 port gate all variable port widths.
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_3port_gate(operation_list type, int width_port1, int width_port2, int width_port3, int width_output, nnode_t *node, short mark)
+{
+    nnode_t *logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = type;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    /* add the input ports as needed */
+    allocate_more_input_pins(logic_node, width_port1);
+    add_input_port_information(logic_node, width_port1);
+    allocate_more_input_pins(logic_node, width_port2);
+    add_input_port_information(logic_node, width_port2);
+    allocate_more_input_pins(logic_node, width_port3);
+    add_input_port_information(logic_node, width_port3);
+    /* add output */
+    allocate_more_output_pins(logic_node, width_output);
+    add_output_port_information(logic_node, width_output);
+
+    return logic_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_2port_logic_gates)
+ * 	Make a 2 port gate with variable sizes.  The first port will be input_pins index 0..width_port1.
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_2port_gate(operation_list type, int width_port1, int width_port2, int width_output, nnode_t *node, short mark)
+{
+    nnode_t *logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = type;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    /* add the input ports as needed */
+    allocate_more_input_pins(logic_node, width_port1);
+    add_input_port_information(logic_node, width_port1);
+    allocate_more_input_pins(logic_node, width_port2);
+    add_input_port_information(logic_node, width_port2);
+    /* add output */
+    allocate_more_output_pins(logic_node, width_output);
+    add_output_port_information(logic_node, width_output);
+
+    return logic_node;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_nport_logic_gates)
+ * 	Make a n port gate with variable sizes.  The first port will be input_pins index 0..width_port1.
+ *-------------------------------------------------------------------------------------------*/
+nnode_t *make_nport_gate(operation_list type, int port_sizes, int width, int width_output, nnode_t *node, short mark)
+{
+    int i;
+    nnode_t *logic_node = allocate_nnode(node->loc);
+    logic_node->traverse_visited = mark;
+    logic_node->type = type;
+    logic_node->name = node_name(logic_node, node->name);
+    logic_node->related_ast_node = node->related_ast_node;
+
+    /* add the input ports as needed */
+    for (i = 0; i < port_sizes; i++) {
+        allocate_more_input_pins(logic_node, width);
+        add_input_port_information(logic_node, width);
+    }
+    // allocate_more_input_pins(logic_node, width_port2);
+    // add_input_port_information(logic_node, width_port2);
+    /* add output */
+    allocate_more_output_pins(logic_node, width_output);
+    add_output_port_information(logic_node, width_output);
+
+    return logic_node;
+}
+
+const char *edge_type_blif_str(edge_type_e edge_type, loc_t loc)
+{
+    switch (edge_type) {
+    case FALLING_EDGE_SENSITIVITY:
+        return "fe";
+    case RISING_EDGE_SENSITIVITY:
+        return "re";
+    case ACTIVE_HIGH_SENSITIVITY:
+        return "ah";
+    case ACTIVE_LOW_SENSITIVITY:
+        return "al";
+    case ASYNCHRONOUS_SENSITIVITY:
+        return "as";
+    default:
+        error_message(NETLIST, loc, "undefined sensitivity kind for flip flop %s", edge_type_e_STR[edge_type]);
+
+        return NULL;
+    }
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: node_name)
+ * 	This creates the unique node name
+ *-------------------------------------------------------------------------------------------*/
+char *node_name(nnode_t *node, char *instance_name_prefix) { return op_node_name(node->type, instance_name_prefix); }
+
+/*---------------------------------------------------------------------------------------------
+ * (function: node_name)
+ * 	This creates the unique node name
+ *-------------------------------------------------------------------------------------------*/
+char *op_node_name(operation_list op, char *instance_prefix_name)
+{
+    char *return_node_name;
+
+    /* create the unique name for this node */
+    return_node_name = make_full_ref_name(instance_prefix_name, NULL, NULL, name_based_on_op(op), unique_node_name_id);
+
+    unique_node_name_id++;
+
+    return return_node_name;
+}
+
+/**
+ * (function: make_multiport_mux)
+ *
+ * @brief make a multiport mux with given selector and signals lists
+ *
+ * @param inputs list of input signal lists
+ * @param selector signal list of selector pins
+ * @param num_muxed_inputs num of inputs to be muxxed
+ * @param outs list of outputs signals
+ * @param node pointing to the src node
+ * @param netlist pointer to the netlist
+ *
+ * @return mux node
+ */
+nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, int num_muxed_inputs, signal_list_t *outs, nnode_t *node,
+                             netlist_t *netlist)
+{
+    /* validation */
+    int valid_num_mux_inputs = shift_left_value_with_overflow_check(0X1, selector->count, node->loc);
+    oassert(valid_num_mux_inputs >= num_muxed_inputs);
+
+    int i, j;
+    int offset = 0;
+
+    nnode_t *mux = allocate_nnode(node->loc);
+    mux->type = MULTIPORT_nBIT_SMUX;
+    mux->name = node_name(mux, node->name);
+    mux->traverse_visited = node->traverse_visited;
+
+    /* add selector signal */
+    add_input_port_information(mux, selector->count);
+    allocate_more_input_pins(mux, selector->count);
+    for (i = 0; i < selector->count; i++) {
+        npin_t *sel = selector->pins[i];
+        /* hook selector into mux node as first port */
+        if (sel->node)
+            remap_pin_to_new_node(sel, mux, i);
+        else
+            add_input_pin_to_node(mux, sel, i);
+    }
+    offset += selector->count;
+
+    int max_width = 0;
+    for (i = 0; i < num_muxed_inputs; i++) {
+        /* keep the size of max input to allocate equal output */
+        if (inputs[i]->count > max_width)
+            max_width = inputs[i]->count;
+    }
+
+    for (i = 0; i < num_muxed_inputs; i++) {
+        /* add input port data */
+        add_input_port_information(mux, max_width);
+        allocate_more_input_pins(mux, max_width);
+
+        for (j = 0; j < inputs[i]->count; j++) {
+            npin_t *pin = inputs[i]->pins[j];
+            /* hook inputs into mux node */
+            if (j < max_width) {
+                if (pin->node)
+                    remap_pin_to_new_node(pin, mux, j + offset);
+                else
+                    add_input_pin_to_node(mux, pin, j + offset);
+            } else {
+                /* pad with PAD node */
+                add_input_pin_to_node(mux, get_pad_pin(netlist), j + offset);
+            }
+        }
+        /* record offset to have correct idx for adding pins */
+        offset += max_width;
+    }
+
+    /* add output info */
+    add_output_port_information(mux, max_width);
+    allocate_more_output_pins(mux, max_width);
+
+    // specify output pin
+    if (outs != NULL) {
+        for (i = 0; i < outs->count; i++) {
+            npin_t *output_pin;
+            if (i < max_width) {
+                output_pin = outs->pins[i];
+                if (output_pin->node)
+                    remap_pin_to_new_node(output_pin, mux, i);
+                else
+                    add_output_pin_to_node(mux, output_pin, i);
+            } else {
+                output_pin = allocate_npin();
+                nnet_t *output_net = allocate_nnet();
+                /* add output driver to the output net */
+                add_driver_pin_to_net(output_net, output_pin);
+                /* add output pin to the mux node */
+                add_output_pin_to_node(mux, output_pin, i);
+            }
+        }
+    } else {
+        signal_list_t *outputs = make_output_pins_for_existing_node(mux, max_width);
+        // CLEAN UP
+        free_signal_list(outputs);
+    }
+
+    return (mux);
+}
diff --git a/parmys-plugin/src/odin_error.cc b/parmys-plugin/src/odin_error.cc
new file mode 100644
index 000000000..ff4365baa
--- /dev/null
+++ b/parmys-plugin/src/odin_error.cc
@@ -0,0 +1,142 @@
+#include "odin_error.h"
+#include "config_t.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+std::vector<std::pair<std::string, int>> include_file_names;
+
+#define NUMBER_OF_LINES_DIGIT 5
+int delayed_errors = 0;
+const loc_t unknown_location = {-1, -1, -1};
+
+const char *odin_error_STR[] = {
+  "", "UTIL", "PARSE_ARGS", "PARSE_TO_AST", "AST", "BLIF ELABORATION", "NETLIST", "PARSE_BLIF", "OUTPUT_BLIF", "SIMULATION",
+};
+
+void verify_delayed_error(odin_error error_type)
+{
+    if (delayed_errors) {
+        error_message(error_type, unknown_location, "Parser found (%d) errors in your syntax, exiting", delayed_errors);
+    }
+}
+
+static std::string make_marker_from_str(std::string str, int column)
+{
+    std::string to_return = "";
+
+    for (size_t i = 0; i < str.size(); i++) {
+        if (column >= 0 && i >= (size_t)column) {
+            break;
+        } else if (str[i] == ' ' || str[i] == '\t') {
+            to_return += str[i];
+        } else if (column <= 0) {
+            break;
+        } else {
+            to_return += ' ';
+        }
+    }
+
+    to_return += "^~~~";
+    return to_return;
+}
+
+static std::string get_culprit_line(int line_number, const char *file)
+{
+    std::string culprit_line = "";
+    FILE *input_file = fopen(file, "r");
+    if (input_file) {
+        bool copy_characters = false;
+        int current_line_number = 0;
+
+        for (;;) {
+            int c = fgetc(input_file);
+            if (EOF == c) {
+                break;
+            } else if ('\n' == c) {
+                ++current_line_number;
+                if (line_number == current_line_number) {
+                    copy_characters = true;
+                } else if (copy_characters) {
+                    break;
+                }
+            } else if (copy_characters) {
+                culprit_line.push_back(c);
+            }
+        }
+        fclose(input_file);
+    }
+    return culprit_line;
+}
+
+static void print_culprit_line_with_context(int column, int target_line, const char *file, int num_context_lines)
+{
+    for (int curr_line = std::max(target_line - num_context_lines, 0); curr_line <= target_line + num_context_lines; curr_line++) {
+        std::string culprit_line = get_culprit_line(curr_line, file);
+        int num_printed;
+        fprintf(stderr, " %*d | %n%s\n", NUMBER_OF_LINES_DIGIT, curr_line + 1, &num_printed, culprit_line.c_str());
+        if (curr_line == target_line) {
+            fprintf(stderr, "%s\n", make_marker_from_str(culprit_line, num_printed + column).c_str());
+        }
+    }
+}
+
+void _log_message(odin_error error_type, loc_t loc, bool fatal_error, const char *function_file_name, int function_line, const char *function_name,
+                  const char *message, ...)
+{
+    fflush(stdout);
+
+    va_list ap;
+
+    if (loc.file >= 0 && (size_t)loc.file < include_file_names.size()) {
+        char *path = realpath(include_file_names[loc.file].first.c_str(), NULL);
+        fprintf(stderr, "\n%s:%d:%d: ", path, loc.line + 1, loc.col);
+        free(path);
+    }
+
+    if (!fatal_error) {
+        fprintf(stderr, "warning");
+    } else {
+        fprintf(stderr, "error");
+    }
+
+    fprintf(stderr, "[%s]: ", odin_error_STR[error_type]);
+
+    if (message != NULL) {
+        fprintf(stderr, " ");
+        va_start(ap, message);
+        vfprintf(stderr, message, ap);
+        va_end(ap);
+
+        if (message[strlen(message) - 1] != '\n')
+            fprintf(stderr, "\n");
+    }
+
+    if (loc.file >= 0 && (size_t)loc.file < include_file_names.size() && loc.line >= 0) {
+        print_culprit_line_with_context(loc.col, loc.line, include_file_names[loc.file].first.c_str(), 2);
+    }
+
+    fflush(stderr);
+    if (fatal_error) {
+        _verbose_abort(NULL, function_file_name, function_line, function_name);
+    }
+}
+
+void _verbose_abort(const char *condition_str, const char *odin_file_name, int odin_line_number, const char *odin_function_name)
+{
+    fflush(stdout);
+    fprintf(stderr, "\n%s:%d: %s: ", odin_file_name, odin_line_number, odin_function_name);
+    if (condition_str) {
+        // We are an assertion, print the condition that failed and which line it occurred on
+        fprintf(stderr, "Assertion %s failed\n", condition_str);
+        // odin_line_number-1 since __LINE__ starts from 1
+        print_culprit_line_with_context(-1, odin_line_number - 1, odin_file_name, 2);
+    } else {
+        // We are a parsing error, dont print the culprit line
+        fprintf(stderr, "Fatal error\n");
+    }
+    fflush(stderr);
+    std::abort();
+}
diff --git a/parmys-plugin/src/odin_ii.cc b/parmys-plugin/src/odin_ii.cc
new file mode 100644
index 000000000..7a7679bb4
--- /dev/null
+++ b/parmys-plugin/src/odin_ii.cc
@@ -0,0 +1,76 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <sstream>
+
+#include "argparse.hpp"
+#include "odin_ii.h"
+
+#include "odin_globals.h"
+#include "odin_types.h"
+
+#include "HardSoftLogicMixer.hpp"
+#include "vtr_path.h"
+
+#define DEFAULT_OUTPUT "."
+
+loc_t my_location;
+
+t_arch Arch;
+global_args_t global_args;
+short physical_lut_size = -1;
+HardSoftLogicMixer *mixer;
+
+/* CONSTANT NET ELEMENTS */
+char *one_string;
+char *zero_string;
+char *pad_string;
+
+/*---------------------------------------------------------------------------
+ * (function: set_default_options)
+ *-------------------------------------------------------------------------*/
+void set_default_config()
+{
+    /* Set up the global configuration. */
+    configuration.coarsen = false;
+    configuration.tcl_file = "";
+    configuration.output_netlist_graphs = 0;
+    // TODO: unused
+    configuration.debug_output_path = std::string(DEFAULT_OUTPUT);
+    configuration.dsp_verilog = "arch_dsp.v";
+    configuration.arch_file = "";
+
+    configuration.fixed_hard_multiplier = 0;
+    configuration.split_hard_multiplier = 0;
+
+    configuration.split_memory_width = 0;
+    configuration.split_memory_depth = 0;
+
+    configuration.adder_cin_global = false;
+
+    /*
+     * Soft logic cutoffs. If a memory or a memory resulting from a split
+     * has a width AND depth below these, it will be converted to soft logic.
+     */
+    configuration.soft_logic_memory_width_threshold = 0;
+    configuration.soft_logic_memory_depth_threshold = 0;
+}
diff --git a/parmys-plugin/src/odin_util.cc b/parmys-plugin/src/odin_util.cc
new file mode 100644
index 000000000..dd29c3d27
--- /dev/null
+++ b/parmys-plugin/src/odin_util.cc
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <cstdarg>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <sstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+
+#include "odin_util.h"
+#include "vtr_memory.h"
+#include "vtr_path.h"
+#include "vtr_util.h"
+#include <regex>
+#include <stdbool.h>
+
+// for mkdir
+#ifdef WIN32
+#include <direct.h>
+#define getcwd _getcwd
+#else
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+long shift_left_value_with_overflow_check(long input_value, long shift_by, loc_t loc)
+{
+    if (shift_by < 0)
+        error_message(NETLIST, loc, "requesting a shift left that is negative [%ld]\n", shift_by);
+    else if (shift_by >= (long)ODIN_STD_BITWIDTH - 1)
+        warning_message(NETLIST, loc, "requesting a shift left that will overflow the maximum size of %ld [%ld]\n", shift_by, ODIN_STD_BITWIDTH - 1);
+
+    return input_value << shift_by;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: name_based_on_op)
+ * 	Get the string version of an operation
+ *-------------------------------------------------------------------------------------------*/
+const char *name_based_on_op(operation_list op)
+{
+    oassert(op < operation_list_END && "OUT OF BOUND operation_list!");
+
+    return operation_list_STR[op][ODIN_STRING_TYPE];
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: node_name_based_on_op)
+ * 	Get the string version of a node
+ *-------------------------------------------------------------------------------------------*/
+const char *node_name_based_on_op(nnode_t *node) { return name_based_on_op(node->type); }
+
+/*---------------------------------------------------------------------------------------------
+ * (function: make_full_ref_name)
+ * // {previous_string}.instance_name
+ * // {previous_string}.instance_name^signal_name
+ * // {previous_string}.instance_name^signal_name~bit
+ *-------------------------------------------------------------------------------------------*/
+char *make_full_ref_name(const char *previous, const char * /*module_name*/, const char *module_instance_name, const char *signal_name, long bit)
+{
+    std::stringstream return_string;
+    if (previous)
+        return_string << previous;
+
+    if (module_instance_name)
+        return_string << "." << module_instance_name;
+
+    if (signal_name && (previous || module_instance_name))
+        return_string << "^";
+
+    if (signal_name)
+        return_string << signal_name;
+
+    if (bit != -1) {
+        oassert(signal_name);
+        return_string << "~" << std::dec << bit;
+    }
+    return vtr::strdup(return_string.str().c_str());
+}
+
+/*---------------------------------------------------------------------------------------------
+ *  (function: make_simple_name )
+ *-------------------------------------------------------------------------------------------*/
+std::string make_simple_name(char *input, const char *flatten_string, char flatten_char)
+{
+    oassert(input);
+    oassert(flatten_string);
+
+    std::string input_str = input;
+    std::string flatten_str = flatten_string;
+
+    for (size_t i = 0; i < flatten_str.length(); i++)
+        std::replace(input_str.begin(), input_str.end(), flatten_str[i], flatten_char);
+
+    return input_str;
+}
+
+/*-----------------------------------------------------------------------
+ * (function: my_malloc_struct )
+ *-----------------------------------------------------------------*/
+void *my_malloc_struct(long bytes_to_alloc)
+{
+    void *allocated = vtr::calloc(1, bytes_to_alloc);
+    static long int m_id = 0;
+
+    // ways to stop the execution at the point when a specific structure is built...note it needs to be m_id - 1 ... it's unique_id in most data
+    // structures
+    // oassert(m_id != 193);
+
+    if (allocated == NULL) {
+        fprintf(stderr, "MEMORY FAILURE\n");
+        oassert(0);
+    }
+
+    /* mark the unique_id */
+    *((long int *)allocated) = m_id++;
+
+    return allocated;
+}
+
+/*
+ * Returns a new string consisting of the original string
+ * plus the appendage. Leaves the original string
+ * intact.
+ *
+ * Handles format strings as well.
+ */
+char *append_string(const char *string, const char *appendage, ...)
+{
+    char buffer[vtr::bufsize];
+
+    va_list ap;
+
+    va_start(ap, appendage);
+    vsnprintf(buffer, vtr::bufsize * sizeof(char), appendage, ap);
+    va_end(ap);
+
+    std::string new_string = std::string(string) + std::string(buffer);
+    return vtr::strdup(new_string.c_str());
+}
+
+void passed_verify_i_o_availabilty(nnode_t *node, int expected_input_size, int expected_output_size, const char *current_src, int line_src)
+{
+    if (!node)
+        error_message(UTIL, unknown_location, "node unavailable @%s::%d", current_src, line_src);
+
+    std::stringstream err_message;
+    int error = 0;
+
+    if (expected_input_size != -1 && node->num_input_pins != expected_input_size) {
+        err_message << " input size is " << std::to_string(node->num_input_pins) << " expected 3:\n";
+        for (int i = 0; i < node->num_input_pins; i++)
+            err_message << "\t" << node->input_pins[0]->name << "\n";
+
+        error = 1;
+    }
+
+    if (expected_output_size != -1 && node->num_output_pins != expected_output_size) {
+        err_message << " output size is " << std::to_string(node->num_output_pins) << " expected 1:\n";
+        for (int i = 0; i < node->num_output_pins; i++)
+            err_message << "\t" << node->output_pins[0]->name << "\n";
+
+        error = 1;
+    }
+
+    if (error)
+        error_message(UTIL, node->loc, "failed for %s:%s %s\n", node_name_based_on_op(node), node->name, err_message.str().c_str());
+}
+
+/*
+ * Gets the current time in seconds.
+ */
+double wall_time()
+{
+    auto time_point = std::chrono::system_clock::now();
+    std::chrono::duration<double> time_since_epoch = time_point.time_since_epoch();
+
+    return time_since_epoch.count();
+}
+
+/**
+ * This overrides default sprintf since odin uses sprintf to concatenate strings
+ * sprintf has undefined behavior for such and this prevents string overriding if
+ * it is also given as an input
+ */
+int odin_sprintf(char *s, const char *format, ...)
+{
+    va_list args, args_copy;
+    va_start(args, format);
+    va_copy(args_copy, args);
+
+    const auto sz = std::vsnprintf(nullptr, 0, format, args) + 1;
+
+    try {
+        std::string temp(sz, ' ');
+        std::vsnprintf(&temp.front(), sz, format, args_copy);
+        va_end(args_copy);
+        va_end(args);
+
+        s = strncpy(s, temp.c_str(), temp.length());
+
+        return temp.length();
+
+    } catch (const std::bad_alloc &) {
+        va_end(args_copy);
+        va_end(args);
+        return -1;
+    }
+}
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/src/partial_map.cc
new file mode 100644
index 000000000..15bf7bd40
--- /dev/null
+++ b/parmys-plugin/src/partial_map.cc
@@ -0,0 +1,1232 @@
+/**
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * @file: this file includes the instantiation process of Odin-II
+ * supported cells. Technically, the Odin-II partial mapper transforms
+ * netlist to a target device technology dependent cells. The partial
+ * decides the hard/soft logic inference of logic blocks according to
+ * the target architecture and specified threshold in command arguments.
+ */
+#include "odin_globals.h"
+#include "odin_types.h"
+#include <stdio.h>
+#include <string.h>
+
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "odin_util.h"
+
+#include "adders.h"
+#include "hard_blocks.h"
+
+#include "memories.h"
+
+#include "partial_map.h"
+#include "subtractions.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+void depth_first_traverse_partial_map(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
+
+void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist);
+
+void instantiate_not_logic(nnode_t *node, short mark, netlist_t *netlist);
+bool eliminate_buffer(nnode_t *node, short, netlist_t *);
+void instantiate_bitwise_logic(nnode_t *node, operation_list op, short mark, netlist_t *netlist);
+void instantiate_bitwise_reduction(nnode_t *node, operation_list op, short mark);
+void instantiate_logical_logic(nnode_t *node, operation_list op, short mark);
+void instantiate_EQUAL(nnode_t *node, operation_list type, short mark, netlist_t *netlist);
+void instantiate_GE(nnode_t *node, operation_list type, short mark, netlist_t *netlist);
+void instantiate_GT(nnode_t *node, operation_list type, short mark, netlist_t *netlist);
+void instantiate_shift(nnode_t *node, short mark, netlist_t *netlist);
+void instantiate_unary_sub(nnode_t *node, short mark, netlist_t *netlist);
+void instantiate_sub_w_carry(nnode_t *node, short mark, netlist_t *netlist);
+void instantiate_sub_w_borrow(nnode_t *node, short mark, netlist_t *netlist);
+
+void instantiate_soft_logic_ram(nnode_t *node, short mark, netlist_t *netlist);
+
+static void instantiate_constant_shift(nnode_t *node, operation_list type, short mark, netlist_t *netlist);
+static void instantiate_variable_shift(nnode_t *node, operation_list type, short mark, netlist_t *netlist);
+
+/*-------------------------------------------------------------------------
+ * (function: partial_map_top)
+ *-----------------------------------------------------------------------*/
+void partial_map_top(netlist_t *netlist) { depth_first_traversal_to_partial_map(PARTIAL_MAP_TRAVERSE_VALUE, netlist); }
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traversal_to_parital_map()
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist)
+{
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
+        if (netlist->top_input_nodes[i] != NULL) {
+            depth_first_traverse_partial_map(netlist->top_input_nodes[i], marker_value, netlist);
+        }
+    }
+
+    depth_first_traverse_partial_map(netlist->gnd_node, marker_value, netlist);
+    depth_first_traverse_partial_map(netlist->vcc_node, marker_value, netlist);
+    depth_first_traverse_partial_map(netlist->pad_node, marker_value, netlist);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: depth_first_traverse)
+ *-------------------------------------------------------------------------------------------*/
+void depth_first_traverse_partial_map(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
+{
+    int i, j;
+
+    if (node->traverse_visited != traverse_mark_number) {
+
+        node->traverse_visited = traverse_mark_number;
+
+        for (i = 0; i < node->num_output_pins; i++) {
+            if (node->output_pins[i]->net) {
+                nnet_t *next_net = node->output_pins[i]->net;
+                if (next_net->fanout_pins) {
+                    for (j = 0; j < next_net->num_fanout_pins; j++) {
+                        if (next_net->fanout_pins[j]) {
+                            if (next_net->fanout_pins[j]->node) {
+                                depth_first_traverse_partial_map(next_net->fanout_pins[j]->node, traverse_mark_number, netlist);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        partial_map_node(node, traverse_mark_number, netlist);
+    }
+}
+
+/*----------------------------------------------------------------------
+ * (function: partial_map_node)
+ *--------------------------------------------------------------------*/
+void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist)
+{
+    switch (node->type) {
+    case BITWISE_NOT:
+        instantiate_not_logic(node, traverse_number, netlist);
+        break;
+    case BUF_NODE:
+        eliminate_buffer(node, traverse_number, netlist);
+        break;
+    case BITWISE_AND:
+    case BITWISE_OR:
+    case BITWISE_NAND:
+    case BITWISE_NOR:
+    case BITWISE_XNOR:
+    case BITWISE_XOR:
+        if (node->num_input_port_sizes >= 2) {
+            instantiate_bitwise_logic(node, node->type, traverse_number, netlist);
+        } else if (node->num_input_port_sizes == 1) {
+            instantiate_bitwise_reduction(node, node->type, traverse_number);
+        } else
+            oassert(false);
+        break;
+
+    case LOGICAL_OR:
+    case LOGICAL_AND:
+    case LOGICAL_NOR:
+    case LOGICAL_NAND:
+    case LOGICAL_XOR:
+    case LOGICAL_XNOR:
+        if (node->num_input_port_sizes == 2) {
+            instantiate_logical_logic(node, node->type, traverse_number);
+        }
+        break;
+
+    case LOGICAL_NOT:
+        /* don't need to do anything since this is a reduction */
+        break;
+
+    case ADD:
+        if (hard_adders && node->bit_width >= min_threshold_adder) {
+            // Check if the size of this adder is greater than the hard vs soft logic threshold
+            instantiate_hard_adder(node, traverse_number, netlist);
+        } else {
+            instantiate_add_w_carry(node, traverse_number, netlist);
+        }
+        break;
+    case MINUS:
+        if (hard_adders) {
+            if (node->num_input_port_sizes == 3) {
+                int max_num = (node->input_port_sizes[0] >= node->input_port_sizes[1]) ? node->input_port_sizes[0] : node->input_port_sizes[1];
+                if (max_num >= min_add)
+                    instantiate_hard_adder_subtraction(node, traverse_number, netlist);
+                else
+                    instantiate_add_w_carry(node, traverse_number, netlist);
+            } else if (node->num_input_port_sizes == 2) {
+                instantiate_sub_w_carry(node, traverse_number, netlist);
+            } else if (node->num_input_port_sizes == 1) {
+                instantiate_unary_sub(node, traverse_number, netlist);
+            } else
+                oassert(false);
+        } else {
+            if (node->num_input_port_sizes == 3) {
+                instantiate_sub_w_borrow(node, traverse_number, netlist);
+            } else if (node->num_input_port_sizes == 2) {
+                instantiate_sub_w_carry(node, traverse_number, netlist);
+            } else if (node->num_input_port_sizes == 1) {
+                instantiate_unary_sub(node, traverse_number, netlist);
+            } else
+                oassert(false);
+        }
+
+        break;
+    case LOGICAL_EQUAL:
+    case NOT_EQUAL:
+        instantiate_EQUAL(node, node->type, traverse_number, netlist);
+        break;
+    case GTE:
+    case LTE:
+        instantiate_GE(node, node->type, traverse_number, netlist);
+        break;
+    case GT:
+    case LT:
+        instantiate_GT(node, node->type, traverse_number, netlist);
+        break;
+    case SL:
+    case ASL:
+    case SR:
+    case ASR:
+        instantiate_shift(node, traverse_number, netlist);
+        break;
+    case MULTI_PORT_MUX:
+        instantiate_multi_port_mux(node, traverse_number, netlist);
+        break;
+    // case MULTIPORT_nBIT_SMUX:
+    //     instantiate_multi_port_n_bits_mux(node, traverse_number, netlist);
+    //     break;
+    case MULTIPLY: {
+        mixer->partial_map_node(node, traverse_number, netlist);
+        break;
+    }
+    case MEMORY: {
+        ast_node_t *ast_node = node->related_ast_node;
+        char *identifier = ast_node->identifier_node->types.identifier;
+        if (find_hard_block(identifier)) {
+            long depth = is_sp_ram(node) ? get_sp_ram_depth(node) : get_dp_ram_depth(node);
+            long width = is_sp_ram(node) ? get_sp_ram_width(node) : get_dp_ram_width(node);
+
+            // If the memory satisfies the threshold for the use of a hard logic block, use one.
+            if (depth > configuration.soft_logic_memory_depth_threshold || width > configuration.soft_logic_memory_width_threshold) {
+                instantiate_hard_block(node, traverse_number, netlist);
+            } else {
+                printf("\tInferring soft logic ram: %zux%zu\n", width, depth);
+                instantiate_soft_logic_ram(node, traverse_number, netlist);
+            }
+        } else {
+            instantiate_soft_logic_ram(node, traverse_number, netlist);
+        }
+        break;
+    }
+    case HARD_IP:
+        instantiate_hard_block(node, traverse_number, netlist);
+
+        break;
+    case ADDER_FUNC:
+    case CARRY_FUNC:
+    case MUX_2:
+    case SMUX_2:
+    case FF_NODE:
+    case INPUT_NODE:
+    case CLOCK_NODE:
+    case OUTPUT_NODE:
+    case GND_NODE:
+    case VCC_NODE:
+    case PAD_NODE:
+        /* some nodes already in the form that is mapable */
+        break;
+    case SKIP: // should skip
+        instantiate_hard_block(node, traverse_number, netlist);
+        break;
+    case CASE_EQUAL:
+    case CASE_NOT_EQUAL:
+    case DIVIDE:
+    case MODULO:
+    case MULTIPORT_nBIT_SMUX:
+    default:
+        error_message(NETLIST, node->loc, "%s", "Partial map: node should have been converted to softer version.");
+        break;
+    }
+}
+
+void instantiate_soft_logic_ram(nnode_t *node, short mark, netlist_t *netlist)
+{
+    if (is_sp_ram(node))
+        instantiate_soft_single_port_ram(node, mark, netlist);
+    else if (is_dp_ram(node))
+        instantiate_soft_dual_port_ram(node, mark, netlist);
+    else
+        oassert(false);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_multi_port_mux )
+ * 	Makes the multiport into a series of 2-Mux-decoded
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    int i, j;
+    int width_of_one_hot_logic;
+    int num_ports;
+    int port_offset;
+    nnode_t **muxes;
+
+    /* setup the calculations for padding and indexing */
+    width_of_one_hot_logic = node->input_port_sizes[0];
+    num_ports = node->num_input_port_sizes;
+    port_offset = node->input_port_sizes[1];
+
+    muxes = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (num_ports - 1));
+    for (i = 0; i < num_ports - 1; i++) {
+        muxes[i] = make_2port_gate(MUX_2, width_of_one_hot_logic, width_of_one_hot_logic, 1, node, mark);
+    }
+
+    for (j = 0; j < num_ports - 1; j++) {
+        for (i = 0; i < width_of_one_hot_logic; i++) {
+            /* map the inputs to the muxt */
+            remap_pin_to_new_node(node->input_pins[i + (j + 1) * port_offset], muxes[j], width_of_one_hot_logic + i);
+
+            /* map the one hot logic control */
+            if (j == 0)
+                remap_pin_to_new_node(node->input_pins[i], muxes[j], i);
+            else
+                add_input_pin_to_node(muxes[j], copy_input_npin(muxes[0]->input_pins[i]), i);
+        }
+
+        /* now hookup outputs */
+        remap_pin_to_new_node(node->output_pins[j], muxes[j], 0);
+    }
+    vtr::free(muxes);
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_not_logic )
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_not_logic(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    int width = node->num_input_pins;
+    nnode_t **new_not_cells;
+    int i;
+
+    new_not_cells = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * width);
+
+    for (i = 0; i < width; i++) {
+        new_not_cells[i] = make_not_gate(node, mark);
+    }
+
+    /* connect inputs and outputs */
+    for (i = 0; i < width; i++) {
+        /* Joining the inputs to the new soft NOT GATES */
+        remap_pin_to_new_node(node->input_pins[i], new_not_cells[i], 0);
+        remap_pin_to_new_node(node->output_pins[i], new_not_cells[i], 0);
+    }
+
+    vtr::free(new_not_cells);
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: eliminate_buffer )
+ * 	Buffers just pass through signals
+ * 	Returns true if the buffer could be eliminated
+ *-------------------------------------------------------------------------------------------*/
+bool eliminate_buffer(nnode_t *node, short, netlist_t *)
+{
+    bool buffer_is_removed = true;
+    /* for now we just pass the signals directly through */
+    for (int i = 0; i < node->num_input_pins; i++) {
+        int idx_2_buffer = node->input_pins[i]->pin_net_idx;
+
+        // Dont eliminate the buffer if there are multiple drivers or the AST included it
+        if (node->output_pins[i]->net->num_driver_pins <= 1) {
+            /* join all fanouts of the output net with the input pins net */
+            join_nets(node->input_pins[i]->net, node->output_pins[i]->net);
+
+            /* erase the pointer to this buffer */
+            node->input_pins[i]->net->fanout_pins[idx_2_buffer] = NULL;
+            delete_npin(node->input_pins[i]);
+        } else {
+            buffer_is_removed = false;
+        }
+    }
+
+    // CLEAN UP
+    if (buffer_is_removed) {
+        /* detach output pins from the node */
+        for (int i = 0; i < node->num_output_pins; i++)
+            node->output_pins[i]->node = NULL;
+        free_nnode(node);
+    }
+
+    return buffer_is_removed;
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_logical_logic )
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_logical_logic(nnode_t *node, operation_list op, short mark)
+{
+    int i;
+    int port_B_offset;
+    int width_a;
+    int width_b;
+    nnode_t *new_logic_cell;
+    nnode_t *reduction1;
+    nnode_t *reduction2;
+
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->num_output_pins == 1);
+    /* setup the calculations for padding and indexing */
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    port_B_offset = width_a;
+
+    /* instantiate the cells */
+    new_logic_cell = make_1port_logic_gate(op, 2, node, mark);
+    reduction1 = make_1port_logic_gate(BITWISE_OR, width_a, node, mark);
+    reduction2 = make_1port_logic_gate(BITWISE_OR, width_b, node, mark);
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < width_a; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        remap_pin_to_new_node(node->input_pins[i], reduction1, i);
+    }
+    for (i = 0; i < width_b; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        remap_pin_to_new_node(node->input_pins[i + port_B_offset], reduction2, i);
+    }
+
+    connect_nodes(reduction1, 0, new_logic_cell, 0);
+    connect_nodes(reduction2, 0, new_logic_cell, 1);
+
+    instantiate_bitwise_reduction(reduction1, BITWISE_OR, mark);
+    instantiate_bitwise_reduction(reduction2, BITWISE_OR, mark);
+
+    remap_pin_to_new_node(node->output_pins[0], new_logic_cell, 0);
+    free_nnode(node);
+}
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_bitwise_reduction )
+ * 	Makes 2 input gates to break into bitwise
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_bitwise_reduction(nnode_t *node, operation_list op, short mark)
+{
+    int i;
+    int width_a;
+    nnode_t *new_logic_cell;
+    operation_list cell_op;
+
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 1);
+    oassert(node->output_port_sizes[0] == 1);
+    /* setup the calculations for padding and indexing */
+    width_a = node->input_port_sizes[0];
+
+    switch (op) {
+    case BITWISE_AND:
+    case LOGICAL_AND:
+        cell_op = LOGICAL_AND;
+        break;
+    case BITWISE_OR:
+    case LOGICAL_OR:
+        cell_op = LOGICAL_OR;
+        break;
+    case BITWISE_NAND:
+    case LOGICAL_NAND:
+        cell_op = LOGICAL_NAND;
+        break;
+    case BITWISE_NOR:
+    case LOGICAL_NOR:
+        cell_op = LOGICAL_NOR;
+        break;
+    case BITWISE_XNOR:
+    case LOGICAL_XNOR:
+        cell_op = LOGICAL_XNOR;
+        break;
+    case BITWISE_XOR:
+    case LOGICAL_XOR:
+        cell_op = LOGICAL_XOR;
+        break;
+    default:
+        cell_op = NO_OP;
+        oassert(false);
+        break;
+    }
+    /* instantiate the cells */
+    new_logic_cell = make_1port_logic_gate(cell_op, width_a, node, mark);
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < width_a; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        remap_pin_to_new_node(node->input_pins[i], new_logic_cell, i);
+    }
+
+    remap_pin_to_new_node(node->output_pins[0], new_logic_cell, 0);
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_bitwise_logic )
+ * 	Makes 2 input gates to break into bitwise
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_bitwise_logic(nnode_t *node, operation_list op, short mark, netlist_t *netlist)
+{
+    int i, j;
+
+    operation_list cell_op;
+    if (!node)
+        return;
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes >= 2);
+
+    switch (op) {
+    case BITWISE_AND:
+        cell_op = LOGICAL_AND;
+        break;
+    case BITWISE_OR:
+        cell_op = LOGICAL_OR;
+        break;
+    case BITWISE_NAND:
+        cell_op = LOGICAL_NAND;
+        break;
+    case BITWISE_NOR:
+        cell_op = LOGICAL_NOR;
+        break;
+    case BITWISE_XNOR:
+        cell_op = LOGICAL_XNOR;
+        break;
+    case BITWISE_XOR:
+        cell_op = LOGICAL_XOR;
+        break;
+    default:
+        cell_op = NO_OP;
+        oassert(false);
+        break;
+    }
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < node->output_port_sizes[0]; i++) {
+        nnode_t *new_logic_cells = make_nport_gate(cell_op, node->num_input_port_sizes, 1, 1, node, mark);
+        int current_port_offset = 0;
+        /* Joining the inputs to the input 1 of that gate */
+        for (j = 0; j < node->num_input_port_sizes; j++) {
+            /* IF - this current input will also have a corresponding other input ports then join it to the gate */
+            if (i < node->input_port_sizes[j])
+                remap_pin_to_new_node(node->input_pins[i + current_port_offset], new_logic_cells, j);
+
+            /* ELSE - the input does not exist, so this answer goes right through */
+            else
+                add_input_pin_to_node(new_logic_cells, get_zero_pin(netlist), j);
+
+            current_port_offset += node->input_port_sizes[j];
+        }
+
+        remap_pin_to_new_node(node->output_pins[i], new_logic_cells, 0);
+    }
+
+    free_nnode(node);
+}
+
+/*--------------------------------------------------------------------------
+ * (function: instantiate_add_w_carry )
+ * 	This is for soft addition in output formats that don't handle
+ *	multi-output logic functions (BLIF).  We use one function for the
+ *	add, and one for the carry.
+ *------------------------------------------------------------------------*/
+void instantiate_add_w_carry(nnode_t *node, short mark, netlist_t *netlist)
+{
+    // define locations in array when fetching pins
+    const int out = 0, input_a = 1, input_b = 2, pinout_count = 3;
+
+    oassert(node->num_input_pins > 0);
+
+    int *width = (int *)vtr::malloc(pinout_count * sizeof(int));
+
+    if (node->num_input_port_sizes == 2)
+        width[out] = node->output_port_sizes[0];
+    else
+        width[out] = node->num_output_pins;
+
+    width[input_a] = node->input_port_sizes[0];
+    width[input_b] = node->input_port_sizes[1];
+
+    instantiate_add_w_carry_block(width, node, mark, netlist, 0);
+
+    vtr::free(width);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_sub_w_carry )
+ * 	This subtraction is intended for sof subtraction with output formats that can't handle
+ * 	multi output logic functions.  We split the add and the carry over two logic functions.
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_sub_w_carry(nnode_t *node, short mark, netlist_t *netlist)
+{
+    // define locations in array when fetching pins
+    const int out = 0, input_a = 1, input_b = 2, pinout_count = 3;
+
+    oassert(node->num_input_pins > 0);
+
+    int *width = (int *)vtr::malloc(pinout_count * sizeof(int));
+    width[out] = node->output_port_sizes[0];
+
+    if (node->num_input_port_sizes == 1) {
+        width[input_a] = 0;
+        width[input_b] = node->input_port_sizes[0];
+    } else if (node->num_input_port_sizes == 2) {
+        width[input_a] = node->input_port_sizes[0];
+        width[input_b] = node->input_port_sizes[1];
+    }
+
+    instantiate_add_w_carry_block(width, node, mark, netlist, 1);
+
+    vtr::free(width);
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: instantiate_sub_w_borrow )
+ *
+ * @brief instantiating a single bit subtraction circuit with borrow_in and borrow_out
+ *
+ * @param node pointing to a logical not node
+ * @param mark unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_sub_w_borrow(nnode_t *node, short mark, netlist_t *netlist)
+{
+    /* to validate the input otuptu ports */
+    oassert(node->num_input_port_sizes > 1);
+    oassert(node->num_output_port_sizes > 0);
+    /* to validate the size of input output pins */
+    oassert(node->input_port_sizes[0] == 1);
+    oassert(node->input_port_sizes[1] == 1);
+    oassert(node->output_port_sizes[0] == 1);
+
+    /* to implement the sub 3 logic */
+    instantiate_sub_w_borrow_block(node, mark, netlist);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function:  instantiate_unary_sub )
+ *	Does 2's complement which is the equivalent of a unary subtraction as a HW implementation.
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_unary_sub(nnode_t *node, short mark, netlist_t *netlist) { instantiate_sub_w_carry(node, mark, netlist); }
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_EQUAL )
+ *	Builds the hardware for an equal comparison by building EQ for parallel lines and then
+ *	taking them all through an AND tree.
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_EQUAL(nnode_t *node, operation_list type, short mark, netlist_t *netlist)
+{
+    int width_a;
+    int width_b;
+    int width_max;
+    int i;
+    int port_B_offset;
+    nnode_t *compare;
+    nnode_t *combine;
+
+    oassert(node->num_output_pins == 1);
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width_max = width_a > width_b ? width_a : width_b;
+    port_B_offset = width_a;
+
+    /* build an xnor bitwise XNOR */
+    if (type == LOGICAL_EQUAL) {
+        compare = make_2port_gate(LOGICAL_XNOR, width_a, width_b, width_max, node, mark);
+        combine = make_1port_logic_gate(LOGICAL_AND, width_max, node, mark);
+    } else {
+        compare = make_2port_gate(LOGICAL_XOR, width_a, width_b, width_max, node, mark);
+        combine = make_1port_logic_gate(LOGICAL_OR, width_max, node, mark);
+    }
+    /* build an and bitwise AND */
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < width_max; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        if (i < width_a) {
+            if (i < width_b) {
+                /* IF - this current input will also have a corresponding b_port input then join it to the gate */
+                remap_pin_to_new_node(node->input_pins[i], compare, i);
+            } else {
+                /* ELSE - the B input does not exist, so this answer goes right through */
+                add_input_pin_to_node(compare, get_zero_pin(netlist), i);
+            }
+        }
+
+        if (i < width_b) {
+            if (i < width_a) {
+                /* IF - this current input will also have a corresponding a_port input then join it to the gate */
+                /* Joining the inputs to the input 2 of that gate */
+                remap_pin_to_new_node(node->input_pins[i + port_B_offset], compare, i + port_B_offset);
+            } else {
+                /* ELSE - the A input does not exist, so this answer goes right through */
+                add_input_pin_to_node(compare, get_zero_pin(netlist), i + port_B_offset);
+            }
+        }
+
+        /* hook it up to the logcial AND */
+        connect_nodes(compare, i, combine, i);
+    }
+
+    /* join that gate to the output */
+    remap_pin_to_new_node(node->output_pins[0], combine, 0);
+
+    if (type == LOGICAL_EQUAL)
+        instantiate_bitwise_logic(compare, BITWISE_XNOR, mark, netlist);
+    else
+        instantiate_bitwise_logic(compare, BITWISE_XOR, mark, netlist);
+    /* Don't need to instantiate a Logic and gate since it is a function itself */
+
+    oassert(combine->num_output_pins == 1);
+
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_GT )
+ *	Defines the HW needed for greter than equal with EQ, GT, AND and OR gates to create
+ *	the appropriate logic function.
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_GT(nnode_t *node, operation_list type, short mark, netlist_t *netlist)
+{
+    int width_a;
+    int width_b;
+    int width_max;
+    int i;
+    int port_A_offset;
+    int port_B_offset;
+    int port_A_index;
+    int port_B_index;
+    int index = 0;
+    nnode_t *xor_gate = NULL;
+    nnode_t *logical_or_gate;
+    nnode_t **or_cells;
+    nnode_t **gt_cells;
+
+    oassert(node->num_output_pins == 1);
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width_max = width_a > width_b ? width_a : width_b;
+
+    /* swaps ports A and B */
+    if (type == GT) {
+        port_A_offset = 0;
+        port_B_offset = width_a;
+        port_A_index = 0;
+        port_B_index = width_a - 1;
+    } else if (type == LT) {
+        port_A_offset = width_b;
+        port_B_offset = 0;
+        port_A_index = width_b - 1;
+        port_B_index = 0;
+    } else {
+        port_A_offset = 0;
+        port_B_offset = 0;
+        port_A_index = 0;
+        port_B_index = 0;
+        error_message(NETLIST, node->loc, "Invalid node type %s in instantiate_GT\n", node_name_based_on_op(node));
+    }
+
+    if (width_max > 1) {
+        /* xor gate identifies if any bits don't match */
+        xor_gate = make_2port_gate(LOGICAL_XOR, width_a - 1, width_b - 1, width_max - 1, node, mark);
+    }
+
+    /* collects all the GT signals and determines if gt */
+    logical_or_gate = make_1port_logic_gate(LOGICAL_OR, width_max, node, mark);
+    /* collects a chain if any 1 happens than the GT cells output 0 */
+    or_cells = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * width_max - 1);
+    /* each cell checks if A > B and sends out a 1 if history has no 1s (3rd input) */
+    gt_cells = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * width_max);
+
+    for (i = 0; i < width_max; i++) {
+        gt_cells[i] = make_3port_gate(GT, 1, 1, 1, 1, node, mark);
+        if (i < width_max - 1) {
+            or_cells[i] = make_2port_gate(LOGICAL_OR, 1, 1, 1, node, mark);
+        }
+    }
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < width_max; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        if (i < width_a) {
+            /* IF - this current input will also have a corresponding b_port input then join it to the gate */
+            remap_pin_to_new_node(node->input_pins[i + port_A_offset], gt_cells[i], 0);
+            if (i > 0)
+                add_input_pin_to_node(xor_gate, copy_input_npin(gt_cells[i]->input_pins[0]), index + port_A_index);
+        } else {
+            /* ELSE - the B input does not exist, so this answer goes right through */
+            add_input_pin_to_node(gt_cells[i], get_zero_pin(netlist), 0);
+            if (i > 0)
+                add_input_pin_to_node(xor_gate, get_zero_pin(netlist), index + port_A_index);
+        }
+
+        if (i < width_b) {
+            /* IF - this current input will also have a corresponding a_port input then join it to the gate */
+            /* Joining the inputs to the input 2 of that gate */
+            remap_pin_to_new_node(node->input_pins[i + port_B_offset], gt_cells[i], 1);
+            if (i > 0)
+                add_input_pin_to_node(xor_gate, copy_input_npin(gt_cells[i]->input_pins[1]), index + port_B_index);
+        } else {
+            /* ELSE - the A input does not exist, so this answer goes right through */
+            add_input_pin_to_node(gt_cells[i], get_zero_pin(netlist), 1);
+            if (i > 0)
+                add_input_pin_to_node(xor_gate, get_zero_pin(netlist), index + port_B_index);
+        }
+
+        if (i < width_max - 1) {
+            /* number of OR gates */
+            if (i < width_max - 2) {
+                /* connect the msb or to the next lower bit */
+                connect_nodes(or_cells[i + 1], 0, or_cells[i], 1);
+            } else {
+                /* deal with the first greater than test which autom gets a zero */
+                add_input_pin_to_node(or_cells[i], get_zero_pin(netlist), 1);
+            }
+            if (width_max > 1) {
+                /* get all the equals with the or gates */
+                connect_nodes(xor_gate, i, or_cells[i], 0);
+            }
+
+            connect_nodes(or_cells[i], 0, gt_cells[i], 2);
+
+        } else {
+            /* deal with the first greater than test which autom gets a zero */
+            add_input_pin_to_node(gt_cells[i], get_zero_pin(netlist), 2);
+        }
+
+        /* hook it up to the logcial AND */
+        connect_nodes(gt_cells[i], 0, logical_or_gate, i);
+
+        if (i > 0) {
+            index++;
+        }
+    }
+
+    /* join that gate to the output */
+    remap_pin_to_new_node(node->output_pins[0], logical_or_gate, 0);
+    oassert(logical_or_gate->num_output_pins == 1);
+    if (xor_gate != NULL) {
+        instantiate_bitwise_logic(xor_gate, BITWISE_XOR, mark, netlist);
+    }
+
+    vtr::free(gt_cells);
+    vtr::free(or_cells);
+    free_nnode(node);
+}
+
+/*---------------------------------------------------------------------------------------------
+ * (function: instantiate_GE )
+ *	Defines the HW needed for greter than equal with EQ, GT, AND and OR gates to create
+ *	the appropriate logic function.
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_GE(nnode_t *node, operation_list type, short mark, netlist_t *netlist)
+{
+    int width_a;
+    int width_b;
+    int width_max;
+    int i;
+    int port_B_offset;
+    int port_A_offset;
+    nnode_t *equal;
+    nnode_t *compare;
+    nnode_t *logical_or_final_gate;
+
+    oassert(node->num_output_pins == 1);
+    oassert(node->num_input_pins > 0);
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    oassert(width_a == width_b);
+    width_max = width_a > width_b ? width_a : width_b;
+
+    port_A_offset = 0;
+    port_B_offset = width_a;
+
+    /* build an xnor bitwise XNOR */
+    equal = make_2port_gate(LOGICAL_EQUAL, width_a, width_b, 1, node, mark);
+
+    if (type == GTE)
+        compare = make_2port_gate(GT, width_a, width_b, 1, node, mark);
+    else
+        compare = make_2port_gate(LT, width_a, width_b, 1, node, mark);
+
+    logical_or_final_gate = make_1port_logic_gate(LOGICAL_OR, 2, node, mark);
+
+    /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
+    for (i = 0; i < width_max; i++) {
+        /* Joining the inputs to the input 1 of that gate */
+        if (i < width_a) {
+            /* IF - this current input will also have a corresponding b_port input then join it to the gate */
+            remap_pin_to_new_node(node->input_pins[i + port_A_offset], equal, i + port_A_offset);
+            add_input_pin_to_node(compare, copy_input_npin(equal->input_pins[i + port_A_offset]), i + port_A_offset);
+        } else {
+            /* ELSE - the B input does not exist, so this answer goes right through */
+            add_input_pin_to_node(equal, get_zero_pin(netlist), i + port_A_offset);
+            add_input_pin_to_node(compare, get_zero_pin(netlist), i + port_A_offset);
+        }
+
+        if (i < width_b) {
+            /* IF - this current input will also have a corresponding a_port input then join it to the gate */
+            /* Joining the inputs to the input 2 of that gate */
+            remap_pin_to_new_node(node->input_pins[i + port_B_offset], equal, i + port_B_offset);
+            add_input_pin_to_node(compare, copy_input_npin(equal->input_pins[i + port_B_offset]), i + port_B_offset);
+        } else {
+            /* ELSE - the A input does not exist, so this answer goes right through */
+            add_input_pin_to_node(equal, get_zero_pin(netlist), i + port_B_offset);
+            add_input_pin_to_node(compare, get_zero_pin(netlist), i + port_B_offset);
+        }
+    }
+    connect_nodes(equal, 0, logical_or_final_gate, 0);
+    connect_nodes(compare, 0, logical_or_final_gate, 1);
+
+    /* join that gate to the output */
+    remap_pin_to_new_node(node->output_pins[0], logical_or_final_gate, 0);
+    oassert(logical_or_final_gate->num_output_pins == 1);
+
+    /* make the two intermediate gates */
+    instantiate_EQUAL(equal, LOGICAL_EQUAL, mark, netlist);
+
+    if (type == GTE)
+        instantiate_GT(compare, GT, mark, netlist);
+    else
+        instantiate_GT(compare, LT, mark, netlist);
+
+    free_nnode(node);
+}
+
+/**
+ * --------------------------------------------------------------------------
+ * (function: instantiate_shift)
+ *
+ * @brief instantiate shift node based on the type
+ * of given shift varaible.
+ *
+ * @note first_signal 'SHIFT_OP' second_signal
+ *
+ * @param node pointer to the multipication netlist node
+ *
+ * @return output signal
+ * -------------------------------------------------------------------------*/
+void instantiate_shift(nnode_t *node, short mark, netlist_t *netlist)
+{
+    /* validate number and size of input ports */
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
+
+    int i;
+    int operand_width = node->input_port_sizes[0];
+    int shift_width = node->input_port_sizes[1];
+    /* shift signal */
+    signal_list_t *shift_signal = init_signal_list();
+    for (i = 0; i < shift_width; i++) {
+        add_pin_to_signal_list(shift_signal, node->input_pins[operand_width + i]);
+    }
+
+    /* check for constant and variable shift operation */
+    if (is_constant_signal(shift_signal, netlist)) {
+        /* the shift signal is constant, no need to implement the complex variable circuitry */
+        instantiate_constant_shift(node, node->type, mark, netlist);
+    } else {
+        /* the shift signal is variable, variable shift will be instantiated */
+        instantiate_variable_shift(node, node->type, mark, netlist);
+    }
+
+    // CLEAN UP
+    free_signal_list(shift_signal);
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: instantiate_constant_shift )
+ *
+ * @brief instantiate shift node that the shift signals
+ * is a CONSTANT signals. The constant shift implements
+ * the shift operation using manual signal shift with add
+ *
+ * @param node shift node
+ * @param type shift type [SL,SR, ASL, and ASR]
+ * @param mark traversal number
+ * @param netlist pointer to the current netlist
+ *-------------------------------------------------------------------------------------------*/
+static void instantiate_constant_shift(nnode_t *node, operation_list type, short mark, netlist_t *netlist)
+{
+    /* validate number and size of input ports */
+    oassert(node->num_input_port_sizes == 2);
+    oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
+
+    int i;
+    int operand_width = node->input_port_sizes[0];
+    int shift_width = node->input_port_sizes[1];
+    int output_width = node->output_port_sizes[0];
+
+    /* operand signal */
+    signal_list_t *operand_signal = init_signal_list();
+    for (i = 0; i < operand_width; i++) {
+        add_pin_to_signal_list(operand_signal, node->input_pins[i]);
+    }
+    /* shift signal */
+    signal_list_t *shift_signal = init_signal_list();
+    for (i = 0; i < shift_width; i++) {
+        add_pin_to_signal_list(shift_signal, node->input_pins[operand_width + i]);
+    }
+
+    /* validate constant shift signal */
+    oassert(is_constant_signal(shift_signal, netlist));
+
+    /* shift the operand by shift_size*/
+    signal_list_t *result = init_signal_list();
+    /* record the size of the shift */
+    int pad_bit = operand_width - 1;
+    /* calculate the value of shift signal */
+    long shift_size = constant_signal_value(shift_signal, netlist);
+
+    switch (type) {
+    case SL:
+    case ASL: {
+        /* connect ZERO to outputs that don't have inputs connected */
+        for (i = 0; i < shift_size; i++) {
+            if (i < output_width) {
+                // connect 0 to lower outputs
+                add_pin_to_signal_list(result, get_zero_pin(netlist));
+            }
+        }
+
+        /* connect inputs to outputs */
+        for (i = 0; i < output_width - shift_size; i++) {
+            if (i < operand_width) {
+                npin_t *pin = operand_signal->pins[i];
+                // connect higher output pin to lower input pin
+                add_pin_to_signal_list(result, pin);
+                /* detach from the old node */
+                pin->node->input_pins[pin->pin_node_idx] = NULL;
+            } else {
+                /* pad with zero pins */
+                npin_t *extension_pin = get_zero_pin(netlist);
+                add_pin_to_signal_list(result, extension_pin);
+            }
+        }
+        break;
+    }
+    case SR: // fallthrough
+    case ASR: {
+        for (i = shift_size; i < operand_width; i++) {
+            npin_t *pin = operand_signal->pins[i];
+            // connect higher output pin to lower input pin
+            if (i - shift_size < output_width) {
+                add_pin_to_signal_list(result, pin);
+                pin->node->input_pins[pin->pin_node_idx] = NULL;
+            }
+        }
+
+        /* Extend pad_bit to outputs that don't have inputs connected */
+        for (i = output_width - 1; i >= operand_width - shift_size; i--) {
+            npin_t *extension_pin = NULL;
+            if (node->related_ast_node && node->attributes->port_a_signed == SIGNED && node->type == ASR) {
+                /* for signed values padding will be with last pin */
+                extension_pin = copy_input_npin(operand_signal->pins[pad_bit]);
+            } else {
+                /* otherwise result will be padded with zero pins */
+                extension_pin = get_zero_pin(netlist);
+            }
+
+            add_pin_to_signal_list(result, extension_pin);
+        }
+        break;
+    }
+    default:
+        error_message(NETLIST, node->loc, "%s", "Operation not supported by Odin\n");
+        break;
+    }
+
+    for (i = 0; i < output_width; i++) {
+        /* create a buf node to drive output pins */
+        nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, mark);
+        /* add result as inout pins */
+        add_input_pin_to_node(buf_node, result->pins[i], 0);
+
+        /* remap output signals to buf node */
+        remap_pin_to_new_node(node->output_pins[i], buf_node, 0);
+    }
+
+    // CLEAN UP
+    for (i = 0; i < operand_signal->count; i++) {
+        /* delete unused operand pins */
+        if (operand_signal->pins[i]->node == node)
+            delete_npin(operand_signal->pins[i]);
+    }
+    free_signal_list(operand_signal);
+    for (i = 0; i < shift_signal->count; i++) {
+        /* delete shift pins */
+        delete_npin(shift_signal->pins[i]);
+    }
+    free_signal_list(shift_signal);
+    free_signal_list(result);
+    free_nnode(node);
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: instantiate_shift )
+ *
+ * @brief instantiate shift node that the shift signals
+ * is not a CONSTANT signals. The variable shift implements
+ * the shift operation using Barrel Shift design
+ *
+ * @param node shift node
+ * @param type shift type [SL,SR, ASL, and ASR]
+ * @param mark traversal number
+ * @param netlist pointer to the current netlist
+ *-------------------------------------------------------------------------------------------*/
+static void instantiate_variable_shift(nnode_t *node, operation_list type, short mark, netlist_t *netlist)
+{
+    /*
+     *   Create mux 2:1
+     *   data1 = SHIFT first_input
+     *   data2 = SHIFT first_input shifted right or left by pow(2,i)
+     *   selector = SHIFT second_input[i]
+     */
+
+    nnode_t ***muxes;
+    signal_list_t *input_pins = init_signal_list();
+    signal_list_t *output_pins;
+    int input_port_width = 0;
+    int output_port_width = 0;
+    int pow_2_by_i = 0;
+
+    for (int i = 0; i < node->num_input_pins; i++) {
+        add_pin_to_signal_list(input_pins, node->input_pins[i]);
+    }
+
+    output_pins = input_pins;
+    output_port_width = node->output_port_sizes[0];
+    input_port_width = node->input_port_sizes[0];
+    muxes = (nnode_t ***)vtr::malloc(sizeof(nnode_t **) * (input_port_width));
+
+    for (int i = 0; i < input_port_width; i++) {
+        muxes[i] = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (input_port_width));
+        for (int j = 0; j < input_port_width; j++) {
+            muxes[i][j] = make_2port_gate(SMUX_2, 1, 2, 1, node, mark);
+            /* add selector pin */
+            add_input_pin_to_node(muxes[i][j], copy_input_npin(node->input_pins[input_port_width + i]), 0);
+        }
+    }
+
+    for (int i = 0; i < input_port_width; i++) {
+        pow_2_by_i = shift_left_value_with_overflow_check(0x1, i, node->loc);
+        /*
+         * Limit shift value of barrel design to max at input_port_width,
+         * since after this value we should extend based on extension bit.
+         * Also, checking the overflow of pow_2_by_i
+         */
+        int shift_size = (pow_2_by_i > input_port_width || pow_2_by_i < 0) ? input_port_width : pow_2_by_i;
+
+        input_pins = output_pins;
+
+        if (type == SR || type == ASR) {
+            /*
+             * Logical variable shift right
+             * or
+             * arithmetic variable shift right
+             */
+
+            // shift by pow(2,i) and connect the output of the previous stage mux as the second input to the next stage mux
+            for (int j = shift_size; j < input_port_width; j++)
+                add_input_pin_to_node(muxes[i][j - shift_size], copy_input_npin(input_pins->pins[j]), 2);
+            // connect the sign bit of the previous output as extension bits
+            int pad_bit = input_port_width - 1;
+            for (int j = 0; j < shift_size; j++) {
+                if (node->attributes->port_a_signed == SIGNED && type == ASR) {
+                    add_input_pin_to_node(muxes[i][j + input_port_width - shift_size], copy_input_npin(input_pins->pins[pad_bit]), 2);
+                } else {
+                    add_input_pin_to_node(muxes[i][j + input_port_width - shift_size], get_zero_pin(netlist), 2);
+                }
+            }
+
+        } else {
+            /*
+             * Logical variable shift left
+             * or
+             * arithmetic variable shift left
+             */
+
+            // shift by pow(2,i) and connect the output of the previous stage mux as the second input to the next stage mux
+            for (int j = 0; j < input_port_width - shift_size; j++)
+                add_input_pin_to_node(muxes[i][j + shift_size], copy_input_npin(input_pins->pins[j]), 2);
+
+            // connect the zero as extension bits
+            for (int j = 0; j < shift_size; j++)
+                add_input_pin_to_node(muxes[i][j], get_zero_pin(netlist), 2);
+        }
+
+        for (int j = 0; j < input_port_width; j++) {
+            // connect the output of the previous stage mux as the first input to the next stage mux
+            add_input_pin_to_node(muxes[i][j], input_pins->pins[j], 1);
+        }
+
+        free_signal_list(input_pins);
+        output_pins = init_signal_list();
+        // Connect output pin to related input pin
+        for (int j = 0; j < input_port_width; j++) {
+            if (i != input_port_width - 1) {
+                npin_t *new_pin1 = allocate_npin();
+                npin_t *new_pin2 = allocate_npin();
+                nnet_t *new_net = allocate_nnet();
+                new_net->name = make_full_ref_name(NULL, NULL, NULL, muxes[i][j]->name, j);
+                /* hook the output pin into the node */
+                add_output_pin_to_node(muxes[i][j], new_pin1, 0);
+                /* hook up new pin 1 into the new net */
+                add_driver_pin_to_net(new_net, new_pin1);
+                /* hook up the new pin 2 to this new net */
+                add_fanout_pin_to_net(new_net, new_pin2);
+
+                // Storing the output pins of the current mux stage as the input of the next one
+                add_pin_to_signal_list(output_pins, new_pin2);
+
+            } else {
+                if (j < output_port_width)
+                    remap_pin_to_new_node(node->output_pins[j], muxes[i][j], 0);
+            }
+        }
+    }
+
+    free_signal_list(output_pins);
+    for (int i = 0; i < input_port_width; i++) {
+        vtr::free(muxes[i]);
+    }
+    vtr::free(muxes);
+}
diff --git a/parmys-plugin/src/read_xml_config_file.cc b/parmys-plugin/src/read_xml_config_file.cc
new file mode 100644
index 000000000..eba55eb9c
--- /dev/null
+++ b/parmys-plugin/src/read_xml_config_file.cc
@@ -0,0 +1,285 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "read_xml_config_file.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "pugixml.hpp"
+#include "read_xml_util.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+using namespace pugiutil;
+
+config_t configuration;
+
+void read_inputs(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data);
+void read_outputs(pugi::xml_node a_node, const pugiutil::loc_data &loc_data);
+void read_debug_switches(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data);
+void read_optimizations(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data);
+void set_default_optimization_settings(config_t *config);
+
+extern HardSoftLogicMixer *mixer;
+
+/*-------------------------------------------------------------------------
+ * (function: read_config_file)
+ * This reads an XML config file that specifies what we will do in the tool.
+ *
+ * See config_t in types.h to see the data structures used in this read.
+ *-----------------------------------------------------------------------*/
+void read_config_file(const char *file_name)
+{
+    pugi::xml_node config, next;
+
+    /* Parse the xml file */
+    oassert(file_name != NULL);
+
+    pugi::xml_document doc;
+    pugiutil::loc_data loc_data;
+    try {
+        loc_data = pugiutil::load_xml(doc, file_name);
+
+        /* Root element should be config */
+        config = get_single_child(doc, "config", loc_data);
+
+        /* Process the verilog files */
+        next = get_single_child(config, "inputs", loc_data);
+        read_inputs(next, &configuration, loc_data);
+
+        /* Process the output */
+        next = get_single_child(config, "output", loc_data);
+        read_outputs(next, loc_data);
+
+        /* Process the optimizations */
+        set_default_optimization_settings(&configuration);
+        next = get_single_child(config, "optimizations", loc_data, OPTIONAL);
+        if (next) {
+            read_optimizations(next, &configuration, loc_data);
+        }
+
+        /* Process the debug switches */
+        next = get_single_child(config, "debug_outputs", loc_data);
+        read_debug_switches(next, &configuration, loc_data);
+
+    } catch (XmlError &e) {
+        printf("error: could not parse xml configuration file '%s': %s\n", file_name, e.what());
+        return;
+    }
+
+    /* Release the full XML tree */
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: read_verilog_files)
+ *-----------------------------------------------------------------------*/
+void read_inputs(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data)
+{
+    pugi::xml_node child;
+    pugi::xml_node junk;
+
+    child = get_single_child(a_node, "input_type", loc_data, OPTIONAL);
+    if (child != NULL) {
+        file_type_strmap[child.child_value()];
+    }
+
+    child = get_first_child(a_node, "input_path_and_name", loc_data, OPTIONAL);
+    while (child != NULL) {
+        config->list_of_file_names.push_back(child.child_value());
+        child = child.next_sibling(child.name());
+    }
+    return;
+}
+
+/*--------------------------------------------------------------------------
+ * (function: read_outputs)
+ *------------------------------------------------------------------------*/
+void read_outputs(pugi::xml_node a_node, const pugiutil::loc_data &loc_data)
+{
+    pugi::xml_node child;
+
+    child = get_single_child(a_node, "output_type", loc_data, OPTIONAL);
+    if (child != NULL) {
+        file_type_strmap[child.child_value()];
+    }
+
+    child = get_single_child(a_node, "output_path_and_name", loc_data, OPTIONAL);
+    if (child != NULL) {
+        global_args.output_file.set(child.child_value(), argparse::Provenance::SPECIFIED);
+    }
+
+    child = get_single_child(a_node, "target", loc_data, OPTIONAL);
+    if (child != NULL) {
+        child = get_single_child(child, "arch_file", loc_data, OPTIONAL);
+        if (child != NULL) {
+            /* Two arch files specified? */
+            if (global_args.arch_file.value() != "") {
+                error_message(PARSE_ARGS, unknown_location, "%s", "Error: Arch file specified in config file AND command line\n");
+            }
+            global_args.arch_file.set(child.child_value(), argparse::Provenance::SPECIFIED);
+        }
+    }
+    return;
+}
+
+/*--------------------------------------------------------------------------
+ * (function: read_workload_generation)
+ *------------------------------------------------------------------------*/
+void read_debug_switches(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data)
+{
+    pugi::xml_node child;
+
+    child = get_single_child(a_node, "output_ast_graphs", loc_data, OPTIONAL);
+
+    child = get_single_child(a_node, "output_netlist_graphs", loc_data, OPTIONAL);
+    if (child != NULL) {
+        config->output_netlist_graphs = atoi(child.child_value());
+    }
+
+    child = get_single_child(a_node, "debug_output_path", loc_data, OPTIONAL);
+    if (child != NULL) {
+        config->debug_output_path = child.child_value();
+    }
+
+    child = get_single_child(a_node, "print_parse_tokens", loc_data, OPTIONAL);
+
+    return;
+}
+
+/*--------------------------------------------------------------------------
+ * (function: set_default_optimization_settings)
+ *------------------------------------------------------------------------*/
+void set_default_optimization_settings(config_t *config)
+{
+    config->min_hard_multiplier = 0;
+    config->fixed_hard_multiplier = 0;
+    config->mult_padding = -1; /* unconn */
+    config->split_hard_multiplier = 1;
+    config->split_memory_width = false;
+    config->split_memory_depth = false;
+    config->fixed_hard_adder = 0;
+    config->min_threshold_adder = 0;
+    return;
+}
+
+/*--------------------------------------------------------------------------
+ * (function: read_optimizations)
+ *------------------------------------------------------------------------*/
+void read_optimizations(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data)
+{
+    const char *prop;
+    pugi::xml_node child;
+
+    child = get_single_child(a_node, "multiply", loc_data, OPTIONAL);
+    if (child != NULL) {
+        prop = get_attribute(child, "size", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->min_hard_multiplier = atoi(prop);
+        } else /* Default: No minimum hard multiply size */
+            config->min_hard_multiplier = 0;
+
+        prop = get_attribute(child, "padding", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->mult_padding = atoi(prop);
+        } else /* Default: Pad to hbpad pins */
+            config->mult_padding = -1;
+
+        prop = get_attribute(child, "fixed", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->fixed_hard_multiplier = atoi(prop);
+        } else /* Default: No fixed hard multiply size */
+            config->fixed_hard_multiplier = 0;
+
+        prop = get_attribute(child, "fracture", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->split_hard_multiplier = atoi(prop);
+        } else /* Default: use fractured hard multiply size */
+            config->split_hard_multiplier = 1;
+    }
+
+    child = get_single_child(a_node, "mix_soft_hard_blocks", loc_data, OPTIONAL);
+    if (child != NULL) {
+        prop = get_attribute(child, "mults_ratio", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            float ratio = atof(prop);
+            if (ratio >= 0.0 && ratio <= 1.0) {
+                delete mixer->_opts[MULTIPLY];
+                mixer->_opts[MULTIPLY] = new MultsOpt(ratio);
+            }
+        }
+
+        prop = get_attribute(child, "exact_mults", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            int exact = atoi(prop);
+            if (exact >= 0) {
+                delete mixer->_opts[MULTIPLY];
+                mixer->_opts[MULTIPLY] = new MultsOpt(exact);
+            }
+        }
+    }
+
+    child = get_single_child(a_node, "memory", loc_data, OPTIONAL);
+    if (child != NULL) {
+        prop = get_attribute(child, "split_memory_width", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->split_memory_width = atoi(prop);
+        } else /* Default: Do not split memory width! */
+            config->split_memory_width = 0;
+
+        prop = get_attribute(child, "split_memory_depth", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            if (strcmp(prop, "min") == 0)
+                config->split_memory_depth = -1;
+            else if (strcmp(prop, "max") == 0)
+                config->split_memory_depth = -2;
+            else
+                config->split_memory_depth = atoi(prop);
+        } else /* Default: Do not split memory depth! */
+            config->split_memory_depth = 0;
+    }
+
+    child = get_single_child(a_node, "adder", loc_data, OPTIONAL);
+    if (child != NULL) {
+        prop = get_attribute(child, "size", loc_data, OPTIONAL).as_string(NULL);
+
+        prop = get_attribute(child, "threshold_size", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->min_threshold_adder = atoi(prop);
+        } else /* Default: No minimum hard adder size */
+            config->min_threshold_adder = 0;
+
+        prop = get_attribute(child, "padding", loc_data, OPTIONAL).as_string(NULL);
+
+        prop = get_attribute(child, "fixed", loc_data, OPTIONAL).as_string(NULL);
+        if (prop != NULL) {
+            config->fixed_hard_adder = atoi(prop);
+        } else /* Default: Fixed hard adder size */
+            config->fixed_hard_adder = 1;
+
+        prop = get_attribute(child, "fracture", loc_data, OPTIONAL).as_string(NULL);
+    }
+
+    return;
+}
\ No newline at end of file
diff --git a/parmys-plugin/src/string_cache.cc b/parmys-plugin/src/string_cache.cc
new file mode 100644
index 000000000..7a81fec0b
--- /dev/null
+++ b/parmys-plugin/src/string_cache.cc
@@ -0,0 +1,166 @@
+// Not PJs code, but very useful and used everywhere */
+#include "string_cache.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+#include <stdio.h>
+#include <string.h>
+
+unsigned long string_hash(STRING_CACHE *sc, const char *string);
+void generate_sc_hash(STRING_CACHE *sc);
+
+unsigned long string_hash(STRING_CACHE *sc, const char *string)
+{
+    long a, i, mod, mul;
+
+    a = 0;
+    mod = sc->mod;
+    mul = sc->mul;
+    for (i = 0; string[i]; i++)
+        a = (a * mul + (unsigned char)string[i]) % mod;
+    return a;
+}
+
+void generate_sc_hash(STRING_CACHE *sc)
+{
+    long i;
+    long hash;
+
+    if (sc->string_hash != NULL)
+        vtr::free(sc->string_hash);
+    if (sc->next_string != NULL)
+        vtr::free(sc->next_string);
+    sc->string_hash_size = sc->size * 2 + 11;
+    sc->string_hash = (long *)sc_do_alloc(sc->string_hash_size, sizeof(long));
+    sc->next_string = (long *)sc_do_alloc(sc->size, sizeof(long));
+    memset(sc->string_hash, 0xff, sc->string_hash_size * sizeof(long));
+    memset(sc->next_string, 0xff, sc->size * sizeof(long));
+    for (i = 0; i < sc->free; i++) {
+        hash = string_hash(sc, sc->string[i]) % sc->string_hash_size;
+        sc->next_string[i] = sc->string_hash[hash];
+        sc->string_hash[hash] = i;
+    }
+}
+
+STRING_CACHE *sc_new_string_cache(void)
+{
+    STRING_CACHE *sc;
+
+    sc = (STRING_CACHE *)sc_do_alloc(1, sizeof(STRING_CACHE));
+    sc->size = 100;
+    sc->string_hash_size = 0;
+    sc->string_hash = NULL;
+    sc->next_string = NULL;
+    sc->free = 0;
+    sc->string = (char **)sc_do_alloc(sc->size, sizeof(char *));
+    sc->data = (void **)sc_do_alloc(sc->size, sizeof(void *));
+    sc->mod = 834535547;
+    sc->mul = 247999;
+    generate_sc_hash(sc);
+    return sc;
+}
+
+long sc_lookup_string(STRING_CACHE *sc, const char *string)
+{
+    long i, hash;
+
+    if (sc == NULL) {
+        return -1;
+    } else {
+        hash = string_hash(sc, string) % sc->string_hash_size;
+        i = sc->string_hash[hash];
+        while (i >= 0) {
+            if (!strcmp(sc->string[i], string))
+                return i;
+            i = sc->next_string[i];
+        }
+        return -1;
+    }
+}
+
+long sc_add_string(STRING_CACHE *sc, const char *string)
+{
+    long i;
+    long hash;
+    void *a;
+
+    i = sc_lookup_string(sc, string);
+    if (i >= 0)
+        return i;
+    if (sc->free >= sc->size) {
+        sc->size = sc->size * 2 + 10;
+
+        a = sc_do_alloc(sc->size, sizeof(char *));
+        if (sc->free > 0)
+            memcpy(a, sc->string, sc->free * sizeof(char *));
+        vtr::free(sc->string);
+        sc->string = (char **)a;
+
+        a = sc_do_alloc(sc->size, sizeof(void *));
+        if (sc->free > 0)
+            memcpy(a, sc->data, sc->free * sizeof(void *));
+        vtr::free(sc->data);
+        sc->data = (void **)a;
+
+        generate_sc_hash(sc);
+    }
+    i = sc->free;
+    sc->free++;
+    sc->string[i] = vtr::strdup(string);
+    sc->data[i] = NULL;
+    hash = string_hash(sc, string) % sc->string_hash_size;
+    sc->next_string[i] = sc->string_hash[hash];
+    sc->string_hash[hash] = i;
+    return i;
+}
+
+void *sc_do_alloc(long a, long b)
+{
+    void *r;
+
+    if (a < 1)
+        a = 1;
+    if (b < 1)
+        b = 1;
+    r = vtr::calloc(a, b);
+    while (r == NULL) {
+        fprintf(stderr, "Failed to allocated %ld chunks of %ld bytes (%ld bytes total)\n", a, b, a * b);
+        r = vtr::calloc(a, b);
+    }
+    return r;
+}
+
+STRING_CACHE *sc_free_string_cache(STRING_CACHE *sc)
+{
+    if (sc != NULL) {
+        if (sc->string != NULL) {
+            for (long i = 0; i < sc->free; i++) {
+                if (sc->string[i] != NULL) {
+                    vtr::free(sc->string[i]);
+                }
+                sc->string[i] = NULL;
+            }
+            vtr::free(sc->string);
+        }
+        sc->string = NULL;
+
+        if (sc->data != NULL) {
+            vtr::free(sc->data);
+        }
+        sc->data = NULL;
+
+        if (sc->string_hash != NULL) {
+            vtr::free(sc->string_hash);
+        }
+        sc->string_hash = NULL;
+
+        if (sc->next_string != NULL) {
+            vtr::free(sc->next_string);
+        }
+        sc->next_string = NULL;
+
+        vtr::free(sc);
+    }
+    sc = NULL;
+
+    return sc;
+}
diff --git a/parmys-plugin/src/subtractions.cc b/parmys-plugin/src/subtractions.cc
new file mode 100644
index 000000000..b9dcf6b85
--- /dev/null
+++ b/parmys-plugin/src/subtractions.cc
@@ -0,0 +1,891 @@
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "subtractions.h"
+#include "adders.h"
+#include "netlist_utils.h"
+#include "node_creation_library.h"
+#include "odin_globals.h"
+#include "odin_types.h"
+#include "odin_util.h"
+#include <stdio.h>
+#include <string.h>
+
+#include "vtr_memory.h"
+
+using vtr::t_linked_vptr;
+
+t_linked_vptr *sub_list = NULL;
+t_linked_vptr *sub_chain_list = NULL;
+int subchaintotal = 0;
+int *sub = NULL;
+
+void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag);
+static void cleanup_sub_old_node(nnode_t *nodeo, netlist_t *netlist);
+
+/*---------------------------------------------------------------------------
+ * (function: report_sub_distribution)
+ *-------------------------------------------------------------------------*/
+
+/* These values are collected during the unused logic removal sweep */
+extern long subtractor_chain_count;
+extern long longest_subtractor_chain;
+extern long total_subtractors;
+
+void report_sub_distribution()
+{
+    if (hard_adders == NULL)
+        return;
+
+    printf("\nHard MINUS Distribution\n");
+    printf("============================\n");
+    printf("\n");
+    printf("\nTotal # of chains = %ld\n", subtractor_chain_count);
+
+    printf("\nHard sub chain Details\n");
+    printf("============================\n");
+
+    printf("\n");
+    printf("\nThe Number of Hard Block subs in the Longest Chain: %ld\n", longest_subtractor_chain);
+
+    printf("\n");
+    printf("\nThe Total Number of Hard Block subs: %ld\n", total_subtractors);
+
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: declare_hard_adder_for_sub)
+ *-------------------------------------------------------------------------*/
+void declare_hard_adder_for_sub(nnode_t *node)
+{
+    t_adder *tmp;
+    int width_a, width_b, width_sumout;
+
+    /* See if this size instance of adder exists?*/
+    if (hard_adders == NULL) {
+        warning_message(NETLIST, node->loc, "%s\n", "Instantiating Substraction where hard adders do not exist");
+    }
+    tmp = (t_adder *)hard_adders->instances;
+    width_a = node->input_port_sizes[0];
+    width_b = node->input_port_sizes[1];
+    width_sumout = node->output_port_sizes[1];
+
+    while (tmp != NULL) {
+        if ((tmp->size_a == width_a) && (tmp->size_b == width_b) && (tmp->size_sumout == width_sumout))
+            return;
+        else
+            tmp = tmp->next;
+    }
+
+    /* Does not exist - must create an instance*/
+    tmp = (t_adder *)vtr::malloc(sizeof(t_adder));
+    tmp->next = (t_adder *)hard_adders->instances;
+    hard_adders->instances = tmp;
+    tmp->size_a = width_a;
+    tmp->size_b = width_b;
+    tmp->size_cin = 1;
+    tmp->size_cout = 1;
+    tmp->size_sumout = width_sumout;
+    return;
+}
+
+/*---------------------------------------------------------------------------
+ * (function: instantiate_hard_adder_subtraction )
+ *-------------------------------------------------------------------------*/
+void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t * /*netlist*/)
+{
+    char *new_name = NULL;
+    int len, sanity, i;
+
+    declare_hard_adder_for_sub(node);
+
+    /* Need to give node proper name */
+    len = strlen(node->name);
+    len = len + 20; /* 20 chars should hold mul specs */
+    new_name = (char *)vtr::malloc(len);
+
+    /* wide input first :) identical branching! ? */
+    // if (node->input_port_sizes[0] > node->input_port_sizes[1])
+    sanity = odin_sprintf(new_name, "%s", node->name);
+    // else
+    // 	sanity = odin_sprintf(new_name, "%s", node->name);
+
+    if (new_name) {
+        vtr::free(new_name);
+    }
+
+    if (len <= sanity) /* buffer not large enough */
+        oassert(false);
+
+    /* Give names to the output pins */
+    for (i = 0; i < node->num_output_pins; i++) {
+        if (node->output_pins[i]->name == NULL) {
+            len = strlen(node->name) + 20; /* 6 chars for pin idx */
+            new_name = (char *)vtr::malloc(len);
+            odin_sprintf(new_name, "%s[%d]", node->name, node->output_pins[i]->pin_node_idx);
+            node->output_pins[i]->name = new_name;
+        }
+    }
+
+    node->traverse_visited = mark;
+    return;
+}
+
+/*-----------------------------------------------------------------------
+ * (function: init_split_adder)
+ *	Create a carry chain adder when spliting. Inputs are connected
+ *	to original pins, output pins are set to NULL for later connecting
+ *---------------------------------------------------------------------*/
+void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag)
+{
+    int i;
+    int flaga = 0;
+    int current_sizea, current_sizeb;
+    int aa = 0;
+    int num = 0;
+
+    // if the input of the first cin is generated by a dummy adder added
+    // to the start of the chain, then an offset is needed to compensate
+    // for that in various positions in the code, otherwise the offset is 0
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Copy properties from original node */
+    ptr->type = node->type;
+    ptr->related_ast_node = node->related_ast_node;
+    ptr->traverse_visited = node->traverse_visited;
+    ptr->node_data = NULL;
+
+    /* decide the current size of input a and b */
+    if (flag == 0) {
+        current_sizea = (a + offset) - sizea * index;
+        current_sizeb = (b + offset) - sizeb * index;
+        if (current_sizea >= sizea)
+            current_sizea = sizea;
+        else if (current_sizea <= 0) {
+            current_sizea = sizea;
+            flaga = 1;
+        } else {
+            aa = current_sizea;
+            current_sizea = sizea;
+            flaga = 2;
+        }
+        current_sizeb = sizeb;
+    } else {
+        if (sizea != 0)
+            current_sizea = sizea;
+        else
+            current_sizea = 1;
+        if (sizeb != 0)
+            current_sizeb = sizeb;
+        else
+            current_sizeb = 1;
+    }
+
+    /* Set new port sizes and parameters */
+    ptr->num_input_port_sizes = 3;
+    ptr->input_port_sizes = (int *)vtr::malloc(3 * sizeof(int));
+    ptr->input_port_sizes[0] = current_sizea;
+    ptr->input_port_sizes[1] = current_sizeb;
+    ptr->input_port_sizes[2] = cin;
+    ptr->num_output_port_sizes = 2;
+    ptr->output_port_sizes = (int *)vtr::malloc(2 * sizeof(int));
+    ptr->output_port_sizes[0] = cout;
+
+    /* The size of output port sumout equals the maxim size of a and b  */
+    if (current_sizea > current_sizeb)
+        ptr->output_port_sizes[1] = current_sizea;
+    else
+        ptr->output_port_sizes[1] = current_sizeb;
+
+    /* Set the number of pins and re-locate previous pin entries */
+    ptr->num_input_pins = current_sizea + current_sizeb + cin;
+    ptr->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (current_sizea + current_sizeb + cin));
+    // the normal sub: if flaga or flagb = 1, the input pins should be empty.
+    // the unary sub: all input pins for a should be null, input pins for b should be connected to node
+    if (node->num_input_port_sizes == 1) {
+        for (i = 0; i < current_sizea; i++)
+            ptr->input_pins[i] = NULL;
+    } else if ((flaga == 1) && (node->num_input_port_sizes == 2)) {
+        for (i = 0; i < current_sizea; i++)
+            ptr->input_pins[i] = NULL;
+    } else if ((flaga == 2) && (node->num_input_port_sizes == 2)) {
+        if (index == 0) {
+            ptr->input_pins[0] = NULL;
+            if (sizea > 1) {
+                for (i = 1; i < aa; i++) {
+                    ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+                for (i = 0; i < (sizea - aa); i++)
+                    ptr->input_pins[i + aa] = NULL;
+            }
+        } else {
+            for (i = 0; i < aa; i++) {
+                ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
+                ptr->input_pins[i]->node = ptr;
+                ptr->input_pins[i]->pin_node_idx = i;
+            }
+            for (i = 0; i < (sizea - aa); i++)
+                ptr->input_pins[i + aa] = NULL;
+        }
+    } else {
+        if (index == 0 && !configuration.adder_cin_global) {
+            if (flag == 0) {
+                ptr->input_pins[0] = NULL;
+                if (sizea > 1) {
+                    for (i = 1; i < sizea; i++) {
+                        ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
+                        ptr->input_pins[i]->node = ptr;
+                        ptr->input_pins[i]->pin_node_idx = i;
+                    }
+                }
+            } else {
+                for (i = 0; i < current_sizea; i++) {
+                    ptr->input_pins[i] = node->input_pins[i];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+            }
+        } else {
+            if (flag == 0) {
+                for (i = 0; i < sizea; i++) {
+                    ptr->input_pins[i] = node->input_pins[i + index * sizea - offset];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+            } else {
+                num = node->input_port_sizes[0];
+                for (i = 0; i < current_sizea; i++) {
+                    ptr->input_pins[i] = node->input_pins[i + num - current_sizea];
+                    ptr->input_pins[i]->node = ptr;
+                    ptr->input_pins[i]->pin_node_idx = i;
+                }
+            }
+        }
+    }
+
+    for (i = 0; i < current_sizeb; i++)
+        ptr->input_pins[i + current_sizeb] = NULL;
+
+    /* Carry_in should be NULL*/
+    for (i = 0; i < cin; i++) {
+        ptr->input_pins[i + current_sizea + current_sizeb] = NULL;
+    }
+
+    /* output pins */
+    int output;
+    if (current_sizea > current_sizeb)
+        output = current_sizea + cout;
+    else
+        output = current_sizeb + cout;
+
+    ptr->num_output_pins = output;
+    ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * output);
+    for (i = 0; i < output; i++)
+        ptr->output_pins[i] = NULL;
+
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: split_adder)
+ *
+ * This function works to split a adder into several smaller
+ *  adders to better "fit" with the available resources in a
+ *  targeted FPGA architecture.
+ *
+ * This function is at the lowest level since it simply receives
+ *  a adder and is told how to split it.
+ *
+ * Note that for some of the additions we need to perform sign extensions,
+ * but this should not be a problem since the sign extension is always
+ * extending NOT contracting.
+ *-----------------------------------------------------------------------*/
+
+void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist)
+{
+    nnode_t **node;
+    nnode_t **not_node;
+    int i, j;
+    int num;
+    int max_num = 0;
+    int flag = 0, lefta = 0, leftb = 0;
+
+    // if the input of the first cin is generated by a dummy adder added
+    // to the start of the chain, then an offset is needed to compensate
+    // for that in various positions in the code, otherwise the offset is 0
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Check for a legitimate split */
+    if (nodeo->num_input_port_sizes == 2) {
+        oassert(nodeo->input_port_sizes[0] == a);
+        oassert(nodeo->input_port_sizes[1] == b);
+    } else {
+        oassert(nodeo->input_port_sizes[0] == a);
+        oassert(nodeo->input_port_sizes[0] == b);
+    }
+
+    node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (count));
+    not_node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (b));
+
+    for (i = 0; i < b; i++) {
+        not_node[i] = allocate_nnode(nodeo->loc);
+        nnode_t *temp = not_node[i];
+        if (nodeo->num_input_port_sizes == 2)
+            not_node[i] = make_not_gate_with_input(copy_input_npin(nodeo->input_pins[a + i]), not_node[i], -1);
+        else
+            not_node[i] = make_not_gate_with_input(copy_input_npin(nodeo->input_pins[i]), not_node[i], -1);
+        free_nnode(temp);
+    }
+
+    for (i = 0; i < count; i++) {
+        node[i] = allocate_nnode(nodeo->loc);
+        node[i]->name = (char *)vtr::malloc(strlen(nodeo->name) + 20);
+        odin_sprintf(node[i]->name, "%s-%d", nodeo->name, i);
+        if (i == count - 1) {
+            if (configuration.fixed_hard_adder == 1)
+                init_split_adder_for_sub(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag);
+            else {
+                if (count == 1) {
+                    lefta = a;
+                    leftb = b;
+                } else {
+                    lefta = (a + 1) % sizea;
+                    leftb = (b + 1) % sizeb;
+                }
+
+                max_num = (lefta >= leftb) ? lefta : leftb;
+                // if fixed_hard_adder = 0, and the left of a and b is more than min_add, then adder need to be remain the same size.
+                if (max_num >= min_add || lefta + leftb == 0)
+                    init_split_adder_for_sub(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag);
+                else {
+                    // Using soft logic to do the addition, No need to pad as the same size
+                    flag = 1;
+                    init_split_adder_for_sub(nodeo, node[i], a, lefta, b, leftb, cin, cout, i, flag);
+                }
+            }
+        } else
+            init_split_adder_for_sub(nodeo, node[i], a, sizea, b, sizeb, cin, cout, i, flag);
+
+        // store the processed hard adder node for optimization
+        processed_adder_list = insert_in_vptr_list(processed_adder_list, node[i]);
+    }
+
+    chain_information_t *adder_chain = allocate_chain_info();
+    // if flag = 0, the last adder use soft logic, so the count of the chain should be one less
+    if (flag == 0)
+        adder_chain->count = count;
+    else
+        adder_chain->count = count - 1;
+    adder_chain->num_bits = a + b;
+    adder_chain->name = nodeo->name;
+    sub_chain_list = insert_in_vptr_list(sub_chain_list, adder_chain);
+
+    if (flag == 1 && count == 1) {
+        for (i = 0; i < b; i++) {
+            /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
+             * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
+            /* connecting untouched nets in the netlist creation to the pad node */
+            if (not_node[i]->input_pins[0]->net->num_driver_pins == 0) {
+                /* join untouched net with pad net */
+                join_nets(netlist->pad_net, not_node[i]->input_pins[0]->net);
+            }
+            oassert(not_node[i]->input_pins[0]->net->num_driver_pins == 1);
+            if (not_node[i]->input_pins[0]->net->driver_pins[0]->node->type == GND_NODE) {
+                connect_nodes(netlist->vcc_node, 0, node[0], (lefta + i));
+                remove_fanout_pins_from_net(not_node[i]->input_pins[0]->net, not_node[i]->input_pins[0], not_node[i]->input_pins[0]->pin_net_idx);
+                free_nnode(not_node[i]);
+            } else if (not_node[i]->input_pins[0]->net->driver_pins[0]->node->type == VCC_NODE) {
+                connect_nodes(netlist->gnd_node, 0, node[0], (lefta + i));
+                remove_fanout_pins_from_net(not_node[i]->input_pins[0]->net, not_node[i]->input_pins[0], not_node[i]->input_pins[0]->pin_net_idx);
+                free_nnode(not_node[i]);
+            } else
+                connect_nodes(not_node[i], 0, node[0], (lefta + i));
+        }
+    } else {
+        if (sizeb > 1) {
+            if ((b + 1) < sizeb)
+                num = b;
+            else
+                num = sizeb - 1;
+            for (i = 0; i < num; i++) {
+                /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
+                 * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
+                /* connecting untouched nets in the netlist creation to the pad node */
+                if (not_node[i]->input_pins[0]->net->num_driver_pins == 0) {
+                    /* join untouched net with pad net */
+                    join_nets(netlist->pad_net, not_node[i]->input_pins[0]->net);
+                }
+                oassert(not_node[i]->input_pins[0]->net->num_driver_pins == 1);
+                if (not_node[i]->input_pins[0]->net->driver_pins[0]->node->type == GND_NODE) {
+                    connect_nodes(netlist->vcc_node, 0, node[0], (sizea + i + 1));
+                    remove_fanout_pins_from_net(not_node[i]->input_pins[0]->net, not_node[i]->input_pins[0], not_node[i]->input_pins[0]->pin_net_idx);
+                    free_nnode(not_node[i]);
+                } else if (not_node[i]->input_pins[0]->net->driver_pins[0]->node->type == VCC_NODE) {
+                    connect_nodes(netlist->gnd_node, 0, node[0], (sizea + i + 1));
+                    remove_fanout_pins_from_net(not_node[i]->input_pins[0]->net, not_node[i]->input_pins[0], not_node[i]->input_pins[0]->pin_net_idx);
+                    free_nnode(not_node[i]);
+                } else
+                    connect_nodes(not_node[i], 0, node[0], (sizea + i + 1));
+            }
+        }
+
+        for (i = offset; i < count; i++) {
+            num = (b + 1) - i * sizeb;
+            if (num > sizeb)
+                num = sizeb;
+
+            for (j = 0; j < num; j++) {
+                if (i == count - 1 && flag == 1) {
+                    /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
+                     * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
+                    /* connecting untouched nets in the netlist creation to the pad node */
+                    if (not_node[(i * sizeb + j - 1)]->input_pins[0]->net->num_driver_pins == 0) {
+                        join_nets(netlist->pad_net, not_node[(i * sizeb + j - 1)]->input_pins[0]->net);
+                    }
+                    oassert(not_node[(i * sizeb + j - 1)]->input_pins[0]->net->num_driver_pins == 1);
+                    if (not_node[(i * sizeb + j - 1)]->input_pins[0]->net->driver_pins[0]->node->type == GND_NODE) {
+                        connect_nodes(netlist->vcc_node, 0, node[i], (lefta + j));
+                        remove_fanout_pins_from_net(not_node[(i * sizeb + j - 1)]->input_pins[0]->net, not_node[(i * sizeb + j - 1)]->input_pins[0],
+                                                    not_node[(i * sizeb + j - 1)]->input_pins[0]->pin_net_idx);
+                        free_nnode(not_node[(i * sizeb + j - 1)]);
+                    } else if (not_node[(i * sizeb + j - 1)]->input_pins[0]->net->driver_pins[0]->node->type == VCC_NODE) {
+                        connect_nodes(netlist->gnd_node, 0, node[i], (lefta + j));
+                        remove_fanout_pins_from_net(not_node[(i * sizeb + j - 1)]->input_pins[0]->net, not_node[(i * sizeb + j - 1)]->input_pins[0],
+                                                    not_node[(i * sizeb + j - 1)]->input_pins[0]->pin_net_idx);
+                        free_nnode(not_node[(i * sizeb + j - 1)]);
+                    } else
+                        connect_nodes(not_node[(i * sizeb + j - 1)], 0, node[i], (lefta + j));
+                } else {
+                    /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
+                     * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
+                    const int index = i * sizeb + j - offset;
+                    /* connecting untouched nets in the netlist creation to the pad node */
+                    if (not_node[index]->input_pins[0]->net->num_driver_pins == 0) {
+                        /* join untouched net with pad net */
+                        join_nets(netlist->pad_net, not_node[index]->input_pins[0]->net);
+                    }
+                    oassert(not_node[index]->input_pins[0]->net->num_driver_pins == 1);
+                    if (not_node[index]->input_pins[0]->net->driver_pins[0]->node->type == GND_NODE) {
+                        connect_nodes(netlist->vcc_node, 0, node[i], (sizea + j));
+                        remove_fanout_pins_from_net(not_node[index]->input_pins[0]->net, not_node[index]->input_pins[0],
+                                                    not_node[index]->input_pins[0]->pin_net_idx);
+                        free_nnode(not_node[index]);
+                    } else if (not_node[index]->input_pins[0]->net->driver_pins[0]->node->type == VCC_NODE) {
+                        connect_nodes(netlist->gnd_node, 0, node[i], (sizea + j));
+                        remove_fanout_pins_from_net(not_node[index]->input_pins[0]->net, not_node[index]->input_pins[0],
+                                                    not_node[index]->input_pins[0]->pin_net_idx);
+                        free_nnode(not_node[index]);
+                    } else
+                        connect_nodes(not_node[index], 0, node[i], (sizea + j));
+                }
+            }
+        }
+    }
+
+    if ((flag == 0 || count > 1) && !configuration.adder_cin_global) {
+        // connect the a[0] of first adder node to ground, and b[0] of first adder node to vcc
+        connect_nodes(netlist->gnd_node, 0, node[0], 0);
+        connect_nodes(netlist->vcc_node, 0, node[0], sizea);
+        // hang the first sumout
+        node[0]->output_pins[1] = allocate_npin();
+        node[0]->output_pins[1]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, 1);
+    }
+
+    // connect the first cin pin to vcc or unconn depending on configuration
+    if ((flag == 1 && count == 1) || configuration.adder_cin_global)
+        connect_nodes(netlist->vcc_node, 0, node[0], node[0]->num_input_pins - 1);
+    else
+        connect_nodes(netlist->pad_node, 0, node[0], node[0]->num_input_pins - 1);
+
+    // for normal subtraction: if any input pins beside intial cin is NULL, it should connect to unconn
+    // for unary subtraction: the first number should has the number of a input pins connected to gnd. The others are as same as normal subtraction
+    for (i = 0; i < count; i++) {
+        num = node[i]->num_input_pins;
+        for (j = 0; j < num - 1; j++) {
+            if (node[i]->input_pins[j] == NULL) {
+                if (nodeo->num_input_port_sizes != 3 && i * sizea + j < a)
+                    connect_nodes(netlist->gnd_node, 0, node[i], j);
+                else
+                    connect_nodes(netlist->pad_node, 0, node[i], j);
+            }
+        }
+    }
+
+    // connect cout to next node's cin
+    for (i = 1; i < count; i++)
+        connect_nodes(node[i - 1], 0, node[i], (node[i]->num_input_pins - 1));
+
+    if (flag == 1 && count == 1) {
+        for (j = 0; j < node[0]->num_output_pins - 1; j++) {
+            if (j < nodeo->num_output_pins)
+                remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 1);
+            else {
+                node[0]->output_pins[j + 1] = allocate_npin();
+                // Pad outputs with a unique and descriptive name to avoid collisions.
+                node[0]->output_pins[j + 1]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, j + 1);
+            }
+        }
+    } else {
+        for (j = 0; j < node[0]->num_output_pins - 2; j++) {
+            if (j < nodeo->num_output_pins)
+                remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
+            else {
+                node[0]->output_pins[j + 2] = allocate_npin();
+                // Pad outputs with a unique and descriptive name to avoid collisions.
+                node[0]->output_pins[j + 2]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, j + 2);
+            }
+        }
+    }
+
+    if (count > 1 || configuration.adder_cin_global) {
+        // remap the output pins of each adder to nodeo
+        for (i = offset; i < count; i++) {
+            for (j = 0; j < node[i]->num_output_pins - 1; j++) {
+                if ((i * sizea + j - offset) < nodeo->num_output_pins)
+                    remap_pin_to_new_node(nodeo->output_pins[i * sizea + j - offset], node[i], j + 1);
+                else {
+                    node[i]->output_pins[j + 1] = allocate_npin();
+                    // Pad outputs with a unique and descriptive name to avoid collisions.
+                    node[i]->output_pins[j + 1]->name = append_string("", "%s~dummy_output~%d~%d", node[i]->name, i, j + 2);
+                }
+            }
+        }
+    }
+    node[count - 1]->output_pins[0] = allocate_npin();
+    // Pad outputs with a unique and descriptive name to avoid collisions.
+    node[count - 1]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[(count - 1)]->name, (count - 1), 0);
+    // connect_nodes(node[count - 1], (node[(count - 1)]->num_output_pins - 1), netlist->gnd_node, 0);
+    // }
+
+    /* Freeing the old node! */
+    cleanup_sub_old_node(nodeo, netlist);
+
+    vtr::free(node);
+    vtr::free(not_node);
+    return;
+}
+
+/*-------------------------------------------------------------------------
+ * (function: iterate_adders_for_sub)
+ *
+ * This function will iterate over all of the minus operations that
+ *	exist in the netlist and perform a splitting so that they can
+ *	fit into a basic hard adder block that exists on the FPGA.
+ *	If the proper option is set, then it will be expanded as well
+ *	to just use a fixed size hard adder.
+ *-----------------------------------------------------------------------*/
+void iterate_adders_for_sub(netlist_t *netlist)
+{
+    int sizea, sizeb, sizecin; // the size of
+    int a, b;
+    int count, counta, countb;
+    int num;
+    nnode_t *node;
+
+    const int offset = (configuration.adder_cin_global) ? 0 : 1;
+
+    /* Can only perform the optimisation if hard adders exist! */
+    if (hard_adders == NULL)
+        return;
+    else {
+        // In hard block adder, the summand and addend are same size.
+        sizecin = hard_adders->inputs->size;
+        sizeb = hard_adders->inputs->next->size;
+        sizea = hard_adders->inputs->next->size;
+
+        oassert(sizecin == 1);
+
+        while (sub_list != NULL) {
+            node = (nnode_t *)sub_list->data_vptr;
+            sub_list = delete_in_vptr_list(sub_list);
+
+            oassert(node != NULL);
+            oassert(node->type == MINUS);
+
+            a = node->input_port_sizes[0];
+            if (node->num_input_port_sizes == 2)
+                b = node->input_port_sizes[1];
+            else
+                b = node->input_port_sizes[0];
+            num = (a >= b) ? a : b;
+
+            if (num >= min_threshold_adder) {
+                // how many subtractors base on a can split
+                if ((a + 1) % sizea == 0)
+                    counta = (a + offset) / sizea;
+                else
+                    counta = (a + 1) / sizea + 1;
+                // how many subtractors base on b can split
+                if ((b + 1) % sizeb == 0)
+                    countb = (b + offset) / sizeb;
+                else
+                    countb = (b + 1) / sizeb + 1;
+                // how many subtractors need to be split
+                if (counta >= countb)
+                    count = counta;
+                else
+                    count = countb;
+                subchaintotal++;
+
+                split_adder_for_sub(node, a, b, sizea, sizeb, 1, 1, count, netlist);
+            }
+            // Store the node into processed_adder_list if the threshold is bigger than num
+            else
+                processed_adder_list = insert_in_vptr_list(processed_adder_list, node);
+        }
+    }
+
+    return;
+}
+
+/**
+ *---------------------------------------------------------------------------------------------
+ * (function: instantiate_sub_w_borrow_block )
+ *
+ * @brief soft logic implemention of single bit subtraction
+ * with borrow_in and borrow_out
+ *
+ * @param node pointing to a logical not node
+ * @param mark unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ *-------------------------------------------------------------------------------------------*/
+void instantiate_sub_w_borrow_block(nnode_t *node, short traverse_mark_number, netlist_t *netlist)
+{
+    /* validate input port sizes */
+    oassert(node->input_port_sizes[0] == 1);
+    oassert(node->input_port_sizes[1] == 1);
+    /* validate output port sizes */
+    oassert(node->num_output_port_sizes == 2);
+    oassert(node->output_port_sizes[0] == 1);
+    oassert(node->output_port_sizes[1] == 1);
+
+    /*                                                                                                         *
+     * <SUB INTERNAL DESIGN>                                                                                   *
+     *                                                                                                         *
+     *       IN1 ----- \\‾‾``                                                                                  *
+     *           |      ||   ``                                                                                *
+     *           |      ||   '' --------------------------------------------------  \\‾‾``                     *
+     *           |      ||   ,,                |                                     ||   ``                   *
+     * IN2 ------0----  //__,,                 |                                     ||   '' ----- DIFF        *
+     *     |     |   (first_xor)               |                                     ||   ,,                   *
+     *     |     |                             |                       -----------  //__,,                     *
+     *     |     |                             |                       |          (second_xor)                 *
+     *     |     |                             |                       |                                       *
+     *     |     |                             |                       |                                       *
+     *     |     |                             |                       |                                       *
+     *     |     |      BIN -------------------0-----------------------|                                       *
+     *     |     |                             |   |                                                           *
+     *     |     |                             |   |                                                           *
+     *     |     |                             |   |     ___                                                   *
+     *     |     |                             |   |____|   ⎞                                                  *
+     *     |     |                             |        |    ⎞                                                 *
+     *     |     |                             |        |     )----------------                                *
+     *     |     |                             |__|\____|    ⎠                |            ____                *
+     *     |     |         ___                    |/    |___⎠                 |-----------⎞    \               *
+     *     |     |__|\____|   ⎞                     (first_xor_not)                        ⎞    ⎞              *
+     *     |        |/    |    ⎞                                                            |    )--- BOUT     *
+     *     |     (IN_not) |     )--------------------------------------------------------- ⎠    ⎠              *
+     *     |______________|    ⎠                                                          ⎠____/               *
+     *                    |___⎠                                                         (first_or)             *
+     *                  (first_and)                                                                            *
+     *                                                                                                         *
+     */
+
+    /**
+     * SUB ports:
+     *
+     * IN1:  1 bit input_port[0]
+     * IN2:  1 bit input_port[1]
+     * BIN:  1 bit input_port[2]
+     *
+     * DIFF: 1 bit output_port[0]
+     * BOUT: 1 bit output_port[1]
+     */
+
+    npin_t *IN1 = node->input_pins[0];
+    npin_t *IN2 = node->input_pins[1];
+    npin_t *BIN = (node->num_input_port_sizes == 3) ? node->input_pins[2] : NULL;
+
+    npin_t *BOUT = (node->num_output_port_sizes == 2) ? node->output_pins[1] : NULL;
+    npin_t *DIFF = node->output_pins[0];
+
+    /*******************************************************************************
+     ********************************** DIFFERENCE *********************************
+     *******************************************************************************/
+    /* creating the first xor */
+    nnode_t *xor1 = make_2port_gate(LOGICAL_XOR, 1, 1, 1, node, traverse_mark_number);
+    /* remapping IN1 as the first input to XOR */
+    remap_pin_to_new_node(IN1, xor1, 0);
+    /* remapping IN2 as the second input to XOR */
+    remap_pin_to_new_node(IN2, xor1, 1);
+    /* create the first xor output pin */
+    signal_list_t *xor1_outs = make_output_pins_for_existing_node(xor1, 1);
+    npin_t *xor1_out = xor1_outs->pins[0]->net->fanout_pins[0];
+
+    /* creating the second xor */
+    nnode_t *xor2 = make_2port_gate(LOGICAL_XOR, 1, 1, 1, node, traverse_mark_number);
+    /* remapping xor1 output as the first input to XOR */
+    add_input_pin_to_node(xor2, xor1_out, 0);
+    /* remapping BIN as the second input to XOR */
+    if (BIN == NULL) {
+        add_input_pin_to_node(xor2, get_zero_pin(netlist), 1);
+    } else {
+        remap_pin_to_new_node(BIN, xor2, 1);
+    }
+    /* need to remap the DIFF as second xor output pin */
+    remap_pin_to_new_node(DIFF, xor2, 0);
+
+    /*******************************************************************************
+     ************************************* BORROW **********************************
+     *******************************************************************************/
+    /* creating not IN1 */
+    nnode_t *IN1_not = make_inverter(copy_input_npin(IN1), node, traverse_mark_number);
+    npin_t *IN1_not_out = IN1_not->output_pins[0]->net->fanout_pins[0];
+
+    /* creating the first and */
+    nnode_t *and1 = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, traverse_mark_number);
+    /* remapping IN1 as the first input to XOR */
+    add_input_pin_to_node(and1, IN1_not_out, 0);
+    /* remapping IN2 as the second input to XOR */
+    add_input_pin_to_node(and1, IN2, 1);
+    /* create the first and output pin */
+    signal_list_t *and1_outs = make_output_pins_for_existing_node(and1, 1);
+    npin_t *and1_out = and1_outs->pins[0]->net->fanout_pins[0];
+
+    /* creating not first_xor */
+    nnode_t *xor1_not = make_inverter(copy_input_npin(xor1_out), xor1, traverse_mark_number);
+    npin_t *xor1_not_out = xor1_not->output_pins[0]->net->fanout_pins[0];
+
+    /* creating the second_and */
+    nnode_t *and2 = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, traverse_mark_number);
+    /* remapping IN1 as the first input to XOR */
+    add_input_pin_to_node(and2, copy_input_npin(BIN), 0);
+    /* remapping IN2 as the second input to XOR */
+    add_input_pin_to_node(and2, xor1_not_out, 1);
+    /* create the second_and output pin */
+    signal_list_t *and2_outs = make_output_pins_for_existing_node(xor1_not, 1);
+    npin_t *and2_out = and2_outs->pins[0]->net->fanout_pins[0];
+
+    /* creating the first_or */
+    nnode_t *or1 = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, traverse_mark_number);
+    /* remapping IN1 as the first input to XOR */
+    add_input_pin_to_node(or1, and2_out, 0);
+    /* remapping IN2 as the second input to XOR */
+    add_input_pin_to_node(or1, and1_out, 1);
+    if (BOUT == NULL) {
+        /* create the first_or output pin */
+        npin_t *or1_out1 = allocate_npin();
+        npin_t *or1_out2 = allocate_npin();
+        nnet_t *or1_net = allocate_nnet();
+        or1_net->name = make_full_ref_name(NULL, NULL, NULL, or1->name, 0);
+        /* hook the output pin into the node */
+        add_output_pin_to_node(or1, or1_out1, 0);
+        /* hook up new pin 1 into the new net */
+        add_driver_pin_to_net(or1_net, or1_out1);
+        /* hook up the new pin 2 to this new net */
+        add_fanout_pin_to_net(or1_net, or1_out2);
+    } else {
+        remap_pin_to_new_node(BOUT, or1, 0);
+    }
+
+    // CLEAN UP
+    free_signal_list(xor1_outs);
+    free_signal_list(and1_outs);
+    free_signal_list(and2_outs);
+    free_nnode(node);
+}
+
+/*-------------------------------------------------------------------------
+ * (function: clean_adders)
+ *
+ * Clean up the memory by deleting the list structure of adders
+ *	during optimization
+ *-----------------------------------------------------------------------*/
+void clean_adders_for_sub()
+{
+    while (sub_list != NULL)
+        sub_list = delete_in_vptr_list(sub_list);
+    while (processed_adder_list != NULL)
+        processed_adder_list = delete_in_vptr_list(processed_adder_list);
+    return;
+}
+
+/**
+ * -------------------------------------------------------------------------
+ * (function: cleanup_sub_old_node)
+ *
+ * @brief <clean up nodeo, a high level MINUS node>
+ * In split_adder_for_sub function, nodeo is splitted to small adders/subtractors,
+ * while because of the complexity of input pin connections they have not been
+ * remapped to new nodes, they just copied and added to new nodes. This function
+ * will detach input pins from the nodeo. Moreover, it will connect the net of
+ * unconnected output signals to the GND node, detach the pin from nodeo and
+ * free the output pins to avoid memory leak.
+ *
+ * @param nodeo representing the old subtraction node
+ * @param netlist representing the current netlist
+ *-----------------------------------------------------------------------*/
+static void cleanup_sub_old_node(nnode_t *nodeo, netlist_t *netlist)
+{
+    int i;
+    /* Disconnecting input pins from the old node side */
+    for (i = 0; i < nodeo->num_input_pins; i++) {
+        npin_t *input_pin = nodeo->input_pins[i];
+        if (input_pin->node == nodeo)
+            delete_npin(input_pin);
+
+        nodeo->input_pins[i] = NULL;
+    }
+
+    /* connecting the extra output pins to the gnd node */
+    for (i = 0; i < nodeo->num_output_pins; i++) {
+        npin_t *output_pin = nodeo->output_pins[i];
+
+        if (output_pin && output_pin->node) {
+            /* for now we just pass the signals directly through */
+            npin_t *zero_pin = get_zero_pin(netlist);
+            int idx_2_buffer = zero_pin->pin_net_idx;
+
+            // Dont eliminate the buffer if there are multiple drivers or the AST included it
+            if (output_pin->net->num_driver_pins <= 1) {
+                /* join all fanouts of the output net with the input pins net */
+                join_nets(zero_pin->net, output_pin->net);
+
+                /* erase the pointer to this buffer */
+                zero_pin->net->fanout_pins[idx_2_buffer] = NULL;
+            }
+
+            free_npin(zero_pin);
+            free_npin(output_pin);
+
+            /* Disconnecting output pins from the old node side */
+            nodeo->output_pins[i] = NULL;
+        }
+    }
+
+    // CLEAN UP
+    free_nnode(nodeo);
+}
diff --git a/parmys-plugin/techlibs/adff2dff.v b/parmys-plugin/techlibs/adff2dff.v
new file mode 100644
index 000000000..bf34a02be
--- /dev/null
+++ b/parmys-plugin/techlibs/adff2dff.v
@@ -0,0 +1,45 @@
+// yosys -- Yosys Open SYnthesis Suite
+
+// Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+(* techmap_celltype = "$adff" *)
+module adff2dff (CLK, ARST, D, Q);
+	parameter WIDTH = 1;
+	parameter CLK_POLARITY = 1;
+	parameter ARST_POLARITY = 1;
+	parameter ARST_VALUE = 0;
+
+	input CLK, ARST;
+	(* force_downto *)
+	input [WIDTH-1:0] D;
+	(* force_downto *)
+	output reg [WIDTH-1:0] Q;
+	(* force_downto *)
+	reg [WIDTH-1:0] NEXT_Q;
+
+	wire [1023:0] _TECHMAP_DO_ = "proc;;";
+
+	always @*
+		if (ARST == ARST_POLARITY)
+			NEXT_Q <= ARST_VALUE;
+		else
+			NEXT_Q <= D;
+
+	if (CLK_POLARITY)
+		always @(posedge CLK)
+			Q <= NEXT_Q;
+	else
+		always @(negedge CLK)
+			Q <= NEXT_Q;
+endmodule
diff --git a/parmys-plugin/techlibs/adffe2dff.v b/parmys-plugin/techlibs/adffe2dff.v
new file mode 100644
index 000000000..266b546ab
--- /dev/null
+++ b/parmys-plugin/techlibs/adffe2dff.v
@@ -0,0 +1,51 @@
+// yosys -- Yosys Open SYnthesis Suite
+
+// Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+// Modified version of adff2dff for addfe
+(* techmap_celltype = "$adffe" *)
+module adffe2dff (CLK, ARST, EN, D, Q);
+	parameter WIDTH = 1;
+	parameter CLK_POLARITY = 1;
+	parameter ARST_POLARITY = 1;
+	parameter EN_POLARITY = 1;
+	parameter ARST_VALUE = 0;
+
+	input CLK, ARST, EN;
+	(* force_downto *)
+	input [WIDTH-1:0] D;
+	(* force_downto *)
+	output reg [WIDTH-1:0] Q;
+	(* force_downto *)
+	reg [WIDTH-1:0] NEXT_Q;
+
+	wire [1023:0] _TECHMAP_DO_ = "proc;;";
+
+	always @*
+		if (ARST == ARST_POLARITY)
+			NEXT_Q <= ARST_VALUE;
+		else
+			NEXT_Q <= D;
+
+	if (CLK_POLARITY) begin
+		always @(posedge CLK)
+			if (EN == EN_POLARITY)
+				Q <= NEXT_Q;
+	end else begin
+		always @(negedge CLK)
+			if (EN == EN_POLARITY)
+				Q <= NEXT_Q;
+	end
+endmodule
diff --git a/parmys-plugin/techlibs/aldff2dff.v b/parmys-plugin/techlibs/aldff2dff.v
new file mode 100644
index 000000000..683c3ef86
--- /dev/null
+++ b/parmys-plugin/techlibs/aldff2dff.v
@@ -0,0 +1,47 @@
+// yosys -- Yosys Open SYnthesis Suite
+
+// Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+// Copyright (C) 2022  Daniel Khadivi
+
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+(* techmap_celltype = "$aldff" *)
+module aldff2dff (CLK, ALOAD, AD, D, Q);
+	parameter WIDTH = 1;
+	parameter CLK_POLARITY = 1;
+	parameter ALOAD_POLARITY = 1;
+
+	input CLK, ALOAD;
+    (* force_downto *)
+	input [WIDTH-1:0] AD;
+	(* force_downto *)
+	input [WIDTH-1:0] D;
+	(* force_downto *)
+	output reg [WIDTH-1:0] Q;
+	(* force_downto *)
+	reg [WIDTH-1:0] NEXT_Q;
+
+	wire [1023:0] _TECHMAP_DO_ = "proc;;";
+
+	always @*
+		if (ALOAD == ALOAD_POLARITY)
+			NEXT_Q <= AD;
+		else
+			NEXT_Q <= D;
+
+	if (CLK_POLARITY)
+		always @(posedge CLK)
+			Q <= NEXT_Q;
+	else
+		always @(negedge CLK)
+			Q <= NEXT_Q;
+endmodule
diff --git a/parmys-plugin/techlibs/aldffe2dff.v b/parmys-plugin/techlibs/aldffe2dff.v
new file mode 100644
index 000000000..e8842c630
--- /dev/null
+++ b/parmys-plugin/techlibs/aldffe2dff.v
@@ -0,0 +1,51 @@
+// yosys -- Yosys Open SYnthesis Suite
+
+// Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+// Copyright (C) 2022  Daniel Khadivi
+
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+(* techmap_celltype = "$aldffe" *)
+module aldffe2dff (CLK, ALOAD, AD, D, EN, Q);
+	parameter WIDTH = 1;
+	parameter CLK_POLARITY = 1;
+	parameter ALOAD_POLARITY = 1;
+	parameter EN_POLARITY = 1;
+
+	input CLK, ALOAD, EN;
+    (* force_downto *)
+	input [WIDTH-1:0] AD;
+	(* force_downto *)
+	input [WIDTH-1:0] D;
+	(* force_downto *)
+	output reg [WIDTH-1:0] Q;
+	(* force_downto *)
+	reg [WIDTH-1:0] NEXT_Q;
+
+	wire [1023:0] _TECHMAP_DO_ = "proc;;";
+
+	always @*
+		if (ALOAD == ALOAD_POLARITY)
+			NEXT_Q <= AD;
+		else
+			NEXT_Q <= D;
+
+	if (CLK_POLARITY) begin
+		always @(posedge CLK)
+			if (EN == EN_POLARITY)
+				Q <= NEXT_Q;
+	end else begin
+		always @(negedge CLK)
+			if (EN == EN_POLARITY)
+				Q <= NEXT_Q;
+	end
+endmodule
diff --git a/parmys-plugin/techlibs/vtr_primitives.v b/parmys-plugin/techlibs/vtr_primitives.v
new file mode 100644
index 000000000..678af1ccd
--- /dev/null
+++ b/parmys-plugin/techlibs/vtr_primitives.v
@@ -0,0 +1,329 @@
+`timescale 1ps/1ps
+//Overivew
+//========
+//This file contains the verilog primitives produced by VPR's
+//post-synthesis netlist writer.
+//
+//If you wish to do back-annotated timing simulation you will need
+//to link with this file during simulation.
+//
+//To ensure currect result when performing back-annoatation with 
+//Modelsim see the notes at the end of this comment.
+//
+//Specifying Timing Edges
+//=======================
+//To perform timing back-annotation the simulator must know the delay 
+//dependancies (timing edges) between the ports on each primitive.
+//
+//During back-annotation the simulator will attempt to annotate SDF delay
+//values onto the timing edges.  It should give a warning if was unable
+//to find a matching edge.
+//
+//
+//In Verilog timing edges are specified using a specify block (delimited by the
+//'specify' and 'endspecify' keywords.
+//
+//Inside the specify block a set of specify statements are used to describe
+//the timing edges.  For example consider:
+//
+//  input [1:0] in;
+//  output [1:0] out;
+//  specify
+//      (in[0] => out[0]) = "";
+//      (in[1] => out[1]) = "";
+//  endspecify
+//
+//This states that there are the following timing edges (dependancies):
+//  * from in[0] to out[0]
+//  * from in[1] to out[1]
+//
+//We could (according to the Verilog standard) equivalently have used:
+//
+//  input [1:0] in;
+//  output [1:0] out;
+//  specify
+//      (in => out) = "";
+//  endspecify
+//
+//However NOT ALL SIMULATORS TREAT MULTIBIT SPECIFY STATEMENTS CORRECTLY,
+//at least by default (in particular ModelSim, see notes below).
+//
+//The previous examples use the 'parrallel connection' operator '=>', which
+//creates parallel edges between the two operands (i.e. bit 0 to bit 0, bit
+//1 to bit 1 etc.).  Note that both operands must have the same bit-width. 
+//
+//Verilog also supports the 'full connection' operator '*>' which will create
+//a fully connected set of edges (e.g. from all-to-all). It does not require
+//both operands to have the same bit-width. For example:
+//
+//  input [1:0] in;
+//  output [2:0] out;
+//  specify
+//      (in *> out) = "";
+//  endspecify
+//
+//states that there are the following timing edges (dependancies):
+//  * from in[0] to out[0]
+//  * from in[0] to out[1]
+//  * from in[0] to out[2]
+//  * from in[1] to out[0]
+//  * from in[1] to out[1]
+//  * from in[1] to out[2]
+//
+//For more details on specify blocks see Section 14 "Specify Blocks" of the
+//Verilog standard (IEEE 1364-2005).
+//
+//Back-annotation with Modelsim
+//=============================
+//
+//Ensuring Multi-bit Specifies are Handled Correctly: Bit-blasting
+//----------------------------------------------------------------
+//
+//ModelSim (tested on Modelsim SE 10.4c) ignores multi-bit specify statements
+//by default.
+//
+//This causes SDF annotation errors such as:
+//
+//  vsim-SDF-3261: Failed to find matching specify module path
+//
+//To force Modelsim to correctly interpret multi-bit specify statements you
+//should provide the '+bitblast' option to the vsim executable.
+//This forces it to apply specify statements using multi-bit operands to
+//each bit of the operand (i.e. according to the Verilog standard).
+//
+//Confirming back-annotation is occuring correctly
+//------------------------------------------------
+//
+//Another useful option is '+sdf_verbose' which produces extra output about
+//SDF annotation, which can be used to verify annotation occured correctly.
+//
+//For example:
+//
+//      Summary of Verilog design objects annotated: 
+//      
+//           Module path delays =          5
+//      
+//       ******************************************************************************
+//      
+//       Summary of constructs read: 
+//      
+//                 IOPATH =          5
+//
+//shows that all 5 IOPATH constructs in the SDF were annotated to the verilog
+//design.
+//
+//Example vsim Command Line
+//--------------------------
+//The following is an example command-line to vsim (where 'tb' is the name of your
+//testbench):
+//
+//  vsim -t 1ps -L rtl_work -L work -voptargs="+acc" +sdf_verbose +bitblast tb
+
+
+
+
+//K-input Look-Up Table
+module LUT_K #(
+    //The Look-up Table size (number of inputs)
+    parameter K = 1, 
+
+    //The lut mask.  
+    //Left-most (MSB) bit corresponds to all inputs logic one. 
+    //Defaults to always false.
+    parameter LUT_MASK={2**K{1'b0}} 
+) (
+    input [K-1:0] in,
+    output out
+);
+
+    specify
+        (in *> out) = "";
+    endspecify
+
+    assign out = LUT_MASK[in];
+
+endmodule
+
+//D-FlipFlop module
+module DFF #(
+    parameter INITIAL_VALUE=1'b0    
+) (
+    input clk,
+    input D,
+    output reg Q
+);
+
+    specify
+        (clk => Q) = "";
+        $setup(D, posedge clk, "");
+        $hold(posedge clk, D, "");
+    endspecify
+
+    initial begin
+        Q <= INITIAL_VALUE;
+    end
+
+    always@(posedge clk) begin
+        Q <= D;
+    end
+endmodule
+
+//Routing fpga_interconnect module
+module fpga_interconnect(
+    input datain,
+    output dataout
+);
+
+    specify
+        (datain=>dataout)="";
+    endspecify
+
+    assign dataout = datain;
+
+endmodule
+
+
+//2-to-1 mux module
+module mux(
+    input select,
+    input x,
+    input y,
+    output z
+);
+
+    assign z = (x & ~select) | (y & select);
+
+endmodule
+
+//n-bit adder
+module adder #(
+    parameter WIDTH = 1   
+) (
+    input [WIDTH-1:0] a, 
+    input [WIDTH-1:0] b, 
+    input cin, 
+    output cout, 
+    output [WIDTH-1:0] sumout);
+
+   specify
+      (a*>sumout)="";
+      (b*>sumout)="";
+      (cin*>sumout)="";
+      (a*>cout)="";
+      (b*>cout)="";
+      (cin=>cout)="";
+   endspecify
+   
+   assign {cout, sumout} = a + b + cin;
+   
+endmodule
+   
+//nxn multiplier module
+module multiply #(
+    //The width of input signals
+    parameter WIDTH = 1
+) (
+    input [WIDTH-1:0] a,
+    input [WIDTH-1:0] b,
+    output [2*WIDTH-1:0] out
+);
+
+    specify
+        (a *> out) = "";
+        (b *> out) = "";
+    endspecify
+
+    assign out = a * b;
+
+endmodule // mult
+
+//single_port_ram module
+(* keep_hierarchy *)
+module single_port_ram #(
+    parameter ADDR_WIDTH = 1,
+    parameter DATA_WIDTH = 1
+) (
+    input clk,
+    input [ADDR_WIDTH-1:0] addr,
+    input [DATA_WIDTH-1:0] data,
+    input we,
+    output reg [DATA_WIDTH-1:0] out
+);
+
+    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
+
+    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
+
+    specify
+        (clk*>out)="";
+        $setup(addr, posedge clk, "");
+        $setup(data, posedge clk, "");
+        $setup(we, posedge clk, "");
+        $hold(posedge clk, addr, "");
+        $hold(posedge clk, data, "");
+        $hold(posedge clk, we, "");
+    endspecify
+   
+    always@(posedge clk) begin
+        if(we) begin
+            Mem[addr] = data;
+        end
+    	out = Mem[addr]; //New data read-during write behaviour (blocking assignments)
+    end
+   
+endmodule // single_port_RAM
+
+//dual_port_ram module
+(* keep_hierarchy *)
+module dual_port_ram #(
+    parameter ADDR_WIDTH = 1,
+    parameter DATA_WIDTH = 1
+) (
+    input clk,
+
+    input [ADDR_WIDTH-1:0] addr1,
+    input [ADDR_WIDTH-1:0] addr2,
+    input [DATA_WIDTH-1:0] data1,
+    input [DATA_WIDTH-1:0] data2,
+    input we1,
+    input we2,
+    output reg [DATA_WIDTH-1:0] out1,
+    output reg [DATA_WIDTH-1:0] out2
+);
+
+    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
+
+    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
+
+    specify
+        (clk*>out1)="";
+        (clk*>out2)="";
+        $setup(addr1, posedge clk, "");
+        $setup(addr2, posedge clk, "");
+        $setup(data1, posedge clk, "");
+        $setup(data2, posedge clk, "");
+        $setup(we1, posedge clk, "");
+        $setup(we2, posedge clk, "");
+        $hold(posedge clk, addr1, "");
+        $hold(posedge clk, addr2, "");
+        $hold(posedge clk, data1, "");
+        $hold(posedge clk, data2, "");
+        $hold(posedge clk, we1, "");
+        $hold(posedge clk, we2, "");
+    endspecify
+   
+    always@(posedge clk) begin //Port 1
+        if(we1) begin
+            Mem[addr1] = data1;
+        end
+        out1 = Mem[addr1]; //New data read-during write behaviour (blocking assignments)
+    end
+
+    always@(posedge clk) begin //Port 2
+        if(we2) begin
+            Mem[addr2] = data2;
+        end
+        out2 = Mem[addr2]; //New data read-during write behaviour (blocking assignments)
+    end
+   
+endmodule // dual_port_ram
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
new file mode 100644
index 000000000..7d1e20505
--- /dev/null
+++ b/parmys-plugin/tests/Makefile
@@ -0,0 +1,28 @@
+# Copyright 2020-2022 F4PGA Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+TESTS = raygentop \
+        eltwise_layer \
+        
+include $(shell pwd)/../../Makefile_test.common
+
+raygentop_verify = true
+eltwise_layer_verify = true
+
+clean_modules:
+	@find . -name "net.dot" -or -name "*.yosys.blif" | xargs rm -rf
+
+clean: clean_modules
diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl b/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
new file mode 100644
index 000000000..b9b22ef10
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
@@ -0,0 +1,87 @@
+yosys -import
+
+plugin -i parmys
+
+yosys -import
+
+read_verilog -nomem2reg +/parmys/vtr_primitives.v
+
+setattr -mod -set keep_hierarchy 1 single_port_ram
+
+setattr -mod -set keep_hierarchy 1 dual_port_ram
+
+puts "Using parmys as partial mapper"
+
+parmys_arch -a k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+
+read_verilog -sv -nolatches hard_block_include.v eltwise_layer.v
+
+
+# Check that there are no combinational loops
+
+scc -select
+
+select -assert-none %
+
+select -clear
+
+hierarchy -check -auto-top -purge_lib
+
+opt_expr
+
+opt_clean
+
+check
+
+opt -nodffe -nosdff
+
+procs -norom
+
+fsm
+
+opt
+
+wreduce
+
+peepopt
+
+opt_clean
+
+share
+
+opt -full
+
+memory -nomap
+
+flatten
+
+opt -full
+
+techmap -map +/parmys/adff2dff.v
+
+techmap -map +/parmys/adffe2dff.v
+
+techmap -map +/parmys/aldff2dff.v
+
+techmap -map +/parmys/aldffe2dff.v
+
+opt -full
+
+parmys -a k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml -nopass -c odin_config.xml
+
+opt -full
+
+techmap 
+
+opt -fast
+
+dffunmap
+
+opt -fast -noff
+
+tee -o /dev/stdout stat
+
+hierarchy -check -auto-top -purge_lib
+
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox eltwise_layer.yosys.blif
+
diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
new file mode 100644
index 000000000..11199fb90
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
@@ -0,0 +1,3057 @@
+//////////////////////////////////////////////////////////////////////////////
+// Author: Aman Arora
+//////////////////////////////////////////////////////////////////////////////
+
+`timescale 1ns/1ns
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Eltwise layer
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Precision BF16
+//Each PE has 1 multiplier, an adder and a subtractor.
+//There are 4 PEs in each compute unit. 
+//There are 6 such compute units in the whole layer.
+//So, total compute throughput is 24 ops per cycle.
+//The "per cycle" is because the adder/sub/mul are
+//pipelined. Although they may be take more than 1 cycle,
+//but in the steady state, one result will come out every cycle.
+//
+//There are 6 BRAMs for each input operand. Each location in a BRAM
+//stores 4 inputs. So, the read bandwidth is 24 elements
+//per cycle. This matches the compute throughput. So, we
+//utilize each PE every cycle. There are 6 BRAMs for output.
+//We can write 4 elements per cycle.
+//
+//There are two modes of operation: 
+// 1. Vector/Matrix mode
+//    In this mode, both operands are matrices/vectors.
+//    They are read from BRAMs (A and B). The operation 
+//    selected (using the op input) is performed. This mode
+//    can be used for operations such as residual add, or 
+//    dropout.
+// 2. Scalar mode
+//    In this mode, one operand is a matrix/vector and the
+//    other operand is a scalar. It could be the mean or 
+//    variance of a normalization layer for example. The 
+//    scalar input is provided from the top-level of the design
+//    so it can be easily modified at runtime.
+//
+//Important inputs:
+//   mode: 
+//      0 -> Both operands (A and B) are matrices/vectors. Result is a matrix/vector.
+//      1 -> Operand A is matrix/vector. Operand B is scalar. Result is a matrix/vector.
+//   op:
+//      00 -> Addition
+//      01 -> Subtraction
+//      10 -> Multiplication
+//
+//The whole design can operate on 24xN matrices.  
+//Typically, to use this design, we'd break a large input
+//matrix into 24 column sections and process the matrix 
+//section by section. The number of rows will be programmed
+//in the "iterations" register in the design.
+
+
+`define BFLOAT16 
+
+// IEEE Half Precision => EXPONENT = 5, MANTISSA = 10
+// BFLOAT16 => EXPONENT = 8, MANTISSA = 7 
+
+`ifdef BFLOAT16
+`define EXPONENT 8
+`define MANTISSA 7
+`else // for ieee half precision fp16
+`define EXPONENT 5
+`define MANTISSA 10
+`endif
+
+`define SIGN 1
+`define DWIDTH (`SIGN+`EXPONENT+`MANTISSA)
+
+`define AWIDTH 10
+`define MEM_SIZE 1024
+`define DESIGN_SIZE 12
+`define CU_SIZE 4
+`define MASK_WIDTH 4
+`define MEM_ACCESS_LATENCY 1
+
+`define REG_DATAWIDTH 32
+`define REG_ADDRWIDTH 8
+`define ITERATIONS_WIDTH 32
+
+`define REG_STDN_ADDR 32'h4
+`define REG_MATRIX_A_ADDR 32'he
+`define REG_MATRIX_B_ADDR 32'h12
+`define REG_MATRIX_C_ADDR 32'h16
+`define REG_VALID_MASK_A_ADDR 32'h20
+`define REG_VALID_MASK_B_ADDR 32'h5c
+
+`define REG_ITERATIONS_ADDR 32'h40
+
+//This is the pipeline depth of the PEs (adder/mult)
+`define PE_PIPELINE_DEPTH 5
+
+module eltwise_layer(
+  input clk,
+  input clk_mem,
+  input resetn,
+  input pe_resetn,
+  input        [`REG_ADDRWIDTH-1:0] PADDR,
+  input                             PWRITE,
+  input                             PSEL,
+  input                             PENABLE,
+  input        [`REG_DATAWIDTH-1:0] PWDATA,
+  output reg   [`REG_DATAWIDTH-1:0] PRDATA,
+  output reg                        PREADY,
+  input [`DWIDTH-1:0] scalar_inp,
+  input mode, // mode==0 -> vector/matrix, mode==1 -> scalar
+  input  [1:0] op, //op==11 -> Mul, op==01 -> Sub, op==00 -> Add
+  input  [7:0] bram_select,
+  input  [`AWIDTH-1:0] bram_addr_ext,
+  output reg [`CU_SIZE*`DWIDTH-1:0] bram_rdata_ext,
+  input  [`CU_SIZE*`DWIDTH-1:0] bram_wdata_ext,
+  input  [`CU_SIZE-1:0] bram_we_ext
+);
+
+
+  wire PCLK;
+  assign PCLK = clk;
+  wire PRESETn;
+  assign PRESETn = resetn;
+  reg start_reg;
+  reg clear_done_reg;
+
+  //Dummy register to sync all other invalid/unimplemented addresses
+  reg [`REG_DATAWIDTH-1:0] reg_dummy;
+  
+  reg [`AWIDTH-1:0] bram_addr_a_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_4_ext;
+
+  reg [`AWIDTH-1:0] bram_addr_a_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_5_ext;
+
+    
+  reg [`AWIDTH-1:0] bram_addr_b_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_4_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_5_ext;
+
+  reg [`AWIDTH-1:0] bram_addr_c_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_4_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_5_ext;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0;
+	wire [`MASK_WIDTH-1:0] bram_we_a_0;
+	wire bram_en_a_0;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2;
+	wire [`MASK_WIDTH-1:0] bram_we_a_2;
+	wire bram_en_a_2;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4;
+	wire [`MASK_WIDTH-1:0] bram_we_a_4;
+	wire bram_en_a_4;
+
+	wire [`AWIDTH-1:0] bram_addr_a_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1;
+	wire [`MASK_WIDTH-1:0] bram_we_a_1;
+	wire bram_en_a_1;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3;
+	wire [`MASK_WIDTH-1:0] bram_we_a_3;
+	wire bram_en_a_3;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5;
+	wire [`MASK_WIDTH-1:0] bram_we_a_5;
+	wire bram_en_a_5;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0;
+	wire [`MASK_WIDTH-1:0] bram_we_b_0;
+	wire bram_en_b_0;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1;
+	wire [`MASK_WIDTH-1:0] bram_we_b_1;
+	wire bram_en_b_1;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2;
+	wire [`MASK_WIDTH-1:0] bram_we_b_2;
+	wire bram_en_b_2;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3;
+	wire [`MASK_WIDTH-1:0] bram_we_b_3;
+	wire bram_en_b_3;
+
+  wire [`AWIDTH-1:0] bram_addr_b_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4;
+	wire [`MASK_WIDTH-1:0] bram_we_b_4;
+	wire bram_en_b_4;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5;
+	wire [`MASK_WIDTH-1:0] bram_we_b_5;
+	wire bram_en_b_5;
+
+	wire [`AWIDTH-1:0] bram_addr_c_0;
+	wire [`AWIDTH-1:0] bram_addr_c_1;
+	wire [`AWIDTH-1:0] bram_addr_c_2;
+	wire [`AWIDTH-1:0] bram_addr_c_3;
+	wire [`AWIDTH-1:0] bram_addr_c_4;
+	wire [`AWIDTH-1:0] bram_addr_c_5;
+
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5;
+
+	wire [`MASK_WIDTH-1:0] bram_we_c_0;
+	wire [`MASK_WIDTH-1:0] bram_we_c_1;
+	wire [`MASK_WIDTH-1:0] bram_we_c_2;
+	wire [`MASK_WIDTH-1:0] bram_we_c_3;
+	wire [`MASK_WIDTH-1:0] bram_we_c_4;
+	wire [`MASK_WIDTH-1:0] bram_we_c_5;
+    
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5;
+
+  always @ (posedge clk) begin
+    case (bram_select)
+  
+      0: begin
+      bram_addr_a_0_ext <= bram_addr_ext;
+      bram_wdata_a_0_ext <= bram_wdata_ext;
+      bram_we_a_0_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_0_ext;
+      end
+    
+      1: begin
+      bram_addr_a_2_ext <= bram_addr_ext;
+      bram_wdata_a_2_ext <= bram_wdata_ext;
+      bram_we_a_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_2_ext;
+      end
+    
+      2: begin
+      bram_addr_a_4_ext <= bram_addr_ext;
+      bram_wdata_a_4_ext <= bram_wdata_ext;
+      bram_we_a_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_4_ext;
+      end
+
+      3: begin
+      bram_addr_a_1_ext <= bram_addr_ext;
+      bram_wdata_a_1_ext <= bram_wdata_ext;
+      bram_we_a_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_1_ext;
+      end
+    
+      4: begin
+      bram_addr_a_3_ext <= bram_addr_ext;
+      bram_wdata_a_3_ext <= bram_wdata_ext;
+      bram_we_a_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_3_ext;
+      end
+    
+      5: begin
+      bram_addr_a_5_ext <= bram_addr_ext;
+      bram_wdata_a_5_ext <= bram_wdata_ext;
+      bram_we_a_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_5_ext;
+      end
+    
+      6: begin
+      bram_addr_b_0_ext = bram_addr_ext;
+      bram_wdata_b_0_ext = bram_wdata_ext;
+      bram_we_b_0_ext = bram_we_ext;
+      bram_rdata_ext = bram_rdata_b_0_ext;
+      end
+    
+      7: begin
+      bram_addr_b_1_ext <= bram_addr_ext;
+      bram_wdata_b_1_ext <= bram_wdata_ext;
+      bram_we_b_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_1_ext;
+      end
+    
+      8: begin
+      bram_addr_b_2_ext <= bram_addr_ext;
+      bram_wdata_b_2_ext <= bram_wdata_ext;
+      bram_we_b_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_2_ext;
+      end
+    
+      9: begin
+      bram_addr_b_3_ext <= bram_addr_ext;
+      bram_wdata_b_3_ext <= bram_wdata_ext;
+      bram_we_b_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_3_ext;
+      end
+    
+      10: begin
+      bram_addr_b_4_ext <= bram_addr_ext;
+      bram_wdata_b_4_ext <= bram_wdata_ext;
+      bram_we_b_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_4_ext;
+      end
+    
+      11: begin
+      bram_addr_b_5_ext <= bram_addr_ext;
+      bram_wdata_b_5_ext <= bram_wdata_ext;
+      bram_we_b_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_5_ext;
+      end
+
+      12: begin
+      bram_addr_c_0_ext <= bram_addr_ext;
+      bram_wdata_c_0_ext <= bram_wdata_ext;
+      bram_we_c_0_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_0_ext;
+      end
+    
+      13: begin
+      bram_addr_c_1_ext <= bram_addr_ext;
+      bram_wdata_c_1_ext <= bram_wdata_ext;
+      bram_we_c_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_1_ext;
+      end
+    
+      14: begin
+      bram_addr_c_2_ext <= bram_addr_ext;
+      bram_wdata_c_2_ext <= bram_wdata_ext;
+      bram_we_c_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_2_ext;
+      end
+    
+      15: begin
+      bram_addr_c_3_ext <= bram_addr_ext;
+      bram_wdata_c_3_ext <= bram_wdata_ext;
+      bram_we_c_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_3_ext;
+      end
+    
+      16: begin
+      bram_addr_c_4_ext <= bram_addr_ext;
+      bram_wdata_c_4_ext <= bram_wdata_ext;
+      bram_we_c_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_4_ext;
+      end
+    
+      17: begin
+      bram_addr_c_5_ext <= bram_addr_ext;
+      bram_wdata_c_5_ext <= bram_wdata_ext;
+      bram_we_c_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_5_ext;
+      end
+    
+      default: begin
+		bram_addr_c_5_ext <= bram_addr_ext;
+      bram_wdata_c_5_ext <= bram_wdata_ext;
+      bram_we_c_5_ext <= bram_we_ext;
+      bram_rdata_ext <= 0;
+      end
+    endcase 
+  end
+  
+/////////////////////////////////////////////////
+// BRAMs to store matrix A
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix A 0
+ram matrix_A_0(
+  .addr0(bram_addr_a_0),
+  .d0(bram_wdata_a_0), 
+  .we0(bram_we_a_0), 
+  .q0(bram_rdata_a_0), 
+  .addr1(bram_addr_a_0_ext),
+  .d1(bram_wdata_a_0_ext), 
+  .we1(bram_we_a_0_ext), 
+  .q1(bram_rdata_a_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 2
+ram matrix_A_2(
+  .addr0(bram_addr_a_2),
+  .d0(bram_wdata_a_2), 
+  .we0(bram_we_a_2), 
+  .q0(bram_rdata_a_2), 
+  .addr1(bram_addr_a_2_ext),
+  .d1(bram_wdata_a_2_ext), 
+  .we1(bram_we_a_2_ext), 
+  .q1(bram_rdata_a_2_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 4
+ram matrix_A_4(
+  .addr0(bram_addr_a_4),
+  .d0(bram_wdata_a_4), 
+  .we0(bram_we_a_4), 
+  .q0(bram_rdata_a_4), 
+  .addr1(bram_addr_a_4_ext),
+  .d1(bram_wdata_a_4_ext), 
+  .we1(bram_we_a_4_ext), 
+  .q1(bram_rdata_a_4_ext), 
+  .clk(clk_mem));
+
+
+    // BRAM matrix A 1
+ram matrix_A_1(
+  .addr0(bram_addr_a_1),
+  .d0(bram_wdata_a_1), 
+  .we0(bram_we_a_1), 
+  .q0(bram_rdata_a_1), 
+  .addr1(bram_addr_a_1_ext),
+  .d1(bram_wdata_a_1_ext), 
+  .we1(bram_we_a_1_ext), 
+  .q1(bram_rdata_a_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 3
+ram matrix_A_3(
+  .addr0(bram_addr_a_3),
+  .d0(bram_wdata_a_3), 
+  .we0(bram_we_a_3), 
+  .q0(bram_rdata_a_3), 
+  .addr1(bram_addr_a_3_ext),
+  .d1(bram_wdata_a_3_ext), 
+  .we1(bram_we_a_3_ext), 
+  .q1(bram_rdata_a_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 5
+ram matrix_A_5(
+  .addr0(bram_addr_a_5),
+  .d0(bram_wdata_a_5), 
+  .we0(bram_we_a_5), 
+  .q0(bram_rdata_a_5), 
+  .addr1(bram_addr_a_5_ext),
+  .d1(bram_wdata_a_5_ext), 
+  .we1(bram_we_a_5_ext), 
+  .q1(bram_rdata_a_5_ext), 
+  .clk(clk_mem));
+
+////////////////////////////////////////////////
+// BRAMs to store matrix B
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix B 0
+ram matrix_B_0(
+  .addr0(bram_addr_b_0),
+  .d0(bram_wdata_b_0), 
+  .we0(bram_we_b_0), 
+  .q0(bram_rdata_b_0), 
+  .addr1(bram_addr_b_0_ext),
+  .d1(bram_wdata_b_0_ext), 
+  .we1(bram_we_b_0_ext), 
+  .q1(bram_rdata_b_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 1
+ram matrix_B_1(
+  .addr0(bram_addr_b_1),
+  .d0(bram_wdata_b_1), 
+  .we0(bram_we_b_1), 
+  .q0(bram_rdata_b_1), 
+  .addr1(bram_addr_b_1_ext),
+  .d1(bram_wdata_b_1_ext), 
+  .we1(bram_we_b_1_ext), 
+  .q1(bram_rdata_b_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 2
+ram matrix_B_2(
+  .addr0(bram_addr_b_2),
+  .d0(bram_wdata_b_2), 
+  .we0(bram_we_b_2), 
+  .q0(bram_rdata_b_2), 
+  .addr1(bram_addr_b_2_ext),
+  .d1(bram_wdata_b_2_ext), 
+  .we1(bram_we_b_2_ext), 
+  .q1(bram_rdata_b_2_ext), 
+  .clk(clk_mem));
+
+  	
+  // BRAM matrix B 3
+ram matrix_B_3(
+  .addr0(bram_addr_b_3),
+  .d0(bram_wdata_b_3), 
+  .we0(bram_we_b_3), 
+  .q0(bram_rdata_b_3), 
+  .addr1(bram_addr_b_3_ext),
+  .d1(bram_wdata_b_3_ext), 
+  .we1(bram_we_b_3_ext), 
+  .q1(bram_rdata_b_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 4
+ram matrix_B_4(
+  .addr0(bram_addr_b_4),
+  .d0(bram_wdata_b_4), 
+  .we0(bram_we_b_4), 
+  .q0(bram_rdata_b_4), 
+  .addr1(bram_addr_b_4_ext),
+  .d1(bram_wdata_b_4_ext), 
+  .we1(bram_we_b_4_ext), 
+  .q1(bram_rdata_b_4_ext), 
+  .clk(clk_mem));
+
+
+  // BRAM matrix B 5
+ram matrix_B_5(
+  .addr0(bram_addr_b_5),
+  .d0(bram_wdata_b_5), 
+  .we0(bram_we_b_5), 
+  .q0(bram_rdata_b_5), 
+  .addr1(bram_addr_b_5_ext),
+  .d1(bram_wdata_b_5_ext), 
+  .we1(bram_we_b_5_ext), 
+  .q1(bram_rdata_b_5_ext), 
+  .clk(clk_mem));
+
+////////////////////////////////////////////////
+// BRAMs to store matrix C
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix C 0
+ram matrix_C_0(
+  .addr0(bram_addr_c_0),
+  .d0(bram_wdata_c_0), 
+  .we0(bram_we_c_0), 
+  .q0(bram_rdata_c_0), 
+  .addr1(bram_addr_c_0_ext),
+  .d1(bram_wdata_c_0_ext), 
+  .we1(bram_we_c_0_ext), 
+  .q1(bram_rdata_c_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 1
+ram matrix_C_1(
+  .addr0(bram_addr_c_1),
+  .d0(bram_wdata_c_1), 
+  .we0(bram_we_c_1), 
+  .q0(bram_rdata_c_1), 
+  .addr1(bram_addr_c_1_ext),
+  .d1(bram_wdata_c_1_ext), 
+  .we1(bram_we_c_1_ext), 
+  .q1(bram_rdata_c_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 2
+ram matrix_C_2(
+  .addr0(bram_addr_c_2),
+  .d0(bram_wdata_c_2), 
+  .we0(bram_we_c_2), 
+  .q0(bram_rdata_c_2), 
+  .addr1(bram_addr_c_2_ext),
+  .d1(bram_wdata_c_2_ext), 
+  .we1(bram_we_c_2_ext), 
+  .q1(bram_rdata_c_2_ext), 
+  .clk(clk_mem));
+
+  	
+  // BRAM matrix C 3
+ram matrix_C_3(
+  .addr0(bram_addr_c_3),
+  .d0(bram_wdata_c_3), 
+  .we0(bram_we_c_3), 
+  .q0(bram_rdata_c_3), 
+  .addr1(bram_addr_c_3_ext),
+  .d1(bram_wdata_c_3_ext), 
+  .we1(bram_we_c_3_ext), 
+  .q1(bram_rdata_c_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 4
+ram matrix_C_4(
+  .addr0(bram_addr_c_4),
+  .d0(bram_wdata_c_4), 
+  .we0(bram_we_c_4), 
+  .q0(bram_rdata_c_4), 
+  .addr1(bram_addr_c_4_ext),
+  .d1(bram_wdata_c_4_ext), 
+  .we1(bram_we_c_4_ext), 
+  .q1(bram_rdata_c_4_ext), 
+  .clk(clk_mem));
+
+
+  // BRAM matrix C 5
+ram matrix_C_5(
+  .addr0(bram_addr_c_5),
+  .d0(bram_wdata_c_5), 
+  .we0(bram_we_c_5), 
+  .q0(bram_rdata_c_5), 
+  .addr1(bram_addr_c_5_ext),
+  .d1(bram_wdata_c_5_ext), 
+  .we1(bram_we_c_5_ext), 
+  .q1(bram_rdata_c_5_ext), 
+  .clk(clk_mem));
+  	
+reg start_eltwise_op;
+wire done_eltwise_op;
+
+reg [3:0] state;
+	
+////////////////////////////////////////////////////////////////
+// Control logic
+////////////////////////////////////////////////////////////////
+	always @( posedge clk) begin
+      if (resetn == 1'b0) begin
+        state <= 4'b0000;
+        start_eltwise_op <= 1'b0;
+      end 
+      else begin
+        case (state)
+
+        4'b0000: begin
+          start_eltwise_op <= 1'b0;
+          if (start_reg == 1'b1) begin
+            state <= 4'b0001;
+          end else begin
+            state <= 4'b0000;
+          end
+        end
+        
+        4'b0001: begin
+          start_eltwise_op <= 1'b1;	      
+          state <= 4'b1010;                    
+        end      
+        
+        4'b1010: begin                 
+          if (done_eltwise_op == 1'b1) begin
+            start_eltwise_op <= 1'b0;
+            state <= 4'b1000;
+          end
+          else begin
+            state <= 4'b1010;
+          end
+        end
+
+       4'b1000: begin
+         if (clear_done_reg == 1'b1) begin
+           state <= 4'b0000;
+         end
+         else begin
+           state <= 4'b1000;
+         end
+       end
+      endcase  
+	end 
+  end
+
+reg [1:0] state_apb;
+`define IDLE     2'b00
+`define W_ENABLE  2'b01
+`define R_ENABLE  2'b10
+
+reg [`AWIDTH-1:0] address_mat_a;
+reg [`AWIDTH-1:0] address_mat_b;
+reg [`AWIDTH-1:0] address_mat_c;
+reg [`MASK_WIDTH-1:0] validity_mask_a;
+reg [`MASK_WIDTH-1:0] validity_mask_b;
+reg [`ITERATIONS_WIDTH-1:0] iterations;
+
+////////////////////////////////////////////////////////////////
+// Configuration logic
+////////////////////////////////////////////////////////////////
+always @(posedge PCLK) begin
+  if (PRESETn == 0) begin
+    state_apb <= `IDLE;
+    PRDATA <= 0;
+    PREADY <= 0;
+    address_mat_a <= 0;
+    address_mat_b <= 0;
+    address_mat_c <= 0;
+    validity_mask_a <= {`MASK_WIDTH{1'b1}};
+    validity_mask_b <= {`MASK_WIDTH{1'b1}};
+  end
+
+  else begin
+    case (state_apb)
+      `IDLE : begin
+        PRDATA <= 0;
+        if (PSEL) begin
+          if (PWRITE) begin
+            state_apb <= `W_ENABLE;
+          end
+          else begin
+            state_apb <= `R_ENABLE;
+          end
+        end
+        PREADY <= 0;
+      end
+
+      `W_ENABLE : begin
+        if (PSEL && PWRITE && PENABLE) begin
+          case (PADDR)
+          `REG_STDN_ADDR       : begin
+                                 start_reg <= PWDATA[0];
+                                 clear_done_reg <= PWDATA[31];
+                                 end
+          `REG_MATRIX_A_ADDR   : address_mat_a <= PWDATA[`AWIDTH-1:0];
+          `REG_MATRIX_B_ADDR   : address_mat_b <= PWDATA[`AWIDTH-1:0];
+          `REG_MATRIX_C_ADDR   : address_mat_c <= PWDATA[`AWIDTH-1:0];
+          `REG_VALID_MASK_A_ADDR: begin
+                                validity_mask_a <= PWDATA[`MASK_WIDTH-1:0];
+                                end
+          `REG_VALID_MASK_B_ADDR: begin
+                                validity_mask_b <= PWDATA[`MASK_WIDTH-1:0];
+                                end
+          `REG_ITERATIONS_ADDR: iterations <= PWDATA[`ITERATIONS_WIDTH-1:0];
+          default : reg_dummy <= PWDATA; //sink writes to a dummy register
+          endcase
+          PREADY <=1;          
+        end
+        state_apb <= `IDLE;
+      end
+
+      `R_ENABLE : begin
+        if (PSEL && !PWRITE && PENABLE) begin
+          PREADY <= 1;
+          case (PADDR)
+          `REG_STDN_ADDR        : PRDATA <= {done_eltwise_op, 30'b0, start_eltwise_op};
+          `REG_MATRIX_A_ADDR    : PRDATA <= address_mat_a;
+          `REG_MATRIX_B_ADDR    : PRDATA <= address_mat_b;
+          `REG_MATRIX_C_ADDR    : PRDATA <= address_mat_c;
+          `REG_VALID_MASK_A_ADDR: PRDATA <= validity_mask_a;
+          `REG_VALID_MASK_B_ADDR: PRDATA <= validity_mask_b;
+          `REG_ITERATIONS_ADDR: PRDATA <= iterations;
+          default : PRDATA <= reg_dummy; //read the dummy register for undefined addresses
+          endcase
+        end
+        state_apb <= `IDLE;
+      end
+      default: begin
+        state_apb <= `IDLE;
+      end
+    endcase
+  end
+end  
+  
+wire reset;
+assign reset = ~resetn;
+wire pe_reset;
+assign pe_reset = ~pe_resetn;
+
+  wire c_data_0_available;
+  wire c_data_1_available;
+  wire c_data_2_available;
+  wire c_data_3_available;
+  wire c_data_4_available;
+  wire c_data_5_available;
+
+  assign bram_wdata_a_0 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_0 = 1'b1;
+  assign bram_we_a_0 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_1 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_1 = 1'b1;
+  assign bram_we_a_1 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_2 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_2 = 1'b1;
+  assign bram_we_a_2 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_3 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_3 = 1'b1;
+  assign bram_we_a_3 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_4 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_4 = 1'b1;
+  assign bram_we_a_4 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_5 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_5 = 1'b1;
+  assign bram_we_a_5 = {`MASK_WIDTH{1'b0}};
+  	
+  assign bram_wdata_b_0 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_0 = 1'b1;
+  assign bram_we_b_0 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_1 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_1 = 1'b1;
+  assign bram_we_b_1 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_2 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_2 = 1'b1;
+  assign bram_we_b_2 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_3 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_3 = 1'b1;
+  assign bram_we_b_3 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_4 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_4 = 1'b1;
+  assign bram_we_b_4 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_5 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_5 = 1'b1;
+  assign bram_we_b_5 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_we_c_0 = (c_data_0_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_2 = (c_data_2_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_4 = (c_data_4_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_1 = (c_data_1_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_3 = (c_data_3_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_5 = (c_data_5_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+
+  /////////////////////////////////////////////////
+  // ORing all done signals
+  /////////////////////////////////////////////////
+  wire done_eltwise_op_0;
+  wire done_eltwise_op_1;
+  wire done_eltwise_op_2;
+  wire done_eltwise_op_3;
+  wire done_eltwise_op_4;
+  wire done_eltwise_op_5;
+
+  assign done_eltwise_op = 
+  done_eltwise_op_0 | 
+  done_eltwise_op_1 | 
+  done_eltwise_op_2 | 
+  done_eltwise_op_3 | 
+  done_eltwise_op_4 | 
+  done_eltwise_op_5 ;
+
+  /////////////////////////////////////////////////
+  // Code to allow for scalar mode
+  /////////////////////////////////////////////////
+  
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_5;
+
+  assign b_data_0 = mode ? bram_rdata_b_0 : {`CU_SIZE{scalar_inp}};
+  assign b_data_1 = mode ? bram_rdata_b_1 : {`CU_SIZE{scalar_inp}};
+  assign b_data_2 = mode ? bram_rdata_b_2 : {`CU_SIZE{scalar_inp}};
+  assign b_data_3 = mode ? bram_rdata_b_3 : {`CU_SIZE{scalar_inp}};
+  assign b_data_4 = mode ? bram_rdata_b_4 : {`CU_SIZE{scalar_inp}};
+  assign b_data_5 = mode ? bram_rdata_b_5 : {`CU_SIZE{scalar_inp}};
+
+  /////////////////////////////////////////////////
+  // Compute Unit 0
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_0(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_0),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_0),
+  .b_data(b_data_0),
+  .c_data_out(bram_wdata_c_0),
+  .a_addr(bram_addr_a_0),
+  .b_addr(bram_addr_b_0),
+  .c_addr(bram_addr_c_0),
+  .c_data_available(c_data_0_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 1
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_1(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_1),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_1),
+  .b_data(b_data_1),
+  .c_data_out(bram_wdata_c_1),
+  .a_addr(bram_addr_a_1),
+  .b_addr(bram_addr_b_1),
+  .c_addr(bram_addr_c_1),
+  .c_data_available(c_data_1_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 2
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_2(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_2),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_2),
+  .b_data(b_data_2),
+  .c_data_out(bram_wdata_c_2),
+  .a_addr(bram_addr_a_2),
+  .b_addr(bram_addr_b_2),
+  .c_addr(bram_addr_c_2),
+  .c_data_available(c_data_2_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 3
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_3(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_3),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_3),
+  .b_data(b_data_3),
+  .c_data_out(bram_wdata_c_3),
+  .a_addr(bram_addr_a_3),
+  .b_addr(bram_addr_b_3),
+  .c_addr(bram_addr_c_3),
+  .c_data_available(c_data_3_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 4
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_4(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_4),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_4),
+  .b_data(b_data_4),
+  .c_data_out(bram_wdata_c_4),
+  .a_addr(bram_addr_a_4),
+  .b_addr(bram_addr_b_4),
+  .c_addr(bram_addr_c_4),
+  .c_data_available(c_data_4_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 5
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_5(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_5),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_5),
+  .b_data(b_data_5),
+  .c_data_out(bram_wdata_c_5),
+  .a_addr(bram_addr_a_5),
+  .b_addr(bram_addr_b_5),
+  .c_addr(bram_addr_c_5),
+  .c_data_available(c_data_5_available),
+  .validity_mask_a(4'b0011),
+  .validity_mask_b(4'b0011)
+);
+
+endmodule
+
+
+//////////////////////////////////
+//////////////////////////////////
+//Dual port RAM
+//////////////////////////////////
+//////////////////////////////////
+module ram (
+        addr0, 
+        d0, 
+        we0, 
+        q0,  
+        addr1,
+        d1,
+        we1,
+        q1,
+        clk);
+
+input [`AWIDTH-1:0] addr0;
+input [`AWIDTH-1:0] addr1;
+input [`CU_SIZE*`DWIDTH-1:0] d0;
+input [`CU_SIZE*`DWIDTH-1:0] d1;
+input [`CU_SIZE-1:0] we0;
+input [`CU_SIZE-1:0] we1;
+output [`CU_SIZE*`DWIDTH-1:0] q0;
+output [`CU_SIZE*`DWIDTH-1:0] q1;
+input clk;
+
+genvar i; 
+
+generate
+`ifdef QUARTUS
+   for (i=0;i<`CU_SIZE;i=i+1) begin: gen_dpram
+`else
+   for (i=0;i<`CU_SIZE;i=i+1) begin
+`endif
+     dpram_original #(.AWIDTH(`AWIDTH),.DWIDTH(`DWIDTH),.NUM_WORDS(1<<`AWIDTH)) dp1 (.clk(clk),.address_a(addr0),.address_b(addr1),.wren_a(we0[i]),.wren_b(we1[i]),.data_a(d0[i*`DWIDTH +: `DWIDTH]),.data_b(d1[i*`DWIDTH +: `DWIDTH]),.out_a(q0[i*`DWIDTH +: `DWIDTH]),.out_b(q1[i*`DWIDTH +: `DWIDTH]));
+   end
+endgenerate
+
+endmodule
+
+module dpram_original (
+    clk,
+    address_a,
+    address_b,
+    wren_a,
+    wren_b,
+    data_a,
+    data_b,
+    out_a,
+    out_b
+);
+parameter AWIDTH=10;
+parameter NUM_WORDS=1024;
+parameter DWIDTH=32;
+input clk;
+input [(AWIDTH-1):0] address_a;
+input [(AWIDTH-1):0] address_b;
+input  wren_a;
+input  wren_b;
+input [(DWIDTH-1):0] data_a;
+input [(DWIDTH-1):0] data_b;
+output reg [(DWIDTH-1):0] out_a;
+output reg [(DWIDTH-1):0] out_b;
+
+`ifndef hard_mem
+
+reg [DWIDTH-1:0] ram[NUM_WORDS-1:0];
+always @ (posedge clk) begin 
+  if (wren_a) begin
+      ram[address_a] <= data_a;
+  end
+  out_a <= ram[address_a];
+end
+  
+always @ (posedge clk) begin 
+  if (wren_b) begin
+      ram[address_b] <= data_b;
+  end 
+  out_b <= ram[address_b];
+end
+
+`else
+
+defparam u_dual_port_ram.ADDR_WIDTH = AWIDTH;
+defparam u_dual_port_ram.DATA_WIDTH = DWIDTH;
+
+dual_port_ram u_dual_port_ram(
+.addr1(address_a),
+.we1(wren_a),
+.data1(data_a),
+.out1(out_a),
+.addr2(address_b),
+.we2(wren_b),
+.data2(data_b),
+.out2(out_b),
+.clk(clk)
+);
+
+`endif
+endmodule
+
+  
+//////////////////////////////////
+//////////////////////////////////
+// Elementwise compute unit
+//////////////////////////////////
+//////////////////////////////////
+module eltwise_cu(
+ clk,
+ reset,
+ pe_reset,
+ start_eltwise_op,
+ done_eltwise_op,
+ count,
+ op,
+ address_mat_a,
+ address_mat_b,
+ address_mat_c,
+ a_data,
+ b_data,
+ c_data_out, 
+ a_addr,
+ b_addr,
+ c_addr,
+ c_data_available,
+ validity_mask_a,
+ validity_mask_b
+);
+
+ input clk;
+ input reset;
+ input pe_reset;
+ input start_eltwise_op;
+ output done_eltwise_op;
+ input [`ITERATIONS_WIDTH-1:0] count;
+ input [1:0] op;
+ input [`AWIDTH-1:0] address_mat_a;
+ input [`AWIDTH-1:0] address_mat_b;
+ input [`AWIDTH-1:0] address_mat_c;
+ input [`CU_SIZE*`DWIDTH-1:0] a_data;
+ input [`CU_SIZE*`DWIDTH-1:0] b_data;
+ output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+ output [`AWIDTH-1:0] a_addr;
+ output [`AWIDTH-1:0] b_addr;
+ output [`AWIDTH-1:0] c_addr;
+ output c_data_available;
+ input [`MASK_WIDTH-1:0] validity_mask_a;
+ input [`MASK_WIDTH-1:0] validity_mask_b;
+
+wire [`DWIDTH-1:0] out0;
+wire [`DWIDTH-1:0] out1;
+wire [`DWIDTH-1:0] out2;
+wire [`DWIDTH-1:0] out3;
+
+wire [`DWIDTH-1:0] a0_data;
+wire [`DWIDTH-1:0] a1_data;
+wire [`DWIDTH-1:0] a2_data;
+wire [`DWIDTH-1:0] a3_data;
+wire [`DWIDTH-1:0] b0_data;
+wire [`DWIDTH-1:0] b1_data;
+wire [`DWIDTH-1:0] b2_data;
+wire [`DWIDTH-1:0] b3_data;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic for done
+//////////////////////////////////////////////////////////////////////////
+wire [7:0] clk_cnt_for_done;
+reg [31:0] clk_cnt;
+reg done_eltwise_op;
+
+assign clk_cnt_for_done = 
+                  `PE_PIPELINE_DEPTH + //This is dependent on the pipeline depth of the PEs
+                  count //The number of iterations asked for this compute unit
+                  ;
+                          
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    clk_cnt <= 0;
+    done_eltwise_op <= 0;
+  end
+  else if (clk_cnt == clk_cnt_for_done) begin
+    done_eltwise_op <= 1;
+    clk_cnt <= clk_cnt + 1;
+  end
+  else if (done_eltwise_op == 0) begin
+    clk_cnt <= clk_cnt + 1;
+  end    
+  else begin
+    done_eltwise_op <= 0;
+    clk_cnt <= clk_cnt + 1;
+  end
+end
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiation of input logic
+//////////////////////////////////////////////////////////////////////////
+input_logic u_input_logic(
+.clk(clk),
+.reset(reset),
+.start_eltwise_op(start_eltwise_op),
+.count(count),
+.a_addr(a_addr),
+.b_addr(b_addr),
+.address_mat_a(address_mat_a),
+.address_mat_b(address_mat_b),
+.a_data(a_data),
+.b_data(b_data),
+.a0_data(a0_data),
+.a1_data(a1_data),
+.a2_data(a2_data),
+.a3_data(a3_data),
+.b0_data(b0_data),
+.b1_data(b1_data),
+.b2_data(b2_data),
+.b3_data(b3_data),
+.validity_mask_a(validity_mask_a),
+.validity_mask_b(validity_mask_b)
+);
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiation of the output logic
+//////////////////////////////////////////////////////////////////////////
+output_logic u_output_logic(
+.clk(clk),
+.reset(reset),
+.start_eltwise_op(start_eltwise_op),
+.done_eltwise_op(done_eltwise_op),
+.address_mat_c(address_mat_c),
+.c_data_out(c_data_out),
+.c_addr(c_addr),
+.c_data_available(c_data_available),
+.out0(out0),
+.out1(out1),
+.out2(out2),
+.out3(out3)
+);
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiations of the actual PEs
+//////////////////////////////////////////////////////////////////////////
+pe_array u_pe_array(
+.reset(reset),
+.clk(clk),
+.pe_reset(pe_reset),
+.op(op),
+.a0(a0_data), 
+.a1(a1_data), 
+.a2(a2_data), 
+.a3(a3_data),
+.b0(b0_data), 
+.b1(b1_data), 
+.b2(b2_data), 
+.b3(b3_data),
+.out0(out0),
+.out1(out1),
+.out2(out2),
+.out3(out3)
+);
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+// Output logic
+//////////////////////////////////////////////////////////////////////////
+module output_logic(
+clk,
+reset,
+start_eltwise_op,
+done_eltwise_op,
+address_mat_c,
+c_data_out, 
+c_addr,
+c_data_available,
+out0,
+out1,
+out2,
+out3
+);
+
+input clk;
+input reset;
+input start_eltwise_op;
+input done_eltwise_op;
+input [`AWIDTH-1:0] address_mat_c;
+output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+output [`AWIDTH-1:0] c_addr;
+output c_data_available;
+input [`DWIDTH-1:0] out0;
+input [`DWIDTH-1:0] out1;
+input [`DWIDTH-1:0] out2;
+input [`DWIDTH-1:0] out3;
+
+reg c_data_available;
+reg [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to capture matrix C data from the PEs and send to RAM
+//////////////////////////////////////////////////////////////////////////
+
+reg [`AWIDTH-1:0] c_addr;
+reg [7:0] cnt;
+
+always @(posedge clk) begin
+  if (reset | ~start_eltwise_op) begin
+    c_data_available <= 1'b0;
+    c_addr <= address_mat_c;
+    c_data_out <= 0;
+    cnt <= 0;
+  end
+  else if (cnt>`PE_PIPELINE_DEPTH) begin
+    c_data_available <= 1'b1;
+    c_addr <= c_addr+1;
+    c_data_out <= {out3, out2, out1, out0};
+    cnt <= cnt + 1;
+  end else begin
+    cnt <= cnt + 1;
+  end 
+end
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+// Data setup
+//////////////////////////////////////////////////////////////////////////
+module input_logic(
+clk,
+reset,
+start_eltwise_op,
+count,
+a_addr,
+b_addr,
+address_mat_a,
+address_mat_b,
+a_data,
+b_data,
+a0_data,
+a1_data,
+a2_data,
+a3_data,
+b0_data,
+b1_data,
+b2_data,
+b3_data,
+validity_mask_a,
+validity_mask_b
+);
+
+input clk;
+input reset;
+input start_eltwise_op;
+input [`ITERATIONS_WIDTH-1:0] count;
+output [`AWIDTH-1:0] a_addr;
+output [`AWIDTH-1:0] b_addr;
+input [`AWIDTH-1:0] address_mat_a;
+input [`AWIDTH-1:0] address_mat_b;
+input [`CU_SIZE*`DWIDTH-1:0] a_data;
+input [`CU_SIZE*`DWIDTH-1:0] b_data;
+output [`DWIDTH-1:0] a0_data;
+output [`DWIDTH-1:0] a1_data;
+output [`DWIDTH-1:0] a2_data;
+output [`DWIDTH-1:0] a3_data;
+output [`DWIDTH-1:0] b0_data;
+output [`DWIDTH-1:0] b1_data;
+output [`DWIDTH-1:0] b2_data;
+output [`DWIDTH-1:0] b3_data;
+input [`MASK_WIDTH-1:0] validity_mask_a;
+input [`MASK_WIDTH-1:0] validity_mask_b;
+
+reg [7:0] iterations;
+
+wire [`DWIDTH-1:0] a0_data;
+wire [`DWIDTH-1:0] a1_data;
+wire [`DWIDTH-1:0] a2_data;
+wire [`DWIDTH-1:0] a3_data;
+wire [`DWIDTH-1:0] b0_data;
+wire [`DWIDTH-1:0] b1_data;
+wire [`DWIDTH-1:0] b2_data;
+wire [`DWIDTH-1:0] b3_data;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate addresses to BRAM A
+//////////////////////////////////////////////////////////////////////////
+reg [`AWIDTH-1:0] a_addr;
+reg a_mem_access; //flag that tells whether the compute unit is trying to access memory or not
+
+always @(posedge clk) begin
+  //else if (clk_cnt >= a_loc*`CU_SIZE+final_mat_mul_size) begin
+  //Writing the line above to avoid multiplication:
+  if (reset || ~start_eltwise_op) begin
+    a_addr <= address_mat_a;
+    a_mem_access <= 0;
+    iterations <= 0;
+  end
+
+  //else if ((clk_cnt >= a_loc*`CU_SIZE) && (clk_cnt < a_loc*`CU_SIZE+final_mat_mul_size)) begin
+  //Writing the line above to avoid multiplication:
+  else if (iterations <= count) begin
+    a_addr <= a_addr + 1;
+    a_mem_access <= 1;
+    iterations <= iterations + 1;
+  end
+end  
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate valid signals for data coming from BRAM A
+//////////////////////////////////////////////////////////////////////////
+reg [7:0] a_mem_access_counter;
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    a_mem_access_counter <= 0;
+  end
+  else if (a_mem_access == 1) begin
+    a_mem_access_counter <= a_mem_access_counter + 1;  
+
+  end
+  else begin
+    a_mem_access_counter <= 0;
+  end
+end
+
+wire bram_rdata_a_valid; //flag that tells whether the data from memory is valid
+assign bram_rdata_a_valid = 
+       ((validity_mask_a[0]==1'b0 && a_mem_access_counter==1) ||
+        (validity_mask_a[1]==1'b0 && a_mem_access_counter==2) ||
+        (validity_mask_a[2]==1'b0 && a_mem_access_counter==3) ||
+        (validity_mask_a[3]==1'b0 && a_mem_access_counter==4)) ?
+        1'b0 : (a_mem_access_counter >= `MEM_ACCESS_LATENCY);
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to delay certain parts of the data received from BRAM A (systolic data setup)
+//////////////////////////////////////////////////////////////////////////
+//Slice data into chunks and qualify it with whether it is valid or not
+assign a0_data = a_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[0]}};
+assign a1_data = a_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[1]}};
+assign a2_data = a_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[2]}};
+assign a3_data = a_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[3]}};
+
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate addresses to BRAM B
+//////////////////////////////////////////////////////////////////////////
+reg [`AWIDTH-1:0] b_addr;
+reg b_mem_access; //flag that tells whether the compute unit is trying to access memory or not
+
+always @(posedge clk) begin
+  //else if (clk_cnt >= b_loc*`CU_SIZE+final_mat_mul_size) begin
+  //Writing the line above to avoid multiplication:
+  if (reset || ~start_eltwise_op) begin
+    b_addr <= address_mat_b ;
+    b_mem_access <= 0;
+  end
+  //else if ((clk_cnt >= b_loc*`CU_SIZE) && (clk_cnt < b_loc*`CU_SIZE+final_mat_mul_size)) begin
+  //Writing the line above to avoid multiplication:
+  else if (iterations <= count) begin
+    b_addr <= b_addr + 1;
+    b_mem_access <= 1;
+  end
+end  
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate valid signals for data coming from BRAM B
+//////////////////////////////////////////////////////////////////////////
+reg [7:0] b_mem_access_counter;
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    b_mem_access_counter <= 0;
+  end
+  else if (b_mem_access == 1) begin
+    b_mem_access_counter <= b_mem_access_counter + 1;  
+  end
+  else begin
+    b_mem_access_counter <= 0;
+  end
+end
+
+wire bram_rdata_b_valid; //flag that tells whether the data from memory is valid
+assign bram_rdata_b_valid = 
+       ((validity_mask_b[0]==1'b0 && b_mem_access_counter==1) ||
+        (validity_mask_b[1]==1'b0 && b_mem_access_counter==2) ||
+        (validity_mask_b[2]==1'b0 && b_mem_access_counter==3) ||
+        (validity_mask_b[3]==1'b0 && b_mem_access_counter==4)) ?
+        1'b0 : (b_mem_access_counter >= `MEM_ACCESS_LATENCY);
+
+//Slice data into chunks and qualify it with whether it is valid or not
+assign b0_data = b_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[0]}};
+assign b1_data = b_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[1]}};
+assign b2_data = b_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[2]}};
+assign b3_data = b_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[3]}};
+
+
+endmodule
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// Array of processing elements
+//////////////////////////////////////////////////////////////////////////
+module pe_array(
+reset,
+clk,
+pe_reset,
+op,
+a0, a1, a2, a3,
+b0, b1, b2, b3,
+out0, out1, out2, out3
+);
+
+input clk;
+input reset;
+input pe_reset;
+input [1:0] op;
+input [`DWIDTH-1:0] a0;
+input [`DWIDTH-1:0] a1;
+input [`DWIDTH-1:0] a2;
+input [`DWIDTH-1:0] a3;
+input [`DWIDTH-1:0] b0;
+input [`DWIDTH-1:0] b1;
+input [`DWIDTH-1:0] b2;
+input [`DWIDTH-1:0] b3;
+output [`DWIDTH-1:0] out0;
+output [`DWIDTH-1:0] out1;
+output [`DWIDTH-1:0] out2;
+output [`DWIDTH-1:0] out3;
+
+wire [`DWIDTH-1:0] out0, out1, out2, out3;
+
+wire effective_rst;
+assign effective_rst = reset | pe_reset;
+
+processing_element pe0(.reset(effective_rst), .clk(clk), .in_a(a0), .in_b(b0), .op(op), .out(out0));
+processing_element pe1(.reset(effective_rst), .clk(clk), .in_a(a1), .in_b(b1), .op(op), .out(out1));
+processing_element pe2(.reset(effective_rst), .clk(clk), .in_a(a2), .in_b(b2), .op(op), .out(out2));
+processing_element pe3(.reset(effective_rst), .clk(clk), .in_a(a3), .in_b(b3), .op(op), .out(out3));
+
+endmodule
+
+
+//////////////////////////////////////////////////////////////////////////
+// Processing element (PE)
+//////////////////////////////////////////////////////////////////////////
+module processing_element(
+ reset, 
+ clk, 
+ in_a,
+ in_b, 
+ op,
+ out
+ );
+
+ input reset;
+ input clk;
+ input  [`DWIDTH-1:0] in_a;
+ input  [`DWIDTH-1:0] in_b;
+ input  [1:0] op;
+ output [`DWIDTH-1:0] out;
+
+ wire [`DWIDTH-1:0] out_mul;
+ wire [`DWIDTH-1:0] out_sum;
+ wire [`DWIDTH-1:0] out_sub;
+
+ assign out = (op == 2'b00) ? out_sum : 
+              (op == 2'b01) ? out_sub :
+              out_mul;
+
+ seq_mul u_mul(.a(in_a), .b(in_b), .out(out_mul), .reset(reset), .clk(clk));
+ seq_add u_add(.a(in_a), .b(in_b), .out(out_sum), .reset(reset), .clk(clk));
+ seq_sub u_sub(.a(in_a), .b(in_b), .out(out_sub), .reset(reset), .clk(clk));
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Multiply block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_mul(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] mul_out_temp;
+reg [`DWIDTH-1:0] mul_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign mul_out_temp = a * b;
+`ifdef complex_dsp
+mult_fp_clk_16 mul_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(mul_out_temp));
+`else
+FPMult_16 u_FPMult (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .result(mul_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    mul_out_temp_reg <= 0;
+  end else begin
+    mul_out_temp_reg <= mul_out_temp;
+  end
+end
+
+assign out = mul_out_temp_reg;
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Addition block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_add(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] sum_out_temp;
+reg [`DWIDTH-1:0] sum_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign sum_out_temp = a + b;
+`ifdef complex_dsp
+addition_fp_clk_16 add_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sum_out_temp));
+`else
+FPAddSub u_FPAddSub (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sum_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    sum_out_temp_reg <= 0;
+  end else begin
+    sum_out_temp_reg <= sum_out_temp;
+  end
+end
+
+assign out = sum_out_temp_reg;
+
+endmodule
+
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Subtraction block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_sub(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] sub_out_temp;
+reg [`DWIDTH-1:0] sub_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign sub_out_temp = a - b;
+//Floating point adder has both modes - add and sub.
+//We don't provide the name of the mode here though.
+
+`ifdef complex_dsp
+addition_fp_clk_16 sub_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sub_out_temp));
+`else
+FPAddSub u_FPAddSub2(.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sub_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    sub_out_temp_reg <= 0;
+  end else begin
+    sub_out_temp_reg <= sub_out_temp;
+  end
+end
+
+assign out = sub_out_temp_reg;
+
+endmodule
+
+
+`ifndef complex_dsp
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Floating point 16-bit multiplier
+// This is a heavily modified version of:
+// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FPMult
+// Original author: Fredrik Brosser
+// Abridged by: Samidh Mehta
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+
+module FPMult_16(
+		clk,
+		rst,
+		a,
+		b,
+		result,
+		flags
+    );
+	
+	// Input Ports
+	input clk ;							// Clock
+	input rst ;							// Reset signal
+	input [`DWIDTH-1:0] a;						// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b;						// Input B, a 32-bit floating point number
+	
+	// Output ports
+	output [`DWIDTH-1:0] result ;					// Product, result of the operation, 32-bit FP number
+	output [4:0] flags ;						// Flags indicating exceptions according to IEEE754
+	
+	// Internal signals
+	wire [`DWIDTH-1:0] Z_int ;					// Product, result of the operation, 32-bit FP number
+	wire [4:0] Flags_int ;						// Flags indicating exceptions according to IEEE754
+	
+	wire Sa ;							// A's sign
+	wire Sb ;							// B's sign
+	wire Sp ;							// Product sign
+	wire [`EXPONENT-1:0] Ea ;					// A's exponent
+	wire [`EXPONENT-1:0] Eb ;					// B's exponent
+	wire [2*`MANTISSA+1:0] Mp ;					// Product mantissa
+	wire [4:0] InputExc ;						// Exceptions in inputs
+	wire [`MANTISSA-1:0] NormM ;					// Normalized mantissa
+	wire [`EXPONENT:0] NormE ;					// Normalized exponent
+	wire [`MANTISSA:0] RoundM ;					// Normalized mantissa
+	wire [`EXPONENT:0] RoundE ;					// Normalized exponent
+	wire [`MANTISSA:0] RoundMP ;					// Normalized mantissa
+	wire [`EXPONENT:0] RoundEP ;					// Normalized exponent
+	wire GRS ;
+
+	//reg [63:0] pipe_0;						// Pipeline register Input->Prep
+	reg [2*`DWIDTH-1:0] pipe_0;					// Pipeline register Input->Prep
+
+	//reg [92:0] pipe_1;						// Pipeline register Prep->Execute
+	//reg [3*`MANTISSA+2*`EXPONENT+7:0] pipe_1;			// Pipeline register Prep->Execute
+	reg [3*`MANTISSA+2*`EXPONENT+18:0] pipe_1;
+
+	//reg [38:0] pipe_2;						// Pipeline register Execute->Normalize
+	reg [`MANTISSA+`EXPONENT+7:0] pipe_2;				// Pipeline register Execute->Normalize
+	
+	//reg [72:0] pipe_3;						// Pipeline register Normalize->Round
+	reg [2*`MANTISSA+2*`EXPONENT+10:0] pipe_3;			// Pipeline register Normalize->Round
+
+	//reg [36:0] pipe_4;						// Pipeline register Round->Output
+	reg [`DWIDTH+4:0] pipe_4;					// Pipeline register Round->Output
+	
+	assign result = pipe_4[`DWIDTH+4:5] ;
+	assign flags = pipe_4[4:0] ;
+	
+	// Prepare the operands for alignment and check for exceptions
+	FPMult_PrepModule PrepModule(clk, rst, pipe_0[2*`DWIDTH-1:`DWIDTH], pipe_0[`DWIDTH-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]) ;
+
+	// Perform (unsigned) mantissa multiplication
+	FPMult_ExecuteModule ExecuteModule(pipe_1[3*`MANTISSA+`EXPONENT*2+7:2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7:2*`MANTISSA+7], pipe_1[2*`MANTISSA+6:5], pipe_1[2*`MANTISSA+2*`EXPONENT+6:2*`MANTISSA+`EXPONENT+7], pipe_1[2*`MANTISSA+`EXPONENT+6:2*`MANTISSA+7], pipe_1[2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7], Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0], GRS) ;
+
+	// Round result and if necessary, perform a second (post-rounding) normalization step
+	FPMult_NormalizeModule NormalizeModule(pipe_2[`MANTISSA-1:0], pipe_2[`MANTISSA+`EXPONENT:`MANTISSA], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]) ;		
+
+	// Round result and if necessary, perform a second (post-rounding) normalization step
+	//FPMult_RoundModule RoundModule(pipe_3[47:24], pipe_3[23:0], pipe_3[65:57], pipe_3[56:48], pipe_3[66], pipe_3[67], pipe_3[72:68], Z_int[31:0], Flags_int[4:0]) ;		
+	FPMult_RoundModule RoundModule(pipe_3[2*`MANTISSA+1:`MANTISSA+1], pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+2*`EXPONENT+3:2*`MANTISSA+`EXPONENT+3], pipe_3[2*`MANTISSA+`EXPONENT+2:2*`MANTISSA+2], pipe_3[2*`MANTISSA+2*`EXPONENT+4], pipe_3[2*`MANTISSA+2*`EXPONENT+5], pipe_3[2*`MANTISSA+2*`EXPONENT+10:2*`MANTISSA+2*`EXPONENT+6], Z_int[`DWIDTH-1:0], Flags_int[4:0]) ;		
+
+//adding always@ (*) instead of posedge clock to make design combinational
+	always @ (posedge clk) begin	
+		if(rst) begin
+			pipe_0 <= 0;
+			pipe_1 <= 0;
+			pipe_2 <= 0; 
+			pipe_3 <= 0;
+			pipe_4 <= 0;
+		end 
+		else begin		
+			/* PIPE 0
+				[2*`DWIDTH-1:`DWIDTH] A
+				[`DWIDTH-1:0] B
+			*/
+                       pipe_0 <= {a, b} ;
+
+
+			/* PIPE 1
+				[2*`EXPONENT+3*`MANTISSA + 18: 2*`EXPONENT+2*`MANTISSA + 18] //pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH] , mantissa of A
+				[2*`EXPONENT+2*`MANTISSA + 17 :2*`EXPONENT+2*`MANTISSA + 9] // pipe_0[8:0]
+				[2*`EXPONENT+2*`MANTISSA + 8] Sa
+				[2*`EXPONENT+2*`MANTISSA + 7] Sb
+				[2*`EXPONENT+2*`MANTISSA + 6:`EXPONENT+2*`MANTISSA+7] Ea
+				[`EXPONENT +2*`MANTISSA+6:2*`MANTISSA+7] Eb
+				[2*`MANTISSA+1+5:5] Mp
+				[4:0] InputExc
+			*/
+			//pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[`MANTISSA_MUL_SPLIT_LSB-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA-1:0], InputExc[4:0]} ;
+			pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[8:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]} ;
+			
+			/* PIPE 2
+				[`EXPONENT + `MANTISSA + 7:`EXPONENT + `MANTISSA + 3] InputExc
+				[`EXPONENT + `MANTISSA + 2] GRS
+				[`EXPONENT + `MANTISSA + 1] Sp
+				[`EXPONENT + `MANTISSA:`MANTISSA] NormE
+				[`MANTISSA-1:0] NormM
+			*/
+			pipe_2 <= {pipe_1[4:0], GRS, Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0]} ;
+			/* PIPE 3
+				[2*`EXPONENT+2*`MANTISSA+10:2*`EXPONENT+2*`MANTISSA+6] InputExc
+				[2*`EXPONENT+2*`MANTISSA+5] GRS
+				[2*`EXPONENT+2*`MANTISSA+4] Sp	
+				[2*`EXPONENT+2*`MANTISSA+3:`EXPONENT+2*`MANTISSA+3] RoundE
+				[`EXPONENT+2*`MANTISSA+2:2*`MANTISSA+2] RoundEP
+				[2*`MANTISSA+1:`MANTISSA+1] RoundM
+				[`MANTISSA:0] RoundMP
+			*/
+			pipe_3 <= {pipe_2[`EXPONENT+`MANTISSA+7:`EXPONENT+`MANTISSA+1], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]} ;
+			/* PIPE 4
+				[`DWIDTH+4:5] Z
+				[4:0] Flags
+			*/				
+			pipe_4 <= {Z_int[`DWIDTH-1:0], Flags_int[4:0]} ;
+		end
+	end
+		
+endmodule
+
+
+
+module FPMult_PrepModule (
+		clk,
+		rst,
+		a,
+		b,
+		Sa,
+		Sb,
+		Ea,
+		Eb,
+		Mp,
+		InputExc
+	);
+	
+	// Input ports
+	input clk ;
+	input rst ;
+	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
+	
+	// Output ports
+	output Sa ;										// A's sign
+	output Sb ;										// B's sign
+	output [`EXPONENT-1:0] Ea ;								// A's exponent
+	output [`EXPONENT-1:0] Eb ;								// B's exponent
+	output [2*`MANTISSA+1:0] Mp ;							// Mantissa product
+	output [4:0] InputExc ;						// Input numbers are exceptions
+	
+	// Internal signals							// If signal is high...
+	wire ANaN ;										// A is a signalling NaN
+	wire BNaN ;										// B is a signalling NaN
+	wire AInf ;										// A is infinity
+	wire BInf ;										// B is infinity
+    wire [`MANTISSA-1:0] Ma;
+    wire [`MANTISSA-1:0] Mb;
+	
+	assign ANaN = &(a[`DWIDTH-2:`MANTISSA]) &  |(a[`DWIDTH-2:`MANTISSA]) ;			// All one exponent and not all zero mantissa - NaN
+	assign BNaN = &(b[`DWIDTH-2:`MANTISSA]) &  |(b[`MANTISSA-1:0]);			// All one exponent and not all zero mantissa - NaN
+	assign AInf = &(a[`DWIDTH-2:`MANTISSA]) & ~|(a[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
+	assign BInf = &(b[`DWIDTH-2:`MANTISSA]) & ~|(b[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
+	
+	// Check for any exceptions and put all flags into exception vector
+	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
+	//assign InputExc = {(ANaN | ANaN | BNaN |BNaN), ANaN, ANaN, BNaN,BNaN} ;
+	
+	// Take input numbers apart
+	assign Sa = a[`DWIDTH-1] ;							// A's sign
+	assign Sb = b[`DWIDTH-1] ;							// B's sign
+	assign Ea = a[`DWIDTH-2:`MANTISSA];						// Store A's exponent in Ea, unless A is an exception
+	assign Eb = b[`DWIDTH-2:`MANTISSA];						// Store B's exponent in Eb, unless B is an exception	
+//    assign Ma = a[`MANTISSA_MSB:`MANTISSA_LSB];
+  //  assign Mb = b[`MANTISSA_MSB:`MANTISSA_LSB];
+	
+
+
+	//assign Mp = ({4'b0001, a[`MANTISSA-1:0]}*{4'b0001, b[`MANTISSA-1:9]}) ;
+	assign Mp = ({1'b1,a[`MANTISSA-1:0]}*{1'b1, b[`MANTISSA-1:0]}) ;
+
+	
+    //We multiply part of the mantissa here
+    //Full mantissa of A
+    //Bits MANTISSA_MUL_SPLIT_MSB:MANTISSA_MUL_SPLIT_LSB of B
+   // wire [`ACTUAL_MANTISSA-1:0] inp_A;
+   // wire [`ACTUAL_MANTISSA-1:0] inp_B;
+   // assign inp_A = {1'b1, Ma};
+   // assign inp_B = {{(`MANTISSA-(`MANTISSA_MUL_SPLIT_MSB-`MANTISSA_MUL_SPLIT_LSB+1)){1'b0}}, 1'b1, Mb[`MANTISSA_MUL_SPLIT_MSB:`MANTISSA_MUL_SPLIT_LSB]};
+   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_A), .B(inp_B), .TC(1'b0), .PRODUCT(Mp));
+endmodule
+
+
+module FPMult_ExecuteModule(
+		a,
+		b,
+		MpC,
+		Ea,
+		Eb,
+		Sa,
+		Sb,
+		Sp,
+		NormE,
+		NormM,
+		GRS
+    );
+
+	// Input ports
+	input [`MANTISSA-1:0] a ;
+	input [2*`EXPONENT:0] b ;
+	input [2*`MANTISSA+1:0] MpC ;
+	input [`EXPONENT-1:0] Ea ;						// A's exponent
+	input [`EXPONENT-1:0] Eb ;						// B's exponent
+	input Sa ;								// A's sign
+	input Sb ;								// B's sign
+	
+	// Output ports
+	output Sp ;								// Product sign
+	output [`EXPONENT:0] NormE ;													// Normalized exponent
+	output [`MANTISSA-1:0] NormM ;												// Normalized mantissa
+	output GRS ;
+	
+	wire [2*`MANTISSA+1:0] Mp ;
+	
+	assign Sp = (Sa ^ Sb) ;												// Equal signs give a positive product
+	
+   // wire [`ACTUAL_MANTISSA-1:0] inp_a;
+   // wire [`ACTUAL_MANTISSA-1:0] inp_b;
+   // assign inp_a = {1'b1, a};
+   // assign inp_b = {{(`MANTISSA-`MANTISSA_MUL_SPLIT_LSB){1'b0}}, 1'b0, b};
+   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_a), .B(inp_b), .TC(1'b0), .PRODUCT(Mp_temp));
+   // DW01_add #(2*`ACTUAL_MANTISSA) u_add(.A(Mp_temp), .B(MpC<<`MANTISSA_MUL_SPLIT_LSB), .CI(1'b0), .SUM(Mp), .CO());
+
+	//assign Mp = (MpC<<(2*`EXPONENT+1)) + ({4'b0001, a[`MANTISSA-1:0]}*{1'b0, b[2*`EXPONENT:0]}) ;
+	assign Mp = MpC;
+
+
+	assign NormM = (Mp[2*`MANTISSA+1] ? Mp[2*`MANTISSA:`MANTISSA+1] : Mp[2*`MANTISSA-1:`MANTISSA]); 	// Check for overflow
+	assign NormE = (Ea + Eb + Mp[2*`MANTISSA+1]);								// If so, increment exponent
+	
+	assign GRS = ((Mp[`MANTISSA]&(Mp[`MANTISSA+1]))|(|Mp[`MANTISSA-1:0])) ;
+	
+endmodule
+
+module FPMult_NormalizeModule(
+		NormM,
+		NormE,
+		RoundE,
+		RoundEP,
+		RoundM,
+		RoundMP
+    );
+
+	// Input Ports
+	input [`MANTISSA-1:0] NormM ;									// Normalized mantissa
+	input [`EXPONENT:0] NormE ;									// Normalized exponent
+
+	// Output Ports
+	output [`EXPONENT:0] RoundE ;
+	output [`EXPONENT:0] RoundEP ;
+	output [`MANTISSA:0] RoundM ;
+	output [`MANTISSA:0] RoundMP ; 
+	
+// EXPONENT = 5 
+// EXPONENT -1 = 4
+// NEED to subtract 2^4 -1 = 15
+
+wire [`EXPONENT-1 : 0] bias;
+
+assign bias =  ((1<< (`EXPONENT -1)) -1);
+
+	assign RoundE = NormE - bias ;
+	assign RoundEP = NormE - bias -1 ;
+	assign RoundM = NormM ;
+	assign RoundMP = NormM ;
+
+endmodule
+
+module FPMult_RoundModule(
+		RoundM,
+		RoundMP,
+		RoundE,
+		RoundEP,
+		Sp,
+		GRS,
+		InputExc,
+		Z,
+		Flags
+    );
+
+	// Input Ports
+	input [`MANTISSA:0] RoundM ;									// Normalized mantissa
+	input [`MANTISSA:0] RoundMP ;									// Normalized exponent
+	input [`EXPONENT:0] RoundE ;									// Normalized mantissa + 1
+	input [`EXPONENT:0] RoundEP ;									// Normalized exponent + 1
+	input Sp ;												// Product sign
+	input GRS ;
+	input [4:0] InputExc ;
+	
+	// Output Ports
+	output [`DWIDTH-1:0] Z ;										// Final product
+	output [4:0] Flags ;
+	
+	// Internal Signals
+	wire [`EXPONENT:0] FinalE ;									// Rounded exponent
+	wire [`MANTISSA:0] FinalM;
+	wire [`MANTISSA:0] PreShiftM;
+	
+	assign PreShiftM = GRS ? RoundMP : RoundM ;	// Round up if R and (G or S)
+	
+	// Post rounding normalization (potential one bit shift> use shifted mantissa if there is overflow)
+	assign FinalM = (PreShiftM[`MANTISSA] ? {1'b0, PreShiftM[`MANTISSA:1]} : PreShiftM[`MANTISSA:0]) ;
+	
+	assign FinalE = (PreShiftM[`MANTISSA] ? RoundEP : RoundE) ; // Increment exponent if a shift was done
+	
+	assign Z = {Sp, FinalE[`EXPONENT-1:0], FinalM[`MANTISSA-1:0]} ;   // Putting the pieces together
+	assign Flags = InputExc[4:0];
+
+endmodule
+`endif
+
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Floating point 16-bit adder
+// This is a heavily modified version of:
+// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FP_AddSub
+// Original author: Fredrik Brosser
+// Abridged by: Samidh Mehta
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+`ifndef complex_dsp
+
+module FPAddSub(
+		//bf16,
+		clk,
+		rst,
+		a,
+		b,
+		operation,			// 0 add, 1 sub
+		result,
+		flags
+	);
+	//input bf16; //1 for Bfloat16, 0 for IEEE half precision
+
+	// Clock and reset
+	input clk ;										// Clock signal
+	input rst ;										// Reset (active high, resets pipeline registers)
+	
+	// Input ports
+	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
+	input operation ;								// Operation select signal
+	
+	// Output ports
+	output [`DWIDTH-1:0] result ;						// Result of the operation
+	output [4:0] flags ;							// Flags indicating exceptions according to IEEE754
+	
+	// Pipeline Registers
+	//reg [79:0] pipe_1;							// Pipeline register PreAlign->Align1
+	reg [2*`EXPONENT + 2*`DWIDTH + 5:0] pipe_1;							// Pipeline register PreAlign->Align1
+
+	//reg [67:0] pipe_2;							// Pipeline register Align1->Align3
+	//reg [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;							// Pipeline register Align1->Align3
+	wire [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;
+
+	//reg [76:0] pipe_3;	68						// Pipeline register Align1->Align3
+	reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_3;							// Pipeline register Align1->Align3
+
+	//reg [69:0] pipe_4;							// Pipeline register Align3->Execute
+	//reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;							// Pipeline register Align3->Execute
+	wire [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;
+	
+	//reg [51:0] pipe_5;							// Pipeline register Execute->Normalize
+	reg [`DWIDTH+`EXPONENT+11:0] pipe_5;							// Pipeline register Execute->Normalize
+
+	//reg [56:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
+	//reg [`DWIDTH+`EXPONENT+16:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
+	wire [`DWIDTH+`EXPONENT+16:0] pipe_6;
+
+	//reg [56:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
+	//reg [`DWIDTH+`EXPONENT+16:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
+	wire [`DWIDTH+`EXPONENT+16:0] pipe_7;
+	//reg [54:0] pipe_8;							// Pipeline register NormalizeShift3->Round
+	reg [`EXPONENT*2+`MANTISSA+15:0] pipe_8;							// Pipeline register NormalizeShift3->Round
+
+	//reg [40:0] pipe_9;							// Pipeline register NormalizeShift3->Round
+	//reg [`DWIDTH+8:0] pipe_9;							// Pipeline register NormalizeShift3->Round
+	wire [`DWIDTH+8:0] pipe_9;
+
+	// Internal wires between modules
+	wire [`DWIDTH-2:0] Aout_0 ;							// A - sign
+	wire [`DWIDTH-2:0] Bout_0 ;							// B - sign
+	wire Opout_0 ;									// A's sign
+	wire Sa_0 ;										// A's sign
+	wire Sb_0 ;										// B's sign
+	wire MaxAB_1 ;									// Indicates the larger of A and B(0/A, 1/B)
+	wire [`EXPONENT-1:0] CExp_1 ;							// Common Exponent
+	wire [`EXPONENT-1:0] Shift_1 ;							// Number of steps to smaller mantissa shift right (align)
+	wire [`MANTISSA-1:0] Mmax_1 ;							// Larger mantissa
+	wire [4:0] InputExc_0 ;						// Input numbers are exceptions
+	wire [2*`EXPONENT-1:0] ShiftDet_0 ;
+	wire [`MANTISSA-1:0] MminS_1 ;						// Smaller mantissa after 0/16 shift
+	wire [`MANTISSA:0] MminS_2 ;						// Smaller mantissa after 0/4/8/12 shift
+	wire [`MANTISSA:0] Mmin_3 ;							// Smaller mantissa after 0/1/2/3 shift
+	wire [`DWIDTH:0] Sum_4 ;
+	wire PSgn_4 ;
+	wire Opr_4 ;
+	wire [`EXPONENT-1:0] Shift_5 ;							// Number of steps to shift sum left (normalize)
+	wire [`DWIDTH:0] SumS_5 ;							// Sum after 0/16 shift
+	wire [`DWIDTH:0] SumS_6 ;							// Sum after 0/16 shift
+	wire [`DWIDTH:0] SumS_7 ;							// Sum after 0/16 shift
+	wire [`MANTISSA-1:0] NormM_8 ;						// Normalized mantissa
+	wire [`EXPONENT:0] NormE_8;							// Adjusted exponent
+	wire ZeroSum_8 ;								// Zero flag
+	wire NegE_8 ;									// Flag indicating negative exponent
+	wire R_8 ;										// Round bit
+	wire S_8 ;										// Final sticky bit
+	wire FG_8 ;										// Final sticky bit
+	wire [`DWIDTH-1:0] P_int ;
+	wire EOF ;
+	
+	// Prepare the operands for alignment and check for exceptions
+	FPAddSub_PrealignModule PrealignModule
+	(	// Inputs
+		a, b, operation,
+		// Outputs
+		Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT-1:0], InputExc_0[4:0], Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Opout_0) ;
+		
+	// Prepare the operands for alignment and check for exceptions
+	FPAddSub_AlignModule AlignModule
+	(	// Inputs
+		pipe_1[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6], pipe_1[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7], pipe_1[2*`EXPONENT+4:5],
+		// Outputs
+		CExp_1[`EXPONENT-1:0], MaxAB_1, Shift_1[`EXPONENT-1:0], MminS_1[`MANTISSA-1:0], Mmax_1[`MANTISSA-1:0]) ;	
+
+	// Alignment Shift Stage 1
+	FPAddSub_AlignShift1 AlignShift1
+	(  // Inputs
+		//bf16, 
+		pipe_2[`MANTISSA-1:0], pipe_2[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 7],
+		// Outputs
+		MminS_2[`MANTISSA:0]) ;
+
+	// Alignment Shift Stage 3 and compution of guard and sticky bits
+	FPAddSub_AlignShift2 AlignShift2  
+	(  // Inputs
+		pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+7:2*`MANTISSA+6],
+		// Outputs
+		Mmin_3[`MANTISSA:0]) ;
+						
+	// Perform mantissa addition
+	FPAddSub_ExecutionModule ExecutionModule
+	(  // Inputs
+		pipe_4[`MANTISSA*2+5:`MANTISSA+6], pipe_4[`MANTISSA:0], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 7], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 6], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9],
+		// Outputs
+		Sum_4[`DWIDTH:0], PSgn_4, Opr_4) ;
+	
+	// Prepare normalization of result
+	FPAddSub_NormalizeModule NormalizeModule
+	(  // Inputs
+		pipe_5[`DWIDTH:0], 
+		// Outputs
+		SumS_5[`DWIDTH:0], Shift_5[4:0]) ;
+					
+	// Normalization Shift Stage 1
+	FPAddSub_NormalizeShift1 NormalizeShift1
+	(  // Inputs
+		pipe_6[`DWIDTH:0], pipe_6[`DWIDTH+`EXPONENT+14:`DWIDTH+`EXPONENT+11],
+		// Outputs
+		SumS_7[`DWIDTH:0]) ;
+		
+	// Normalization Shift Stage 3 and final guard, sticky and round bits
+	FPAddSub_NormalizeShift2 NormalizeShift2
+	(  // Inputs
+		pipe_7[`DWIDTH:0], pipe_7[`DWIDTH+`EXPONENT+5:`DWIDTH+6], pipe_7[`DWIDTH+`EXPONENT+15:`DWIDTH+`EXPONENT+11],
+		// Outputs
+		NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8, FG_8) ;
+
+	// Round and put result together
+	FPAddSub_RoundModule RoundModule
+	(  // Inputs
+		 pipe_8[3], pipe_8[4+`EXPONENT:4], pipe_8[`EXPONENT+`MANTISSA+4:5+`EXPONENT], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT*2+`MANTISSA+15], pipe_8[`EXPONENT*2+`MANTISSA+12], pipe_8[`EXPONENT*2+`MANTISSA+11], pipe_8[`EXPONENT*2+`MANTISSA+14], pipe_8[`EXPONENT*2+`MANTISSA+10], 
+		// Outputs
+		P_int[`DWIDTH-1:0], EOF) ;
+	
+	// Check for exceptions
+	FPAddSub_ExceptionModule Exceptionmodule
+	(  // Inputs
+		pipe_9[8+`DWIDTH:9], pipe_9[8], pipe_9[7], pipe_9[6], pipe_9[5:1], pipe_9[0], 
+		// Outputs
+		result[`DWIDTH-1:0], flags[4:0]) ;			
+	
+
+assign pipe_2 = {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;
+assign pipe_4 = {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;
+assign pipe_6 = {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;
+assign pipe_7 = {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;
+assign pipe_9 = {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;
+
+	always @ (posedge clk) begin	
+		if(rst) begin
+			pipe_1 <= 0;
+			//pipe_2 <= 0;
+			pipe_3 <= 0;
+			//pipe_4 <= 0;
+			pipe_5 <= 0;
+			//pipe_6 <= 0;
+			//pipe_7 <= 0;
+			pipe_8 <= 0;
+			//pipe_9 <= 0;
+		end 
+		else begin
+/* PIPE_1:
+	[2*`EXPONENT + 2*`DWIDTH + 5]  Opout_0
+	[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6] A_out0
+	[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7] Bout_0
+	[2*`EXPONENT +6] Sa_0
+	[2*`EXPONENT +5] Sb_0
+	[2*`EXPONENT +4 : 5] ShiftDet_0
+	[4:0] Input Exc
+*/
+			pipe_1 <= {Opout_0, Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT -1:0], InputExc_0[4:0]} ;	
+/* PIPE_2
+[2*`EXPONENT+ 2*`MANTISSA + 8] operation
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 5] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 4:`EXPONENT+ 2*`MANTISSA + 5] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 5] Shift_0
+[2*`MANTISSA + 4:`MANTISSA + 5] Mmax_0
+[`MANTISSA + 4 : `MANTISSA] InputExc_0
+[`MANTISSA-1:0] MminS_1
+*/
+			//pipe_2 <= {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;	
+/* PIPE_3
+[2*`EXPONENT+ 2*`MANTISSA + 9] operation
+[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
+[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
+[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
+[`MANTISSA:0] MminS_2
+*/
+			pipe_3 <= {pipe_2[2*`EXPONENT+ 2*`MANTISSA + 8:`MANTISSA], MminS_2[`MANTISSA:0]} ;	
+/* PIPE_4
+[2*`EXPONENT+ 2*`MANTISSA + 9] operation
+[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
+[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
+[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
+[`MANTISSA:0] MminS_3
+*/				
+			//pipe_4 <= {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;	
+/* PIPE_5 :
+[`DWIDTH+ `EXPONENT + 11] operation
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/					
+			pipe_5 <= {pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9], PSgn_4, Opr_4, pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8:`EXPONENT+ 2*`MANTISSA + 6], pipe_4[`MANTISSA+5:`MANTISSA+1], Sum_4[`DWIDTH:0]} ;
+/* PIPE_6 :
+[`DWIDTH+ `EXPONENT + 16] operation
+[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/				
+			//pipe_6 <= {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;	
+/* PIPE_7 :
+[`DWIDTH+ `EXPONENT + 16] operation
+[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/						
+			//pipe_7 <= {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;	
+/* PIPE_8:
+[2*`EXPONENT + `MANTISSA + 15] FG_8 
+[2*`EXPONENT + `MANTISSA + 14] operation
+[2*`EXPONENT + `MANTISSA + 13] PSgn_4
+[2*`EXPONENT + `MANTISSA + 12] Sa_0
+[2*`EXPONENT + `MANTISSA + 11] Sb_0
+[2*`EXPONENT + `MANTISSA + 10] MaxAB_0
+[2*`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 10] CExp_0
+[`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 5] InputExc_8
+[`EXPONENT + `MANTISSA + 4 :`EXPONENT + 5] NormM_8 
+[`EXPONENT + 4 :4] NormE_8
+[3] ZeroSum_8
+[2] NegE_8
+[1] R_8
+[0] S_8
+*/				
+			pipe_8 <= {FG_8, pipe_7[`DWIDTH+`EXPONENT+16], pipe_7[`DWIDTH+`EXPONENT+10], pipe_7[`DWIDTH+`EXPONENT+8:`DWIDTH+1], NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8} ;	
+/* pipe_9:
+[`DWIDTH + 8 :9] P_int
+[8] NegE_8
+[7] R_8
+[6] S_8
+[5:1] InputExc_8
+[0] EOF
+*/				
+			//pipe_9 <= {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;	
+		end
+	end		
+	
+endmodule
+
+
+//
+// Description:	 	The pre-alignment module is responsible for taking the inputs
+//							apart and checking the parts for exceptions.
+//							The exponent difference is also calculated in this module.
+//
+
+
+module FPAddSub_PrealignModule(
+		A,
+		B,
+		operation,
+		Sa,
+		Sb,
+		ShiftDet,
+		InputExc,
+		Aout,
+		Bout,
+		Opout
+	);
+	
+	// Input ports
+	input [`DWIDTH-1:0] A ;										// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] B ;										// Input B, a 32-bit floating point number
+	input operation ;
+	
+	// Output ports
+	output Sa ;												// A's sign
+	output Sb ;												// B's sign
+	output [2*`EXPONENT-1:0] ShiftDet ;
+	output [4:0] InputExc ;								// Input numbers are exceptions
+	output [`DWIDTH-2:0] Aout ;
+	output [`DWIDTH-2:0] Bout ;
+	output Opout ;
+	
+	// Internal signals									// If signal is high...
+	wire ANaN ;												// A is a NaN (Not-a-Number)
+	wire BNaN ;												// B is a NaN
+	wire AInf ;												// A is infinity
+	wire BInf ;												// B is infinity
+	wire [`EXPONENT-1:0] DAB ;										// ExpA - ExpB					
+	wire [`EXPONENT-1:0] DBA ;										// ExpB - ExpA	
+	
+	assign ANaN = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(A[`MANTISSA-1:0]) ;		// All one exponent and not all zero mantissa - NaN
+	assign BNaN = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(B[`MANTISSA-1:0]);		// All one exponent and not all zero mantissa - NaN
+	assign AInf = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(A[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
+	assign BInf = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(B[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
+	
+	// Put all flags into exception vector
+	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
+	
+	//assign DAB = (A[30:23] - B[30:23]) ;
+	//assign DBA = (B[30:23] - A[30:23]) ;
+	assign DAB = (A[`DWIDTH-2:`MANTISSA] + ~(B[`DWIDTH-2:`MANTISSA]) + 1) ;
+	assign DBA = (B[`DWIDTH-2:`MANTISSA] + ~(A[`DWIDTH-2:`MANTISSA]) + 1) ;
+	
+	assign Sa = A[`DWIDTH-1] ;									// A's sign bit
+	assign Sb = B[`DWIDTH-1] ;									// B's sign	bit
+	assign ShiftDet = {DBA[`EXPONENT-1:0], DAB[`EXPONENT-1:0]} ;		// Shift data
+	assign Opout = operation ;
+	assign Aout = A[`DWIDTH-2:0] ;
+	assign Bout = B[`DWIDTH-2:0] ;
+	
+endmodule
+
+
+//
+// Description:	 	The alignment module determines the larger input operand and
+//							sets the mantissas, shift and common exponent accordingly.
+//
+
+
+module FPAddSub_AlignModule (
+		A,
+		B,
+		ShiftDet,
+		CExp,
+		MaxAB,
+		Shift,
+		Mmin,
+		Mmax
+	);
+	
+	// Input ports
+	input [`DWIDTH-2:0] A ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-2:0] B ;								// Input B, a 32-bit floating point number
+	input [2*`EXPONENT-1:0] ShiftDet ;
+	
+	// Output ports
+	output [`EXPONENT-1:0] CExp ;							// Common Exponent
+	output MaxAB ;									// Incidates larger of A and B (0/A, 1/B)
+	output [`EXPONENT-1:0] Shift ;							// Number of steps to smaller mantissa shift right
+	output [`MANTISSA-1:0] Mmin ;							// Smaller mantissa 
+	output [`MANTISSA-1:0] Mmax ;							// Larger mantissa
+	
+	// Internal signals
+	//wire BOF ;										// Check for shifting overflow if B is larger
+	//wire AOF ;										// Check for shifting overflow if A is larger
+	
+	assign MaxAB = (A[`DWIDTH-2:0] < B[`DWIDTH-2:0]) ;	
+	//assign BOF = ShiftDet[9:5] < 25 ;		// Cannot shift more than 25 bits
+	//assign AOF = ShiftDet[4:0] < 25 ;		// Cannot shift more than 25 bits
+	
+	// Determine final shift value
+	//assign Shift = MaxAB ? (BOF ? ShiftDet[9:5] : 5'b11001) : (AOF ? ShiftDet[4:0] : 5'b11001) ;
+	
+	assign Shift = MaxAB ? ShiftDet[2*`EXPONENT-1:`EXPONENT] : ShiftDet[`EXPONENT-1:0] ;
+	
+	// Take out smaller mantissa and append shift space
+	assign Mmin = MaxAB ? A[`MANTISSA-1:0] : B[`MANTISSA-1:0] ; 
+	
+	// Take out larger mantissa	
+	assign Mmax = MaxAB ? B[`MANTISSA-1:0]: A[`MANTISSA-1:0] ;	
+	
+	// Common exponent
+	assign CExp = (MaxAB ? B[`MANTISSA+`EXPONENT-1:`MANTISSA] : A[`MANTISSA+`EXPONENT-1:`MANTISSA]) ;		
+	
+endmodule
+
+
+// Description:	 Alignment shift stage 1, performs 16|12|8|4 shift
+//
+
+
+// ONLY THIS MODULE IS HARDCODED for half precision fp16 and bfloat16
+module FPAddSub_AlignShift1(
+		//bf16,
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	//input bf16;
+	input [`MANTISSA-1:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [`EXPONENT-3:0] Shift ;						// Shift amount. Last 2 bits of shifting are done in next stage. Hence, we have [`EXPONENT - 2] bits
+	
+	// Output ports
+	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
+	
+
+	wire bf16;
+	assign bf16 = 1'b1; //hardcoding to 1, to avoid ODIN issue. a `ifdef here wasn't working. apparently, nested `ifdefs don't work
+
+	// Internal signals
+	reg	  [`MANTISSA:0]		Lvl1;
+	reg	  [`MANTISSA:0]		Lvl2;
+	wire    [2*`MANTISSA+1:0]    Stage1;	
+	integer           i;                // Loop variable
+
+	always @(*) begin
+		if (bf16 == 1'b1) begin						
+//hardcoding for bfloat16
+	//For bfloat16, we can shift the mantissa by a max of 7 bits since mantissa has a width of 7. 
+	//Hence if either, bit[3]/bit[4]/bit[5]/bit[6]/bit[7] is 1, we can make it 0. This corresponds to bits [5:1] in our updated shift which doesn't contain last 2 bits.
+		//Lvl1 <= (Shift[1]|Shift[2]|Shift[3]|Shift[4]|Shift[5]) ? {temp_0} : {1'b1, MminP};  // MANTISSA + 1 width	
+		Lvl1 <= (|Shift[`EXPONENT-3:1]) ? 'd0 : {1'b1, MminP};  // MANTISSA + 1 width	
+		end
+		else begin
+		//for half precision fp16, 10 bits can be shifted. Hence, only shifts till 10 (01010)can be made. 
+		Lvl1 <= Shift[2] ? 'd0 : {1'b1, MminP};
+		end
+	end
+	
+	assign Stage1 = {Lvl1, Lvl1}; //2*MANTISSA + 2 width
+
+	always @(*) begin    					// Rotate {0 | 4 } bits
+	if(bf16 == 1'b1) begin
+	  case (Shift[0])
+			// Rotate by 0	
+			1'b0: Lvl2 <= Stage1[`MANTISSA:0];       			
+			// Rotate by 4	
+			1'b1: Lvl2 <= Stage1[`MANTISSA+4:4];
+			default: Lvl2 <= Stage1[`MANTISSA+4:4];
+	  endcase
+	end
+	else begin
+	  case (Shift[1:0])					// Rotate {0 | 4 | 8} bits
+			// Rotate by 0	
+			2'b00: Lvl2 <= Stage1[`MANTISSA:0];       			
+			// Rotate by 4	
+			2'b01: Lvl2 <= Stage1[`MANTISSA+4:4];
+			// Rotate by 8
+			2'b10: Lvl2 <= Stage1[`MANTISSA+8:8];
+			// Rotate by 12	
+			2'b11: Lvl2[`MANTISSA: 0] <= 0; 
+			default: Lvl2[`MANTISSA: 0] <= 0; 
+	  endcase
+	end
+	end
+
+	// Assign output to next shift stage
+	assign Mmin = Lvl2;
+	
+endmodule
+
+
+// Description:	 Alignment shift stage 2, performs 3|2|1 shift
+//
+
+
+module FPAddSub_AlignShift2(
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	input [`MANTISSA:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [1:0] Shift ;						// Shift amount. Last 2 bits
+	
+	// Output ports
+	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
+	
+	// Internal Signal
+	reg	  [`MANTISSA:0]		Lvl3;
+	wire    [2*`MANTISSA+1:0]    Stage2;	
+	integer           j;               // Loop variable
+	
+	assign Stage2 = {MminP, MminP};
+
+	always @(*) begin    // Rotate {0 | 1 | 2 | 3} bits
+	  case (Shift[1:0])
+			// Rotate by 0
+			2'b00: Lvl3 <= Stage2[`MANTISSA:0];   
+			// Rotate by 1
+			2'b01: Lvl3 <= Stage2[`MANTISSA+1:1];
+			// Rotate by 2
+			2'b10: Lvl3 <= Stage2[`MANTISSA+2:2];
+			// Rotate by 3
+			2'b11: Lvl3 <= Stage2[`MANTISSA+3:3]; 
+	  endcase
+	end
+	
+	// Assign output
+	assign Mmin = Lvl3;						// Take out smaller mantissa				
+
+endmodule
+
+
+//
+// Description:	 Module that executes the addition or subtraction on mantissas.
+//
+
+
+module FPAddSub_ExecutionModule(
+		Mmax,
+		Mmin,
+		Sa,
+		Sb,
+		MaxAB,
+		OpMode,
+		Sum,
+		PSgn,
+		Opr
+    );
+
+	// Input ports
+	input [`MANTISSA-1:0] Mmax ;					// The larger mantissa
+	input [`MANTISSA:0] Mmin ;					// The smaller mantissa
+	input Sa ;								// Sign bit of larger number
+	input Sb ;								// Sign bit of smaller number
+	input MaxAB ;							// Indicates the larger number (0/A, 1/B)
+	input OpMode ;							// Operation to be performed (0/Add, 1/Sub)
+	
+	// Output ports
+	output [`DWIDTH:0] Sum ;					// The result of the operation
+	output PSgn ;							// The sign for the result
+	output Opr ;							// The effective (performed) operation
+
+	wire [`EXPONENT-1:0]temp_1;
+
+	assign Opr = (OpMode^Sa^Sb); 		// Resolve sign to determine operation
+	assign temp_1 = 0;
+	// Perform effective operation
+//SAMIDH_UNSURE 5--> 8
+
+	assign Sum = (OpMode^Sa^Sb) ? ({1'b1, Mmax, temp_1} - {Mmin, temp_1}) : ({1'b1, Mmax, temp_1} + {Mmin, temp_1}) ;
+	
+	// Assign result sign
+	assign PSgn = (MaxAB ? Sb : Sa) ;
+
+endmodule
+
+
+//
+// Description:	 Determine the normalization shift amount and perform 16-shift
+//
+
+
+module FPAddSub_NormalizeModule(
+		Sum,
+		Mmin,
+		Shift
+    );
+
+	// Input ports
+	input [`DWIDTH:0] Sum ;					// Mantissa sum including hidden 1 and GRS
+	
+	// Output ports
+	output [`DWIDTH:0] Mmin ;					// Mantissa after 16|0 shift
+	output [4:0] Shift ;					// Shift amount
+	//Changes in this doesn't matter since even Bfloat16 can't go beyond 7 shift to the mantissa (only 3 bits valid here)  
+	// Determine normalization shift amount by finding leading nought
+	assign Shift =  ( 
+		Sum[16] ? 5'b00000 :	 
+		Sum[15] ? 5'b00001 : 
+		Sum[14] ? 5'b00010 : 
+		Sum[13] ? 5'b00011 : 
+		Sum[12] ? 5'b00100 : 
+		Sum[11] ? 5'b00101 : 
+		Sum[10] ? 5'b00110 : 
+		Sum[9] ? 5'b00111 :
+		Sum[8] ? 5'b01000 :
+		Sum[7] ? 5'b01001 :
+		Sum[6] ? 5'b01010 :
+		Sum[5] ? 5'b01011 :
+		Sum[4] ? 5'b01100 : 5'b01101
+	//	Sum[19] ? 5'b01101 :
+	//	Sum[18] ? 5'b01110 :
+	//	Sum[17] ? 5'b01111 :
+	//	Sum[16] ? 5'b10000 :
+	//	Sum[15] ? 5'b10001 :
+	//	Sum[14] ? 5'b10010 :
+	//	Sum[13] ? 5'b10011 :
+	//	Sum[12] ? 5'b10100 :
+	//	Sum[11] ? 5'b10101 :
+	//	Sum[10] ? 5'b10110 :
+	//	Sum[9] ? 5'b10111 :
+	//	Sum[8] ? 5'b11000 :
+	//	Sum[7] ? 5'b11001 : 5'b11010
+	);
+	
+	reg	  [`DWIDTH:0]		Lvl1;
+	
+	always @(*) begin
+		// Rotate by 16?
+		Lvl1 <= Shift[4] ? {Sum[8:0], 8'b00000000} : Sum; 
+	end
+	
+	// Assign outputs
+	assign Mmin = Lvl1;						// Take out smaller mantissa
+
+endmodule
+
+
+// Description:	 Normalization shift stage 1, performs 12|8|4|3|2|1|0 shift
+//
+//Hardcoding loop start and end values of i. To avoid ODIN limitations. i=`DWIDTH*2+1 wasn't working.
+
+module FPAddSub_NormalizeShift1(
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	input [`DWIDTH:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [3:0] Shift ;						// Shift amount
+	
+	// Output ports
+	output [`DWIDTH:0] Mmin ;						// The smaller mantissa
+	
+	reg	  [`DWIDTH:0]		Lvl2;
+	wire    [2*`DWIDTH+1:0]    Stage1;	
+	reg	  [`DWIDTH:0]		Lvl3;
+	wire    [2*`DWIDTH+1:0]    Stage2;	
+	integer           i;               	// Loop variable
+	
+	assign Stage1 = {MminP, MminP};
+
+	always @(*) begin    					// Rotate {0 | 4 | 8 | 12} bits
+	  case (Shift[3:2])
+			// Rotate by 0
+			2'b00: Lvl2 <= Stage1[`DWIDTH:0];       		
+			// Rotate by 4
+			2'b01: Lvl2 <= Stage1[28:13];
+			// Rotate by 8
+			2'b10: Lvl2 <= Stage1[24:9];
+			// Rotate by 12
+			2'b11: Lvl2 <= Stage1[20:5];
+			default: Lvl2 <= Stage1[`DWIDTH:0];
+	  endcase
+	end
+	
+	assign Stage2 = {Lvl2, Lvl2};
+
+	always @(*) begin   				 		// Rotate {0 | 1 | 2 | 3} bits
+	  case (Shift[1:0])
+			// Rotate by 0
+			2'b00: Lvl3 <= Stage2[`DWIDTH:0];
+			// Rotate by 1
+			2'b01: Lvl3 <= Stage2[31:16];
+			// Rotate by 2
+			2'b10: Lvl3 <= Stage2[30:15];
+			// Rotate by 3
+			2'b11: Lvl3 <= Stage2[29:14];
+			default: Lvl3 <= Stage2[`DWIDTH:0];
+	  endcase
+	end
+	
+	// Assign outputs
+	assign Mmin = Lvl3;						// Take out smaller mantissa			
+	
+endmodule
+
+
+// Description:	 Normalization shift stage 2, calculates post-normalization
+//						 mantissa and exponent, as well as the bits used in rounding		
+//
+
+
+module FPAddSub_NormalizeShift2(
+		PSSum,
+		CExp,
+		Shift,
+		NormM,
+		NormE,
+		ZeroSum,
+		NegE,
+		R,
+		S,
+		FG
+	);
+	
+	// Input ports
+	input [`DWIDTH:0] PSSum ;					// The Pre-Shift-Sum
+	input [`EXPONENT-1:0] CExp ;
+	input [4:0] Shift ;					// Amount to be shifted
+
+	// Output ports
+	output [`MANTISSA-1:0] NormM ;				// Normalized mantissa
+	output [`EXPONENT:0] NormE ;					// Adjusted exponent
+	output ZeroSum ;						// Zero flag
+	output NegE ;							// Flag indicating negative exponent
+	output R ;								// Round bit
+	output S ;								// Final sticky bit
+	output FG ;
+
+	// Internal signals
+	wire MSBShift ;						// Flag indicating that a second shift is needed
+	wire [`EXPONENT:0] ExpOF ;					// MSB set in sum indicates overflow
+	wire [`EXPONENT:0] ExpOK ;					// MSB not set, no adjustment
+	
+	// Calculate normalized exponent and mantissa, check for all-zero sum
+	assign MSBShift = PSSum[`DWIDTH] ;		// Check MSB in unnormalized sum
+	assign ZeroSum = ~|PSSum ;			// Check for all zero sum
+	assign ExpOK = CExp - Shift ;		// Adjust exponent for new normalized mantissa
+	assign NegE = ExpOK[`EXPONENT] ;			// Check for exponent overflow
+	assign ExpOF = CExp - Shift + 1'b1 ;		// If MSB set, add one to exponent(x2)
+	assign NormE = MSBShift ? ExpOF : ExpOK ;			// Check for exponent overflow
+	assign NormM = PSSum[`DWIDTH-1:`EXPONENT+1] ;		// The new, normalized mantissa
+	
+	// Also need to compute sticky and round bits for the rounding stage
+	assign FG = PSSum[`EXPONENT] ; 
+	assign R = PSSum[`EXPONENT-1] ;
+	assign S = |PSSum[`EXPONENT-2:0] ;
+	
+endmodule
+
+
+// Description:	 Performs 'Round to nearest, tie to even'-rounding on the
+//						 normalized mantissa according to the G, R, S bits. Calculates
+//						 final result and checks for exponent overflow.
+//
+
+
+module FPAddSub_RoundModule(
+		ZeroSum,
+		NormE,
+		NormM,
+		R,
+		S,
+		G,
+		Sa,
+		Sb,
+		Ctrl,
+		MaxAB,
+		Z,
+		EOF
+    );
+
+	// Input ports
+	input ZeroSum ;					// Sum is zero
+	input [`EXPONENT:0] NormE ;				// Normalized exponent
+	input [`MANTISSA-1:0] NormM ;				// Normalized mantissa
+	input R ;							// Round bit
+	input S ;							// Sticky bit
+	input G ;
+	input Sa ;							// A's sign bit
+	input Sb ;							// B's sign bit
+	input Ctrl ;						// Control bit (operation)
+	input MaxAB ;
+	
+	// Output ports
+	output [`DWIDTH-1:0] Z ;					// Final result
+	output EOF ;
+	
+	// Internal signals
+	wire [`MANTISSA:0] RoundUpM ;			// Rounded up sum with room for overflow
+	wire [`MANTISSA-1:0] RoundM ;				// The final rounded sum
+	wire [`EXPONENT:0] RoundE ;				// Rounded exponent (note extra bit due to poential overflow	)
+	wire RoundUp ;						// Flag indicating that the sum should be rounded up
+        wire FSgn;
+	wire ExpAdd ;						// May have to add 1 to compensate for overflow 
+	wire RoundOF ;						// Rounding overflow
+	
+	wire [`EXPONENT:0]temp_2;
+	assign temp_2 = 0;
+	// The cases where we need to round upwards (= adding one) in Round to nearest, tie to even
+	assign RoundUp = (G & ((R | S) | NormM[0])) ;
+	
+	// Note that in the other cases (rounding down), the sum is already 'rounded'
+	assign RoundUpM = (NormM + 1) ;								// The sum, rounded up by 1
+	assign RoundM = (RoundUp ? RoundUpM[`MANTISSA-1:0] : NormM) ; 	// Compute final mantissa	
+	assign RoundOF = RoundUp & RoundUpM[`MANTISSA] ; 				// Check for overflow when rounding up
+
+	// Calculate post-rounding exponent
+	assign ExpAdd = (RoundOF ? 1'b1 : 1'b0) ; 				// Add 1 to exponent to compensate for overflow
+	assign RoundE = ZeroSum ? temp_2 : (NormE + ExpAdd) ; 							// Final exponent
+
+	// If zero, need to determine sign according to rounding
+	assign FSgn = (ZeroSum & (Sa ^ Sb)) | (ZeroSum ? (Sa & Sb & ~Ctrl) : ((~MaxAB & Sa) | ((Ctrl ^ Sb) & (MaxAB | Sa)))) ;
+
+	// Assign final result
+	assign Z = {FSgn, RoundE[`EXPONENT-1:0], RoundM[`MANTISSA-1:0]} ;
+	
+	// Indicate exponent overflow
+	assign EOF = RoundE[`EXPONENT];
+	
+endmodule
+
+
+//
+// Description:	 Check the final result for exception conditions and set
+//						 flags accordingly.
+//
+
+
+module FPAddSub_ExceptionModule(
+		Z,
+		NegE,
+		R,
+		S,
+		InputExc,
+		EOF,
+		P,
+		Flags
+    );
+	 
+	// Input ports
+	input [`DWIDTH-1:0] Z	;					// Final product
+	input NegE ;						// Negative exponent?
+	input R ;							// Round bit
+	input S ;							// Sticky bit
+	input [4:0] InputExc ;			// Exceptions in inputs A and B
+	input EOF ;
+	
+	// Output ports
+	output [`DWIDTH-1:0] P ;					// Final result
+	output [4:0] Flags ;				// Exception flags
+	
+	// Internal signals
+	wire Overflow ;					// Overflow flag
+	wire Underflow ;					// Underflow flag
+	wire DivideByZero ;				// Divide-by-Zero flag (always 0 in Add/Sub)
+	wire Invalid ;						// Invalid inputs or result
+	wire Inexact ;						// Result is inexact because of rounding
+	
+	// Exception flags
+	
+	// Result is too big to be represented
+	assign Overflow = EOF | InputExc[1] | InputExc[0] ;
+	
+	// Result is too small to be represented
+	assign Underflow = NegE & (R | S);
+	
+	// Infinite result computed exactly from finite operands
+	assign DivideByZero = &(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~|(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~InputExc[1] & ~InputExc[0];
+	
+	// Invalid inputs or operation
+	assign Invalid = |(InputExc[4:2]) ;
+	
+	// Inexact answer due to rounding, overflow or underflow
+	assign Inexact = (R | S) | Overflow | Underflow;
+	
+	// Put pieces together to form final result
+	assign P = Z ;
+	
+	// Collect exception flags	
+	assign Flags = {Overflow, Underflow, DivideByZero, Invalid, Inexact} ; 	
+	
+endmodule
+
+`endif
+
+
diff --git a/parmys-plugin/tests/eltwise_layer/hard_block_include.v b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
new file mode 100644
index 000000000..cc4d502c5
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
@@ -0,0 +1,3 @@
+`define complex_dsp
+`define hard_mem
+
diff --git a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
new file mode 100644
index 000000000..8170d72b0
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
@@ -0,0 +1,3246 @@
+<!--
+    This is the architecture file for a modern Intel FPGA. The blocks (logic, RAM, DSP)
+    are Agilex-like, but the routing architecture is similar to Stratix IV. It is based
+    off the Stratix-10-like Architecture discussed in [1], the Agilex-like Architecture
+    mentioned in [6] and Stratix-IV-like Architecture mentioned in [5].
+
+    The delays and areas of various components in this arch come from COFFE [2]
+    runs using a 22nm technology node [3].
+
+    ##############################
+    Parameters
+    ##############################
+    Parameter | Value | Definition
+    __________|_______|______________________________
+    N         |    10 | Number of BLEs per cluster
+    W         |   300 | Channel width
+    L         |  4,16 | Wire segment length
+    I         |    60 | Number of cluster inputs
+    O         |    40 | Number of cluster outputs
+    K         |     6 | LUT size
+    Fs        |     3 | Switch block flexibility
+    Fcin      |  0.15 | Cluster input flexibility
+    Fcout     |   0.1 | Cluster output flexibility
+    Fclocal   |   0.5 | Local input crossbar population
+
+    ##############################
+    Logic Cluster
+    ##############################
+    This architecture has 10 ALMs (or FLEs: Fracturable Logic Elements) per Logic Cluster
+    (or LAB or CLB), where each ALM is a 6-LUT fracturable into
+    two 5-LUTs. The ALM has 8 inputs and 4 optionally registered outputs.The two 5-LUTs should
+    share at least two inputs. Each two ALM outputs are logically equivalent, which means any
+    output signal that can reach ALM.out[0] can reach ALM.out[1] and the same thing for
+    ALM.out[2] and ALM.out[3]. The ALMs in this architecture have an arithmetic mode
+    where each 5-LUT is fractured into two 4-LUTs, resulting in a total of four 4-LUTs and two
+    bits of addition per ALM. This architecture has a single carry chain that spans the 10 ALMs
+    in the LAB.
+
+    The LAB (or Logic Cluster or CLB) has 60 inputs and 40 outputs. Two outputs of each ALM are fed 
+    to the right and left LAB using direct links and are also fed back to the LAB as feedback connections 
+    sharing the 60 input ports with the signals coming from the routing channels.
+
+    The LAB has a 50% sparsely populated input crossbar.
+    
+    ##############################
+    DSP Slice
+    ##############################
+    This architecture has a DSP block that supports the following modes:
+
+    Fixed point modes:
+    _________________
+    1. 27x27 fixed point multiplier (multiply)
+    2. 27x27 fixed point mac (mac_int_27x27)
+    3. Two 18x19 fixed point multipliers (multiply)
+    4. Two 18x19 fixed point macs (mac_int_18x19)
+    5. Four 9x9 fixed point multipliers (multiply)
+    6. Four 9x9 fixed point macs (mac_int_9x9)
+    7. 27x27 plus 64 mode (mult_add_mode_27_27_64/mult_add_int_27x27). 27 * 27 + 64 -> 64. result = ax * ay + bx + chainin. chainout = result 
+    8. 18x19 sum-of-2 mode (sop_2_mode/int_sop_2) result = (bx * by) + (ax * ay) + chainin. chainout = result    
+    9. 18x19 plus 36 mode (mult_add_mode_18_19_36/mult_add_int_18x19). 18 * 19 + 36 -> 64. result = ax * ay + bx + chainin. chainout = result 
+    10. 9x9 sum-of-4 mode (sop_4_mode/int_sop_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result 
+    11. 9x9 sum-of-4 accum mode (sop_4_accum_mode/int_sop_accum_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result 
+
+    Floating point modes:
+    ____________________
+
+    IMPORTANT:
+    The precisions supported are IEEE floating point 32-bit, IEEE floating point 16-bit and
+    Brain floating point (BF16). In the 16-bit mode descriptions, wherever "fp16" is used, it
+    refers to either IEEE floating point 16-bit or BF16. There are mode bits on the DSP slice
+    that can be used to differentiate between them. Doing this saves the effort of explicitly
+    specifying all the 16-bit modes twice in this file. 
+    Since the goal is architectural exploration and not functional simulation, the mode bits 
+    can be specified to any random value while instantiating the DSP slice in a Verilog benchmark.
+
+    1A. One fp32 multiplier (mult_fp_32)
+    1B. One fp32 multiplier, clocked (mult_fp_clk_32)
+    2A. One fp32 adder/subtractor (addition_fp_32)
+    2B. One fp32 adder/subtractor, clocked (addition_fp_clk_32)
+    3. One fp32 mac (mac_fp_32)
+    4A. Two fp16 multipliers (mult_fp_16)
+    4B. Two fp16 multipliers, clocked (mult_fp_clk_16)
+    5A. Two fp16 adders/subtractors (addition_fp_16)
+    5B. Two fp16 adders/subtractors, clocked (addition_fp_clk_16)
+    6. Two fp16 macs (mac_fp_16)
+    7. floating point fp16 sum-of-products mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. chainout = third_inp or result) (fp16_sum_of_products_mode/fp16_mult_add)
+    8. floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result) (fp16_sum_of_products_2_mult_mode/fp16_sop2_mult)
+    9. floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result) (fp16_sum_of_products_2_accum_mode/fp16_sop2_accum)
+    10. floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp) (fp16_mult_fp32_add/fp16_mult_fp32_add)
+    11. floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator) (fp16_mult_fp32_accum/fp16_mult_fp32_accum)
+    12. floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) (fp32_mult_then_add/fp32_mult_then_add)
+    13. floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin) (fp32_mult_add/fp32_mult_add)
+
+    The DSP block was designed in Verilog and COFFE's [2] hybrid flow was used to generate
+    area and delay results. The standard cell library used was Cadence GPDK 45nm (gsclib045_svt_v4.4)
+    and area/delay scaling euqations from [4] were used.
+
+    A 50% sparsely populated input crossbar was added to the DSP block but is commented out.
+    It was leading to a failure in in VPR. See the discussion on this commit: 
+    https://github.com/verilog-to-routing/vtr-verilog-to-routing/commit/ea7acf1582ece35e892c26b756aa302d2e12ddb2
+
+    Once this is fixed, the input crossbar code can be enabled.
+
+    ##############################
+    Memory Blocks
+    ##############################
+    The architecture also has a 20Kb memory blocks (or M20k or BRAM) that has true and simple dual port modes. 
+    In simple dual port mode the memory can be configured in the following modes: 512x40, 1024x20 and 2048x10,
+    while in true dual port mode it can be configured as: 1024x20 and 2048x10.
+
+    The BRAM has registered inputs and outputs. See details on how the delays for this block were 
+    obtained, in the comments before the specification of the BRAM primitive, towards the end of this file.
+
+    The BRAM doesn't have an input crossbar. Adding an input crossbar was leading to a 
+    seg fault in VPR, likely because of https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1475
+
+    Once this is fixed, an input crossbar can be added. The input crossbar delay from COFFE was: 29.47ps
+
+    ##############################
+    Routing/Interconnect
+    ##############################
+    The routing channel width is 300. Note that the channel width isn't specified directly in this arch file. 
+    Switch pattern calculations assume that value. During experiments, channel width can be specified using 
+    the command line switch `route_chan_width`.
+    The architecture uses unidirectional routing with wire segments of length 4 (260 out of 300 wires) and 
+    length 16 (40 out of 300 wires). The length 16 wires do not directly connect to block pins and are only 
+    accessible from the length 4 wires. Switches appear after every 4 blocks on the length 16 wires. 
+    The switch blocks use a custom switching pattern based on the Stratix-IV-like architecture used in the 
+    Titan flow [5]. 
+
+    ##############################
+    I/Os
+    ##############################
+    I/O pads are arranged along the perimeter of the FPGA. No area values provided for the I/Os.
+
+    ##############################
+    Comments on similarities and differences with Intel FPGA architecture.
+    ##############################
+    The main parameters of the logic blocks, DSPs and RAMs are similar to Intel FPGAs. But here are
+    some important points:
+    1. The DSP slice supports lower precision modes - int8 (actually 9x9) and 16-bit floating point
+       (IEEE half-precision and bfloat16). These modes are present in Intel Agilex FPGA DSPs.
+    2. DSPs are chained in vertical direction (chainin-chainout connections for output cascading
+       and scanin-scanout connections for input cascading). This is a common feature
+       in modern FPGAs.
+    3. There are no registers on the interconnect/routing wires in this architecture. That is a main
+       feature in the Stratix10 and Agilex families of Intel FPGA (it's called HyperFlex by Intel).
+    4. The architecture doesn't have sectors. All blocks are laid out in columns on the entire chip.
+       Most modern Intel FPGAs have sector based layout.
+    5. The IOs are on the perimeter, instead of being arranged in columns. Modern FPGAs arrange I/Os in
+       columns.
+    6. The routing architecture is similar to Stratix IV. There are wire segments of L=4 
+       and L=16. And a custom switch pattern (not a standard wilton switch) is used. 
+
+    [1] M. Eldafrawy, A. Boutros, S. Yazdanshenas, and V. Betz, "FPGA Logic Block Architectures for
+        Efficient Deep Learning Inference" in ACM TRETS, 2020
+    [2] S. Yazdanshenas, and V. Betz, "COFFE 2: Automatic Modelling and Optimization of
+        Complex and Heterogeneous FPGA Architectures" in ACM TRETS, 2019. 
+    [3] PTM High Performance 22nm Metal Gate / High-K / Strained-Si 22NM_BULK_HP, from http://ptm.asu.edu/
+        See: https://github.com/vaughnbetz/COFFE/blob/master/spice_models/ptm_22nm_bulk_hp.l
+    [4] A. Stillmaker and B. Baas, "Scaling equations for the accurate prediction of CMOS device 
+        performance from 180 nm to 7 nm" in Integration, the VLSI Journal (2017)
+    [5] K. E. Murray et al., “Timing-Driven Titan: Enabling Large Benchmarks and Exploring the Gap between 
+        Academic and Commercial CAD,” TRETS 2015.
+    [6] A. Arora et al., "Tensor Slices to the Rescue: Supercharging ML Acceleration on FPGAs", ISFPGA 2020.
+-->
+
+<architecture>
+  <!-- 
+         ODIN II specific config begins 
+         Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+         ".model [type_of_block]") that this architecture supports.
+
+         Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+         already special structures in blif (.names, .input, .output, and .latch) 
+         that describe them.
+    -->
+  <models>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk" combinational_sink_ports="out"/>
+        <!-- control -->
+        <port name="addr" clock="clk" combinational_sink_ports="out"/>
+        <!-- address lines -->
+        <port name="data" clock="clk" combinational_sink_ports="out"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Fixed point multiplication.
+         ODIN infers these when * sign appears in RTL. -->
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Floating point multiplication. -->
+    <model name="mult_fp_16">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>   
+    <model name="mult_fp_32">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>   
+    <model name="mult_fp_clk_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mult_fp_clk_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!-- Only used inside CLBs for 1-bit adder.
+         ODIN infers these when + sign appears in RTL.
+         Can't use this inside DSP slice
+         because ODIN gets confused and starts to connect multi
+         bit adders and single bit adders in different PBs -->
+    <model name="adder">
+      <input_ports>
+        <port name="a" combinational_sink_ports="cout sumout"/>
+        <port name="b" combinational_sink_ports="cout sumout"/>
+        <port name="cin" combinational_sink_ports="cout sumout"/>
+      </input_ports>
+      <output_ports>
+        <port name="cout"/>
+        <port name="sumout"/>
+      </output_ports>
+    </model>
+    <!-- Multi bit floating point adder inside DSP slices -->
+    <model name="addition_fp_16">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_32">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_clk_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_clk_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_2">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="mult_add_int_27x27">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <model name="mult_add_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_accum_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!-- Floating point MAC inside DSP slices -->
+    <model name="mac_fp_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_fp_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!-- Fixed point MAC inside DSP slices -->
+    <model name="mac_int_27x27">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_int_9x9">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp16_sop2_mult">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp16_sop2_accum">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_fp32_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>    
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_fp32_accum">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp32_mult_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp32_mult_then_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+  </models>
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" height="1" width="1" area="27905">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I1" num_pins="15" equivalent="full"/>
+        <input name="I2" num_pins="15" equivalent="full"/>
+        <input name="I3" num_pins="15" equivalent="full"/>
+        <input name="I4" num_pins="15" equivalent="full"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="40" equivalent="none"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
+          <!-- clock pins do not connect to local routing -->
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="dsp_top" height="4" width="1" area="253779">
+      <sub_tile name="dsp_top">
+        <equivalent_sites>
+          <site pb_type="dsp_top" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="reset" num_pins="1" is_non_clock_global="true"/>
+        <input name="dsp_I1" num_pins="64" />
+        <input name="dsp_I2" num_pins="64" />
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <!-- clock pins and chain ports do not connect to local routing -->
+          <fc_override port_name="clk" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainout" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="custom">
+        	  <loc side="left" yoffset="0">dsp_top.dsp_I1[31:0]</loc>
+	          <loc side="right" yoffset="1">dsp_top.dsp_I1[63:32]</loc>
+	          <loc side="left" yoffset="2">dsp_top.dsp_I2[31:0]</loc>
+	          <loc side="right" yoffset="3">dsp_top.dsp_I2[63:32]</loc>
+	          <loc side="top">dsp_top.chainin dsp_top.scanin</loc>
+	          <loc side="bottom">dsp_top.chainout dsp_top.scanout</loc>
+	          <loc side="right" yoffset="0">dsp_top.result[17:0] dsp_top.clk</loc>
+	          <loc side="left" yoffset="1">dsp_top.result[36:18]</loc>
+	          <loc side="right" yoffset="2">dsp_top.result[55:37] </loc>
+	          <loc side="left" yoffset="3">dsp_top.result[73:56] dsp_top.reset</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="2" width="1" area="137668">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>  
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <layout>
+    <!-- Physical descriptions begin -->
+    <auto_layout aspect_ratio="1.0">
+      <perimeter type="io" priority="101"/>
+      <corners type="EMPTY" priority="102"/>
+      <fill type="clb" priority="10"/>
+      <col type="dsp_top" startx="6" starty="1" repeatx="16" priority="20"/>
+      <col type="memory" startx="2" starty="1" repeatx="16" priority="20"/>
+    </auto_layout>
+    <!--
+    <fixed_layout name="mylayout" width="178" height="82">
+      <perimeter type="io" priority="101"/>
+      <corners type="EMPTY" priority="102"/>
+      
+      <col type="dsp_top"  startx="1"  starty="1"  priority="100"/>
+      <col type="clb"  startx="2"  starty="1"  priority="100"/>
+      <col type="clb"  startx="3"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="4"  starty="1"  priority="100"/>
+      <col type="clb"  startx="5"  starty="1"  priority="100"/>
+      <col type="clb"  startx="6"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="7"  starty="1"  priority="100"/>
+      <col type="clb"  startx="8"  starty="1"  priority="100"/>
+      <col type="clb"  startx="9"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="10"  starty="1"  priority="100"/>
+      <col type="clb"  startx="11"  starty="1"  priority="100"/>
+      <col type="clb"  startx="12"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="13"  starty="1"  priority="100"/>
+
+      <region type="clb" startx="14"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="15"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="16"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="dsp_top" startx="17"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="memory" startx="18"   endx="88"   starty="1" incrx="5"  priority="20"/>
+
+      <region type="memory" startx="89"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="dsp_top" startx="90"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="91"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="92"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="93"   endx="163"   starty="1" incrx="5"  priority="20"/>
+
+      <col type="dsp_top"  startx="164"  starty="1"  priority="20"/>
+      <col type="clb"  startx="165"  starty="1"  priority="1"/>
+      <col type="clb"  startx="166"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="167"  starty="1"  priority="20"/>
+      <col type="clb"  startx="168"  starty="1"  priority="1"/>
+      <col type="clb"  startx="169"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="170"  starty="1"  priority="20"/>
+      <col type="clb"  startx="171"  starty="1"  priority="1"/>
+      <col type="clb"  startx="172"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="173"  starty="1"  priority="20"/>
+      <col type="clb"  startx="174"  starty="1"  priority="1"/>
+      <col type="clb"  startx="175"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="176"  starty="1"  priority="20"/>
+    </fixed_layout> 
+    -->
+  </layout>
+  <device>
+    <sizing R_minW_nmos="13090" R_minW_pmos="19086.83"/>
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="custom"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <switch type="mux" name="L4_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="207.9e-12" mux_trans_size="2.377" buf_size="35.69"/>
+    <!-- Delay of L16 driver is scaled from L4 by a factor of 1.5x (based on numbers from the Titan Stratix IV architecture file)
+	 Area numbers will not be totally accurate because of the same buf_size -->
+    <switch type="mux" name="L16_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="312.9e-12" mux_trans_size="2.377" buf_size="35.69"/> 
+    <switch type="mux" name="ipin_cblock" R="0.0" Cout="0.0" Cin="0.0" Tdel="130e-12" mux_trans_size="1.508" buf_size="11.71"/>
+  </switchlist>
+  <segmentlist>
+    <segment name="L4" freq="260" length="4" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L4_driver"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+    <segment name="L16" freq="40" length="16" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L16_driver"/>
+      <!-- Vias from the top of the metal stack (global layers, where the long wires are 
+           implemented) down to the middle/bottom of the metal stack (semi-global layers, 
+           where the short wires are implemented) are expensive and restrictive.
+           As a result Startix IV only places long wire switch blocks every 4 LABs -->
+      <sb type="pattern">1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1</sb>
+      <!-- For the same reasons, long wires do not connect to block pins in Stratix IV -->
+      <cb type="pattern">0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <!-- Direct connect from one LAB to the LAB directly below it (carry chain) -->
+    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
+    <!-- Direct connect from one DSP to the DSP directly below it -->
+    <direct name="dsp_out_chain" from_pin="dsp_top.chainout" from_side="bottom" to_pin="dsp_top.chainin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+    <direct name="dsp_in_chain" from_pin="dsp_top.scanout" from_side="bottom" to_pin="dsp_top.scanin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+  </directlist>
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+	     Delays below come from Ian Kuon. They are small, so they should be interpreted as
+	     the delays to and from registers in the I/O (and generally I/Os are registered 
+	     today and that is when you timing analyze them.
+	     -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <pb_type name="clb">
+      <input name="I1" num_pins="15" equivalent="full"/>
+      <input name="I2" num_pins="15" equivalent="full"/>
+      <input name="I3" num_pins="15" equivalent="full"/>
+      <input name="I4" num_pins="15" equivalent="full"/>
+      <input name="cin" num_pins="1"/>
+      <output name="O" num_pins="40" equivalent="none"/>
+      <output name="cout" num_pins="1"/>
+      <clock name="clk" num_pins="1"/>
+      <pb_type name="lab" num_pb="1">
+        <input name="I1" num_pins="15"/>
+        <input name="I2" num_pins="15"/>
+        <input name="I3" num_pins="15"/>
+        <input name="I4" num_pins="15"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="40"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <!-- Describe fracturable logic element.  
+                 Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+                 The outputs of the fracturable logic element can be optionally registered
+            -->
+        <pb_type name="fle" num_pb="10">
+          <input name="in" num_pins="8"/>
+          <input name="cin" num_pins="1"/>
+          <output name="out" num_pins="4"/>
+          <output name="cout" num_pins="1"/>
+          <clock name="clk" num_pins="1"/>
+          <!-- 
+                    The ALM inputs are as follows:
+                            A -> fle[0]
+                            B -> fle[1]
+                            C -> fle[2]
+                            D -> fle[3]
+                            E -> fle[4]
+                            F -> fle[5]
+                            G -> fle[6]
+                            H -> fle[7]
+              -->
+          <mode name="n2_lut5">
+            <pb_type name="ble5" num_pb="2">
+              <input name="in" num_pins="5"/>
+              <input name="cin" num_pins="1"/>
+              <output name="out" num_pins="2"/>
+              <output name="cout" num_pins="1"/>
+              <clock name="clk" num_pins="1"/>
+              <mode name="blut5">
+                <pb_type name="flut5" num_pb="1">
+                  <input name="in" num_pins="5"/>
+                  <output name="out" num_pins="2"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Regular LUT mode -->
+                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                    <input name="in" num_pins="5" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                             we instead take the average of these numbers to get more stable results
+                             note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
+                             subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
+                             210.96e-12
+                             206.85e-12
+                             143.46e-12
+                             136.94e-12
+                             68.12e-12
+                          -->
+                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                        </delay_matrix>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="lut5_in" input="flut5.in" output="lut5.in"/>
+                    <direct name="reg_in" input="flut5.in[0]" output="ff[0].D"/>
+                    <direct name="lut5_ff" input="lut5.out" output="ff[1].D">
+                      <delay_constant max="18.96e-12" in_port="lut5.out" out_port="ff[1].D"/>
+                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff[1].D"/>
+                    </direct>
+                    <complete name="clock" input="flut5.clk" output="ff.clk"/>
+                    <complete name="out_mux" input="ff.Q lut5.out" output="flut5.out">
+                      <delay_constant max="39.85e-12" in_port="lut5.out" out_port="flut5.out"/>
+                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="flut5.out"/>
+                    </complete>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
+                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
+                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+              <mode name="arithmetic">
+                <pb_type name="arithmetic" num_pb="1">
+                  <input name="in" num_pins="4"/>
+                  <input name="cin" num_pins="1"/>
+                  <output name="out" num_pins="2"/>
+                  <output name="cout" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Special dual-LUT mode that drives adder only -->
+                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
+                    <input name="in" num_pins="4" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                           note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
+                           subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
+                             168.12e-12
+                             164.02e-12
+                             100.63e-12
+                             94.11e-12
+                          -->
+                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
+                            131.72e-12
+                            131.72e-12
+                            131.72e-12
+                            131.72e-12
+                        </delay_matrix>
+                  </pb_type>
+                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
+                    <input name="a" num_pins="1"/>
+                    <input name="b" num_pins="1"/>
+                    <input name="cin" num_pins="1"/>
+                    <output name="cout" num_pins="1"/>
+                    <output name="sumout" num_pins="1"/>
+                    <delay_constant max="68.74e-12" in_port="adder.a" out_port="adder.sumout"/>
+                    <delay_constant max="68.74e-12" in_port="adder.b" out_port="adder.sumout"/>
+                    <delay_constant max="35.46e-12" in_port="adder.cin" out_port="adder.sumout"/>
+                    <delay_constant max="49.32e-12" in_port="adder.a" out_port="adder.cout"/>
+                    <delay_constant max="49.32e-12" in_port="adder.b" out_port="adder.cout"/>
+                    <delay_constant max="25.56e-12" in_port="adder.cin" out_port="adder.cout"/>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
+                    <direct name="lut4_in1" input="arithmetic.in" output="lut4[0].in"/>
+                    <direct name="lut4_in2" input="arithmetic.in" output="lut4[1].in"/>
+                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a"/>
+                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b"/>
+                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
+                      <delay_constant max="18.96e-12" in_port="adder.sumout" out_port="ff.D"/>
+                      <!--pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/-->
+                    </direct>
+                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
+                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
+                    </direct>
+                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
+                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
+                    </direct>
+                    <complete name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
+                      <delay_constant max="39.85e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
+                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="arithmetic.out"/>
+                    </complete>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
+                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
+                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
+                  </direct>
+                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
+                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
+                  </direct>
+                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
+                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <interconnect>
+              <!-- Shared inputs between the two 5-LUTs -->
+              <complete name="lut5_reg1" input="fle.in[0]" output="ble5[0].in[0] ble5[1].in[1]"/>
+              <complete name="lut5_reg2" input="fle.in[1]" output="ble5[0].in[1] ble5[1].in[0]"/>
+              <!-- Rest of the 5-LUT inputs -->
+              <direct name="lut5_inputs_1" input="fle.in[4:2]" output="ble5[0].in[4:2]"/>
+              <direct name="lut5_inputs_22" input="fle.in[7:5]" output="ble5[1].in[4:2]"/>
+              <direct name="lut5_outputs_1" input="ble5[0].out" output="fle.out[1:0]"/>
+              <direct name="lut5_outputs_2" input="ble5[1].out" output="fle.out[3:2]"/>
+              <direct name="carry_in" input="fle.cin" output="ble5[0].cin">
+                <pack_pattern name="chain" in_port="fle.cin" out_port="ble5[0].cin"/>
+              </direct>
+              <direct name="carry_out" input="ble5[1].cout" output="fle.cout">
+                <pack_pattern name="chain" in_port="ble5[1].cout" out_port="fle.cout"/>
+              </direct>
+              <direct name="carry_link" input="ble5[0].cout" output="ble5[1].cin">
+                <pack_pattern name="chain" in_port="ble5[0].cout" out_port="ble5[1].cout"/>
+              </direct>
+              <complete name="clock" input="fle.clk" output="ble5[1:0].clk"/>
+            </interconnect>
+          </mode>
+          <!-- n2_lut5 -->
+          <mode name="n1_lut6">
+            <pb_type name="ble6" num_pb="1">
+              <input name="in" num_pins="6"/>
+              <output name="out" num_pins="4"/>
+              <clock name="clk" num_pins="1"/>
+              <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+                <input name="in" num_pins="6" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <!-- LUT timing using delay matrix -->
+                <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                           257.8e-12
+                           253.69e-12
+                           190.3e-12
+                           183.78e-12
+                           114.96e-12
+                           77.18e-12
+                      -->
+                <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                    </delay_matrix>
+              </pb_type>
+              <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
+                <input name="D" num_pins="1" port_class="D"/>
+                <output name="Q" num_pins="1" port_class="Q"/>
+                <clock name="clk" num_pins="1" port_class="clock"/>
+                <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+              </pb_type>
+              <interconnect>
+                <direct name="lut6_inputs" input="ble6.in" output="lut6.in"/>
+                <direct name="lut6_ff" input="lut6.out" output="ff[1].D">
+                  <delay_constant max="18.96e-12" in_port="lut6.out" out_port="ff[1].D"/>
+                  <pack_pattern name="ble6" in_port="lut6.out" out_port="ff[1].D"/>
+                </direct>
+                <complete name="clock" input="ble6.clk" output="ff.clk"/>
+                <direct name="input_to_ff" input="ble6.in[0]" output="ff[0].D"/>
+                <mux name="mux1" input="ff[0].Q lut6.out" output="ble6.out[0]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[0]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[0]"/>
+                </mux>
+                <!-- This mux is the same as mux1 but connected to output 2 -->
+                <mux name="mux2" input="ff[0].Q lut6.out" output="ble6.out[1]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[1]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[1]"/>
+                </mux>
+                <mux name="mux3" input="ff[1].Q lut6.out" output="ble6.out[2]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[2]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[2]"/>
+                </mux>
+                <!-- This mux is the same as mux2 but connected to output 3 -->
+                <mux name="mux4" input="ff[1].Q lut6.out" output="ble6.out[3]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[3]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[3]"/>
+                </mux>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <!-- ble6 takes inputs A, B, C, D, E, & F; where F is fle[7] -->
+              <direct name="lut6_inputs1" input="fle.in[4:0]" output="ble6.in[4:0]"/>
+              <direct name="lut6_inputs2" input="fle.in[7]" output="ble6.in[5]"/>
+              <direct name="direct2" input="ble6.out" output="fle.out"/>
+              <direct name="direct4" input="fle.clk" output="ble6.clk"/>
+            </interconnect>
+          </mode>
+          <!-- n1_lut6 -->
+        </pb_type>
+        <interconnect>
+          <!-- 50% sparsely populated local routing -->
+          <!-- This 50% sparsity pattern divides the cluster inputs and local feedbacks into four groups, 
+               and then selects two of the four groups to feed each LUT input. This means half of the cluster 
+               inputs and local feedbacks can feed each LUT input. There is partial overlap in the inputs that 
+               feed the various LUT inputs, which helps routability vs. simply having half the cluster inputs 
+               feed one set of half the LUT inputs and the other half of cluster inputs feed the other set of 
+               LUT inputs. This pattern is used by Stratix (I - 10) architectures. -->
+          <complete name="lutA" input="lab.I4 lab.I3" output="fle[9:0].in[0:0]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[0:0]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[0:0]"/>
+          </complete>
+          <complete name="lutB" input="lab.I3 lab.I2" output="fle[9:0].in[1:1]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[1:1]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[1:1]"/>
+          </complete>
+          <complete name="lutC" input="lab.I2 lab.I1" output="fle[9:0].in[2:2]">
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[2:2]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[2:2]"/>
+          </complete>
+          <complete name="lutD" input="lab.I4 lab.I2" output="fle[9:0].in[3:3]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[3:3]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[3:3]"/>
+          </complete>
+          <complete name="lutE" input="lab.I3 lab.I1" output="fle[9:0].in[4:4]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[4:4]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[4:4]"/>
+          </complete>
+          <complete name="lutF" input="lab.I4 lab.I1" output="fle[9:0].in[5:5]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[5:5]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[5:5]"/>
+          </complete>
+          <complete name="lutG" input="lab.I4 lab.I3" output="fle[9:0].in[6:6]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[6:6]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[6:6]"/>
+          </complete>
+          <complete name="lutH" input="lab.I3 lab.I2" output="fle[9:0].in[7:7]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[7:7]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[7:7]"/>
+          </complete>
+          <complete name="clks" input="lab.clk" output="fle[9:0].clk"/>
+          <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
+                     By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+                     then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+                     naive specification).
+              -->
+          <direct name="labouts1" input="fle[9:0].out[0]" output="lab.O[9:0]"/>
+          <direct name="labouts2" input="fle[9:0].out[1]" output="lab.O[19:10]"/>
+          <direct name="labouts3" input="fle[9:0].out[2]" output="lab.O[29:20]"/>
+          <direct name="labouts4" input="fle[9:0].out[3]" output="lab.O[39:30]"/>
+          <!-- Carry chain links -->
+          <direct name="carry_in" input="lab.cin" output="fle[0:0].cin">
+            <!-- Put all inter-block carry chain delay on this one edge -->
+            <delay_constant max="18.47e-12" in_port="lab.cin" out_port="fle[0:0].cin"/>
+            <pack_pattern name="chain" in_port="lab.cin" out_port="fle[0:0].cin"/>
+          </direct>
+          <direct name="carry_out" input="fle[9:9].cout" output="lab.cout">
+            <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="lab.cout"/>
+          </direct>
+          <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
+            <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
+          </direct>
+        </interconnect>
+      </pb_type>
+      <interconnect>
+        <direct name="carry_in" input="clb.cin" output="lab.cin"/>
+        <direct name="carry_out" input="lab.cout" output="clb.cout"/>
+        <direct name="clock" input="clb.clk" output="lab.clk"/>
+        <complete name="Input_feedback_I1" input="clb.I1 lab.O[4:0]" output="lab.I1"/>
+        <complete name="Input_feedback_I2" input="clb.I2 lab.O[24:20]" output="lab.I2"/>
+        <complete name="Input_feedback_I3" input="clb.I3 lab.O[9:5]" output="lab.I3"/>
+        <complete name="Input_feedback_I4" input="clb.I4 lab.O[29:25]" output="lab.I4"/>
+        <!--
+        <direct name="Input_I1" input="clb.I1" output="lab.I1"/>
+        <direct name="Input_I2" input="clb.I2" output="lab.I2"/>
+        <direct name="Input_I3" input="clb.I3" output="lab.I3"/>
+        <direct name="Input_I4" input="clb.I4" output="lab.I4"/>
+        -->
+        <direct name="output" input="lab.O" output="clb.O"/>
+      </interconnect>
+    </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+
+    <!-- Define DSP slice begin -->
+    <pb_type name="dsp_top">
+      <input name="reset" num_pins="1" is_non_clock_global="true"/>
+      <input name="dsp_I1" num_pins="64" />
+      <input name="dsp_I2" num_pins="64" />
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+    <pb_type name="dsp" num_pb="1">
+      <input name="reset" num_pins="1"/>
+      <input name="dsp_I1" num_pins="64"/>
+      <input name="dsp_I2" num_pins="64"/>
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+      <pb_type name="dsp_pb" num_pb="1">
+        <input name="reset" num_pins="1"/>
+        <input name="mode_sigs" num_pins="12"/>
+        <input name="datain" num_pins="116"/>
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+
+        <!-- fixed-point multiplier mode (1 27x27 multiplier) result = ax*ay -->
+        <mode name="one_mult_27x27">
+          <pb_type name="one_mult_27x27" num_pb="1">
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <pb_type name="mult_27x27" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="27"/>
+              <input name="b" num_pins="27"/>
+              <output name="out" num_pins="54"/>
+              <delay_constant max="2.14e-9" in_port="mult_27x27.a" out_port="mult_27x27.out"/>
+              <delay_constant max="2.14e-9" in_port="mult_27x27.b" out_port="mult_27x27.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="one_mult_27x27.a" output="mult_27x27.a">
+              </direct>
+              <direct name="b2b" input="one_mult_27x27.b" output="mult_27x27.b">
+              </direct>
+              <direct name="out2out" input="mult_27x27.out" output="one_mult_27x27.out">
+              </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a" input="dsp_pb.datain[26:0]" output="one_mult_27x27.a">
+            </direct>
+            <direct name="datain2b" input="dsp_pb.datain[53:27]" output="one_mult_27x27.b">
+            </direct>
+            <direct name="out2dataout" input="one_mult_27x27.out" output="dsp_pb.result[53:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier mode (2 18x19 multipliers) result[some:bits] = ax*ay, result[other:bits] = bx*by -->
+        <mode name="two_mult_18x19">
+          <pb_type name="two_mult_18x19" num_pb="2">
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <pb_type name="mult_18x19" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="18"/>
+              <input name="b" num_pins="19"/>
+              <output name="out" num_pins="37"/>
+              <delay_constant max="2.14e-9" in_port="mult_18x19.a" out_port="mult_18x19.out"/>
+              <delay_constant max="2.14e-9" in_port="mult_18x19.b" out_port="mult_18x19.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="two_mult_18x19.a" output="mult_18x19.a">
+                 </direct>
+              <direct name="b2b" input="two_mult_18x19.b" output="mult_18x19.b">
+                 </direct>
+              <direct name="out2out" input="mult_18x19.out" output="two_mult_18x19.out">
+                 </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a1" input="dsp_pb.datain[17:0]" output="two_mult_18x19[0].a">
+            </direct>
+            <direct name="datain2b1" input="dsp_pb.datain[36:18]" output="two_mult_18x19[0].b">
+            </direct>
+            <direct name="datain2a2" input="dsp_pb.datain[54:37]" output="two_mult_18x19[1].a">
+            </direct>
+            <direct name="datain2b2" input="dsp_pb.datain[73:55]" output="two_mult_18x19[1].b">
+            </direct>
+            <direct name="out2result" input="two_mult_18x19.out" output="dsp_pb.result[73:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point multiplier mode (4 9x9 multipliers) result = ax*ay -->
+        <mode name="mult_9x9_fixed_pt_mode">
+          <pb_type name="mult_9x9_fixed_pt" blif_model=".subckt multiply" num_pb="4">
+            <input name="a" num_pins="9"/>
+            <input name="b" num_pins="9"/>
+            <output name="out" num_pins="18"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.a" out_port="mult_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.b" out_port="mult_9x9_fixed_pt.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mult_9x9_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mult_9x9_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mult_9x9_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mult_9x9_fixed_pt[1].b"/>
+            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mult_9x9_fixed_pt[2].a"/>
+            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mult_9x9_fixed_pt[2].b"/>
+            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mult_9x9_fixed_pt[3].a"/>
+            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mult_9x9_fixed_pt[3].b"/>
+            <direct name="sumouttosumout0" input="mult_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
+            <direct name="sumouttosumout1" input="mult_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
+            <direct name="sumouttosumout2" input="mult_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
+            <direct name="sumouttosumout3" input="mult_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_2_mode">
+          <pb_type name="sop_2" num_pb="1" blif_model=".subckt int_sop_2">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="18"/>
+            <input name="by" num_pins="19"/>
+            <input name="chainin" num_pins="37"/>
+            <output name="result" num_pins="37"/>
+            <output name="chainout" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_2.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_2.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_2.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="sop_2.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="sop_2.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="sop_2.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[54:37]" output="sop_2.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[73:55]" output="sop_2.by">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[36:0]" output="sop_2.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_2.result" output="dsp_pb.result[36:0]">
+            </direct>
+            <direct name="chainout" input="sop_2.chainout" output="dsp_pb.chainout[36:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_18_19_36">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_18x19">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="19"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="19"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[72:37]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[18:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[18:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_27_27_64">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_27x27">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="27"/>
+            <input name="ay" num_pins="27"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="27"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="27"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[26:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[53:27]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[89:54]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[26:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[26:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_4_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_4">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 accum mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result -->
+        <mode name="sop_4_accum_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_accum_4">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_27x27_fixed_pt_mode">
+          <pb_type name="mac_27x27_fixed_pt" blif_model=".subckt mac_int_27x27" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.a" out_port="mac_27x27_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.b" out_port="mac_27x27_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.reset" out_port="mac_27x27_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="mac_27x27_fixed_pt.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="mac_27x27_fixed_pt.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[26:0]" output="mac_27x27_fixed_pt.a"/>
+            <direct name="btob" input="dsp_pb.datain[53:27]" output="mac_27x27_fixed_pt.b"/>
+            <direct name="sumouttosumout" input="mac_27x27_fixed_pt.out" output="dsp_pb.result[53:0]"/>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_18x19_fixed_pt_mode">
+          <pb_type name="mac_fixed_pt" blif_model=".subckt mac_int_18x19" num_pb="2">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.a" out_port="mac_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.b" out_port="mac_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.reset" out_port="mac_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_fixed_pt[0].reset"/>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_fixed_pt[1].reset"/>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_fixed_pt[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_fixed_pt[1].clk"/>
+            <direct name="atoa0" input="dsp_pb.datain[17:0]" output="mac_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[36:18]" output="mac_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[54:37]" output="mac_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[73:55]" output="mac_fixed_pt[1].b"/>
+            <direct name="sumouttosumout0" input="mac_fixed_pt[0].out" output="dsp_pb.result[36:0]"/>
+            <direct name="sumouttosumout1" input="mac_fixed_pt[1].out" output="dsp_pb.result[73:37]"/>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_9x9_fixed_pt_mode">
+          <pb_type name="mac_9x9_fixed_pt" blif_model=".subckt mac_int_9x9" num_pb="4">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="9"/>
+            <input name="b" num_pins="9"/>
+            <output name="out" num_pins="18"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.a" out_port="mac_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.b" out_port="mac_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.reset" out_port="mac_9x9_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_9x9_fixed_pt[0].reset"/>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_9x9_fixed_pt[1].reset"/>
+            <direct name="reset2" input="dsp_pb.reset" output="mac_9x9_fixed_pt[2].reset"/>
+            <direct name="reset3" input="dsp_pb.reset" output="mac_9x9_fixed_pt[3].reset"/>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_9x9_fixed_pt[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_9x9_fixed_pt[1].clk"/>
+            <direct name="clk2" input="dsp_pb.clk" output="mac_9x9_fixed_pt[2].clk"/>
+            <direct name="clk3" input="dsp_pb.clk" output="mac_9x9_fixed_pt[3].clk"/>
+            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mac_9x9_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mac_9x9_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mac_9x9_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mac_9x9_fixed_pt[1].b"/>
+            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mac_9x9_fixed_pt[2].a"/>
+            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mac_9x9_fixed_pt[2].b"/>
+            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mac_9x9_fixed_pt[3].a"/>
+            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mac_9x9_fixed_pt[3].b"/>
+            <direct name="sumouttosumout0" input="mac_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
+            <direct name="sumouttosumout1" input="mac_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
+            <direct name="sumouttosumout2" input="mac_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
+            <direct name="sumouttosumout3" input="mac_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
+          </interconnect>
+        </mode>
+
+        <!-- floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp32_mode">
+          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
+            </direct>
+            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
+            </direct>
+            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp16_mode">
+          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
+            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point adder mode (result = a + b)-->
+        <mode name="adder_fp32_mode"> 
+          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point adder mode (result = a + b)-->
+        <mode name="adder_fp16_mode">
+          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
+            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp32_clocked_mode">
+          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_clk_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="mult_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_fp32.clk"/>
+            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
+            </direct>
+            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
+            </direct>
+            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp16_clocked_mode">
+          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_clk_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="mult_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk0" input="dsp_pb.clk" output="mult_fp16[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mult_fp16[1].clk"/>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
+            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point adder mode (result = a + b)-->
+        <mode name="adder_fp32_clocked_mode"> 
+          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_clk_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="adder_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="adder_fp32.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point adder mode (result = a + b)-->
+        <mode name="adder_fp16_clocked_mode">
+          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_clk_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="adder_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk0" input="dsp_pb.clk" output="adder_fp16[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="adder_fp16[1].clk"/>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
+            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_fp32_mode">
+          <pb_type name="mac_fp32" blif_model=".subckt mac_fp_32" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mac_fp32.a" out_port="mac_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp32.b" out_port="mac_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp32.reset" out_port="mac_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fp32.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="mac_fp32.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="mac_fp32.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="mac_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="mac_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="mac_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point 16-bit mac mode (result = a*b + accumulated value-->
+        <mode name="mac_fp16_mode">
+          <pb_type name="mac_fp16" blif_model=".subckt mac_fp_16" num_pb="2">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mac_fp16.a" out_port="mac_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp16.b" out_port="mac_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp16.reset" out_port="mac_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fp16.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_fp16[0].reset"></direct>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_fp16[1].reset"></direct>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_fp16[0].clk"></direct>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_fp16[1].clk"></direct>
+            <direct name="atoa0" input="dsp_pb.datain[15:0]"  output="mac_fp16[0].a"></direct>
+            <direct name="atoa1" input="dsp_pb.datain[31:16]" output="mac_fp16[1].a"></direct>
+            <direct name="btob0" input="dsp_pb.datain[47:32]" output="mac_fp16[0].b"></direct>
+            <direct name="btob1" input="dsp_pb.datain[63:48]" output="mac_fp16[1].b"></direct>
+            <direct name="sumouttosumout0" input="mac_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="sumouttosumout1" input="mac_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b). chainout = third_inp or result-->
+        <mode name="fp16_sum_of_products_mode"> 
+          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_mult_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>  
+
+        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result)-->
+        <mode name="fp16_sum_of_products_2_mult_mode"> 
+          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_sop2_mult" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_sum_of_2_mult.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>        
+
+        <!-- floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result)-->
+        <mode name="fp16_sum_of_products_2_accum_mode"> 
+          <pb_type name="fp16_sum_of_2_accum" blif_model=".subckt fp16_sop2_accum" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_accum.mode_sigs"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_accum.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_accum.clk"/>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_accum.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_accum.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_accum.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_accum.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_accum.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>        
+
+        <!-- floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp)-->
+        <mode name="fp16_mult_fp32_add"> 
+          <pb_type name="fp16_mult_fp32_add" blif_model=".subckt fp16_mult_fp32_add" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_add.clk"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_add.reset"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_add.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_add.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_add.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_mult_fp32_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_add.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_add.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_mult_fp32_add.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_mult_fp32_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp16_mult_fp32_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_mult_fp32_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator)-->
+        <mode name="fp16_mult_fp32_accum"> 
+          <pb_type name="fp16_mult_fp32_accum" blif_model=".subckt fp16_mult_fp32_accum" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_accum.mode_sigs"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_accum.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_accum.clk"/>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_accum.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_accum.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_accum.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_accum.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_mult_fp32_accum.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_mult_fp32_accum.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) -->
+        <mode name="fp32_mult_then_add"> 
+          <pb_type name="fp32_mult_then_add" blif_model=".subckt fp32_mult_then_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_then_add.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_then_add.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_then_add.mode_sigs">
+            </direct>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_then_add.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_then_add.b">
+            </direct>
+            <direct name="result" input="fp32_mult_then_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_then_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_then_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp32_mult_then_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin)-->
+        <mode name="fp32_mult_add"> 
+          <pb_type name="fp32_mult_add" blif_model=".subckt fp32_mult_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp32_mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_add.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_add.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_add.mode_sigs">
+            </direct>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_add.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_add.b">
+            </direct>
+            <direct name="result" input="fp32_mult_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp32_mult_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+      </pb_type>
+
+      <interconnect>
+        <direct name="mode_sigs" input="dsp.dsp_I1[11:0]" output ="dsp_pb.mode_sigs"/>
+        <direct name="datain1" input="dsp.dsp_I1[63:12]" output ="dsp_pb.datain[51:0]"/>
+        <direct name="datain2" input="dsp.dsp_I2" output ="dsp_pb.datain[115:52]"/>
+        <direct name="reset" input="dsp.reset" output="dsp_pb.reset"></direct>
+        <direct name="chainin" input="dsp.chainin"    output="dsp_pb.chainin"></direct>
+        <direct name="chainout" input="dsp_pb.chainout" output="dsp.chainout"></direct>
+        <direct name="scanin" input="dsp.scanin"    output="dsp_pb.scanin"></direct>
+        <direct name="scanout" input="dsp_pb.scanout" output="dsp.scanout"></direct>
+        <direct name="result" input="dsp_pb.result" output="dsp.result"></direct>
+        <direct name="clk" input="dsp.clk" output="dsp_pb.clk"></direct>
+      </interconnect>  
+
+    </pb_type>
+    <interconnect>
+        <!--50% sparse crossbar means 50% of the lines can reach an actual input of the dsp 
+        We do this by splitting inputs into two buckets and having two full crossbars-->
+        <!--
+       <complete name="first_half" input="dsp_top.dsp_I1" output="dsp.dsp_I1">
+            <delay_constant max="333e-12" in_port="dsp_top.dsp_I1" out_port="dsp.dsp_I1"/>
+       </complete>
+
+        <complete name="second_half" input="dsp_top.dsp_I2" output="dsp.dsp_I2">
+            <delay_constant max="333e-12" in_port="dsp_top.dsp_I2" out_port="dsp.dsp_I2"/>
+        </complete>
+        -->
+        <direct name="enable" input="dsp_top.dsp_I1[0]" output ="dsp.dsp_I1[0]"/>
+        <direct name="loadconst" input="dsp_top.dsp_I1[1]" output ="dsp.dsp_I1[1]"/>
+        <direct name="accumulate" input="dsp_top.dsp_I1[2]" output ="dsp.dsp_I1[2]"/>
+        <direct name="negate" input="dsp_top.dsp_I1[3]" output ="dsp.dsp_I1[3]"/>
+        <direct name="sub" input="dsp_top.dsp_I1[4]" output ="dsp.dsp_I1[4]"/>
+        <direct name="mode" input="dsp_top.dsp_I1[7:5]" output ="dsp.dsp_I1[7:5]"/>
+        <direct name="mux9_select" input="dsp_top.dsp_I1[8]" output ="dsp.dsp_I1[8]"/>
+        <direct name="internal_coeffa" input="dsp_top.dsp_I1[9]" output ="dsp.dsp_I1[9]"/>
+        <direct name="internal_coeffb" input="dsp_top.dsp_I1[10]" output ="dsp.dsp_I1[10]"/>
+        <direct name="datain1" input="dsp_top.dsp_I1[63:11]" output ="dsp.dsp_I1[63:11]"/>
+        <direct name="datain2" input="dsp_top.dsp_I2" output ="dsp.dsp_I2"/>
+ 
+        <direct name="reset" input="dsp_top.reset" output="dsp.reset"></direct>
+        <direct name="chainin" input="dsp_top.chainin" output="dsp.chainin">
+            <delay_constant max="1179e-12" in_port="dsp_top.chainin" out_port="dsp.chainin"/>
+        </direct>
+        <direct name="chainout" input="dsp.chainout" output="dsp_top.chainout">
+            <delay_constant max="1179e-12" in_port="dsp.chainout" out_port="dsp_top.chainout"/>
+        </direct>
+        <direct name="scanin" input="dsp_top.scanin" output="dsp.scanin">
+            <delay_constant max="1179e-12" in_port="dsp_top.scanin" out_port="dsp.scanin"/>
+        </direct>
+        <direct name="scanout" input="dsp.scanout" output="dsp_top.scanout">
+            <delay_constant max="1179e-12" in_port="dsp.scanout" out_port="dsp_top.scanout"/>
+        </direct>
+        <direct name="result" input="dsp.result" output="dsp_top.result"></direct>
+        <direct name="clk" input="dsp_top.clk" output="dsp.clk"></direct>
+    </interconnect>
+    </pb_type>
+    <!-- Define DSP slice end -->
+
+
+    <!-- Define fracturable memory begin -->
+    <!-- 
+    RAM blocks always have registered inputs. The input FFs appear before the address decoder & wordline driver,
+    and after the local input crossbar & level shifter.
+    RAM blocks optionally have registered outputs. The output FFs (if present) appear after the output crossbar.
+    If BRAM doesn't have registered outputs, then T_clk_to_q is the whole delay of the read/write operation.
+    If BRAM does have registered output, then T_clk_to_q is just the FF clk_to_q and then delay_constant
+    can be used to specify the whole delay of the read/write operation.
+
+    This RAM block has registered outputs.
+
+    The area and delay values of this RAM block were obtained (indirectly) from COFFE simulations.
+    COFFE only support widths and depths that are powers of 2. For M20K (20 Kilobit BRAM), we need
+    the width to be 40 bits and depth to be 512 (for the logically widest mode: 512x40). We can't
+    simulate these dimensions directly in COFFE. So, we simulated and obtained the results for M32K
+    (32 Kilobits BRAM) and (16 Kilobits BRAM). Then we interpolated the results.
+    For delay, a linear interpolation was used, based on the size of the Memory (16K->20K->32K).
+    For area, the value was calculated using two interpolations: (1) port based (change in number of 
+    ports in going from 16K->20K->32K) and (2) number of bits based (change in number of bits in
+    going from 16K->20K->32K). The interpolation that resulted in the larger area was picked.
+    
+
+    Here are the equations used to calculate the delays based on COFFE results:
+    T_setup (inputs) = T_level_shifter + T_register_micro_setup = 32.3ps + 18.91ps = 51.21ps
+    T_clk_to_q (inputs) = T_register_micro_clk_to_q = 60.32ps
+    T_setup (outputs) = T_register_micro_setup = 18.91ps 
+    T_clk_to_q (outputs) = T_register_micro_clk_to_q = 60.32ps
+
+    (Register setup and clk_to_q timings are actually from the FF used in the logic cluster.)
+
+    T_read = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver + Bit line delay) + (Sense amp + Output crossbar)
+
+    * Bit line delay is included in self.RAM.samp.delay time in COFFE. The Sense amp delay is actually
+    self.RAM.samp_part2.delay
+
+    T_write = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver) + (Write driver)
+
+    delay_constant values model the internal limits of a block (the combinatorial delay).
+    delay_constant = max (T_read, T_write) 
+
+	  Overall internal delay of the RAM is T_clk_to_q (inputs) + delay_constant + T_setup (outputs)
+    -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x40_sp">
+        <pb_type name="mem_512x40_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="40" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="40" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_512x40_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_512x40_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.addr" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.data" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.we"   out_port="mem_512x40_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x40_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data" output="mem_512x40_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x40_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_512x40_sp.out" output="memory.out">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x40_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_1024x20_sp">
+        <pb_type name="mem_1024x20_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="20" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="20" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.addr" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.data" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.we"   out_port="mem_1024x20_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_sp.out" output="memory.out[19:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_sp">
+        <pb_type name="mem_2048x10_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="10" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="10" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.addr" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.data" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.we"   out_port="mem_2048x10_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_sp.out" output="memory.out[9:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x20_dp">
+        <pb_type name="mem_1024x20_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="20" port_class="data_in1"/>
+          <input name="data2" num_pins="20" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="20" port_class="data_out1"/>
+          <output name="out2" num_pins="20" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we2" out_port="mem_1024x20_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x20_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[39:20]" output="mem_1024x20_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x20_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_dp.out1" output="memory.out[19:0]">
+          </direct>
+          <direct name="dataout2" input="mem_1024x20_dp.out2" output="memory.out[39:20]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_dp">
+        <pb_type name="mem_2048x10_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="10" port_class="data_in1"/>
+          <input name="data2" num_pins="10" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="10" port_class="data_out1"/>
+          <output name="out2" num_pins="10" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we2" out_port="mem_2048x10_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x10_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[19:10]" output="mem_2048x10_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x10_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_dp.out1" output="memory.out[9:0]">
+          </direct>
+          <direct name="dataout2" input="mem_2048x10_dp.out2" output="memory.out[19:10]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+  </complexblocklist>
+
+  <switchblocklist>
+    <!-- Stratix IV uses a uni-directional routing architecture with a Driver Input Mux (DIM) size of 12 (i.e.
+           each wire can be driven by one of 12 block/outputs or wires) for the L4s.
+           
+           In the Stratix IV architecture the long wires (L16 here) are accessible only from the short wires, 
+           and are not connected to the block pins (i.e. connection blocks). Furthermore, they only connect 
+           to switch blocks every 4 LABs (to avoid expensive deep via stacks).
+           We approximate the L16 DIM size as 40:1 (in reality it is a pair of 20:1 (?) muxes with a 2:1 swap mux
+           in front, which has nearly the same connectivity as a full 40:1).
+
+           L4 wires
+           ================
+           At a channel width of 300 there are 260 L4/L4prime wires. At an effective Fc_out of 0.075 
+           and 40 LAB outputs this yeilds:
+
+                40 * 2 = 80 outputs per channel  [2 LABs per-channel]
+
+                80 * 0.075 = 6 outputs drive each L4 wire [output connection block]
+
+           This leaves:
+
+                12 - 6 = 6 inputs to the DIMs from other routing wires [switch block]
+
+           Since L4s connect at every switch block, there are:
+
+                260 L16 wires per channel + direction which can drive wires at a particular switchblock
+                (via switchpoints 0, 1, 2, 3)
+
+           And for each direction (260 wires) only:
+
+               260 / 4 = 65 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there are 32 L4s starting, and 32 L4s ending; + 1 wire for the 65th)
+
+           Which we allocate as follows:
+
+                L4
+                =====
+                straight-through connection: 2 (from L4 or L16)
+                clock-wise turn            : 2 (from L4 or L16)
+                counter-clock-wise turn    : 2 (from L4 or L16)
+
+           L16 wires
+           =========
+           At a channel width of 300 there are 40 L16 wires (20 in each direction), which do not connect to the input/output connection blocks.
+           This leaves 40 inputs to the DIM to select from routing wires (long wires use larger DIMs to improve reachability,
+           the area cost is relatively small since they are so rare).
+
+           Since L16s only connect at every 4th switch block there are:
+
+                40 / 4 = 10 L16 wires per channel (5 in each direction) which can drive wires at a particular switchblock
+                (via switchpoints 0, 4, 8, 12)
+
+           And for each direction (20 wires) only:
+
+               40 / 16 = 2.25 => 2 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there is one L16 starting, and one L16 ending)
+           
+           We assign the 40 DIM inputs as follows:
+
+                L16
+                =====
+                straight-through connection:  3 (from L16)
+                straight-through connection: 11 (from L4)
+                clock-wise turn            :  3 (from L16)
+                clock-wise turn            : 10 (from L4)
+                counter clock-wise turn    :  3 (from L16)
+                counter clock-wise turn    : 10 (from L4)
+
+           Switch pattern
+           ==============
+           This switch block is based on the Wilton switch block (see Page 103 of Steve Wilton's PhD Thesis 
+           "Architecture and Algorithms for Field-Programmable Gate Arrays with Embedded Memory", 1997):
+
+                left-to-top: W - t
+                top-to-right: t + 1
+                right-to-bottom: 2*W - 2 - t
+                bottom-to-left: t + 1
+                left-to-right: t
+                top-to-bottom: t
+
+           Since Wilton assumed bidirection routing (while we use unidirectional routing),
+           we mirror the clock-wise turns to match the conter-clock-wise specification.
+           -->
+    <switchblock name="wilton_turn_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_turn_counter_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+
+               Note that a different from_switchpoints ordering is used to ensure a different shuffling occurs compared to 
+               wilton_turn_clockwise_core.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_straight" type="unidir">
+      <switchblock_location type="EVERYWHERE"/>
+      <switchfuncs>
+        <!-- Straight -->
+        <func type="lr" formula="t"/>
+        <!-- left to right -->
+        <func type="tb" formula="t"/>
+        <!-- top to bottom -->
+        <func type="rl" formula="t"/>
+        <!-- right to left -->
+        <func type="bt" formula="t"/>
+        <!-- bottom to top -->
+      </switchfuncs>
+      <!-- L16 Drivers 
+                Note that we order the switchpoints in order of preference, since VPR currently
+                iterates through the source sets in order, such that we connect first to wires
+                ending at the switchblock (switchpoint 0), and then fallback to switchpoints
+                in decreasing distance from the drive point (if we have more to's than from's
+                it then wraps around).
+
+                Note also that we multiply the number of expected connections by 'to', since while usually
+                there is only one 'to' wire, ocasionally there may be more, and we want to ensure they all
+                get the same number of connections.
+
+                For L16->L16:
+                  We allow any valid switchpoint to be used as the 'from' point.
+                  Allow 'low' switchpoints like '4' may seem counter-intuitive (i.e. why not use a cheaper L4)
+                  this makes it easier to bypass once on the L16 network (e.g. to get around congestion).
+           -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_straight_corner" type="unidir">
+      <!-- Same as wilton straight, but turning around a corner -->
+      <switchblock_location type="CORNER"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t"/>
+        <!-- top to right -->
+        <func type="rb" formula="t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t"/>
+        <!-- right to top -->
+        <func type="br" formula="t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 Drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_turn_fringe" type="unidir">
+      <!-- Non-corner perimeter SBs -->
+      <switchblock_location type="FRINGE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- We use 'max' style connections here to ensure there are no dangling wires, otherwise like core turns -->
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="21*max(from,to)" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers -->
+      <wireconn num_conns="1*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" from_order="fixed" to_type="L4" to_switchpoint="0"/>
+      <wireconn num_conns="1*max(from,to)" from_type="L4" from_switchpoint="0,1,2,3" from_order="shuffled" to_type="L4" to_switchpoint="0"/>
+    </switchblock>
+  </switchblocklist>
+
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+</architecture>
+
+
diff --git a/parmys-plugin/tests/eltwise_layer/odin_config.xml b/parmys-plugin/tests/eltwise_layer/odin_config.xml
new file mode 100644
index 000000000..07a54fea6
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/odin_config.xml
@@ -0,0 +1,41 @@
+<!--
+# Copyright 2022 Daniel Khadivi
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+-->
+<config>
+	<inputs>
+		<input_type>Verilog</input_type>
+		<input_path_and_name>hard_block_include.v</input_path_and_name>
+		<input_path_and_name>eltwise_layer.v</input_path_and_name>
+	</inputs>
+	<output>
+		<output_type>blif</output_type>
+		<output_path_and_name>eltwise_layer.yosys.blif</output_path_and_name>
+		<target>
+			<arch_file>k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml</arch_file>
+		</target>
+	</output>
+	<optimizations>
+		<multiply size="3" fixed="1" fracture="0" padding="-1" />
+		<memory split_memory_width="1" split_memory_depth="11" />
+		<adder size="0" threshold_size="1" />
+	</optimizations>
+	<debug_outputs>
+		<debug_output_path>.</debug_output_path>
+		<output_ast_graphs>0</output_ast_graphs>
+		<output_netlist_graphs>0</output_netlist_graphs>
+	</debug_outputs>
+</config>
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
new file mode 100644
index 000000000..b8d26348a
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -0,0 +1,1505 @@
+<!-- 
+  Flagship Heterogeneous Architecture with Carry Chains for VTR 7.0.
+
+  - 40 nm technology
+  - General purpose logic block: 
+    K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared) 
+    with optionally registered outputs
+    Each 5-LUT has an arithemtic mode that converts it to a single-bit adder with both inputs driven by 4-LUTs (both 4-LUTs share all 4 inputs)
+    Carry chain links to vertically adjacent logic blocks
+  - Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.  
+    Height = 6, found on every (8n+2)th column
+  - Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.  
+    Height = 4, found on every (8n+6)th column
+  - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
+
+  Details on Modelling:
+
+  The electrical design of the architecture described here is NOT from an 
+  optimized, SPICED architecture.  Instead, we attempt to create a reasonable 
+  architecture file by using an existing commercial FPGA to approximate the area, 
+  delay, and power of the underlying components. This is combined with a reasonable 40 nm 
+  model of wiring and circuit design for low-level routing components, where available.
+  The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also 
+  has wiring electrical parameters that allow the wire lengths and switch patterns to be 
+  modified and you will still get reasonable delay results for the new architecture.
+  The following describes, in detail, how we obtained the various electrical values for this 
+  architecture.
+
+  Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar 
+  architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture. 
+  (n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)      
+  This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to 
+  match the overall target (a 40 nm FPGA).
+
+  We obtain maximum delay numbers by measuring delays of routing, soft logic blocks, 
+  memories, and multipliers from test circuits on a Stratix IV GX device 
+  (EP4SGX230DF29C2X, i.e. fastest speed grade). Minimum delay values are calculated based on the
+  ratios between maximum and minimum values in Stratix IV GX device. For routing, we took the 
+  average delay of H4 and V4 wires.  Rmetal and Cmetal values for the routing wires were obtained 
+  from work done by Charles Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal 
+  stack 40 nm routing) and take the R and C data from the ITRS roadmap. 
+
+  For the general purpose logic block, we assume that the area and delays of the Stratix IV 
+  crossbar is close enough to the crossbar modelled here.  We use 40 inputs and 20 feedback lines in 
+  the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
+  Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to 
+  36:1 multiplexers.  We require 60 such multiplexers, while Stratix IV requires 88 for its more
+  complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows: 
+  The Stratix IV crossbar has more inputs (72 vs. 60) and 
+  outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the 
+  Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its 
+  area and delay.  The total number of crossbar switch points is roughly similar between the two 
+  architectures (3160 for SIV and 3600 for the academic architecture below), so we use the area 
+  & delay of the Stratix IV crossbar as a rough approximation of our crossbar.
+
+  For LUTs, we include LUT 
+  delays measured from Stratix IV which is dependant on the input used (ie. some 
+  LUT inputs are faster than others).  The CAD tools at the time of VTR 7 does 
+  not consider differences in LUT input delays.
+
+  Adder delays obtained as approximate values from a Stratix IV EP4SE230F29C3 device.  
+  Delay obtained by compiling a 256 bit adder (registered inputs and outputs, 
+  all pins except clock virtual) then measuring the delays in chip-planner, 
+  sumout delay = 0.271ns to 0.348 ns, intra-block carry delay = 0.011 ns, 
+  inter-block carry delay = 0.327 ns.  Given this data, I will approximate 
+  sumout 0.3 ns, intra-block carry-delay = 0.01 ns, and 
+  inter-block carry-delay = 0.16 ns (since Altera inter-block carry delay has 
+  overhead that we don't have, I'll approximate the delay of a simpler chain at 
+  one half what they have.  This is very rough, anything from 0.01ns to 0.327ns 
+  can be justified).
+
+  Logic block area numbers obtained by scaling overall tile area of a 65nm 
+  Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out 
+  routing area at a channel width of 300. We use a channel width of 300 because it can route 
+  all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
+  total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
+  choosing a width that provides high routability. The architecture can be routed at different channel
+  widths, but we estimate the tile size and hence the physical length of routing wires assuming
+  a channel width of 300.
+
+  Sanity checks employed:
+    1.  We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches 
+        common electrical design.
+
+
+  Authors: Jason Luu, Jeff Goeders, Vaughn Betz
+-->
+<architecture>
+  <!-- 
+       ODIN II specific config begins 
+       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+       ".model [type_of_block]") that this architecture supports.
+
+       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+       already special structures in blif (.names, .input, .output, and .latch) 
+       that describe them.
+  -->
+  <models>
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk"/>
+        <!-- control -->
+        <port name="addr" clock="clk"/>
+        <!-- address lines -->
+        <port name="data" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="adder">
+      <input_ports>
+        <port name="a" combinational_sink_ports="sumout cout"/>
+        <port name="b" combinational_sink_ports="sumout cout"/>
+        <port name="cin" combinational_sink_ports="sumout cout"/>
+      </input_ports>
+      <output_ports>
+        <port name="cout"/>
+        <port name="sumout"/>
+      </output_ports>
+    </model>
+  </models>
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" area="53894">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I" num_pins="40" equivalent="full"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="20" equivalent="none"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="mult_36" height="4" area="396000">
+      <sub_tile name="mult_36">
+        <equivalent_sites>
+          <site pb_type="mult_36" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="a" num_pins="36"/>
+        <input name="b" num_pins="36"/>
+        <output name="out" num_pins="72"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="6" area="548000">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="addr1" num_pins="15"/>
+        <input name="addr2" num_pins="15"/>
+        <input name="data" num_pins="64"/>
+        <input name="we1" num_pins="1"/>
+        <input name="we2" num_pins="1"/>
+        <output name="out" num_pins="64"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <!-- Physical descriptions begin -->
+  <layout>
+    <auto_layout aspect_ratio="1.0">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </auto_layout>
+  </layout>
+  <device>
+    <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
+			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
+			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
+			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
+			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
+			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
+			     lined up with Stratix IV. 
+			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
+			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
+			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
+	                     by 2.5x when looking up in Jeff's tables.
+			     The delay values are lined up with Stratix IV, which has an architecture similar to this
+			     proposed FPGA, and which is also 40 nm 
+			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
+			     4x minimum drive strength buffer. -->
+    <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
+    <!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
+     	  area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
+	    -->
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="wilton" fs="3"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
+           book area formula. This means the mux transistors are about 5x minimum drive strength.
+           We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
+           mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
+           the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
+           by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
+           buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
+           I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
+           (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
+           The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
+           2.5x when looking up in Jeff's tables.
+           Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
+           This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
+    <switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
+    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
+    <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
+  </switchlist>
+  <segmentlist>
+    <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
+             With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
+             reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
+    <segment freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
+      <mux name="0"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
+  </directlist>
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+	     Maximum delays below come from Ian Kuon. They are small, so they should be interpreted as
+	     the delays to and from registers in the I/O (and generally I/Os are registered 
+	     today and that is when you timing analyze them.
+
+		 Minimum delays are retrieved using a ratio of maximum and minimum times as seen in Quartus II
+		 in Stratix IV. The ratio of minimum value/maximum value is as follows:
+			inpad delay:  0.9239
+			outpad delay: 0.9545
+
+	     -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" min="3.92e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" min="1.331e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+      <power method="ignore"/>
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor 
+	   area is 60 L^2 yields a tile area of 84375 MWTAs.
+	   Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area 
+	   This means that only 37% of our area is in the general routing, and 63% is inside the logic
+	   block. Note that the crossbar / local interconnect is considered part of the logic block
+	   area in this analysis. That is a lower proportion of of routing area than most academics
+	   assume, but note that the total routing area really includes the crossbar, which would push
+	   routing area up significantly, we estimate into the ~70% range. 
+	   -->
+    <pb_type name="clb">
+      <input name="I" num_pins="40" equivalent="full"/>
+      <input name="cin" num_pins="1"/>
+      <output name="O" num_pins="20" equivalent="none"/>
+      <output name="cout" num_pins="1"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Describe fracturable logic element.  
+             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+             The outputs of the fracturable logic element can be optionally registered
+        -->
+      <pb_type name="fle" num_pb="10">
+        <input name="in" num_pins="6"/>
+        <input name="cin" num_pins="1"/>
+        <output name="out" num_pins="2"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <mode name="n2_lut5">
+          <pb_type name="lut5inter" num_pb="1">
+            <input name="in" num_pins="5"/>
+            <input name="cin" num_pins="1"/>
+            <output name="out" num_pins="2"/>
+            <output name="cout" num_pins="1"/>
+            <clock name="clk" num_pins="1"/>
+            <pb_type name="ble5" num_pb="2">
+              <input name="in" num_pins="5"/>
+              <input name="cin" num_pins="1"/>
+              <output name="out" num_pins="1"/>
+              <output name="cout" num_pins="1"/>
+              <clock name="clk" num_pins="1"/>
+              <mode name="blut5">
+                <pb_type name="flut5" num_pb="1">
+                  <input name="in" num_pins="5"/>
+                  <output name="out" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Regular LUT mode -->
+                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                    <input name="in" num_pins="5" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical maximum delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                        82e-12
+                        173e-12
+                        261e-12
+                        263e-12
+                        398e-12
+							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                        -->
+                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                        235e-12
+                        235e-12
+                        235e-12
+                        235e-12
+                        235e-12
+                      </delay_matrix>
+                    <delay_matrix type="min" in_port="lut5.in" out_port="lut5.out">
+                        174e-12
+                        174e-12
+                        174e-12
+                        174e-12
+                        174e-12
+                      </delay_matrix>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
+                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="direct1" input="flut5.in" output="lut5.in"/>
+                    <direct name="direct2" input="lut5.out" output="ff.D">
+                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
+                    </direct>
+                    <direct name="direct3" input="flut5.clk" output="ff.clk"/>
+                    <mux name="mux1" input="ff.Q lut5.out" output="flut5.out">
+                      <delay_constant max="25e-12" min="24e-12" in_port="lut5.out" out_port="flut5.out"/>
+                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="flut5.out"/>
+                    </mux>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
+                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
+                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+              <mode name="arithmetic">
+                <pb_type name="arithmetic" num_pb="1">
+                  <input name="in" num_pins="4"/>
+                  <input name="cin" num_pins="1"/>
+                  <output name="out" num_pins="1"/>
+                  <output name="cout" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Special dual-LUT mode that drives adder only -->
+                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
+                    <input name="in" num_pins="4" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                             we instead take the average of these numbers to get more stable results
+                        82e-12
+                        173e-12
+                        261e-12
+                        263e-12
+							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                        -->
+                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
+                        195e-12
+                        195e-12
+                        195e-12
+                        195e-12
+                      </delay_matrix>
+                    <delay_matrix type="min" in_port="lut4.in" out_port="lut4.out">
+                        144e-12
+                        144e-12
+                        144e-12
+                        144e-12
+                      </delay_matrix>
+                  </pb_type>
+                  <!-- The ratio between minimum and maximum delays in StratixIV for data ports to sumout
+							is 0.6809 and cin to sumout is 0.6969-->
+                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
+                    <input name="a" num_pins="1"/>
+                    <input name="b" num_pins="1"/>
+                    <input name="cin" num_pins="1"/>
+                    <output name="cout" num_pins="1"/>
+                    <output name="sumout" num_pins="1"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.cin" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.cout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.cout"/>
+                    <delay_constant max="0.01e-9" min="6.9797e-12" in_port="adder.cin" out_port="adder.cout"/>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
+                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
+                    <direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
+                    <direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
+                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
+                      </direct>
+                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
+                      </direct>
+                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
+                      <pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
+                    </direct>
+                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
+                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
+                    </direct>
+                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
+                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
+                    </direct>
+                    <mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
+                      <delay_constant max="25e-12" min="24e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
+                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="arithmetic.out"/>
+                    </mux>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
+                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
+                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
+                  </direct>
+                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
+                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
+                  </direct>
+                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
+                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
+              <direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
+              <direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
+              <direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
+                <pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/>
+              </direct>
+              <direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
+                <pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/>
+              </direct>
+              <direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
+                <pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/>
+              </direct>
+              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
+            <direct name="direct2" input="lut5inter.out" output="fle.out"/>
+            <direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
+            <direct name="carry_in" input="fle.cin" output="lut5inter.cin">
+              <pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/>
+            </direct>
+            <direct name="carry_out" input="lut5inter.cout" output="fle.cout">
+              <pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/>
+            </direct>
+          </interconnect>
+        </mode>
+        <!-- n2_lut5 -->
+        <mode name="n1_lut6">
+          <pb_type name="ble6" num_pb="1">
+            <input name="in" num_pins="6"/>
+            <output name="out" num_pins="1"/>
+            <clock name="clk" num_pins="1"/>
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <!-- LUT timing using delay matrix -->
+              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                       we instead take the average of these numbers to get more stable results
+                  82e-12
+                  173e-12
+                  261e-12
+                  263e-12
+                  398e-12
+                  397e-12
+
+					The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                  -->
+              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                </delay_matrix>
+              <delay_matrix type="min" in_port="lut6.in" out_port="lut6.out">
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                </delay_matrix>
+            </pb_type>
+            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+              <input name="D" num_pins="1" port_class="D"/>
+              <output name="Q" num_pins="1" port_class="Q"/>
+              <clock name="clk" num_pins="1" port_class="clock"/>
+              <T_setup value="66e-12" port="ff.D" clock="clk"/>
+              <T_hold value="37e-12" port="ff.D" clock="clk"/>
+              <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
+              <direct name="direct2" input="lut6.out" output="ff.D">
+                <pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
+              </direct>
+              <direct name="direct3" input="ble6.clk" output="ff.clk"/>
+              <mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
+                <delay_constant max="25e-12" min="24e-12" in_port="lut6.out" out_port="ble6.out"/>
+                <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="ble6.out"/>
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in" output="ble6.in"/>
+            <direct name="direct2" input="ble6.out" output="fle.out[0:0]"/>
+            <direct name="direct3" input="fle.clk" output="ble6.clk"/>
+          </interconnect>
+        </mode>
+        <!-- n1_lut6 -->
+      </pb_type>
+      <interconnect>
+        <!-- We use a full crossbar to get logical equivalence at inputs of CLB 
+           The delays below come from Stratix IV. the delay through a connection block
+           input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps 
+           delay on the connection block input mux (modeled by Ian Kuon), so the remaining
+           delay within the crossbar is 95 ps. 
+		   For the minimum delays, we have the delay through a connection block input mux +
+		   the crossbar in Stratix IV is 144. Subtracting the 72 ps leaves 72 ps remaining.
+           The max delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
+           Since all our outputs LUT outputs go to a BLE output, and have a delay of 
+           25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
+           to get the part that should be marked on the crossbar. For the minimum delay,
+  		   the value in Stratix IV is 93 ps, subtracting the 24 ps leaves 69 ps.-->
+        <complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in">
+          <delay_constant max="95e-12" min="72e-12" in_port="clb.I" out_port="fle[9:0].in"/>
+          <delay_constant max="75e-12" min="69e-12" in_port="fle[9:0].out" out_port="fle[9:0].in"/>
+        </complete>
+        <complete name="clks" input="clb.clk" output="fle[9:0].clk">
+          </complete>
+        <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
+                 By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+                 then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+                 naive specification).
+          -->
+        <direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
+        <direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
+        <!-- Carry chain links -->
+        <direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
+          <!-- Put all inter-block carry chain delay on this one edge -->
+          <delay_constant max="0.16e-9" min="0.11e-9" in_port="clb.cin" out_port="fle[0:0].cin"/>
+          <pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
+        </direct>
+        <direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
+          <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
+        </direct>
+        <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
+          <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
+        </direct>
+      </interconnect>
+    </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+    <!-- Define fracturable multiplier begin -->
+    <!-- This multiplier can operate as a 36x36 multiplier that can fracture to two 18x18 multipliers each of which can further fracture to two 9x9 multipliers 
+	   For delay modelling, the 36x36 DSP multiplier in Stratix IV has a maximum delay of 1.523 ns + 1.93 ns
+	    = 3.45 ns. The average difference between the maximum and minimum values of a dsp mac out in 36 bit multiply
+		mode is 0.51. Hence, the minimum delay is modeled as 0.776 ns + 0.984 ns = 1.760 ns.
+ 		The 18x18 mode doesn't need to sum four 18x18 multipliers, so it is a bit
+	   faster: 1.523 ns for the multiplier, and 1.09 ns for the multiplier output block.
+	    For the input and output interconnect delays, unlike Stratix IV, we don't
+	   have any routing/logic flexibility (crossbars) at the inputs.  There is some output muxing
+	   in Stratix IV and this architecture to select which multiplier outputs should go out (e.g.
+	   9x9 outputs, 18x18 or 36x36) so those are very close between the two architectures. 
+	   We take the conservative (slightly pessimistic)
+           approach modelling the input as the same as the Stratix IV input delay and the output delay the same as the Stratix IV DSP out delay.
+		   
+	   We estimate block area by using the published Stratix III data (which is architecturally identical to Stratix IV)
+	      (H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011) of 0.2623 
+		  mm^2 and scaling from 65 to 40 nm to obtain 0.0993 mm^2. That area is for a DSP block with approximately 2x the functionality of 
+		  the block we use (can implement two 36x36 multiplies instead of our 1, eight 18x18 multiplies instead of our 4, etc.). Hence we 
+		  divide the area by 2 to obtain 0.0497 mm^2. One minimum-width transistor units = 60 L^2 (where L = 40 nm), so is 518,000 MWTUS. 
+		  That area includes routing and the connection block input muxes.  Our DSP block is four 
+		  rows high, and hence includes four horizontal routing channel segments and four vertical ones, which is 4x the routing of a logic 
+		  block (single tile). It also includes 3.6x the outputs of a logic block, and 1.8x the inputs. Hence a slight overestimate of the routing
+		  area associated with our DSP block is four times that of a logic tile, where the routing area of a logic tile was calculated above (at W = 300)
+		  as 30481 MWTAs. Hence the (core, non-routing) area our DSP block is approximately 518,000 - 4 * 30,481 = 396,000 MWTUs.
+      -->
+    <pb_type name="mult_36">
+      <input name="a" num_pins="36"/>
+      <input name="b" num_pins="36"/>
+      <output name="out" num_pins="72"/>
+      <mode name="two_divisible_mult_18x18">
+        <pb_type name="divisible_mult_18x18" num_pb="2">
+          <input name="a" num_pins="18"/>
+          <input name="b" num_pins="18"/>
+          <output name="out" num_pins="36"/>
+          <!-- Model 9x9 delay and 18x18 delay as the same.  9x9 could be faster, but in Stratix IV
+	          isn't, presumably because the multiplier layout is really optimized for 18x18.
+		-->
+          <mode name="two_mult_9x9">
+            <pb_type name="mult_9x9_slice" num_pb="2">
+              <input name="A_cfg" num_pins="9"/>
+              <input name="B_cfg" num_pins="9"/>
+              <output name="OUT_cfg" num_pins="18"/>
+              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="9"/>
+                <input name="b" num_pins="9"/>
+                <output name="out" num_pins="18"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.a" out_port="mult_9x9.out"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.b" out_port="mult_9x9.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
+                </direct>
+                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
+                </direct>
+                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.45e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.45e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <mode name="mult_18x18">
+            <pb_type name="mult_18x18_slice" num_pb="1">
+              <input name="A_cfg" num_pins="18"/>
+              <input name="B_cfg" num_pins="18"/>
+              <output name="OUT_cfg" num_pins="36"/>
+              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="18"/>
+                <input name="b" num_pins="18"/>
+                <output name="out" num_pins="36"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.a" out_port="mult_18x18.out"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.b" out_port="mult_18x18.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
+                </direct>
+                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
+                </direct>
+                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.09e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.09e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <power method="sum-of-children"/>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading 134 ps
+				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
+              -->
+          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].b">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].b"/>
+          </direct>
+          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
+            <delay_constant max="1.09e-9" min="74e-12" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mult_36x36">
+        <pb_type name="mult_36x36_slice" num_pb="1">
+          <input name="A_cfg" num_pins="36"/>
+          <input name="B_cfg" num_pins="36"/>
+          <output name="OUT_cfg" num_pins="72"/>
+          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1">
+            <input name="a" num_pins="36"/>
+            <input name="b" num_pins="36"/>
+            <output name="out" num_pins="72"/>
+            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.a" out_port="mult_36x36.out"/>
+            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.b" out_port="mult_36x36.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
+            </direct>
+            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
+            </direct>
+            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
+            </direct>
+          </interconnect>
+          <power method="pin-toggle">
+            <port name="A_cfg" energy_per_toggle="2.13e-12"/>
+            <port name="B_cfg" energy_per_toggle="2.13e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
+		   to a 134 ps delay.
+				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
+              -->
+          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
+          </direct>
+          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
+            <delay_constant max="1.93e-9" min="74e-12" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Place this multiplier block every 8 columns from (and including) the sixth column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable multiplier end -->
+    <!-- Define fracturable memory begin -->
+    <!-- 32 Kb Memory that can operate from 512x64 to 32Kx1 for single-port mode and 1024x32 to 32Kx1 for dual-port mode.  
+           Area and max delay based off Stratix IV 9K and 144K memories (delay from linear interpolation, Tsu(483 ps, 636 ps) Tco(1084ps, 1969ps)).  
+
+		   uTh/Tsu ratio = 0.468, uTco min/max ratio = 0.97
+           uTh = 226ps, 298ps
+           Min uTco = 1051ps, 1909ps
+           Max input delay = 204ps (from Stratix IV LAB line) - 72ps (this architecture does not lump connection box delay in internal delay)
+           Max output delay = M9K buffer 50ps. 
+		   Min input delay = 160ps (from Stratix IV Lab line) - 72ps
+		   Min output delay = 46ps (M9K buffer min/max ratio = 0.9286)
+		   
+		   Area is obtained by appropriately scaling and adjusting the published Stratix III (which is architecturally identical to Stratix IV)
+		   data from H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011.
+		   Linearly interpolating (by bit count) between the M9k and M144k areas to obtain an M32k (our RAM size) point yields a 65 nm area of
+		   of 0.153 mm^2. Interpolating based on port count between the RAMs would instead yield an area of 0.209 mm^2 for our 32 kB RAM; since 
+		   bit count accounts for more area than ports for a RAM this size we choose the bit count interpolation; however, since the port interpolation
+		   is not radically different this also gives us confidence that interpolating based on bits is OK, but slightly underpredicts area.
+		   Scaling to 40 nm^2 yields .0579 mm^2, and converting to MWTUs at 60 L^2 / MWTU yields 604,000 MWTUs. This includes routing. A Stratix IV
+		   M9K RAM is one row high and hence has one routing tile (one horizonal and one vertical routing segment area). An M144k RAM has 8 such tiles.
+		   Linearly interpolating on
+		   bits to 32 kb yields 2.2 routing tiles incorporated in the area number above. The inter-block routing represents 30% of the area of a logic 
+		   tile according to D. Lewis et al, "Architectural Enhancements in Stratix V," FPGA 2013. Hence we should subtract 0.3 * 2.2 * 84,375 MWTUs to
+		   obtain a RAM core area (not including inter-block routing) of 548,000 MWTU areas for our 32 kb RAM in a 40 nm process.
+      -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="15"/>
+      <input name="addr2" num_pins="15"/>
+      <input name="data" num_pins="64"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="64"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x64_sp">
+        <pb_type name="mem_512x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="64" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="64" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_512x64_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x64_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[8:0]" out_port="mem_512x64_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[63:0]" output="mem_512x64_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:0]" out_port="mem_512x64_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x64_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_512x64_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_512x64_sp.out" output="memory.out[63:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_512x64_sp.out" out_port="memory.out[63:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x64_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_1024x32_sp">
+        <pb_type name="mem_1024x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="32" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="32" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_sp.out" output="memory.out[31:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_sp.out" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_sp">
+        <pb_type name="mem_2048x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="16" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="16" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_sp.out" output="memory.out[15:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_sp.out" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_4096x8_sp">
+        <pb_type name="mem_4096x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="12" port_class="address"/>
+          <input name="data" num_pins="8" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="8" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_4096x8_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x8_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_4096x8_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_4096x8_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_4096x8_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_4096x8_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_4096x8_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_4096x8_sp.out" output="memory.out[7:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_4096x8_sp.out" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_4096x8_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_sp">
+        <pb_type name="mem_8192x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="13" port_class="address"/>
+          <input name="data" num_pins="4" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="4" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_sp.out" output="memory.out[3:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_sp.out" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_sp">
+        <pb_type name="mem_16384x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="14" port_class="address"/>
+          <input name="data" num_pins="2" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="2" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_sp.out" output="memory.out[1:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_sp.out" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_sp">
+        <pb_type name="mem_32768x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="15" port_class="address"/>
+          <input name="data" num_pins="1" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="1" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_sp.out" output="memory.out[0:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_sp.out" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x32_dp">
+        <pb_type name="mem_1024x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="32" port_class="data_in1"/>
+          <input name="data2" num_pins="32" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="32" port_class="data_out1"/>
+          <output name="out2" num_pins="32" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x32_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[9:0]" out_port="mem_1024x32_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[63:32]" output="mem_1024x32_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:32]" out_port="mem_1024x32_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x32_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_1024x32_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_dp.out1" output="memory.out[31:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out1" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_1024x32_dp.out2" output="memory.out[63:32]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out2" out_port="memory.out[63:32]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_dp">
+        <pb_type name="mem_2048x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="16" port_class="data_in1"/>
+          <input name="data2" num_pins="16" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="16" port_class="data_out1"/>
+          <output name="out2" num_pins="16" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x16_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[10:0]" out_port="mem_2048x16_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[31:16]" output="mem_2048x16_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:16]" out_port="mem_2048x16_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x16_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x16_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_dp.out1" output="memory.out[15:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out1" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x16_dp.out2" output="memory.out[31:16]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out2" out_port="memory.out[31:16]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x8_dp">
+        <pb_type name="mem_2048x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="12" port_class="address1"/>
+          <input name="addr2" num_pins="12" port_class="address2"/>
+          <input name="data1" num_pins="8" port_class="data_in1"/>
+          <input name="data2" num_pins="8" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="8" port_class="data_out1"/>
+          <output name="out2" num_pins="8" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_2048x8_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_2048x8_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[11:0]" output="mem_2048x8_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[11:0]" out_port="mem_2048x8_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_2048x8_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_2048x8_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[15:8]" output="mem_2048x8_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:8]" out_port="mem_2048x8_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x8_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x8_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x8_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x8_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x8_dp.out1" output="memory.out[7:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out1" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x8_dp.out2" output="memory.out[15:8]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out2" out_port="memory.out[15:8]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x8_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_dp">
+        <pb_type name="mem_8192x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="13" port_class="address1"/>
+          <input name="addr2" num_pins="13" port_class="address2"/>
+          <input name="data1" num_pins="4" port_class="data_in1"/>
+          <input name="data2" num_pins="4" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="4" port_class="data_out1"/>
+          <output name="out2" num_pins="4" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x4_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[12:0]" out_port="mem_8192x4_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[7:4]" output="mem_8192x4_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:4]" out_port="mem_8192x4_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_8192x4_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_8192x4_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_dp.out1" output="memory.out[3:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out1" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_8192x4_dp.out2" output="memory.out[7:4]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out2" out_port="memory.out[7:4]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_dp">
+        <pb_type name="mem_16384x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="14" port_class="address1"/>
+          <input name="addr2" num_pins="14" port_class="address2"/>
+          <input name="data1" num_pins="2" port_class="data_in1"/>
+          <input name="data2" num_pins="2" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="2" port_class="data_out1"/>
+          <output name="out2" num_pins="2" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x2_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[13:0]" out_port="mem_16384x2_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[3:2]" output="mem_16384x2_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:2]" out_port="mem_16384x2_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_16384x2_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_16384x2_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_dp.out1" output="memory.out[1:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out1" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_16384x2_dp.out2" output="memory.out[3:2]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out2" out_port="memory.out[3:2]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_dp">
+        <pb_type name="mem_32768x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="15" port_class="address1"/>
+          <input name="addr2" num_pins="15" port_class="address2"/>
+          <input name="data1" num_pins="1" port_class="data_in1"/>
+          <input name="data2" num_pins="1" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="1" port_class="data_out1"/>
+          <output name="out2" num_pins="1" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x1_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[14:0]" out_port="mem_32768x1_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[1:1]" output="mem_32768x1_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:1]" out_port="mem_32768x1_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_32768x1_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_32768x1_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_dp.out1" output="memory.out[0:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out1" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_32768x1_dp.out2" output="memory.out[1:1]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out2" out_port="memory.out[1:1]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- Place this memory block every 8 columns from (and including) the second column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+  </complexblocklist>
+  <power>
+    <local_interconnect C_wire="2.5e-10"/>
+    <mux_transistor_size mux_transistor_size="3"/>
+    <FF_size FF_size="4"/>
+    <LUT_transistor_size LUT_transistor_size="4"/>
+  </power>
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+</architecture>
diff --git a/parmys-plugin/tests/raygentop/odin_config.xml b/parmys-plugin/tests/raygentop/odin_config.xml
new file mode 100644
index 000000000..2edfb591c
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/odin_config.xml
@@ -0,0 +1,40 @@
+<!--
+# Copyright 2022 Daniel Khadivi
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+-->
+<config>
+	<inputs>
+		<input_type>Verilog</input_type>
+		<input_path_and_name>raygentop.v</input_path_and_name>
+	</inputs>
+	<output>
+		<output_type>blif</output_type>
+		<output_path_and_name>raygentop.yosys.blif</output_path_and_name>
+		<target>
+			<arch_file>k6_frac_N10_frac_chain_mem32K_40nm.xml</arch_file>
+		</target>
+	</output>
+	<optimizations>
+		<multiply size="3" fixed="1" fracture="0" padding="-1" />
+		<memory split_memory_width="1" split_memory_depth="15" />
+		<adder size="0" threshold_size="1" />
+	</optimizations>
+	<debug_outputs>
+		<debug_output_path>.</debug_output_path>
+		<output_ast_graphs>0</output_ast_graphs>
+		<output_netlist_graphs>0</output_netlist_graphs>
+	</debug_outputs>
+</config>
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/raygentop.tcl b/parmys-plugin/tests/raygentop/raygentop.tcl
new file mode 100644
index 000000000..fbbe78568
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/raygentop.tcl
@@ -0,0 +1,93 @@
+yosys -import
+
+plugin -i parmys
+
+yosys -import
+
+read_verilog -nomem2reg +/parmys/vtr_primitives.v
+
+setattr -mod -set keep_hierarchy 1 single_port_ram
+
+setattr -mod -set keep_hierarchy 1 dual_port_ram
+
+
+puts "Using parmys as partial mapper"
+
+
+parmys_arch -a k6_frac_N10_frac_chain_mem32K_40nm.xml
+
+
+read_verilog -sv -nolatches raygentop.v
+
+
+# Check that there are no combinational loops
+
+scc -select
+
+select -assert-none %
+
+select -clear
+
+
+hierarchy -check -auto-top -purge_lib
+
+
+opt_expr
+
+opt_clean
+
+check
+
+opt -nodffe -nosdff
+
+procs -norom
+
+fsm
+
+opt
+
+wreduce
+
+peepopt
+
+opt_clean
+
+share
+
+opt -full
+
+memory -nomap
+
+flatten
+
+opt -full
+
+techmap -map +/parmys/adff2dff.v
+
+techmap -map +/parmys/adffe2dff.v
+
+techmap -map +/parmys/aldff2dff.v
+
+techmap -map +/parmys/aldffe2dff.v
+
+opt -full
+
+parmys -a k6_frac_N10_frac_chain_mem32K_40nm.xml -nopass -c odin_config.xml
+
+opt -full
+
+techmap 
+
+opt -fast
+
+dffunmap
+
+opt -fast -noff
+
+
+tee -o /dev/stdout stat
+
+hierarchy -check -auto-top -purge_lib
+
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox raygentop.yosys.blif
+
diff --git a/parmys-plugin/tests/raygentop/raygentop.v b/parmys-plugin/tests/raygentop/raygentop.v
new file mode 100644
index 000000000..256b3aead
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/raygentop.v
@@ -0,0 +1,2978 @@
+ module paj_raygentop_hierarchy_no_mem (rgwant_addr, rgwant_data, rgread_ready, rgaddr_ready, rgdata_ready, rgwant_read, rgdatain, rgdataout, rgaddrin, rgCont, rgStat, rgCfgData, rgwant_CfgData, rgCfgData_ready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, clk, fbdata, fbdatavalid, fbnextscanline, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, rgData, rgAddr, rgWE, rgAddrValid, rgDone, rgResultData, rgResultReady, rgResultSource);
+
+    output rgwant_addr; 
+    wire rgwant_addr;
+    output rgwant_data; 
+    wire rgwant_data;
+    output rgread_ready; 
+    wire rgread_ready;
+    input rgaddr_ready; 
+    input rgdata_ready; 
+
+    input rgwant_read; 
+    input[63:0] rgdatain; 
+    output[63:0] rgdataout; 
+    wire[63:0] rgdataout;
+    input[17:0] rgaddrin; 
+    input[31:0] rgCont; 
+    output[31:0] rgStat; 
+    wire[31:0] rgStat;
+    input[31:0] rgCfgData; 
+    output rgwant_CfgData; 
+    wire rgwant_CfgData;
+    input rgCfgData_ready; 
+
+    input[63:0] tm3_sram_data_in; 
+    wire[63:0] tm3_sram_data_in;
+    output[63:0] tm3_sram_data_out; 
+    wire[63:0] tm3_sram_data_out;
+    wire[63:0] tm3_sram_data_xhdl0;
+    output[18:0] tm3_sram_addr; 
+    wire[18:0] tm3_sram_addr;
+    output[7:0] tm3_sram_we; 
+    wire[7:0] tm3_sram_we;
+    output[1:0] tm3_sram_oe; 
+    wire[1:0] tm3_sram_oe;
+    output tm3_sram_adsp; 
+    wire tm3_sram_adsp;
+    input clk; 
+
+    output[63:0] fbdata; 
+    wire[63:0] fbdata;
+    output fbdatavalid; 
+    wire fbdatavalid;
+    input fbnextscanline; 
+    output[1:0] raygroup01; 
+    wire[1:0] raygroup01;
+    output raygroupvalid01; 
+    wire raygroupvalid01;
+    input busy01; 
+    output[1:0] raygroup10; 
+    wire[1:0] raygroup10;
+
+    output raygroupvalid10; 
+    wire raygroupvalid10;
+    input busy10; 
+    input globalreset; 
+    output[31:0] rgData; 
+    wire[31:0] rgData;
+    output[3:0] rgAddr; 
+    wire[3:0] rgAddr;
+    output[2:0] rgWE; 
+    wire[2:0] rgWE;
+    output rgAddrValid; 
+    wire rgAddrValid;
+
+    input rgDone; 
+    input[31:0] rgResultData; 
+    input rgResultReady; 
+    input[1:0] rgResultSource; 
+
+    wire[2:0] statepeek2; 
+    wire as01; 
+    wire ack01; 
+
+    wire[3:0] addr01; 
+    wire[47:0] dir01; 
+    wire[47:0] dir; 
+    wire[47:0] sramdatal; 
+    wire wantDir; 
+    wire dirReady; 
+    wire dirReadyl; 
+    wire[14:0] address; 
+    wire[30:0] cyclecounter; 
+
+    wire nas01; 
+    wire nas10; 
+    wire go; 
+    reg page; 
+    wire[2:0] statepeekct; 
+    // result Signals
+    wire valid01; 
+    wire valid10; 
+    wire[15:0] id01a; 
+    wire[15:0] id01b; 
+    wire[15:0] id01c; 
+    wire[15:0] id10a; 
+
+    wire[15:0] id10b; 
+    wire[15:0] id10c; 
+    wire hit01a; 
+    wire hit01b; 
+    wire hit01c; 
+    wire hit10a; 
+    wire hit10b; 
+    wire hit10c; 
+    wire[7:0] u01a; 
+    wire[7:0] u01b; 
+    wire[7:0] u01c; 
+    wire[7:0] v01a; 
+
+    wire[7:0] v01b; 
+    wire[7:0] v01c; 
+    wire[7:0] u10a; 
+    wire[7:0] u10b; 
+    wire[7:0] u10c; 
+    wire[7:0] v10a; 
+    wire[7:0] v10b; 
+    wire[7:0] v10c; 
+    wire wantwriteback; 
+    wire writebackack; 
+    wire[63:0] writebackdata; 
+    wire[17:0] writebackaddr; 
+
+    wire[17:0] nextaddr01; 
+    // Shading Signals
+    wire[63:0] shadedata; 
+    wire[15:0] triID; 
+    wire wantshadedata; 
+    wire shadedataready; 
+    // CfgData Signals
+    wire[27:0] origx; 
+    wire[27:0] origy; 
+    wire[27:0] origz; 
+    wire[15:0] m11; 
+    wire[15:0] m12; 
+
+    wire[15:0] m13; 
+    wire[15:0] m21; 
+    wire[15:0] m22; 
+    wire[15:0] m23; 
+    wire[15:0] m31; 
+    wire[15:0] m32; 
+    wire[15:0] m33; 
+    wire[20:0] bkcolour; 
+    // Texture signals
+    wire[20:0] texinfo; 
+    wire[3:0] texaddr; 
+    wire[63:0] texel; 
+
+    wire[17:0] texeladdr; 
+    wire wanttexel; 
+    wire texelready; 
+    // Frame Buffer Read Signals
+    wire fbpage; 
+    // debug signals
+    wire wantcfg; 
+    wire debugglobalreset; 
+
+    assign rgwant_CfgData = wantcfg ;
+
+    onlyonecycle onlyeonecycleinst (rgCont[0], go, globalreset, clk); 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          page <= 1'b1 ; // Reset to 1 such that first flip sets to 0
+       end
+       else
+
+       begin
+          page <= ~page ; 
+       end 
+    end 
+    assign fbpage = ~page ;
+
+    matmult matmultinst(sramdatal[47:32], sramdatal[31:16], sramdatal[15:0], m11, m12, m13, m21, m22, m23, m31, m32, m33, dir[47:32], dir[31:16], dir[15:0], clk); 
+
+    delay1x3 dir01delay(dirReady, dirReadyl, clk); 
+    rgconfigmemory ConfigMemoryInst (rgCfgData[31:28], rgCfgData[27:0], rgCfgData_ready, wantcfg, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk); 
+
+    rgsramcontroller sramcont (rgwant_addr, rgaddr_ready, rgaddrin, rgwant_data, rgdata_ready, rgdatain, rgwant_read, rgread_ready, rgdataout, dirReady, wantDir, sramdatal, address, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
+    raysend raysendinst (as01, ack01, addr01, dir01, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek2); 
+
+    raygencont  raygencontinst(go, rgCont[15:1], rgStat[31], cyclecounter, nextaddr01, nas01, nas10, page, dirReadyl, wantDir, dir, address, as01, addr01, ack01, dir01, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, clk, statepeekct); 
+    resultrecieve resultrecieveinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk); 
+    assign debugglobalreset = globalreset | go ;
+    resultwriter resultwriteinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, nextaddr01, nas01, nas10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, writebackdata, writebackaddr, wantwriteback, writebackack, debugglobalreset, clk);
+    assign rgStat[30:0] = cyclecounter ;
+ endmodule
+
+
+module delay1x3 (datain, dataout, clk);
+
+    input datain; 
+    output dataout; 
+    wire dataout;
+    input clk; 
+
+    reg buff0; 
+    reg buff1; 
+    reg buff2; 
+
+    assign dataout = buff2 ;
+
+    always @(posedge clk)
+    begin
+/* PAJ expanded for loop to hard definition the size of `depth */
+       buff0 <= datain ; 
+		buff1 <= buff0;
+		buff2 <= buff1;
+    end 
+ endmodule
+
+
+    
+
+    
+    
+ // A debugging circuit that allows a single cycle pulse to be 
+ // generated by through the ports package
+ module onlyonecycle (trigger, output_xhdl0, globalreset, clk);
+
+    input trigger; 
+    output output_xhdl0; 
+    reg output_xhdl0;
+    input globalreset; 
+    input clk; 
+
+    reg[1:0] state; 
+    reg[1:0] next_state; 
+    reg count; 
+    reg temp_count; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          count <= 0 ; 
+
+       end
+       else
+       begin
+          state <= next_state ; 
+		count <= temp_count;
+       end 
+    end 
+
+    always @(state or trigger or count)
+    begin
+       case (state)
+          0 :
+                   begin
+       				  output_xhdl0 = 1'b0 ; 
+                      if (trigger == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                         temp_count = 1 - 1 ; 
+                   end
+          1 :
+                   begin
+                      output_xhdl0 = 1'b1 ; 
+                      if (count == 0)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+
+                      begin
+
+                         next_state = 1 ; 
+                      end 
+                         temp_count = count - 1 ; 
+                   end
+          2 :
+                   begin
+       				  output_xhdl0 = 1'b0 ; 
+                      if (trigger == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+
+                      end 
+                   end
+       endcase 
+    end 
+ endmodule
+
+module matmult (Ax, Ay, Az, m11, m12, m13, m21, m22, m23, m31, m32, m33, Cx, Cy, Cz, clk);
+
+    input[16 - 1:0] Ax; 
+    input[16 - 1:0] Ay; 
+    input[16 - 1:0] Az; 
+    input[16 - 1:0] m11; 
+    input[16 - 1:0] m12; 
+
+    input[16 - 1:0] m13; 
+    input[16 - 1:0] m21; 
+    input[16 - 1:0] m22; 
+    input[16 - 1:0] m23; 
+    input[16 - 1:0] m31; 
+    input[16 - 1:0] m32; 
+    input[16 - 1:0] m33; 
+    output[16 - 1:0] Cx; 
+    reg[16 - 1:0] Cx;
+    output[16 - 1:0] Cy; 
+    reg[16 - 1:0] Cy;
+    output[16 - 1:0] Cz; 
+
+    reg[16 - 1:0] Cz;
+    input clk; 
+
+    reg[16 + 16 - 1:0] am11; 
+    reg[16 + 16 - 1:0] am12; 
+    reg[16 + 16 - 1:0] am13; 
+    reg[16 + 16 - 1:0] am21; 
+    reg[16 + 16 - 1:0] am22; 
+    reg[16 + 16 - 1:0] am23; 
+    reg[16 + 16 - 1:0] am31; 
+    reg[16 + 16 - 1:0] am32; 
+    reg[16 + 16 - 1:0] am33; 
+
+
+    always @(posedge clk)
+    begin
+       am11 <= Ax * m11 ; 
+       am12 <= Ay * m12 ; 
+       am13 <= Az * m13 ; 
+       am21 <= Ax * m21 ; 
+       am22 <= Ay * m22 ; 
+       am23 <= Az * m23 ; 
+       am31 <= Ax * m31 ; 
+       am32 <= Ay * m32 ; 
+       am33 <= Az * m33 ; 
+
+       //      Cx <= (am11 + am12 + am13) (`widthA+`widthB-2 downto `widthB-1);
+       //      Cy <= (am21 + am22 + am23) (`widthA+`widthB-2 downto `widthB-1);
+       //      Cz <= (am31 + am32 + am33) (`widthA+`widthB-2 downto `widthB-1);
+       Cx <= (am11[16+16-2:16-1] + am12[16+16-2:16-1] + am13[16+16-2:16-1]) ; 
+       Cy <= (am21[16+16-2:16-1] + am22[16+16-2:16-1] + am23[16+16-2:16-1]); 
+       Cz <= (am31[16+16-2:16-1] + am32[16+16-2:16-1] + am33[16+16-2:16-1]) ;  
+    end 
+ endmodule
+
+    
+    
+
+module rgconfigmemory (CfgAddr, CfgData, CfgData_Ready, want_CfgData, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk);
+
+
+    input[3:0] CfgAddr; 
+    input[27:0] CfgData; 
+    input CfgData_Ready; 
+    output want_CfgData; 
+    reg want_CfgData;
+    output[27:0] origx; 
+    reg[27:0] origx;
+    output[27:0] origy; 
+    reg[27:0] origy;
+    output[27:0] origz; 
+    reg[27:0] origz;
+    output[15:0] m11; 
+    reg[15:0] m11;
+    output[15:0] m12; 
+    reg[15:0] m12;
+    output[15:0] m13; 
+    reg[15:0] m13;
+    output[15:0] m21; 
+    reg[15:0] m21;
+    output[15:0] m22; 
+    reg[15:0] m22;
+    output[15:0] m23; 
+    reg[15:0] m23;
+    output[15:0] m31; 
+    reg[15:0] m31;
+    output[15:0] m32; 
+    reg[15:0] m32;
+    output[15:0] m33; 
+    reg[15:0] m33;
+    output[20:0] bkcolour; 
+    reg[20:0] bkcolour;
+    output[20:0] texinfo; 
+
+    wire[20:0] texinfo;
+    input globalreset; 
+    input clk; 
+
+    reg state; 
+    reg next_state; 
+    wire we; 
+
+    reg[27:0] temp_origx;
+    reg[27:0] temp_origy;
+    reg[27:0] temp_origz;
+    reg[15:0] temp_m11;
+    reg[15:0] temp_m12;
+    reg[15:0] temp_m13;
+    reg[15:0] temp_m21;
+    reg[15:0] temp_m22;
+    reg[15:0] temp_m23;
+    reg[15:0] temp_m31;
+    reg[15:0] temp_m32;
+    reg[15:0] temp_m33;
+    reg[20:0] temp_bkcolour;
+
+    // <<X-HDL>> Can't find translated component 'spram'. Module name may not match
+    spram21x4 spraminst(we, texinfo, CfgData[20:0], clk); 
+    assign we = ((CfgData_Ready == 1'b1) & (CfgAddr == 4'b1110)) ? 1'b1 : 1'b0 ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          origx <= 0;
+          origy <= 0;
+
+          origz <= 0;
+          m11 <= 1;
+          m12 <= 0;
+          m13 <= 0;
+          m21 <= 0;
+          m22 <= 1;
+          m23 <= 0;
+          m31 <= 0;
+          m32 <= 0;
+         m33 <= 1;
+          bkcolour <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+          origx <= temp_origx;
+          origy <= temp_origy;
+          origz <= temp_origz;
+          m11 <= temp_m11;
+          m12 <= temp_m12;
+          m13 <= temp_m13;
+          m21 <= temp_m21;
+          m22 <= temp_m22;
+          m23 <= temp_m23;
+          m31 <= temp_m31;
+          m32 <= temp_m32;
+         m33 <= temp_m33;
+          bkcolour <= bkcolour;
+       end 
+    end 
+
+    always @(state or CfgData_Ready)
+    begin
+       case (state)
+          0 :
+                   begin
+                      want_CfgData = 1'b1 ; 
+                      if (CfgData_Ready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+
+              if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0001))
+                        begin
+											temp_origx = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0010))
+                        begin
+                                           temp_origy = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0011))
+                        begin
+                                           temp_origz = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0100))
+                        begin
+                                           temp_m11 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0101))
+                        begin
+                                           temp_m12 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0110))
+                        begin
+                                           temp_m13 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0111))
+                        begin
+                                           temp_m21 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1000))
+                        begin
+                                           temp_m22 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1001))
+                        begin
+                                           temp_m23 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1010))
+                        begin
+                                           temp_m31 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1011))
+                        begin
+                                           temp_m32 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1100))
+                        begin
+                                           temp_m33 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1101))
+                        begin
+                                           temp_bkcolour = CfgData[20:0] ; 
+						end
+                   end
+          1 :
+                   begin
+                      want_CfgData = 1'b0 ; 
+                      if (CfgData_Ready == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+ module spram21x4 (we, dataout, datain, clk);
+
+    input we; 
+    output[21 - 1:0] dataout; 
+    wire[21 - 1:0] dataout;
+    input[21 - 1:0] datain; 
+    input clk; 
+
+	reg [7:0] addr;
+	
+	always @ (posedge clk)
+	begin
+	 addr[0] <= we;
+	 addr [1] <= addr[0];
+	 addr [2] <= addr[1];
+	 addr [3] <= addr[2];
+	 addr [4] <= addr[3];
+	 addr [5] <= addr[4];
+	 addr [6] <= addr[5];
+	 addr [7] <= addr[6];
+	 end
+//changed to odin 2 ram specifications
+
+defparam new_ram.ADDR_WIDTH = 8;
+defparam new_ram.DATA_WIDTH = 21;
+single_port_ram new_ram(
+  .clk (clk),
+  .we(we),
+  .data(datain),
+  .out(dataout),
+  .addr(addr)
+  );
+  
+  
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+module rgsramcontroller (want_addr, addr_ready, addrin, want_data, data_ready, datain, want_read, read_ready, dataout, dirReady, wantDir, sramdatal, addr, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
+
+    output want_addr; 
+    reg want_addr;
+    input addr_ready; 
+    input[17:0] addrin; 
+    output want_data; 
+    reg want_data;
+    input data_ready; 
+    input[63:0] datain; 
+    input want_read; 
+    output read_ready; 
+
+    reg read_ready;
+    output[63:0] dataout; 
+    wire[63:0] dataout;
+    output dirReady; 
+    reg dirReady;
+    input wantDir; 
+    output[47:0] sramdatal; 
+    reg[47:0] sramdatal;
+    output[14:0] addr; 
+    wire[14:0] addr;
+    input wantwriteback; 
+    output writebackack; 
+
+    reg writebackack;
+    input[63:0] writebackdata; 
+    input[17:0] writebackaddr; 
+    output[63:0] fbdata; 
+    reg[63:0] fbdata;
+    input fbnextscanline; 
+    output fbdatavalid; 
+    reg fbdatavalid;
+    input fbpage; 
+    output[63:0] shadedata; 
+    wire[63:0] shadedata;
+    input[15:0] triID; 
+
+    input wantshadedata; 
+    output shadedataready; 
+    reg shadedataready;
+    input[17:0] texeladdr; 
+    output[63:0] texel; 
+    wire[63:0] texel;
+    input wanttexel; 
+    output texelready; 
+    reg texelready;
+    input[63:0] tm3_sram_data_in; 
+    wire[63:0] tm3_sram_data_in;
+    output[63:0] tm3_sram_data_out; 
+    wire[63:0] tm3_sram_data_out;
+    reg[63:0] tm3_sram_data_xhdl0;
+
+    output[18:0] tm3_sram_addr; 
+    reg[18:0] tm3_sram_addr;
+    output[7:0] tm3_sram_we; 
+    reg[7:0] tm3_sram_we;
+    output[1:0] tm3_sram_oe; 
+    reg[1:0] tm3_sram_oe;
+    output tm3_sram_adsp; 
+    reg tm3_sram_adsp;
+    input globalreset; 
+    input clk; 
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+    reg[17:0] waddress; 
+    reg[14:0] faddress; 
+    reg[6:0] fcount; 
+    reg fbdatavalidl; 
+
+    reg[17:0] temp_waddress; 
+    reg[14:0] temp_faddress; 
+    reg[6:0] temp_fcount; 
+    reg temp_fbdatavalidl; 
+    reg temp_texelready;
+    reg temp_shadedataready;
+
+    assign tm3_sram_data_out = tm3_sram_data_xhdl0;
+
+    assign dataout = tm3_sram_data_in ;
+    assign addr = tm3_sram_data_in[62:48] ;
+    assign shadedata = tm3_sram_data_in ;
+    assign texel = tm3_sram_data_in ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+
+          state <= 0 ; 
+          waddress <= 0;
+          faddress <= 0;
+          fcount <= 7'b1101011 ; 
+          fbdatavalid <= 1'b0 ; 
+          fbdatavalidl <= 1'b0 ; 
+          shadedataready <= 1'b0 ; 
+          texelready <= 1'b0 ; 
+          sramdatal <= 0;
+          fbdata <= 0;
+       end
+       else
+
+       begin
+          state <= next_state ; 
+          sramdatal <= tm3_sram_data_in[47:0] ; 
+          fbdata <= tm3_sram_data_in ; 
+          fbdatavalid <= fbdatavalidl ; 
+
+fbdatavalidl <= temp_fbdatavalidl;
+texelready <= temp_texelready;
+shadedataready <= temp_shadedataready;
+fcount <= temp_fcount;
+faddress <= temp_faddress;
+waddress <= temp_waddress;
+
+       end 
+    end 
+
+    always @(state or addr_ready or data_ready or waddress or datain or wantDir or 
+             want_read or wantwriteback or writebackdata or writebackaddr or 
+             fcount or fbpage or faddress or fbnextscanline or triID or wantshadedata or 
+             wanttexel or texeladdr)
+
+    begin
+       case (state)
+
+          0 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      if (addr_ready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (want_read == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else if (data_ready == 1'b1)
+                      begin
+
+                         next_state = 3 ; 
+                      end
+                      else if (wantDir == 1'b1)
+                      begin
+                         next_state = 5 ; 
+                      end
+                      else if (wantwriteback == 1'b1)
+                      begin
+                         next_state = 6 ; 
+                      end
+                      else if (wantshadedata == 1'b1)
+                      begin
+
+                         next_state = 9 ; 
+                      end
+                      else if (wanttexel == 1'b1)
+                      begin
+                         next_state = 10 ; 
+                      end
+                      else if (fcount != 0)
+                      begin
+                         next_state = 7 ; 
+                      end
+                      else if (fbnextscanline == 1'b1)
+                      begin
+
+                         next_state = 8 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (addr_ready == 1'b1)
+
+                         begin
+                            temp_waddress = addrin ; 
+                         end 
+
+                   end
+          1 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      want_addr = 1'b0 ; 
+                      if (addr_ready == 1'b0)
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                   end
+          2 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+
+                      read_ready = 1'b0 ; 
+                      if (want_read == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (want_read == 1'b0)
+                         begin
+
+                            temp_waddress = waddress + 1 ; 
+                         end 
+
+                   end
+          3 :
+                   begin
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_data_xhdl0 = datain ; 
+                      tm3_sram_we = 8'b00000000 ; 
+
+
+                   tm3_sram_oe = 2'b11 ; 
+                      tm3_sram_adsp = 1'b0 ; 
+                      want_data = 1'b0 ; 
+                      next_state = 4 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_waddress = waddress + 1 ; 
+
+                   end
+          4 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      if (data_ready == 1'b0)
+                      begin
+
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+                      want_data = 1'b0 ; 
+                   end
+
+          5 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       writebackack = 1'b0 ; 
+
+                     dirReady = 1'b1 ; 
+                      if (wantDir == 1'b0)
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (wantDir == 1'b0)
+                         begin
+                            temp_waddress = waddress + 1 ; 
+                         end 
+
+                   end
+          6 :
+                   begin
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+
+                      tm3_sram_data_xhdl0 = writebackdata ; 
+                      tm3_sram_we = 8'b00000000 ; 
+                      tm3_sram_oe = 2'b11 ; 
+                      tm3_sram_adsp = 1'b0 ; 
+                      tm3_sram_addr = {1'b0, writebackaddr} ; 
+                      writebackack = 1'b1 ; 
+                      next_state = 0 ; 
+                   end
+
+          7 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {3'b011, fbpage, faddress} ; 
+                      if ((fcount == 1) | (addr_ready == 1'b1) | (want_read == 1'b1) | (data_ready == 1'b1) | (wantDir == 1'b1) | (wantwriteback == 1'b1))
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 7 ; 
+                      end 
+
+
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_fbdatavalidl = 1'b1 ; 
+                         if (fcount != 0)
+                         begin
+                            temp_faddress = faddress + 1 ; 
+                            temp_fcount = fcount - 1 ; 
+                         end 
+
+                   end
+          8 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      next_state = 7 ; 
+
+				   				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_fcount = 7'b1101011 ; 
+                         if (faddress == 25680)
+                         begin
+                            temp_faddress = 0;
+                         end 
+                   end
+          9 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {3'b010, triID} ; 
+                      next_state = 0 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_shadedataready = 1'b1 ; 
+                   end
+
+          10 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {1'b0, texeladdr} ; 
+                      next_state = 0 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+                         temp_texelready = 1'b1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+ module raysend (as, ack, addr, dir, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek);
+
+    input as; 
+    output ack; 
+    reg ack;
+    input[3:0] addr; 
+    input[47:0] dir; 
+    input[27:0] origx; 
+    input[27:0] origy; 
+    input[27:0] origz; 
+    output[31:0] rgData; 
+    reg[31:0] rgData;
+
+    output[3:0] rgAddr; 
+    reg[3:0] rgAddr;
+    output[2:0] rgWE; 
+    reg[2:0] rgWE;
+    output rgAddrValid; 
+    reg rgAddrValid;
+    input rgDone; 
+    input globalreset; 
+    input clk; 
+    output[2:0] statepeek; 
+    reg[2:0] statepeek;
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+
+
+
+    reg[31:0] temp_rgData;
+    reg[2:0] temp_rgWE; 
+    reg temp_rgAddrValid;
+    reg temp_ack;
+    reg[3:0] temp_rgAddr; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          ack <= 1'b0 ; 
+          rgWE <= 3'b000 ; 
+          rgData <= 0;
+          rgAddrValid <= 1'b0 ; 
+          rgAddr <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+rgData <= temp_rgData;
+rgWE <= temp_rgWE;
+rgAddrValid <= temp_rgAddrValid;
+ack <= temp_ack;
+rgAddr <= temp_rgAddr;
+
+       end 
+    end 
+
+    always @(state or ack or as or rgDone)
+    begin
+
+       case (state)
+          0 :
+                   begin
+                      if ((as == 1'b1) & (ack == 1'b0))
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b001 ; 
+
+                         if ((as == 1'b1) & (ack == 1'b0))
+                         begin
+                            temp_rgData = {4'b0000, origx} ; 
+                            temp_rgWE = 3'b001 ; 
+                            temp_rgAddrValid = 1'b1 ; 
+                            temp_rgAddr = addr ; 
+                         end 
+                         if (as == 1'b0 & ack == 1'b1)
+                         begin
+                            temp_ack = 1'b0 ; 
+                         end 
+
+                   end
+          1 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 6 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                      statepeek = 3'b010 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+          2 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 7 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+                      statepeek = 3'b011 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+           3 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 8 ; 
+                      end
+                      else
+                      begin
+                         next_state = 3 ; 
+                      end 
+                      statepeek = 3'b100 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+         4 :
+                   begin
+                      if (rgDone == 1'b1)
+                       begin
+                         next_state = 9 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+                      statepeek = 3'b101 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+                   end
+
+          5 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+                      statepeek = 3'b110 ; 
+
+                         temp_ack = 1'b1 ; 
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+
+          6 :
+                   begin
+                      next_state = 2 ; 
+
+                         temp_rgData = {4'b0000, origy} ; 
+                         temp_rgWE = 3'b010 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+
+                   end
+          7 :
+                   begin
+                      next_state = 3 ; 
+
+                         temp_rgData = {4'b0000, origz} ; 
+                         temp_rgWE = 3'b011 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+          8 :
+                   begin
+                      next_state = 4 ; 
+
+                         temp_rgData = {dir[31:16], dir[47:32]} ; 
+                         temp_rgWE = 3'b100 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+           9 :
+                   begin
+                      next_state = 5 ; 
+
+                         temp_rgData = {16'b0000000000000000, dir[15:0]} ; 
+                          temp_rgWE = 3'b101 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+    
+    
+    
+
+ module raygencont (go, initcount, busyout, cycles, nextaddr, nas0, nas1, page, dirReady, wantDir, dirIn, addrIn, as, addr, ack, dir, raygroup0, raygroupvalid0, busy0, raygroup1, raygroupvalid1, busy1, globalreset, clk, statepeek);
+
+    input go; 
+    input[14:0] initcount; 
+    output busyout; 
+    wire busyout;
+    reg temp_busyout;
+    output[30:0] cycles; 
+    reg[30:0] cycles;
+    output[17:0] nextaddr; 
+    wire[17:0] nextaddr;
+    output nas0; 
+
+    wire nas0;
+    reg temp_nas0;
+    output nas1; 
+    wire nas1;
+    reg temp_nas1;
+    input page; 
+    input dirReady; 
+    output wantDir; 
+    reg wantDir;
+    input[47:0] dirIn; 
+    input[14:0] addrIn; 
+    output as; 
+    reg as;
+    output[3:0] addr; 
+
+    reg[3:0] addr;
+    input ack; 
+    output[47:0] dir; 
+    reg[47:0] dir;
+    output[1:0] raygroup0; 
+    wire[1:0] raygroup0;
+    output raygroupvalid0; 
+    reg raygroupvalid0;
+    input busy0; 
+    output[1:0] raygroup1; 
+    wire[1:0] raygroup1;
+    output raygroupvalid1; 
+
+    reg raygroupvalid1;
+    input busy1; 
+    input globalreset; 
+    input clk; 
+    output[2:0] statepeek; 
+    reg[2:0] statepeek;
+
+
+    reg[2:0] state; 
+    reg[2:0] next_state; 
+    reg[14:0] count; 
+    reg first; 
+    reg[17:0] destaddr; 
+    wire[1:0] busy; 
+    reg[1:0] loaded; 
+    reg[1:0] groupID; 
+    reg active; 
+
+    reg[47:0] temp_dir;
+    reg[30:0] temp_cycles;
+    reg[1:0] temp_addr;
+    reg[1:0] temp_loaded; 
+    reg[1:0] temp_groupID; 
+    reg[14:0] temp_count; 
+    reg temp_active; 
+    reg temp_raygroupvalid1;
+    reg temp_raygroupvalid0;
+
+    assign busy = {busy1, busy0} ;
+
+    always @(posedge clk)
+    begin
+
+       if (globalreset == 1'b1)
+
+       begin
+          state <= 0 ; 
+          cycles <= 0;
+          dir <= 0;
+          addr[1:0] <= 2'b00 ; 
+          groupID <= 2'b00 ; 
+          count <= 0;
+          first <= 1'b0 ; 
+          destaddr <= 0;
+          raygroupvalid0 <= 1'b0 ; 
+          raygroupvalid1 <= 1'b0 ; 
+          loaded <= 2'b00 ; 
+
+          active <= 1'b0 ; 
+       end
+       else
+       begin
+    	addr[3:2] <= (active == 1'b0) ? {1'b0, groupID[0]} : {1'b1, groupID[1]} ;
+	addr[1:0] <= temp_addr[1:0];
+        state <= next_state ; 
+
+	dir <= temp_dir;
+	cycles <= temp_cycles;
+	loaded <= temp_loaded;	
+	groupID <= temp_groupID;
+	count <= temp_count;
+	active <= temp_active;
+	raygroupvalid0 <= temp_raygroupvalid0;
+	raygroupvalid1 <= temp_raygroupvalid1;
+
+       end 
+    end 
+
+    assign raygroup0 = {1'b0, groupID[0]} ;
+    assign raygroup1 = {1'b1, groupID[1]} ;
+    assign nextaddr = {2'b11, page, addrIn} ;
+    assign busyout = temp_busyout;
+    assign nas0 = temp_nas0;
+    assign nas1 = temp_nas1;
+
+    always @(state or go or ack or busy or dirReady or addr or count or loaded)
+    begin
+       case (state)
+          0 :
+                   begin
+       				as = 1'b0 ; 
+       				wantDir = 1'b0 ; 
+                      if (go == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b001 ; 
+						temp_busyout = 1'b0;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+
+                         if (go == 1'b1)
+                         begin
+                            temp_cycles = 0;
+                         end 
+                         temp_addr[1:0] = 2'b00 ; 
+                         temp_loaded = 2'b00 ; 
+                         temp_groupID = 2'b00 ; 
+                         temp_count = initcount ; 
+                         temp_active = 1'b0 ; 
+
+                   end
+          1 :
+                   begin
+                      as = dirReady ; 
+                      wantDir = 1'b1 ; 
+                      if (dirReady == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                     statepeek = 3'b010 ; 
+						temp_busyout = 1'b1;
+    				if (addr[1:0] == 2'b00 & dirReady == 1'b1 & active == 1'b0) 
+					begin
+						 temp_nas0 = 1'b1;
+						 temp_nas1 = 1'b1;
+					end
+
+                         temp_dir = dirIn ; 
+                         if (dirReady == 1'b1 & addr[1:0] == 2'b10)
+                         begin
+                            if (active == 1'b0)
+                            begin
+                               temp_loaded[0] = 1'b1 ; 
+                            end
+                            else
+                            begin
+                               temp_loaded[1] = 1'b1 ; 
+                            end 
+                         end 
+             temp_cycles = cycles + 1 ; 
+
+
+                   end
+          2 :
+                   begin
+                      wantDir = 1'b0 ; 
+                      as = 1'b1 ; 
+                      if ((ack == 1'b1) & (addr[1:0] != 2'b10))
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (ack == 1'b1)
+                      begin
+                         if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                         begin
+                            next_state = 3 ; 
+                         end
+                         else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                         begin
+                            next_state = 4 ; 
+                         end
+                         else if (loaded != 2'b11)
+                         begin
+
+                            next_state = 1 ; 
+                         end
+                         else
+                         begin
+                            next_state = 2 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+                      statepeek = 3'b011 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                         if ((ack == 1'b1) & (addr[1:0] != 2'b10))
+                         begin
+                            temp_addr[1:0] = addr[1:0] + 2'b01 ; 
+                         end 
+                         else if ((ack == 1'b1) & addr[1:0] == 2'b10)
+                         begin
+                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                            begin
+                               temp_raygroupvalid0 = 1'b1 ; 
+                            end
+                            else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                            begin
+
+                               temp_raygroupvalid1 = 1'b1 ; 
+                            end
+                            else if ((loaded[0]) == 1'b0)
+                            begin
+                               temp_active = 1'b0 ; 
+                               temp_addr[1:0] = 2'b00 ; 
+                            end
+                            else if ((loaded[1]) == 1'b0)
+                            begin
+                               temp_active = 1'b1 ; 
+                               temp_addr[1:0] = 2'b00 ; 
+                            end 
+                         end 
+
+             temp_cycles = cycles + 1 ; 
+                   end
+          4 :
+                   begin
+                      if ((busy[1]) == 1'b0)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                      begin
+                         next_state = 3 ; 
+                      end
+                      else if (count > 0)
+                      begin
+
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b101 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                     if ((busy[1]) == 1'b1)
+                         begin
+                            temp_groupID[1] = ~groupID[1] ; 
+                            temp_raygroupvalid1 = 1'b0 ; 
+                            temp_count = count - 1 ; 
+                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                            begin
+                               temp_raygroupvalid0 = 1'b1 ; 
+                            end
+
+                            else if ((loaded[0]) == 1'b0)
+                            begin
+                               temp_active = 1'b0 ; 
+                            end
+                            else
+                            begin
+                               temp_active = 1'b1 ; 
+                            end 
+                         end 
+                         temp_loaded[1] = 1'b0 ; 
+                         temp_addr[1:0] = 2'b00 ; 
+
+             temp_cycles = cycles + 1 ; 
+                   end
+          3 :
+                   begin
+                      if ((busy[0]) == 1'b0)
+                      begin
+                         next_state = 3 ; 
+
+                      end
+                      else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else if (count > 0)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+
+                      end 
+                      statepeek = 3'b100 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                         if ((busy[0]) == 1'b1)
+                         begin
+                            temp_groupID[0] = ~groupID[0] ; 
+                            temp_raygroupvalid0 = 1'b0 ; 
+                            temp_count = count - 1 ; 
+                            if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                            begin
+                               temp_raygroupvalid1 = 1'b1 ; 
+
+                            end
+                            else if ((loaded[1]) == 1'b0)
+                            begin
+                               temp_active = 1'b1 ; 
+                            end
+                            else
+                            begin
+                               temp_active = 1'b0 ; 
+                            end 
+                         end 
+                         temp_loaded[0] = 1'b0 ; 
+                         temp_addr[1:0] = 2'b00 ; 
+
+
+             temp_cycles = cycles + 1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+
+ module resultrecieve (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk);
+
+    output valid01; 
+    reg valid01;
+    output valid10; 
+    reg valid10;
+    output[15:0] id01a; 
+    reg[15:0] id01a;
+    output[15:0] id01b; 
+    reg[15:0] id01b;
+    output[15:0] id01c; 
+    reg[15:0] id01c;
+
+    output[15:0] id10a; 
+    reg[15:0] id10a;
+    output[15:0] id10b; 
+    reg[15:0] id10b;
+    output[15:0] id10c; 
+    reg[15:0] id10c;
+    output hit01a; 
+    reg hit01a;
+    output hit01b; 
+    reg hit01b;
+    output hit01c; 
+    reg hit01c;
+
+    output hit10a; 
+    reg hit10a;
+    output hit10b; 
+    reg hit10b;
+    output hit10c; 
+    reg hit10c;
+    output[7:0] u01a; 
+    reg[7:0] u01a;
+    output[7:0] u01b; 
+    reg[7:0] u01b;
+    output[7:0] u01c; 
+    reg[7:0] u01c;
+
+    output[7:0] v01a; 
+    reg[7:0] v01a;
+    output[7:0] v01b; 
+    reg[7:0] v01b;
+    output[7:0] v01c; 
+    reg[7:0] v01c;
+    output[7:0] u10a; 
+    reg[7:0] u10a;
+    output[7:0] u10b; 
+    reg[7:0] u10b;
+    output[7:0] u10c; 
+    reg[7:0] u10c;
+
+    output[7:0] v10a; 
+    reg[7:0] v10a;
+    output[7:0] v10b; 
+    reg[7:0] v10b;
+    output[7:0] v10c; 
+    reg[7:0] v10c;
+    input[31:0] rgResultData; 
+    input rgResultReady; 
+    input[1:0] rgResultSource; 
+    input globalreset; 
+    input clk; 
+
+    reg temp_valid01;
+    reg temp_valid10;
+    reg[15:0] temp_id01a;
+    reg[15:0] temp_id01b;
+    reg[15:0] temp_id01c;
+    reg[15:0] temp_id10a;
+    reg[15:0] temp_id10b;
+    reg[15:0] temp_id10c;
+    reg temp_hit01a;
+    reg temp_hit01b;
+    reg temp_hit01c;
+    reg temp_hit10a;
+    reg temp_hit10b;
+    reg temp_hit10c;
+    reg[7:0] temp_u01a;
+    reg[7:0] temp_u01b;
+    reg[7:0] temp_u01c;
+    reg[7:0] temp_v01a;
+    reg[7:0] temp_v01b;
+    reg[7:0] temp_v01c;
+    reg[7:0] temp_u10a;
+    reg[7:0] temp_u10b;
+    reg[7:0] temp_u10c;
+    reg[7:0] temp_v10a;
+    reg[7:0] temp_v10b;
+    reg[7:0] temp_v10c;
+
+
+    reg[2:0] state; 
+    reg[2:0] next_state; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          valid01 <= 1'b0 ; 
+          valid10 <= 1'b0 ; 
+          hit01a <= 1'b0 ; 
+          hit01b <= 1'b0 ; 
+          hit01c <= 1'b0 ; 
+          hit10a <= 1'b0 ; 
+          hit10b <= 1'b0 ; 
+          hit10c <= 1'b0 ; 
+          id01a <= 0;
+
+          id01b <= 0;
+          id01c <= 0;
+          id10a <= 0;
+          id10b <= 0;
+          id10c <= 0;
+          u01a <= 0;
+          u01b <= 0;
+          u01c <= 0;
+          v01a <= 0;
+          v01b <= 0;
+          v01c <= 0;
+          u10a <= 0;
+
+          u10b <= 0;
+          u10c <= 0;
+          v10a <= 0;
+          v10b <= 0;
+          v10c <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+valid01 <= temp_valid01;
+valid10 <= temp_valid10;
+id01a <= temp_id01a;
+id01b <= temp_id01b;
+id01c <= temp_id01c;
+hit01a <= temp_hit01a;
+hit01b <= temp_hit01b;
+hit01c <= temp_hit01c;
+u01a <= temp_u01a;
+u01b <= temp_u01b;
+u01c <= temp_u01c;
+u10a <= temp_u10a;
+u10b <= temp_u10b;
+u10c <= temp_u10c;
+v01a <= temp_v01a;
+v01b <= temp_v01b;
+v01c <= temp_v01c;
+v10a <= temp_v10a;
+v10b <= temp_v10b;
+v10c <= temp_v10c;
+hit10a <= temp_hit10a;
+hit10b <= temp_hit10b;
+hit10c <= temp_hit10c;
+       end 
+    end 
+
+
+    always @(state or rgResultReady or rgResultSource)
+    begin
+       case (state)
+          0 :
+                   begin
+                      if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
+                      begin
+
+                         next_state = 4 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
+                         begin
+                            temp_id01a = rgResultData[31:16] ; 
+                            temp_id01b = rgResultData[15:0] ; 
+                         end
+                         else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
+                         begin
+                            temp_id10a = rgResultData[31:16] ; 
+                            temp_id10b = rgResultData[15:0] ; 
+                         end 
+
+                   end
+
+          1 :
+                   begin
+                      next_state = 2 ; 
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_id01c = rgResultData[15:0] ; 
+                         temp_hit01a = rgResultData[18] ; 
+                         temp_hit01b = rgResultData[17] ; 
+                         temp_hit01c = rgResultData[16] ; 
+
+                   end
+          2 :
+
+                   begin
+                      next_state = 3 ; 
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_u01a = rgResultData[23:16] ; 
+                         temp_u01b = rgResultData[15:8] ; 
+                         temp_u01c = rgResultData[7:0] ; 
+
+                   end
+          3 :
+                   begin
+                      next_state = 0 ; 
+
+				          temp_valid10 = 1'b0 ; 
+                         temp_v01a = rgResultData[23:16] ; 
+                         temp_v01b = rgResultData[15:8] ; 
+                         temp_v01c = rgResultData[7:0] ; 
+                         temp_valid01 = 1'b1 ; 
+
+                   end
+          4 :
+                   begin
+                      next_state = 5 ; 
+
+          				temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_id10c = rgResultData[15:0] ; 
+
+                         temp_hit10a = rgResultData[18] ; 
+                         temp_hit10b = rgResultData[17] ; 
+                         temp_hit10c = rgResultData[16] ; 
+
+                   end
+          5 :
+
+                   begin
+                      next_state = 6 ; 
+
+          				temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_u10a = rgResultData[23:16] ; 
+                         temp_u10b = rgResultData[15:8] ; 
+                         temp_u10c = rgResultData[7:0] ; 
+
+                   end
+          6 :
+                   begin
+                      next_state = 0 ; 
+
+      				temp_valid01 = 1'b0 ; 
+                         temp_v10a = rgResultData[23:16] ; 
+                         temp_v10b = rgResultData[15:8] ; 
+                         temp_v10c = rgResultData[7:0] ; 
+                         temp_valid10 = 1'b1 ; 
+
+                   end
+       endcase 
+    end 
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+ module resultwriter (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, addr, as01, as10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, dataout, addrout, write, ack, globalreset, clk);
+
+    input valid01; 
+    input valid10; 
+    input[15:0] id01a; 
+    input[15:0] id01b; 
+    input[15:0] id01c; 
+    input[15:0] id10a; 
+    input[15:0] id10b; 
+    input[15:0] id10c; 
+
+    input hit01a; 
+    input hit01b; 
+    input hit01c; 
+    input hit10a; 
+    input hit10b; 
+    input hit10c; 
+    input[7:0] u01a; 
+    input[7:0] u01b; 
+    input[7:0] u01c; 
+    input[7:0] v01a; 
+    input[7:0] v01b; 
+    input[7:0] v01c; 
+
+    input[7:0] u10a; 
+    input[7:0] u10b; 
+    input[7:0] u10c; 
+    input[7:0] v10a; 
+    input[7:0] v10b; 
+    input[7:0] v10c; 
+    input[17:0] addr; 
+    input as01; 
+    input as10; 
+    input[20:0] bkcolour; 
+    input[63:0] shadedata; 
+    output[15:0] triID; 
+
+    reg[15:0] triID;
+    output wantshadedata; 
+    reg wantshadedata;
+    input shadedataready; 
+    input[20:0] texinfo; 
+    output[3:0] texaddr; 
+    wire[3:0] texaddr;
+    output[17:0] texeladdr; 
+    wire[17:0] texeladdr;
+    input[63:0] texel; 
+    output wanttexel; 
+    reg wanttexel;
+
+    input texelready; 
+    output[63:0] dataout; 
+    // PAJ see lower note wire[63:0] dataout;
+    reg[63:0] dataout;
+    output[17:0] addrout; 
+    wire[17:0] addrout;
+    output write; 
+    wire write;
+    reg temp_write;
+    input ack; 
+    input globalreset; 
+    input clk; 
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+    reg pending01; 
+    reg pending10; 
+    reg process01; 
+    wire[17:0] addrout01; 
+    wire[17:0] addrout10; 
+    wire shiften01; 
+    wire shiften10; 
+    reg temp_shiften01; 
+    reg temp_shiften10; 
+    reg[20:0] shadedataa; 
+    reg[20:0] shadedatab; 
+    reg[20:0] shadedatac; 
+    wire hita; 
+    wire hitb; 
+    wire hitc; 
+
+    reg[2:0] selectuv; 
+    wire[6:0] blr; 
+    wire[6:0] blg; 
+    wire[6:0] blb; 
+    reg texmap; 
+    reg lmenable; 
+    wire[1:0] texelselect; 
+    wire[6:0] texelr; 
+    wire[6:0] texelg; 
+    wire[6:0] texelb; 
+    reg[20:0] texinfol; 
+
+    reg temp_pending01; 
+    reg temp_pending10; 
+    reg temp_process01; 
+    reg temp_texmap; 
+    reg[20:0] temp_texinfol; 
+    reg[20:0] temp_shadedataa; 
+    reg[20:0] temp_shadedatab; 
+    reg[20:0] temp_shadedatac; 
+
+    col16to21 col16to21inst (texel, texelselect, texelr, texelg, texelb); 
+    linearmap linearmapinst (blb, blg, texinfol[17:0], texeladdr, texelselect, texinfol[20:18], lmenable, clk); 
+    bilinearintrp bilinearimp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, shadedata[41:35], shadedata[62:56], shadedata[20:14], shadedata[34:28], shadedata[55:49], shadedata[13:7], shadedata[27:21], shadedata[48:42], shadedata[6:0], blr, blg, blb, clk); 
+    fifo3 fifo3insta (addr, as01, addrout01, shiften01, globalreset, clk); 
+    fifo3 fifo3instb (addr, as10, addrout10, shiften10, globalreset, clk); 
+    assign hita = (hit01a & process01) | (hit10a & ~process01) ;
+    assign hitb = (hit01b & process01) | (hit10b & ~process01) ;
+    assign hitc = (hit01c & process01) | (hit10c & ~process01) ;
+    assign texaddr = shadedata[59:56] ;
+    assign shiften01 = temp_shiften01;
+    assign shiften10 = temp_shiften10;
+    assign write = temp_write;
+
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          pending01 <= 1'b0 ; 
+          pending10 <= 1'b0 ; 
+          shadedataa <= 0;
+          shadedatab <= 0;
+          shadedatac <= 0;
+          process01 <= 1'b0 ; 
+          texmap <= 1'b0 ; 
+
+          texinfol <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+process01 <= temp_process01;
+pending01 <= temp_pending01;
+pending10 <= temp_pending10;
+texmap <= temp_texmap;
+texinfol <= temp_texinfol;
+shadedataa <= temp_shadedataa;
+shadedatab <= temp_shadedatab;
+shadedatac <= temp_shadedatac;
+
+    dataout <= {1'b0, 
+					shadedataa[20],
+					shadedataa[19],
+					shadedataa[18],
+					shadedataa[17],
+					shadedataa[16],
+					shadedataa[15],
+					shadedataa[14],
+					shadedataa[13],
+					shadedataa[12],
+					shadedataa[11],
+					shadedataa[10],
+					shadedataa[9],
+					shadedataa[8],
+					shadedataa[7],
+					shadedataa[6],
+					shadedataa[5],
+					shadedataa[4],
+					shadedataa[3],
+					shadedataa[2],
+					shadedataa[1],
+					shadedataa[0],
+					shadedatab[20],
+					shadedatab[19],
+					shadedatab[18],
+					shadedatab[17],
+					shadedatab[16],
+					shadedatab[15],
+					shadedatab[14],
+					shadedatab[13],
+					shadedatab[12],
+					shadedatab[11],
+					shadedatab[10],
+					shadedatab[9],
+					shadedatab[8],
+					shadedatab[7],
+					shadedatab[6],
+					shadedatab[5],
+					shadedatab[4],
+					shadedatab[3],
+					shadedatab[2],
+					shadedatab[1],
+					shadedatab[0],
+					shadedatac[20],
+					shadedatac[19],
+					shadedatac[18],
+					shadedatac[17],
+					shadedatac[16],
+					shadedatac[15],
+					shadedatac[14],
+					shadedatac[13],
+					shadedatac[12],
+					shadedatac[11],
+					shadedatac[10],
+					shadedatac[9],
+					shadedatac[8],
+					shadedatac[7],
+					shadedatac[6],
+					shadedatac[5],
+					shadedatac[4],
+					shadedatac[3],
+					shadedatac[2],
+					shadedatac[1],
+					shadedatac[0]} ;
+       end 
+//    end 
+// PAJ used to be assign, but weird error, so added as register   assign dataout = {1'b0, 
+    end 
+    assign addrout = (process01 == 1'b1) ? addrout01 : addrout10 ;
+
+    always @(state or process01 or pending10 or ack or shadedataready or id01a or 
+             id01b or id01c or id10a or id10b or id10c or selectuv or hita or 
+             hitb or hitc or shadedata or pending01 or texmap or texelready)
+    begin
+       case (state)
+          0 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      if (pending01 == 1'b1 | pending10 == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+
+                      begin
+                         next_state = 0 ; 
+                      end 
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_process01 = pending01 ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          2 :
+                   begin
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      wantshadedata = 1'b1 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b00 ; 
+                      if (process01 == 1'b1)
+                      begin
+                         triID = id01a ; 
+
+                      end
+                      else
+                      begin
+                         triID = id10a ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hita == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 3 ; 
+                         end
+                         else
+
+                         begin
+                            next_state = 4 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hita == 1'b1)
+                         begin
+                            temp_shadedataa = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedataa = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          3 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+
+                      selectuv[1:0] = 2'b00 ; 
+                      next_state = 8 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+
+                   end
+          8 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b00 ; 
+                      lmenable = 1'b1 ; 
+                      if (texmap == 1'b1)
+                      begin
+
+                         next_state = 11 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedataa[6:0] = blb ; 
+                         temp_shadedataa[13:7] = blg ; 
+                         temp_shadedataa[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          11 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else
+                      begin
+                         next_state = 11 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedataa[6:0] = texelb ; 
+                         temp_shadedataa[13:7] = texelg ; 
+                         temp_shadedataa[20:14] = texelr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          12 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 5 ; 
+                      end
+                      else
+                      begin
+                         next_state = 12 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedatab[6:0] = texelb ; 
+                         temp_shadedatab[13:7] = texelg ; 
+                         temp_shadedatab[20:14] = texelr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          13 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 13 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedatac[6:0] = texelb ; 
+                         temp_shadedatac[13:7] = texelg ; 
+                         temp_shadedatac[20:14] = texelr ; 
+
+                   end
+          6 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      next_state = 9 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          9 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      lmenable = 1'b1 ; 
+                      if (texmap == 1'b1)
+                      begin
+                         next_state = 12 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedatab[6:0] = blb ; 
+                         temp_shadedatab[13:7] = blg ; 
+                         temp_shadedatab[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          7 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      next_state = 10 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+
+          10 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      if (texmap == 1'b1)
+                      begin
+                         next_state = 13 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+
+                      lmenable = 1'b1 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedatac[6:0] = blb ; 
+                         temp_shadedatac[13:7] = blg ; 
+                         temp_shadedatac[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          4 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      if (process01 == 1'b1)
+                      begin
+                         triID = id01b ; 
+                      end
+                      else
+                      begin
+
+                         triID = id10b ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hitb == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 6 ; 
+                         end
+                         else
+                         begin
+                            next_state = 5 ; 
+                         end 
+
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hitb == 1'b1)
+                         begin
+                            temp_shadedatab = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedatab = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          5 :
+                   begin
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      wantshadedata = 1'b1 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      if (process01 == 1'b1)
+
+                      begin
+                         triID = id01c ; 
+                      end
+                      else
+                      begin
+                         triID = id10c ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hitc == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 7 ; 
+
+                         end
+                         else
+                         begin
+                            next_state = 1 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hitc == 1'b1)
+                          begin
+                            temp_shadedatac = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedatac = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          1 :
+
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      if (ack == 1'b1)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+
+                         if (ack == 1'b1 & process01 == 1'b1)
+                         begin
+                            temp_pending01 = 1'b0 ; 
+                         end
+
+                          else if (ack == 1'b1 & process01 == 1'b0)
+                         begin
+                            temp_pending10 = 1'b0 ; 
+                         end 
+
+    				if (process01 == 1'b1 &  ack == 1'b1)
+						begin
+							temp_shiften01 = 1'b1;
+							temp_shiften10 = 1'b1;
+						end
+					    temp_write = 1'b1;
+                   end
+       endcase 
+    end 
+ endmodule
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ //
+ // Verilog file generated by X-HDL - Revision 3.2.38  Jan. 9, 2004 
+ // Sun Feb  8 14:14:35 2004
+ //
+ //      Input file         : G:/jamieson/VERILOG_BENCHMARKS/RAYTRACE/col16to21.vhd
+ //      Design name        : col16to21
+ //      Author             : 
+ //      Company            : 
+ //
+ //      Description        : 
+ //
+ //
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ //
+ module col16to21 (dataline, texelselect, r, g, b);
+
+    input[63:0] dataline; 
+    input[1:0] texelselect; 
+    output[6:0] r; 
+    wire[6:0] r;
+    output[6:0] g; 
+    wire[6:0] g;
+    output[6:0] b; 
+    wire[6:0] b;
+
+    reg[15:0] col16; 
+
+    always @(dataline or texelselect)
+    begin
+       case (texelselect)
+          2'b00 :
+                   begin
+                      col16 = dataline[15:0] ; 
+                   end
+          2'b01 :
+                   begin
+                      col16 = dataline[31:16] ; 
+                   end
+          2'b10 :
+                   begin
+                      col16 = dataline[47:32] ; 
+                   end
+          2'b11 :
+                   begin
+                      col16 = dataline[63:48] ; 
+                   end
+       endcase 
+    end 
+    assign r = {col16[15:10], 1'b0} ;
+    assign g = {col16[9:5], 2'b00} ;
+    assign b = {col16[4:0], 2'b00} ;
+ endmodule
+ module linearmap (u, v, start, addr, texelselect, factor, enable, clk);
+
+    input[6:0] u; 
+    input[6:0] v; 
+    input[17:0] start; 
+    output[17:0] addr; 
+    reg[17:0] addr;
+    output[1:0] texelselect; 
+    wire[1:0] texelselect;
+
+    input[2:0] factor; 
+    input enable; 
+    input clk; 
+
+    reg[6:0] ul; 
+    reg[6:0] vl; 
+
+    assign texelselect = ul[1:0] ;
+
+    always @(posedge clk)
+    begin
+       if (enable == 1'b1)
+       begin
+          ul <= u ; 
+          vl <= v ; 
+       end 
+       else
+       begin
+          ul <= ul ; 
+          vl <= vl ; 
+       end 
+       case (factor)
+          3'b000 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({11'b00000000000, vl}) ; 
+                   end
+          3'b001 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({10'b0000000000, vl, 1'b0}) ; 
+
+                   end
+          3'b010 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({9'b000000000, vl, 2'b00}) ; 
+                   end
+          3'b011 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({8'b00000000, vl, 3'b000}) ; 
+                   end
+          3'b100 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({7'b0000000, vl, 4'b0000}) ; 
+
+                   end
+          3'b101 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({6'b000000, vl, 5'b00000}) ; 
+                   end
+          3'b110 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({5'b00000, vl, 6'b000000}) ; 
+                   end
+          3'b111 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({4'b0000, vl, 7'b0000000}) ; 
+
+                   end
+       endcase  
+    end 
+ endmodule
+     module bilinearintrp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, ru, rv, rw, gu, gv, gw, bu, bv, bw, r, g, b, clk);
+
+        input[7:0] u01a; 
+        input[7:0] u01b; 
+        input[7:0] u01c; 
+        input[7:0] v01a; 
+        input[7:0] v01b; 
+        input[7:0] v01c; 
+        input[7:0] u10a; 
+        input[7:0] u10b; 
+        input[7:0] u10c; 
+        input[7:0] v10a; 
+        input[7:0] v10b; 
+        input[7:0] v10c; 
+        input[2:0] selectuv; 
+        input[6:0] ru; 
+        input[6:0] rv; 
+        input[6:0] rw; 
+        input[6:0] gu; 
+        input[6:0] gv; 
+        input[6:0] gw; 
+        input[6:0] bu; 
+        input[6:0] bv; 
+        input[6:0] bw; 
+        output[6:0] r; 
+        wire[6:0] r;
+        output[6:0] g; 
+        wire[6:0] g;
+        output[6:0] b; 
+        wire[6:0] b;
+        input clk; 
+
+        reg[7:0] u; 
+        reg[7:0] v; 
+        reg[7:0] ul; 
+        reg[7:0] vl; 
+        reg[7:0] wl; 
+        reg[14:0] i1b; 
+        reg[14:0] i2b; 
+        reg[14:0] i3b; 
+        reg[14:0] i1g; 
+        reg[14:0] i2g; 
+        reg[14:0] i3g; 
+        reg[14:0] i1r; 
+        reg[14:0] i2r; 
+        reg[14:0] i3r; 
+        reg[6:0] rul; 
+        reg[6:0] rvl; 
+        reg[6:0] rwl; 
+        reg[6:0] gul; 
+        reg[6:0] gvl; 
+        reg[6:0] gwl; 
+        reg[6:0] bul; 
+        reg[6:0] bvl; 
+        reg[6:0] bwl; 
+
+        always @(selectuv or u01a or u01b or u01c or v01a or v01b or v01c or u10a or 
+                 u10b or u10c or v10a or v10b or v10c)
+        begin
+           case (selectuv)
+              3'b000 :
+                       begin
+                          u = u01a ; 
+                          v = v01a ; 
+                       end
+              3'b001 :
+                       begin
+                          u = u01b ; 
+						 v = v01b ; 
+                       end
+              3'b010 :
+                       begin
+                          u = u01c ; 
+                          v = v01c ; 
+                       end
+              3'b100 :
+                       begin
+                          u = u10a ; 
+                          v = v10a ; 
+                       end
+              3'b101 :
+                       begin
+                          u = u10b ; 
+                          v = v10b ; 
+                       end
+              3'b110 :
+                       begin
+                          u = u10c ; 
+                          v = v10c ; 
+                       end
+              default :
+                       begin
+                          u = 0;
+                          v = 0;
+                       end
+           endcase 
+        end 
+
+        always @(posedge clk)
+        begin
+           wl <= 8'b11111111 - u - v ; 
+           ul <= u ; 
+           vl <= v ; 
+           rul <= ru ; 
+           rvl <= rv ; 
+           rwl <= rw ; 
+           gul <= gu ; 
+           gvl <= gv ; 
+           gwl <= gw ; 
+           bul <= bu ; 
+           bvl <= bv ; 
+           bwl <= bw ; 
+           i1r <= ul * rul ; 
+           i2r <= vl * rvl ; 
+           i3r <= wl * rwl ; 
+           i1g <= ul * gul ; 
+           i2g <= vl * gvl ; 
+           i3g <= wl * gwl ; 
+           i1b <= ul * bul ; 
+           i2b <= vl * bvl ; 
+           i3b <= wl * bwl ;  
+        end 
+        assign r = (i1r + i2r + i3r) ;
+        assign g = (i1g + i2g + i3g) ;
+        assign b = (i1b + i2b + i3b) ;
+     endmodule
+
+
+
+module fifo3 (datain, writeen, dataout, shiften, globalreset, clk);
+
+    input[18 - 1:0] datain; 
+    input writeen; 
+    output[18 - 1:0] dataout; 
+    wire[18 - 1:0] dataout;
+    input shiften; 
+    input globalreset; 
+    input clk; 
+
+    reg[18 - 1:0] data0; 
+    reg[18 - 1:0] data1; 
+    reg[18 - 1:0] data2; 
+
+    reg[1:0] pos; 
+
+    assign dataout = data0 ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          pos <= 2'b00 ; 
+          data0 <= 0 ; 
+          data1 <= 0 ; 
+          data2 <= 0 ; 
+       end
+       else
+       begin
+          if (writeen == 1'b1 & shiften == 1'b1)
+          begin
+             case (pos)
+                2'b00 :
+                         begin
+                            data0 <= 0 ; 
+                            data1 <= 0 ; 
+                            data2 <= 0 ; 
+                         end
+
+                2'b01 :
+                         begin
+                            data0 <= datain ; 
+                            data1 <= 0 ; 
+                            data2 <= 0 ; 
+                         end
+                2'b10 :
+                         begin
+                            data0 <= data1 ; 
+                            data1 <= datain ; 
+                            data2 <= 0 ; 
+                         end
+
+                2'b11 :
+                         begin
+                            data0 <= data1 ; 
+                            data1 <= data2 ; 
+                            data2 <= datain ; 
+                         end
+             endcase 
+          end
+          else if (shiften == 1'b1)
+          begin
+             data0 <= data1 ; 
+             data1 <= data2 ; 
+             pos <= pos - 1 ; 
+          end
+          else if (writeen == 1'b1)
+          begin
+             case (pos)
+                2'b00 :
+                         begin
+                            data0 <= datain ; 
+                         end
+                2'b01 :
+    					begin
+                            data1 <= datain ; 
+                         end
+                2'b10 :
+                         begin
+                            data2 <= datain ; 
+                         end
+             endcase 
+             pos <= pos + 1 ; 
+          end 
+       end 
+    end 
+ endmodule
+
diff --git a/third_party/pugixml b/third_party/pugixml
index 95683943b..a0e064336 160000
--- a/third_party/pugixml
+++ b/third_party/pugixml
@@ -1 +1 @@
-Subproject commit 95683943bba726729079886d0967112a60fa71aa
+Subproject commit a0e064336317c9347a91224112af9933598714e9
diff --git a/third_party/vtr/LICENSE.md b/third_party/vtr/LICENSE.md
new file mode 100644
index 000000000..01332da43
--- /dev/null
+++ b/third_party/vtr/LICENSE.md
@@ -0,0 +1,69 @@
+# VTR License
+
+The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
+well as additional benchmarks, documentation, libraries and scripts. The authors
+of the various components of VTR retain their ownership of their tools.
+
+* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
+all software, documents, and scripts in VTR, follows the standard MIT license described
+[here](http://www.opensource.org/licenses/mit-license.php) copied below for
+your convenience:
+
+> The MIT License (MIT)
+>
+> Copyright 2012 VTR Developers
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy of
+> this software and associated documentation files (the "Software"), to deal in
+> the Software without restriction, including without limitation the rights to
+> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+> of the Software, and to permit persons to whom the Software is furnished to do
+> so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+* Terms and conditions for ABC is found
+[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
+for your convenience:
+
+> Copyright (c) The Regents of the University of California. All rights reserved.
+>
+> Permission is hereby granted, without written agreement and without license or
+> royalty fees, to use, copy, modify, and distribute this software and its
+> documentation for any purpose, provided that the above copyright notice and the
+> following two paragraphs appear in all copies of this software.
+>
+> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+>
+> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+The benchmark circuits are all open source but each have their own
+individual terms and conditions which are listed in the source code of each
+benchmark.
+
+Subject to these conditions, the software is provided free of charge to all
+interested parties.
+
+If you do decide to use this tool, please reference our work as references are
+important in academia.
+
+Donations in the form of research grants to promote further research and
+development on the tools will be gladly accepted, either anonymously or with
+attribution on our future publications.
+
diff --git a/third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
new file mode 100644
index 000000000..8170d72b0
--- /dev/null
+++ b/third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
@@ -0,0 +1,3246 @@
+<!--
+    This is the architecture file for a modern Intel FPGA. The blocks (logic, RAM, DSP)
+    are Agilex-like, but the routing architecture is similar to Stratix IV. It is based
+    off the Stratix-10-like Architecture discussed in [1], the Agilex-like Architecture
+    mentioned in [6] and Stratix-IV-like Architecture mentioned in [5].
+
+    The delays and areas of various components in this arch come from COFFE [2]
+    runs using a 22nm technology node [3].
+
+    ##############################
+    Parameters
+    ##############################
+    Parameter | Value | Definition
+    __________|_______|______________________________
+    N         |    10 | Number of BLEs per cluster
+    W         |   300 | Channel width
+    L         |  4,16 | Wire segment length
+    I         |    60 | Number of cluster inputs
+    O         |    40 | Number of cluster outputs
+    K         |     6 | LUT size
+    Fs        |     3 | Switch block flexibility
+    Fcin      |  0.15 | Cluster input flexibility
+    Fcout     |   0.1 | Cluster output flexibility
+    Fclocal   |   0.5 | Local input crossbar population
+
+    ##############################
+    Logic Cluster
+    ##############################
+    This architecture has 10 ALMs (or FLEs: Fracturable Logic Elements) per Logic Cluster
+    (or LAB or CLB), where each ALM is a 6-LUT fracturable into
+    two 5-LUTs. The ALM has 8 inputs and 4 optionally registered outputs.The two 5-LUTs should
+    share at least two inputs. Each two ALM outputs are logically equivalent, which means any
+    output signal that can reach ALM.out[0] can reach ALM.out[1] and the same thing for
+    ALM.out[2] and ALM.out[3]. The ALMs in this architecture have an arithmetic mode
+    where each 5-LUT is fractured into two 4-LUTs, resulting in a total of four 4-LUTs and two
+    bits of addition per ALM. This architecture has a single carry chain that spans the 10 ALMs
+    in the LAB.
+
+    The LAB (or Logic Cluster or CLB) has 60 inputs and 40 outputs. Two outputs of each ALM are fed 
+    to the right and left LAB using direct links and are also fed back to the LAB as feedback connections 
+    sharing the 60 input ports with the signals coming from the routing channels.
+
+    The LAB has a 50% sparsely populated input crossbar.
+    
+    ##############################
+    DSP Slice
+    ##############################
+    This architecture has a DSP block that supports the following modes:
+
+    Fixed point modes:
+    _________________
+    1. 27x27 fixed point multiplier (multiply)
+    2. 27x27 fixed point mac (mac_int_27x27)
+    3. Two 18x19 fixed point multipliers (multiply)
+    4. Two 18x19 fixed point macs (mac_int_18x19)
+    5. Four 9x9 fixed point multipliers (multiply)
+    6. Four 9x9 fixed point macs (mac_int_9x9)
+    7. 27x27 plus 64 mode (mult_add_mode_27_27_64/mult_add_int_27x27). 27 * 27 + 64 -> 64. result = ax * ay + bx + chainin. chainout = result 
+    8. 18x19 sum-of-2 mode (sop_2_mode/int_sop_2) result = (bx * by) + (ax * ay) + chainin. chainout = result    
+    9. 18x19 plus 36 mode (mult_add_mode_18_19_36/mult_add_int_18x19). 18 * 19 + 36 -> 64. result = ax * ay + bx + chainin. chainout = result 
+    10. 9x9 sum-of-4 mode (sop_4_mode/int_sop_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result 
+    11. 9x9 sum-of-4 accum mode (sop_4_accum_mode/int_sop_accum_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result 
+
+    Floating point modes:
+    ____________________
+
+    IMPORTANT:
+    The precisions supported are IEEE floating point 32-bit, IEEE floating point 16-bit and
+    Brain floating point (BF16). In the 16-bit mode descriptions, wherever "fp16" is used, it
+    refers to either IEEE floating point 16-bit or BF16. There are mode bits on the DSP slice
+    that can be used to differentiate between them. Doing this saves the effort of explicitly
+    specifying all the 16-bit modes twice in this file. 
+    Since the goal is architectural exploration and not functional simulation, the mode bits 
+    can be specified to any random value while instantiating the DSP slice in a Verilog benchmark.
+
+    1A. One fp32 multiplier (mult_fp_32)
+    1B. One fp32 multiplier, clocked (mult_fp_clk_32)
+    2A. One fp32 adder/subtractor (addition_fp_32)
+    2B. One fp32 adder/subtractor, clocked (addition_fp_clk_32)
+    3. One fp32 mac (mac_fp_32)
+    4A. Two fp16 multipliers (mult_fp_16)
+    4B. Two fp16 multipliers, clocked (mult_fp_clk_16)
+    5A. Two fp16 adders/subtractors (addition_fp_16)
+    5B. Two fp16 adders/subtractors, clocked (addition_fp_clk_16)
+    6. Two fp16 macs (mac_fp_16)
+    7. floating point fp16 sum-of-products mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. chainout = third_inp or result) (fp16_sum_of_products_mode/fp16_mult_add)
+    8. floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result) (fp16_sum_of_products_2_mult_mode/fp16_sop2_mult)
+    9. floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result) (fp16_sum_of_products_2_accum_mode/fp16_sop2_accum)
+    10. floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp) (fp16_mult_fp32_add/fp16_mult_fp32_add)
+    11. floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator) (fp16_mult_fp32_accum/fp16_mult_fp32_accum)
+    12. floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) (fp32_mult_then_add/fp32_mult_then_add)
+    13. floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin) (fp32_mult_add/fp32_mult_add)
+
+    The DSP block was designed in Verilog and COFFE's [2] hybrid flow was used to generate
+    area and delay results. The standard cell library used was Cadence GPDK 45nm (gsclib045_svt_v4.4)
+    and area/delay scaling euqations from [4] were used.
+
+    A 50% sparsely populated input crossbar was added to the DSP block but is commented out.
+    It was leading to a failure in in VPR. See the discussion on this commit: 
+    https://github.com/verilog-to-routing/vtr-verilog-to-routing/commit/ea7acf1582ece35e892c26b756aa302d2e12ddb2
+
+    Once this is fixed, the input crossbar code can be enabled.
+
+    ##############################
+    Memory Blocks
+    ##############################
+    The architecture also has a 20Kb memory blocks (or M20k or BRAM) that has true and simple dual port modes. 
+    In simple dual port mode the memory can be configured in the following modes: 512x40, 1024x20 and 2048x10,
+    while in true dual port mode it can be configured as: 1024x20 and 2048x10.
+
+    The BRAM has registered inputs and outputs. See details on how the delays for this block were 
+    obtained, in the comments before the specification of the BRAM primitive, towards the end of this file.
+
+    The BRAM doesn't have an input crossbar. Adding an input crossbar was leading to a 
+    seg fault in VPR, likely because of https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1475
+
+    Once this is fixed, an input crossbar can be added. The input crossbar delay from COFFE was: 29.47ps
+
+    ##############################
+    Routing/Interconnect
+    ##############################
+    The routing channel width is 300. Note that the channel width isn't specified directly in this arch file. 
+    Switch pattern calculations assume that value. During experiments, channel width can be specified using 
+    the command line switch `route_chan_width`.
+    The architecture uses unidirectional routing with wire segments of length 4 (260 out of 300 wires) and 
+    length 16 (40 out of 300 wires). The length 16 wires do not directly connect to block pins and are only 
+    accessible from the length 4 wires. Switches appear after every 4 blocks on the length 16 wires. 
+    The switch blocks use a custom switching pattern based on the Stratix-IV-like architecture used in the 
+    Titan flow [5]. 
+
+    ##############################
+    I/Os
+    ##############################
+    I/O pads are arranged along the perimeter of the FPGA. No area values provided for the I/Os.
+
+    ##############################
+    Comments on similarities and differences with Intel FPGA architecture.
+    ##############################
+    The main parameters of the logic blocks, DSPs and RAMs are similar to Intel FPGAs. But here are
+    some important points:
+    1. The DSP slice supports lower precision modes - int8 (actually 9x9) and 16-bit floating point
+       (IEEE half-precision and bfloat16). These modes are present in Intel Agilex FPGA DSPs.
+    2. DSPs are chained in vertical direction (chainin-chainout connections for output cascading
+       and scanin-scanout connections for input cascading). This is a common feature
+       in modern FPGAs.
+    3. There are no registers on the interconnect/routing wires in this architecture. That is a main
+       feature in the Stratix10 and Agilex families of Intel FPGA (it's called HyperFlex by Intel).
+    4. The architecture doesn't have sectors. All blocks are laid out in columns on the entire chip.
+       Most modern Intel FPGAs have sector based layout.
+    5. The IOs are on the perimeter, instead of being arranged in columns. Modern FPGAs arrange I/Os in
+       columns.
+    6. The routing architecture is similar to Stratix IV. There are wire segments of L=4 
+       and L=16. And a custom switch pattern (not a standard wilton switch) is used. 
+
+    [1] M. Eldafrawy, A. Boutros, S. Yazdanshenas, and V. Betz, "FPGA Logic Block Architectures for
+        Efficient Deep Learning Inference" in ACM TRETS, 2020
+    [2] S. Yazdanshenas, and V. Betz, "COFFE 2: Automatic Modelling and Optimization of
+        Complex and Heterogeneous FPGA Architectures" in ACM TRETS, 2019. 
+    [3] PTM High Performance 22nm Metal Gate / High-K / Strained-Si 22NM_BULK_HP, from http://ptm.asu.edu/
+        See: https://github.com/vaughnbetz/COFFE/blob/master/spice_models/ptm_22nm_bulk_hp.l
+    [4] A. Stillmaker and B. Baas, "Scaling equations for the accurate prediction of CMOS device 
+        performance from 180 nm to 7 nm" in Integration, the VLSI Journal (2017)
+    [5] K. E. Murray et al., “Timing-Driven Titan: Enabling Large Benchmarks and Exploring the Gap between 
+        Academic and Commercial CAD,” TRETS 2015.
+    [6] A. Arora et al., "Tensor Slices to the Rescue: Supercharging ML Acceleration on FPGAs", ISFPGA 2020.
+-->
+
+<architecture>
+  <!-- 
+         ODIN II specific config begins 
+         Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+         ".model [type_of_block]") that this architecture supports.
+
+         Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+         already special structures in blif (.names, .input, .output, and .latch) 
+         that describe them.
+    -->
+  <models>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk" combinational_sink_ports="out"/>
+        <!-- control -->
+        <port name="addr" clock="clk" combinational_sink_ports="out"/>
+        <!-- address lines -->
+        <port name="data" clock="clk" combinational_sink_ports="out"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Fixed point multiplication.
+         ODIN infers these when * sign appears in RTL. -->
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Floating point multiplication. -->
+    <model name="mult_fp_16">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>   
+    <model name="mult_fp_32">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>   
+    <model name="mult_fp_clk_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mult_fp_clk_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!-- Only used inside CLBs for 1-bit adder.
+         ODIN infers these when + sign appears in RTL.
+         Can't use this inside DSP slice
+         because ODIN gets confused and starts to connect multi
+         bit adders and single bit adders in different PBs -->
+    <model name="adder">
+      <input_ports>
+        <port name="a" combinational_sink_ports="cout sumout"/>
+        <port name="b" combinational_sink_ports="cout sumout"/>
+        <port name="cin" combinational_sink_ports="cout sumout"/>
+      </input_ports>
+      <output_ports>
+        <port name="cout"/>
+        <port name="sumout"/>
+      </output_ports>
+    </model>
+    <!-- Multi bit floating point adder inside DSP slices -->
+    <model name="addition_fp_16">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_32">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_clk_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="addition_fp_clk_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_2">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="mult_add_int_27x27">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <model name="mult_add_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_accum_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!-- Floating point MAC inside DSP slices -->
+    <model name="mac_fp_16">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_fp_32">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!-- Fixed point MAC inside DSP slices -->
+    <model name="mac_int_27x27">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <model name="mac_int_9x9">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="out"/>
+        <port name="a" clock="clk" combinational_sink_ports="out"/>
+        <port name="b" clock="clk" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp16_sop2_mult">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp16_sop2_accum">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_fp32_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>    
+    <!--A mode in DSP slice-->
+    <model name="fp16_mult_fp32_accum">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="fp32_mult_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+    <!--A mode in DSP slice-->
+    <model name="fp32_mult_then_add">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model> 
+  </models>
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" height="1" width="1" area="27905">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I1" num_pins="15" equivalent="full"/>
+        <input name="I2" num_pins="15" equivalent="full"/>
+        <input name="I3" num_pins="15" equivalent="full"/>
+        <input name="I4" num_pins="15" equivalent="full"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="40" equivalent="none"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
+          <!-- clock pins do not connect to local routing -->
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="dsp_top" height="4" width="1" area="253779">
+      <sub_tile name="dsp_top">
+        <equivalent_sites>
+          <site pb_type="dsp_top" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="reset" num_pins="1" is_non_clock_global="true"/>
+        <input name="dsp_I1" num_pins="64" />
+        <input name="dsp_I2" num_pins="64" />
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <!-- clock pins and chain ports do not connect to local routing -->
+          <fc_override port_name="clk" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainout" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="custom">
+        	  <loc side="left" yoffset="0">dsp_top.dsp_I1[31:0]</loc>
+	          <loc side="right" yoffset="1">dsp_top.dsp_I1[63:32]</loc>
+	          <loc side="left" yoffset="2">dsp_top.dsp_I2[31:0]</loc>
+	          <loc side="right" yoffset="3">dsp_top.dsp_I2[63:32]</loc>
+	          <loc side="top">dsp_top.chainin dsp_top.scanin</loc>
+	          <loc side="bottom">dsp_top.chainout dsp_top.scanout</loc>
+	          <loc side="right" yoffset="0">dsp_top.result[17:0] dsp_top.clk</loc>
+	          <loc side="left" yoffset="1">dsp_top.result[36:18]</loc>
+	          <loc side="right" yoffset="2">dsp_top.result[55:37] </loc>
+	          <loc side="left" yoffset="3">dsp_top.result[73:56] dsp_top.reset</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="2" width="1" area="137668">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>  
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <layout>
+    <!-- Physical descriptions begin -->
+    <auto_layout aspect_ratio="1.0">
+      <perimeter type="io" priority="101"/>
+      <corners type="EMPTY" priority="102"/>
+      <fill type="clb" priority="10"/>
+      <col type="dsp_top" startx="6" starty="1" repeatx="16" priority="20"/>
+      <col type="memory" startx="2" starty="1" repeatx="16" priority="20"/>
+    </auto_layout>
+    <!--
+    <fixed_layout name="mylayout" width="178" height="82">
+      <perimeter type="io" priority="101"/>
+      <corners type="EMPTY" priority="102"/>
+      
+      <col type="dsp_top"  startx="1"  starty="1"  priority="100"/>
+      <col type="clb"  startx="2"  starty="1"  priority="100"/>
+      <col type="clb"  startx="3"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="4"  starty="1"  priority="100"/>
+      <col type="clb"  startx="5"  starty="1"  priority="100"/>
+      <col type="clb"  startx="6"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="7"  starty="1"  priority="100"/>
+      <col type="clb"  startx="8"  starty="1"  priority="100"/>
+      <col type="clb"  startx="9"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="10"  starty="1"  priority="100"/>
+      <col type="clb"  startx="11"  starty="1"  priority="100"/>
+      <col type="clb"  startx="12"  starty="1"  priority="100"/>
+      <col type="dsp_top"  startx="13"  starty="1"  priority="100"/>
+
+      <region type="clb" startx="14"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="15"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="16"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="dsp_top" startx="17"   endx="88"   starty="1" incrx="5"  priority="20"/>
+      <region type="memory" startx="18"   endx="88"   starty="1" incrx="5"  priority="20"/>
+
+      <region type="memory" startx="89"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="dsp_top" startx="90"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="91"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="92"   endx="163"   starty="1" incrx="5"  priority="20"/>
+      <region type="clb" startx="93"   endx="163"   starty="1" incrx="5"  priority="20"/>
+
+      <col type="dsp_top"  startx="164"  starty="1"  priority="20"/>
+      <col type="clb"  startx="165"  starty="1"  priority="1"/>
+      <col type="clb"  startx="166"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="167"  starty="1"  priority="20"/>
+      <col type="clb"  startx="168"  starty="1"  priority="1"/>
+      <col type="clb"  startx="169"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="170"  starty="1"  priority="20"/>
+      <col type="clb"  startx="171"  starty="1"  priority="1"/>
+      <col type="clb"  startx="172"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="173"  starty="1"  priority="20"/>
+      <col type="clb"  startx="174"  starty="1"  priority="1"/>
+      <col type="clb"  startx="175"  starty="1"  priority="1"/>
+      <col type="dsp_top"  startx="176"  starty="1"  priority="20"/>
+    </fixed_layout> 
+    -->
+  </layout>
+  <device>
+    <sizing R_minW_nmos="13090" R_minW_pmos="19086.83"/>
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="custom"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <switch type="mux" name="L4_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="207.9e-12" mux_trans_size="2.377" buf_size="35.69"/>
+    <!-- Delay of L16 driver is scaled from L4 by a factor of 1.5x (based on numbers from the Titan Stratix IV architecture file)
+	 Area numbers will not be totally accurate because of the same buf_size -->
+    <switch type="mux" name="L16_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="312.9e-12" mux_trans_size="2.377" buf_size="35.69"/> 
+    <switch type="mux" name="ipin_cblock" R="0.0" Cout="0.0" Cin="0.0" Tdel="130e-12" mux_trans_size="1.508" buf_size="11.71"/>
+  </switchlist>
+  <segmentlist>
+    <segment name="L4" freq="260" length="4" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L4_driver"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+    <segment name="L16" freq="40" length="16" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L16_driver"/>
+      <!-- Vias from the top of the metal stack (global layers, where the long wires are 
+           implemented) down to the middle/bottom of the metal stack (semi-global layers, 
+           where the short wires are implemented) are expensive and restrictive.
+           As a result Startix IV only places long wire switch blocks every 4 LABs -->
+      <sb type="pattern">1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1</sb>
+      <!-- For the same reasons, long wires do not connect to block pins in Stratix IV -->
+      <cb type="pattern">0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <!-- Direct connect from one LAB to the LAB directly below it (carry chain) -->
+    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
+    <!-- Direct connect from one DSP to the DSP directly below it -->
+    <direct name="dsp_out_chain" from_pin="dsp_top.chainout" from_side="bottom" to_pin="dsp_top.chainin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+    <direct name="dsp_in_chain" from_pin="dsp_top.scanout" from_side="bottom" to_pin="dsp_top.scanin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+  </directlist>
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+	     Delays below come from Ian Kuon. They are small, so they should be interpreted as
+	     the delays to and from registers in the I/O (and generally I/Os are registered 
+	     today and that is when you timing analyze them.
+	     -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <pb_type name="clb">
+      <input name="I1" num_pins="15" equivalent="full"/>
+      <input name="I2" num_pins="15" equivalent="full"/>
+      <input name="I3" num_pins="15" equivalent="full"/>
+      <input name="I4" num_pins="15" equivalent="full"/>
+      <input name="cin" num_pins="1"/>
+      <output name="O" num_pins="40" equivalent="none"/>
+      <output name="cout" num_pins="1"/>
+      <clock name="clk" num_pins="1"/>
+      <pb_type name="lab" num_pb="1">
+        <input name="I1" num_pins="15"/>
+        <input name="I2" num_pins="15"/>
+        <input name="I3" num_pins="15"/>
+        <input name="I4" num_pins="15"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="40"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <!-- Describe fracturable logic element.  
+                 Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+                 The outputs of the fracturable logic element can be optionally registered
+            -->
+        <pb_type name="fle" num_pb="10">
+          <input name="in" num_pins="8"/>
+          <input name="cin" num_pins="1"/>
+          <output name="out" num_pins="4"/>
+          <output name="cout" num_pins="1"/>
+          <clock name="clk" num_pins="1"/>
+          <!-- 
+                    The ALM inputs are as follows:
+                            A -> fle[0]
+                            B -> fle[1]
+                            C -> fle[2]
+                            D -> fle[3]
+                            E -> fle[4]
+                            F -> fle[5]
+                            G -> fle[6]
+                            H -> fle[7]
+              -->
+          <mode name="n2_lut5">
+            <pb_type name="ble5" num_pb="2">
+              <input name="in" num_pins="5"/>
+              <input name="cin" num_pins="1"/>
+              <output name="out" num_pins="2"/>
+              <output name="cout" num_pins="1"/>
+              <clock name="clk" num_pins="1"/>
+              <mode name="blut5">
+                <pb_type name="flut5" num_pb="1">
+                  <input name="in" num_pins="5"/>
+                  <output name="out" num_pins="2"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Regular LUT mode -->
+                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                    <input name="in" num_pins="5" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                             we instead take the average of these numbers to get more stable results
+                             note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
+                             subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
+                             210.96e-12
+                             206.85e-12
+                             143.46e-12
+                             136.94e-12
+                             68.12e-12
+                          -->
+                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                            153.27e-12
+                        </delay_matrix>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="lut5_in" input="flut5.in" output="lut5.in"/>
+                    <direct name="reg_in" input="flut5.in[0]" output="ff[0].D"/>
+                    <direct name="lut5_ff" input="lut5.out" output="ff[1].D">
+                      <delay_constant max="18.96e-12" in_port="lut5.out" out_port="ff[1].D"/>
+                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff[1].D"/>
+                    </direct>
+                    <complete name="clock" input="flut5.clk" output="ff.clk"/>
+                    <complete name="out_mux" input="ff.Q lut5.out" output="flut5.out">
+                      <delay_constant max="39.85e-12" in_port="lut5.out" out_port="flut5.out"/>
+                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="flut5.out"/>
+                    </complete>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
+                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
+                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+              <mode name="arithmetic">
+                <pb_type name="arithmetic" num_pb="1">
+                  <input name="in" num_pins="4"/>
+                  <input name="cin" num_pins="1"/>
+                  <output name="out" num_pins="2"/>
+                  <output name="cout" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Special dual-LUT mode that drives adder only -->
+                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
+                    <input name="in" num_pins="4" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                           note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
+                           subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
+                             168.12e-12
+                             164.02e-12
+                             100.63e-12
+                             94.11e-12
+                          -->
+                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
+                            131.72e-12
+                            131.72e-12
+                            131.72e-12
+                            131.72e-12
+                        </delay_matrix>
+                  </pb_type>
+                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
+                    <input name="a" num_pins="1"/>
+                    <input name="b" num_pins="1"/>
+                    <input name="cin" num_pins="1"/>
+                    <output name="cout" num_pins="1"/>
+                    <output name="sumout" num_pins="1"/>
+                    <delay_constant max="68.74e-12" in_port="adder.a" out_port="adder.sumout"/>
+                    <delay_constant max="68.74e-12" in_port="adder.b" out_port="adder.sumout"/>
+                    <delay_constant max="35.46e-12" in_port="adder.cin" out_port="adder.sumout"/>
+                    <delay_constant max="49.32e-12" in_port="adder.a" out_port="adder.cout"/>
+                    <delay_constant max="49.32e-12" in_port="adder.b" out_port="adder.cout"/>
+                    <delay_constant max="25.56e-12" in_port="adder.cin" out_port="adder.cout"/>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
+                    <direct name="lut4_in1" input="arithmetic.in" output="lut4[0].in"/>
+                    <direct name="lut4_in2" input="arithmetic.in" output="lut4[1].in"/>
+                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a"/>
+                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b"/>
+                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
+                      <delay_constant max="18.96e-12" in_port="adder.sumout" out_port="ff.D"/>
+                      <!--pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/-->
+                    </direct>
+                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
+                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
+                    </direct>
+                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
+                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
+                    </direct>
+                    <complete name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
+                      <delay_constant max="39.85e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
+                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="arithmetic.out"/>
+                    </complete>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
+                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
+                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
+                  </direct>
+                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
+                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
+                  </direct>
+                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
+                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <interconnect>
+              <!-- Shared inputs between the two 5-LUTs -->
+              <complete name="lut5_reg1" input="fle.in[0]" output="ble5[0].in[0] ble5[1].in[1]"/>
+              <complete name="lut5_reg2" input="fle.in[1]" output="ble5[0].in[1] ble5[1].in[0]"/>
+              <!-- Rest of the 5-LUT inputs -->
+              <direct name="lut5_inputs_1" input="fle.in[4:2]" output="ble5[0].in[4:2]"/>
+              <direct name="lut5_inputs_22" input="fle.in[7:5]" output="ble5[1].in[4:2]"/>
+              <direct name="lut5_outputs_1" input="ble5[0].out" output="fle.out[1:0]"/>
+              <direct name="lut5_outputs_2" input="ble5[1].out" output="fle.out[3:2]"/>
+              <direct name="carry_in" input="fle.cin" output="ble5[0].cin">
+                <pack_pattern name="chain" in_port="fle.cin" out_port="ble5[0].cin"/>
+              </direct>
+              <direct name="carry_out" input="ble5[1].cout" output="fle.cout">
+                <pack_pattern name="chain" in_port="ble5[1].cout" out_port="fle.cout"/>
+              </direct>
+              <direct name="carry_link" input="ble5[0].cout" output="ble5[1].cin">
+                <pack_pattern name="chain" in_port="ble5[0].cout" out_port="ble5[1].cout"/>
+              </direct>
+              <complete name="clock" input="fle.clk" output="ble5[1:0].clk"/>
+            </interconnect>
+          </mode>
+          <!-- n2_lut5 -->
+          <mode name="n1_lut6">
+            <pb_type name="ble6" num_pb="1">
+              <input name="in" num_pins="6"/>
+              <output name="out" num_pins="4"/>
+              <clock name="clk" num_pins="1"/>
+              <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+                <input name="in" num_pins="6" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <!-- LUT timing using delay matrix -->
+                <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                           257.8e-12
+                           253.69e-12
+                           190.3e-12
+                           183.78e-12
+                           114.96e-12
+                           77.18e-12
+                      -->
+                <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                        179.6e-12
+                    </delay_matrix>
+              </pb_type>
+              <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
+                <input name="D" num_pins="1" port_class="D"/>
+                <output name="Q" num_pins="1" port_class="Q"/>
+                <clock name="clk" num_pins="1" port_class="clock"/>
+                <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
+                <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
+              </pb_type>
+              <interconnect>
+                <direct name="lut6_inputs" input="ble6.in" output="lut6.in"/>
+                <direct name="lut6_ff" input="lut6.out" output="ff[1].D">
+                  <delay_constant max="18.96e-12" in_port="lut6.out" out_port="ff[1].D"/>
+                  <pack_pattern name="ble6" in_port="lut6.out" out_port="ff[1].D"/>
+                </direct>
+                <complete name="clock" input="ble6.clk" output="ff.clk"/>
+                <direct name="input_to_ff" input="ble6.in[0]" output="ff[0].D"/>
+                <mux name="mux1" input="ff[0].Q lut6.out" output="ble6.out[0]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[0]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[0]"/>
+                </mux>
+                <!-- This mux is the same as mux1 but connected to output 2 -->
+                <mux name="mux2" input="ff[0].Q lut6.out" output="ble6.out[1]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[1]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[1]"/>
+                </mux>
+                <mux name="mux3" input="ff[1].Q lut6.out" output="ble6.out[2]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[2]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[2]"/>
+                </mux>
+                <!-- This mux is the same as mux2 but connected to output 3 -->
+                <mux name="mux4" input="ff[1].Q lut6.out" output="ble6.out[3]">
+                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[3]"/>
+                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[3]"/>
+                </mux>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <!-- ble6 takes inputs A, B, C, D, E, & F; where F is fle[7] -->
+              <direct name="lut6_inputs1" input="fle.in[4:0]" output="ble6.in[4:0]"/>
+              <direct name="lut6_inputs2" input="fle.in[7]" output="ble6.in[5]"/>
+              <direct name="direct2" input="ble6.out" output="fle.out"/>
+              <direct name="direct4" input="fle.clk" output="ble6.clk"/>
+            </interconnect>
+          </mode>
+          <!-- n1_lut6 -->
+        </pb_type>
+        <interconnect>
+          <!-- 50% sparsely populated local routing -->
+          <!-- This 50% sparsity pattern divides the cluster inputs and local feedbacks into four groups, 
+               and then selects two of the four groups to feed each LUT input. This means half of the cluster 
+               inputs and local feedbacks can feed each LUT input. There is partial overlap in the inputs that 
+               feed the various LUT inputs, which helps routability vs. simply having half the cluster inputs 
+               feed one set of half the LUT inputs and the other half of cluster inputs feed the other set of 
+               LUT inputs. This pattern is used by Stratix (I - 10) architectures. -->
+          <complete name="lutA" input="lab.I4 lab.I3" output="fle[9:0].in[0:0]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[0:0]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[0:0]"/>
+          </complete>
+          <complete name="lutB" input="lab.I3 lab.I2" output="fle[9:0].in[1:1]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[1:1]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[1:1]"/>
+          </complete>
+          <complete name="lutC" input="lab.I2 lab.I1" output="fle[9:0].in[2:2]">
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[2:2]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[2:2]"/>
+          </complete>
+          <complete name="lutD" input="lab.I4 lab.I2" output="fle[9:0].in[3:3]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[3:3]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[3:3]"/>
+          </complete>
+          <complete name="lutE" input="lab.I3 lab.I1" output="fle[9:0].in[4:4]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[4:4]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[4:4]"/>
+          </complete>
+          <complete name="lutF" input="lab.I4 lab.I1" output="fle[9:0].in[5:5]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[5:5]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[5:5]"/>
+          </complete>
+          <complete name="lutG" input="lab.I4 lab.I3" output="fle[9:0].in[6:6]">
+            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[6:6]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[6:6]"/>
+          </complete>
+          <complete name="lutH" input="lab.I3 lab.I2" output="fle[9:0].in[7:7]">
+            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[7:7]"/>
+            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[7:7]"/>
+          </complete>
+          <complete name="clks" input="lab.clk" output="fle[9:0].clk"/>
+          <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
+                     By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+                     then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+                     naive specification).
+              -->
+          <direct name="labouts1" input="fle[9:0].out[0]" output="lab.O[9:0]"/>
+          <direct name="labouts2" input="fle[9:0].out[1]" output="lab.O[19:10]"/>
+          <direct name="labouts3" input="fle[9:0].out[2]" output="lab.O[29:20]"/>
+          <direct name="labouts4" input="fle[9:0].out[3]" output="lab.O[39:30]"/>
+          <!-- Carry chain links -->
+          <direct name="carry_in" input="lab.cin" output="fle[0:0].cin">
+            <!-- Put all inter-block carry chain delay on this one edge -->
+            <delay_constant max="18.47e-12" in_port="lab.cin" out_port="fle[0:0].cin"/>
+            <pack_pattern name="chain" in_port="lab.cin" out_port="fle[0:0].cin"/>
+          </direct>
+          <direct name="carry_out" input="fle[9:9].cout" output="lab.cout">
+            <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="lab.cout"/>
+          </direct>
+          <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
+            <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
+          </direct>
+        </interconnect>
+      </pb_type>
+      <interconnect>
+        <direct name="carry_in" input="clb.cin" output="lab.cin"/>
+        <direct name="carry_out" input="lab.cout" output="clb.cout"/>
+        <direct name="clock" input="clb.clk" output="lab.clk"/>
+        <complete name="Input_feedback_I1" input="clb.I1 lab.O[4:0]" output="lab.I1"/>
+        <complete name="Input_feedback_I2" input="clb.I2 lab.O[24:20]" output="lab.I2"/>
+        <complete name="Input_feedback_I3" input="clb.I3 lab.O[9:5]" output="lab.I3"/>
+        <complete name="Input_feedback_I4" input="clb.I4 lab.O[29:25]" output="lab.I4"/>
+        <!--
+        <direct name="Input_I1" input="clb.I1" output="lab.I1"/>
+        <direct name="Input_I2" input="clb.I2" output="lab.I2"/>
+        <direct name="Input_I3" input="clb.I3" output="lab.I3"/>
+        <direct name="Input_I4" input="clb.I4" output="lab.I4"/>
+        -->
+        <direct name="output" input="lab.O" output="clb.O"/>
+      </interconnect>
+    </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+
+    <!-- Define DSP slice begin -->
+    <pb_type name="dsp_top">
+      <input name="reset" num_pins="1" is_non_clock_global="true"/>
+      <input name="dsp_I1" num_pins="64" />
+      <input name="dsp_I2" num_pins="64" />
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+    <pb_type name="dsp" num_pb="1">
+      <input name="reset" num_pins="1"/>
+      <input name="dsp_I1" num_pins="64"/>
+      <input name="dsp_I2" num_pins="64"/>
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+      <pb_type name="dsp_pb" num_pb="1">
+        <input name="reset" num_pins="1"/>
+        <input name="mode_sigs" num_pins="12"/>
+        <input name="datain" num_pins="116"/>
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+
+        <!-- fixed-point multiplier mode (1 27x27 multiplier) result = ax*ay -->
+        <mode name="one_mult_27x27">
+          <pb_type name="one_mult_27x27" num_pb="1">
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <pb_type name="mult_27x27" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="27"/>
+              <input name="b" num_pins="27"/>
+              <output name="out" num_pins="54"/>
+              <delay_constant max="2.14e-9" in_port="mult_27x27.a" out_port="mult_27x27.out"/>
+              <delay_constant max="2.14e-9" in_port="mult_27x27.b" out_port="mult_27x27.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="one_mult_27x27.a" output="mult_27x27.a">
+              </direct>
+              <direct name="b2b" input="one_mult_27x27.b" output="mult_27x27.b">
+              </direct>
+              <direct name="out2out" input="mult_27x27.out" output="one_mult_27x27.out">
+              </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a" input="dsp_pb.datain[26:0]" output="one_mult_27x27.a">
+            </direct>
+            <direct name="datain2b" input="dsp_pb.datain[53:27]" output="one_mult_27x27.b">
+            </direct>
+            <direct name="out2dataout" input="one_mult_27x27.out" output="dsp_pb.result[53:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier mode (2 18x19 multipliers) result[some:bits] = ax*ay, result[other:bits] = bx*by -->
+        <mode name="two_mult_18x19">
+          <pb_type name="two_mult_18x19" num_pb="2">
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <pb_type name="mult_18x19" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="18"/>
+              <input name="b" num_pins="19"/>
+              <output name="out" num_pins="37"/>
+              <delay_constant max="2.14e-9" in_port="mult_18x19.a" out_port="mult_18x19.out"/>
+              <delay_constant max="2.14e-9" in_port="mult_18x19.b" out_port="mult_18x19.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="two_mult_18x19.a" output="mult_18x19.a">
+                 </direct>
+              <direct name="b2b" input="two_mult_18x19.b" output="mult_18x19.b">
+                 </direct>
+              <direct name="out2out" input="mult_18x19.out" output="two_mult_18x19.out">
+                 </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a1" input="dsp_pb.datain[17:0]" output="two_mult_18x19[0].a">
+            </direct>
+            <direct name="datain2b1" input="dsp_pb.datain[36:18]" output="two_mult_18x19[0].b">
+            </direct>
+            <direct name="datain2a2" input="dsp_pb.datain[54:37]" output="two_mult_18x19[1].a">
+            </direct>
+            <direct name="datain2b2" input="dsp_pb.datain[73:55]" output="two_mult_18x19[1].b">
+            </direct>
+            <direct name="out2result" input="two_mult_18x19.out" output="dsp_pb.result[73:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point multiplier mode (4 9x9 multipliers) result = ax*ay -->
+        <mode name="mult_9x9_fixed_pt_mode">
+          <pb_type name="mult_9x9_fixed_pt" blif_model=".subckt multiply" num_pb="4">
+            <input name="a" num_pins="9"/>
+            <input name="b" num_pins="9"/>
+            <output name="out" num_pins="18"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.a" out_port="mult_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.b" out_port="mult_9x9_fixed_pt.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mult_9x9_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mult_9x9_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mult_9x9_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mult_9x9_fixed_pt[1].b"/>
+            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mult_9x9_fixed_pt[2].a"/>
+            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mult_9x9_fixed_pt[2].b"/>
+            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mult_9x9_fixed_pt[3].a"/>
+            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mult_9x9_fixed_pt[3].b"/>
+            <direct name="sumouttosumout0" input="mult_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
+            <direct name="sumouttosumout1" input="mult_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
+            <direct name="sumouttosumout2" input="mult_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
+            <direct name="sumouttosumout3" input="mult_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_2_mode">
+          <pb_type name="sop_2" num_pb="1" blif_model=".subckt int_sop_2">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="18"/>
+            <input name="by" num_pins="19"/>
+            <input name="chainin" num_pins="37"/>
+            <output name="result" num_pins="37"/>
+            <output name="chainout" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_2.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_2.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_2.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_2.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="sop_2.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="sop_2.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="sop_2.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[54:37]" output="sop_2.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[73:55]" output="sop_2.by">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[36:0]" output="sop_2.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_2.result" output="dsp_pb.result[36:0]">
+            </direct>
+            <direct name="chainout" input="sop_2.chainout" output="dsp_pb.chainout[36:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_18_19_36">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_18x19">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="19"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="19"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[72:37]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[18:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[18:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_27_27_64">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_27x27">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="27"/>
+            <input name="ay" num_pins="27"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="27"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="27"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[26:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[53:27]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[89:54]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[26:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[26:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_4_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_4">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 accum mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result -->
+        <mode name="sop_4_accum_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_accum_4">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+
+            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+
+            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_27x27_fixed_pt_mode">
+          <pb_type name="mac_27x27_fixed_pt" blif_model=".subckt mac_int_27x27" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.a" out_port="mac_27x27_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.b" out_port="mac_27x27_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.reset" out_port="mac_27x27_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="mac_27x27_fixed_pt.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="mac_27x27_fixed_pt.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[26:0]" output="mac_27x27_fixed_pt.a"/>
+            <direct name="btob" input="dsp_pb.datain[53:27]" output="mac_27x27_fixed_pt.b"/>
+            <direct name="sumouttosumout" input="mac_27x27_fixed_pt.out" output="dsp_pb.result[53:0]"/>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_18x19_fixed_pt_mode">
+          <pb_type name="mac_fixed_pt" blif_model=".subckt mac_int_18x19" num_pb="2">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.a" out_port="mac_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.b" out_port="mac_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.reset" out_port="mac_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_fixed_pt[0].reset"/>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_fixed_pt[1].reset"/>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_fixed_pt[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_fixed_pt[1].clk"/>
+            <direct name="atoa0" input="dsp_pb.datain[17:0]" output="mac_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[36:18]" output="mac_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[54:37]" output="mac_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[73:55]" output="mac_fixed_pt[1].b"/>
+            <direct name="sumouttosumout0" input="mac_fixed_pt[0].out" output="dsp_pb.result[36:0]"/>
+            <direct name="sumouttosumout1" input="mac_fixed_pt[1].out" output="dsp_pb.result[73:37]"/>
+          </interconnect>
+        </mode>
+
+        <!-- Fixed point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_9x9_fixed_pt_mode">
+          <pb_type name="mac_9x9_fixed_pt" blif_model=".subckt mac_int_9x9" num_pb="4">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="9"/>
+            <input name="b" num_pins="9"/>
+            <output name="out" num_pins="18"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.a" out_port="mac_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.b" out_port="mac_9x9_fixed_pt.out"/>
+            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.reset" out_port="mac_9x9_fixed_pt.out"/>
+
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_9x9_fixed_pt[0].reset"/>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_9x9_fixed_pt[1].reset"/>
+            <direct name="reset2" input="dsp_pb.reset" output="mac_9x9_fixed_pt[2].reset"/>
+            <direct name="reset3" input="dsp_pb.reset" output="mac_9x9_fixed_pt[3].reset"/>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_9x9_fixed_pt[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_9x9_fixed_pt[1].clk"/>
+            <direct name="clk2" input="dsp_pb.clk" output="mac_9x9_fixed_pt[2].clk"/>
+            <direct name="clk3" input="dsp_pb.clk" output="mac_9x9_fixed_pt[3].clk"/>
+            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mac_9x9_fixed_pt[0].a"/>
+            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mac_9x9_fixed_pt[0].b"/>
+            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mac_9x9_fixed_pt[1].a"/>
+            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mac_9x9_fixed_pt[1].b"/>
+            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mac_9x9_fixed_pt[2].a"/>
+            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mac_9x9_fixed_pt[2].b"/>
+            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mac_9x9_fixed_pt[3].a"/>
+            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mac_9x9_fixed_pt[3].b"/>
+            <direct name="sumouttosumout0" input="mac_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
+            <direct name="sumouttosumout1" input="mac_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
+            <direct name="sumouttosumout2" input="mac_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
+            <direct name="sumouttosumout3" input="mac_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
+          </interconnect>
+        </mode>
+
+        <!-- floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp32_mode">
+          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
+            </direct>
+            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
+            </direct>
+            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp16_mode">
+          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
+            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point adder mode (result = a + b)-->
+        <mode name="adder_fp32_mode"> 
+          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point adder mode (result = a + b)-->
+        <mode name="adder_fp16_mode">
+          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
+            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp32_clocked_mode">
+          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_clk_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="mult_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_fp32.clk"/>
+            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
+            </direct>
+            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
+            </direct>
+            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point multiplier mode (result = a * b)-->
+        <mode name="mult_fp16_clocked_mode">
+          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_clk_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="mult_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mult_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk0" input="dsp_pb.clk" output="mult_fp16[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="mult_fp16[1].clk"/>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
+            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point adder mode (result = a + b)-->
+        <mode name="adder_fp32_clocked_mode"> 
+          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_clk_32" num_pb="1">
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="adder_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="adder_fp32.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- clocked floating point adder mode (result = a + b)-->
+        <mode name="adder_fp16_clocked_mode">
+          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_clk_16" num_pb="2">
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="adder_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="adder_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk0" input="dsp_pb.clk" output="adder_fp16[0].clk"/>
+            <direct name="clk1" input="dsp_pb.clk" output="adder_fp16[1].clk"/>
+            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
+            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
+            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
+            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
+            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point mac mode (result = a*b + accumulated value-->
+        <mode name="mac_fp32_mode">
+          <pb_type name="mac_fp32" blif_model=".subckt mac_fp_32" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <output name="out" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mac_fp32.a" out_port="mac_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp32.b" out_port="mac_fp32.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp32.reset" out_port="mac_fp32.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fp32.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp32.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="mac_fp32.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="mac_fp32.clk"/>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="mac_fp32.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="mac_fp32.b">
+            </direct>
+            <direct name="sumouttosumout" input="mac_fp32.out" output="dsp_pb.result[31:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point 16-bit mac mode (result = a*b + accumulated value-->
+        <mode name="mac_fp16_mode">
+          <pb_type name="mac_fp16" blif_model=".subckt mac_fp_16" num_pb="2">
+            <input name="reset" num_pins="1"/>
+            <input name="a" num_pins="16"/>
+            <input name="b" num_pins="16"/>
+            <output name="out" num_pins="16"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="mac_fp16.a" out_port="mac_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp16.b" out_port="mac_fp16.out"/>
+            <delay_constant max="2.56e-9" in_port="mac_fp16.reset" out_port="mac_fp16.out"/>
+
+            <T_setup value="18.91e-12" port="mac_fp16.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="mac_fp16.out" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.out" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset0" input="dsp_pb.reset" output="mac_fp16[0].reset"></direct>
+            <direct name="reset1" input="dsp_pb.reset" output="mac_fp16[1].reset"></direct>
+            <direct name="clk0" input="dsp_pb.clk" output="mac_fp16[0].clk"></direct>
+            <direct name="clk1" input="dsp_pb.clk" output="mac_fp16[1].clk"></direct>
+            <direct name="atoa0" input="dsp_pb.datain[15:0]"  output="mac_fp16[0].a"></direct>
+            <direct name="atoa1" input="dsp_pb.datain[31:16]" output="mac_fp16[1].a"></direct>
+            <direct name="btob0" input="dsp_pb.datain[47:32]" output="mac_fp16[0].b"></direct>
+            <direct name="btob1" input="dsp_pb.datain[63:48]" output="mac_fp16[1].b"></direct>
+            <direct name="sumouttosumout0" input="mac_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
+            <direct name="sumouttosumout1" input="mac_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
+          </interconnect>
+        </mode>
+
+        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b). chainout = third_inp or result-->
+        <mode name="fp16_sum_of_products_mode"> 
+          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_mult_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>  
+
+        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result)-->
+        <mode name="fp16_sum_of_products_2_mult_mode"> 
+          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_sop2_mult" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_sum_of_2_mult.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>        
+
+        <!-- floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result)-->
+        <mode name="fp16_sum_of_products_2_accum_mode"> 
+          <pb_type name="fp16_sum_of_2_accum" blif_model=".subckt fp16_sop2_accum" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_accum.mode_sigs"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_accum.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_accum.clk"/>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_accum.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_accum.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_accum.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_accum.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainout" input="fp16_sum_of_2_accum.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>        
+
+        <!-- floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp)-->
+        <mode name="fp16_mult_fp32_add"> 
+          <pb_type name="fp16_mult_fp32_add" blif_model=".subckt fp16_mult_fp32_add" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_add.clk"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_add.reset"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_add.mode_sigs">
+            </direct>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_add.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_add.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_mult_fp32_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_add.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_add.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_mult_fp32_add.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_mult_fp32_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp16_mult_fp32_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_mult_fp32_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator)-->
+        <mode name="fp16_mult_fp32_accum"> 
+          <pb_type name="fp16_mult_fp32_accum" blif_model=".subckt fp16_mult_fp32_accum" num_pb="1">
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="reset" num_pins="1"/>
+            <input name="top_a" num_pins="16"/>
+            <input name="top_b" num_pins="16"/>
+            <input name="bot_a" num_pins="16"/>
+            <input name="bot_b" num_pins="16"/>
+            <input name="fp32_in" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.result"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_accum.mode_sigs"/>
+            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_accum.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_accum.clk"/>
+            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_accum.top_a">
+            </direct>
+            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_accum.top_b">
+            </direct>
+            <direct name="result_top" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_accum.bot_a">
+            </direct>
+            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_accum.bot_b">
+            </direct>
+            <direct name="result_bot" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[63:32]">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_mult_fp32_accum.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp16_mult_fp32_accum.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) -->
+        <mode name="fp32_mult_then_add"> 
+          <pb_type name="fp32_mult_then_add" blif_model=".subckt fp32_mult_then_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_then_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_then_add.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_then_add.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_then_add.mode_sigs">
+            </direct>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_then_add.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_then_add.b">
+            </direct>
+            <direct name="result" input="fp32_mult_then_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_then_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_then_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp32_mult_then_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+
+        <!-- floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin)-->
+        <mode name="fp32_mult_add"> 
+          <pb_type name="fp32_mult_add" blif_model=".subckt fp32_mult_add" num_pb="1">
+            <input name="reset" num_pins="1"/>
+            <input name="mode_sigs" num_pins="12"/>
+            <input name="a" num_pins="32"/>
+            <input name="b" num_pins="32"/>
+            <input name="fp32_in" num_pins="32"/>
+            <input name="chainin" num_pins="32"/>
+            <output name="result" num_pins="32"/>
+            <output name="chainout" num_pins="32"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.result"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.result"/>
+
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.chainout"/>
+            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.chainout"/>
+
+            <T_setup value="18.91e-12" port="fp32_mult_add.reset" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.a" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.b" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.chainin" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
+            <T_setup value="18.91e-12" port="fp32_mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.a" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.b" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
+            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_add.reset"/>
+            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_add.clk"/>
+            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_add.mode_sigs">
+            </direct>
+            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_add.a">
+            </direct>
+            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_add.b">
+            </direct>
+            <direct name="result" input="fp32_mult_add.result" output="dsp_pb.result[31:0]">
+            </direct>
+            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_add.chainin">
+            </direct>
+            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_add.fp32_in">
+            </direct>
+            <direct name="chainout" input="fp32_mult_add.chainout" output="dsp_pb.chainout[31:0]">
+            </direct>
+          </interconnect>
+        </mode>      
+      </pb_type>
+
+      <interconnect>
+        <direct name="mode_sigs" input="dsp.dsp_I1[11:0]" output ="dsp_pb.mode_sigs"/>
+        <direct name="datain1" input="dsp.dsp_I1[63:12]" output ="dsp_pb.datain[51:0]"/>
+        <direct name="datain2" input="dsp.dsp_I2" output ="dsp_pb.datain[115:52]"/>
+        <direct name="reset" input="dsp.reset" output="dsp_pb.reset"></direct>
+        <direct name="chainin" input="dsp.chainin"    output="dsp_pb.chainin"></direct>
+        <direct name="chainout" input="dsp_pb.chainout" output="dsp.chainout"></direct>
+        <direct name="scanin" input="dsp.scanin"    output="dsp_pb.scanin"></direct>
+        <direct name="scanout" input="dsp_pb.scanout" output="dsp.scanout"></direct>
+        <direct name="result" input="dsp_pb.result" output="dsp.result"></direct>
+        <direct name="clk" input="dsp.clk" output="dsp_pb.clk"></direct>
+      </interconnect>  
+
+    </pb_type>
+    <interconnect>
+        <!--50% sparse crossbar means 50% of the lines can reach an actual input of the dsp 
+        We do this by splitting inputs into two buckets and having two full crossbars-->
+        <!--
+       <complete name="first_half" input="dsp_top.dsp_I1" output="dsp.dsp_I1">
+            <delay_constant max="333e-12" in_port="dsp_top.dsp_I1" out_port="dsp.dsp_I1"/>
+       </complete>
+
+        <complete name="second_half" input="dsp_top.dsp_I2" output="dsp.dsp_I2">
+            <delay_constant max="333e-12" in_port="dsp_top.dsp_I2" out_port="dsp.dsp_I2"/>
+        </complete>
+        -->
+        <direct name="enable" input="dsp_top.dsp_I1[0]" output ="dsp.dsp_I1[0]"/>
+        <direct name="loadconst" input="dsp_top.dsp_I1[1]" output ="dsp.dsp_I1[1]"/>
+        <direct name="accumulate" input="dsp_top.dsp_I1[2]" output ="dsp.dsp_I1[2]"/>
+        <direct name="negate" input="dsp_top.dsp_I1[3]" output ="dsp.dsp_I1[3]"/>
+        <direct name="sub" input="dsp_top.dsp_I1[4]" output ="dsp.dsp_I1[4]"/>
+        <direct name="mode" input="dsp_top.dsp_I1[7:5]" output ="dsp.dsp_I1[7:5]"/>
+        <direct name="mux9_select" input="dsp_top.dsp_I1[8]" output ="dsp.dsp_I1[8]"/>
+        <direct name="internal_coeffa" input="dsp_top.dsp_I1[9]" output ="dsp.dsp_I1[9]"/>
+        <direct name="internal_coeffb" input="dsp_top.dsp_I1[10]" output ="dsp.dsp_I1[10]"/>
+        <direct name="datain1" input="dsp_top.dsp_I1[63:11]" output ="dsp.dsp_I1[63:11]"/>
+        <direct name="datain2" input="dsp_top.dsp_I2" output ="dsp.dsp_I2"/>
+ 
+        <direct name="reset" input="dsp_top.reset" output="dsp.reset"></direct>
+        <direct name="chainin" input="dsp_top.chainin" output="dsp.chainin">
+            <delay_constant max="1179e-12" in_port="dsp_top.chainin" out_port="dsp.chainin"/>
+        </direct>
+        <direct name="chainout" input="dsp.chainout" output="dsp_top.chainout">
+            <delay_constant max="1179e-12" in_port="dsp.chainout" out_port="dsp_top.chainout"/>
+        </direct>
+        <direct name="scanin" input="dsp_top.scanin" output="dsp.scanin">
+            <delay_constant max="1179e-12" in_port="dsp_top.scanin" out_port="dsp.scanin"/>
+        </direct>
+        <direct name="scanout" input="dsp.scanout" output="dsp_top.scanout">
+            <delay_constant max="1179e-12" in_port="dsp.scanout" out_port="dsp_top.scanout"/>
+        </direct>
+        <direct name="result" input="dsp.result" output="dsp_top.result"></direct>
+        <direct name="clk" input="dsp_top.clk" output="dsp.clk"></direct>
+    </interconnect>
+    </pb_type>
+    <!-- Define DSP slice end -->
+
+
+    <!-- Define fracturable memory begin -->
+    <!-- 
+    RAM blocks always have registered inputs. The input FFs appear before the address decoder & wordline driver,
+    and after the local input crossbar & level shifter.
+    RAM blocks optionally have registered outputs. The output FFs (if present) appear after the output crossbar.
+    If BRAM doesn't have registered outputs, then T_clk_to_q is the whole delay of the read/write operation.
+    If BRAM does have registered output, then T_clk_to_q is just the FF clk_to_q and then delay_constant
+    can be used to specify the whole delay of the read/write operation.
+
+    This RAM block has registered outputs.
+
+    The area and delay values of this RAM block were obtained (indirectly) from COFFE simulations.
+    COFFE only support widths and depths that are powers of 2. For M20K (20 Kilobit BRAM), we need
+    the width to be 40 bits and depth to be 512 (for the logically widest mode: 512x40). We can't
+    simulate these dimensions directly in COFFE. So, we simulated and obtained the results for M32K
+    (32 Kilobits BRAM) and (16 Kilobits BRAM). Then we interpolated the results.
+    For delay, a linear interpolation was used, based on the size of the Memory (16K->20K->32K).
+    For area, the value was calculated using two interpolations: (1) port based (change in number of 
+    ports in going from 16K->20K->32K) and (2) number of bits based (change in number of bits in
+    going from 16K->20K->32K). The interpolation that resulted in the larger area was picked.
+    
+
+    Here are the equations used to calculate the delays based on COFFE results:
+    T_setup (inputs) = T_level_shifter + T_register_micro_setup = 32.3ps + 18.91ps = 51.21ps
+    T_clk_to_q (inputs) = T_register_micro_clk_to_q = 60.32ps
+    T_setup (outputs) = T_register_micro_setup = 18.91ps 
+    T_clk_to_q (outputs) = T_register_micro_clk_to_q = 60.32ps
+
+    (Register setup and clk_to_q timings are actually from the FF used in the logic cluster.)
+
+    T_read = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver + Bit line delay) + (Sense amp + Output crossbar)
+
+    * Bit line delay is included in self.RAM.samp.delay time in COFFE. The Sense amp delay is actually
+    self.RAM.samp_part2.delay
+
+    T_write = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver) + (Write driver)
+
+    delay_constant values model the internal limits of a block (the combinatorial delay).
+    delay_constant = max (T_read, T_write) 
+
+	  Overall internal delay of the RAM is T_clk_to_q (inputs) + delay_constant + T_setup (outputs)
+    -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x40_sp">
+        <pb_type name="mem_512x40_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="40" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="40" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_512x40_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_512x40_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.addr" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.data" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.we"   out_port="mem_512x40_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x40_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data" output="mem_512x40_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x40_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_512x40_sp.out" output="memory.out">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x40_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_1024x20_sp">
+        <pb_type name="mem_1024x20_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="20" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="20" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.addr" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.data" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.we"   out_port="mem_1024x20_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_sp.out" output="memory.out[19:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_sp">
+        <pb_type name="mem_2048x10_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="10" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="10" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.addr" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.data" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.we"   out_port="mem_2048x10_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_sp.out" output="memory.out[9:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x20_dp">
+        <pb_type name="mem_1024x20_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="20" port_class="data_in1"/>
+          <input name="data2" num_pins="20" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="20" port_class="data_out1"/>
+          <output name="out2" num_pins="20" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we2" out_port="mem_1024x20_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x20_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[39:20]" output="mem_1024x20_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x20_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_dp.out1" output="memory.out[19:0]">
+          </direct>
+          <direct name="dataout2" input="mem_1024x20_dp.out2" output="memory.out[39:20]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_dp">
+        <pb_type name="mem_2048x10_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="10" port_class="data_in1"/>
+          <input name="data2" num_pins="10" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="10" port_class="data_out1"/>
+          <output name="out2" num_pins="10" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_setup value="51.12e-12" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_setup value="18.91e-12" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we2" out_port="mem_2048x10_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x10_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[19:10]" output="mem_2048x10_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x10_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_dp.out1" output="memory.out[9:0]">
+          </direct>
+          <direct name="dataout2" input="mem_2048x10_dp.out2" output="memory.out[19:10]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+  </complexblocklist>
+
+  <switchblocklist>
+    <!-- Stratix IV uses a uni-directional routing architecture with a Driver Input Mux (DIM) size of 12 (i.e.
+           each wire can be driven by one of 12 block/outputs or wires) for the L4s.
+           
+           In the Stratix IV architecture the long wires (L16 here) are accessible only from the short wires, 
+           and are not connected to the block pins (i.e. connection blocks). Furthermore, they only connect 
+           to switch blocks every 4 LABs (to avoid expensive deep via stacks).
+           We approximate the L16 DIM size as 40:1 (in reality it is a pair of 20:1 (?) muxes with a 2:1 swap mux
+           in front, which has nearly the same connectivity as a full 40:1).
+
+           L4 wires
+           ================
+           At a channel width of 300 there are 260 L4/L4prime wires. At an effective Fc_out of 0.075 
+           and 40 LAB outputs this yeilds:
+
+                40 * 2 = 80 outputs per channel  [2 LABs per-channel]
+
+                80 * 0.075 = 6 outputs drive each L4 wire [output connection block]
+
+           This leaves:
+
+                12 - 6 = 6 inputs to the DIMs from other routing wires [switch block]
+
+           Since L4s connect at every switch block, there are:
+
+                260 L16 wires per channel + direction which can drive wires at a particular switchblock
+                (via switchpoints 0, 1, 2, 3)
+
+           And for each direction (260 wires) only:
+
+               260 / 4 = 65 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there are 32 L4s starting, and 32 L4s ending; + 1 wire for the 65th)
+
+           Which we allocate as follows:
+
+                L4
+                =====
+                straight-through connection: 2 (from L4 or L16)
+                clock-wise turn            : 2 (from L4 or L16)
+                counter-clock-wise turn    : 2 (from L4 or L16)
+
+           L16 wires
+           =========
+           At a channel width of 300 there are 40 L16 wires (20 in each direction), which do not connect to the input/output connection blocks.
+           This leaves 40 inputs to the DIM to select from routing wires (long wires use larger DIMs to improve reachability,
+           the area cost is relatively small since they are so rare).
+
+           Since L16s only connect at every 4th switch block there are:
+
+                40 / 4 = 10 L16 wires per channel (5 in each direction) which can drive wires at a particular switchblock
+                (via switchpoints 0, 4, 8, 12)
+
+           And for each direction (20 wires) only:
+
+               40 / 16 = 2.25 => 2 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there is one L16 starting, and one L16 ending)
+           
+           We assign the 40 DIM inputs as follows:
+
+                L16
+                =====
+                straight-through connection:  3 (from L16)
+                straight-through connection: 11 (from L4)
+                clock-wise turn            :  3 (from L16)
+                clock-wise turn            : 10 (from L4)
+                counter clock-wise turn    :  3 (from L16)
+                counter clock-wise turn    : 10 (from L4)
+
+           Switch pattern
+           ==============
+           This switch block is based on the Wilton switch block (see Page 103 of Steve Wilton's PhD Thesis 
+           "Architecture and Algorithms for Field-Programmable Gate Arrays with Embedded Memory", 1997):
+
+                left-to-top: W - t
+                top-to-right: t + 1
+                right-to-bottom: 2*W - 2 - t
+                bottom-to-left: t + 1
+                left-to-right: t
+                top-to-bottom: t
+
+           Since Wilton assumed bidirection routing (while we use unidirectional routing),
+           we mirror the clock-wise turns to match the conter-clock-wise specification.
+           -->
+    <switchblock name="wilton_turn_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_turn_counter_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+
+               Note that a different from_switchpoints ordering is used to ensure a different shuffling occurs compared to 
+               wilton_turn_clockwise_core.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_straight" type="unidir">
+      <switchblock_location type="EVERYWHERE"/>
+      <switchfuncs>
+        <!-- Straight -->
+        <func type="lr" formula="t"/>
+        <!-- left to right -->
+        <func type="tb" formula="t"/>
+        <!-- top to bottom -->
+        <func type="rl" formula="t"/>
+        <!-- right to left -->
+        <func type="bt" formula="t"/>
+        <!-- bottom to top -->
+      </switchfuncs>
+      <!-- L16 Drivers 
+                Note that we order the switchpoints in order of preference, since VPR currently
+                iterates through the source sets in order, such that we connect first to wires
+                ending at the switchblock (switchpoint 0), and then fallback to switchpoints
+                in decreasing distance from the drive point (if we have more to's than from's
+                it then wraps around).
+
+                Note also that we multiply the number of expected connections by 'to', since while usually
+                there is only one 'to' wire, ocasionally there may be more, and we want to ensure they all
+                get the same number of connections.
+
+                For L16->L16:
+                  We allow any valid switchpoint to be used as the 'from' point.
+                  Allow 'low' switchpoints like '4' may seem counter-intuitive (i.e. why not use a cheaper L4)
+                  this makes it easier to bypass once on the L16 network (e.g. to get around congestion).
+           -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_straight_corner" type="unidir">
+      <!-- Same as wilton straight, but turning around a corner -->
+      <switchblock_location type="CORNER"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t"/>
+        <!-- top to right -->
+        <func type="rb" formula="t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t"/>
+        <!-- right to top -->
+        <func type="br" formula="t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 Drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_turn_fringe" type="unidir">
+      <!-- Non-corner perimeter SBs -->
+      <switchblock_location type="FRINGE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- We use 'max' style connections here to ensure there are no dangling wires, otherwise like core turns -->
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="21*max(from,to)" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers -->
+      <wireconn num_conns="1*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" from_order="fixed" to_type="L4" to_switchpoint="0"/>
+      <wireconn num_conns="1*max(from,to)" from_type="L4" from_switchpoint="0,1,2,3" from_order="shuffled" to_type="L4" to_switchpoint="0"/>
+    </switchblock>
+  </switchblocklist>
+
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+</architecture>
+
+
diff --git a/third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml b/third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
new file mode 100644
index 000000000..b8d26348a
--- /dev/null
+++ b/third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -0,0 +1,1505 @@
+<!-- 
+  Flagship Heterogeneous Architecture with Carry Chains for VTR 7.0.
+
+  - 40 nm technology
+  - General purpose logic block: 
+    K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared) 
+    with optionally registered outputs
+    Each 5-LUT has an arithemtic mode that converts it to a single-bit adder with both inputs driven by 4-LUTs (both 4-LUTs share all 4 inputs)
+    Carry chain links to vertically adjacent logic blocks
+  - Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.  
+    Height = 6, found on every (8n+2)th column
+  - Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.  
+    Height = 4, found on every (8n+6)th column
+  - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
+
+  Details on Modelling:
+
+  The electrical design of the architecture described here is NOT from an 
+  optimized, SPICED architecture.  Instead, we attempt to create a reasonable 
+  architecture file by using an existing commercial FPGA to approximate the area, 
+  delay, and power of the underlying components. This is combined with a reasonable 40 nm 
+  model of wiring and circuit design for low-level routing components, where available.
+  The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also 
+  has wiring electrical parameters that allow the wire lengths and switch patterns to be 
+  modified and you will still get reasonable delay results for the new architecture.
+  The following describes, in detail, how we obtained the various electrical values for this 
+  architecture.
+
+  Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar 
+  architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture. 
+  (n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)      
+  This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to 
+  match the overall target (a 40 nm FPGA).
+
+  We obtain maximum delay numbers by measuring delays of routing, soft logic blocks, 
+  memories, and multipliers from test circuits on a Stratix IV GX device 
+  (EP4SGX230DF29C2X, i.e. fastest speed grade). Minimum delay values are calculated based on the
+  ratios between maximum and minimum values in Stratix IV GX device. For routing, we took the 
+  average delay of H4 and V4 wires.  Rmetal and Cmetal values for the routing wires were obtained 
+  from work done by Charles Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal 
+  stack 40 nm routing) and take the R and C data from the ITRS roadmap. 
+
+  For the general purpose logic block, we assume that the area and delays of the Stratix IV 
+  crossbar is close enough to the crossbar modelled here.  We use 40 inputs and 20 feedback lines in 
+  the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
+  Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to 
+  36:1 multiplexers.  We require 60 such multiplexers, while Stratix IV requires 88 for its more
+  complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows: 
+  The Stratix IV crossbar has more inputs (72 vs. 60) and 
+  outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the 
+  Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its 
+  area and delay.  The total number of crossbar switch points is roughly similar between the two 
+  architectures (3160 for SIV and 3600 for the academic architecture below), so we use the area 
+  & delay of the Stratix IV crossbar as a rough approximation of our crossbar.
+
+  For LUTs, we include LUT 
+  delays measured from Stratix IV which is dependant on the input used (ie. some 
+  LUT inputs are faster than others).  The CAD tools at the time of VTR 7 does 
+  not consider differences in LUT input delays.
+
+  Adder delays obtained as approximate values from a Stratix IV EP4SE230F29C3 device.  
+  Delay obtained by compiling a 256 bit adder (registered inputs and outputs, 
+  all pins except clock virtual) then measuring the delays in chip-planner, 
+  sumout delay = 0.271ns to 0.348 ns, intra-block carry delay = 0.011 ns, 
+  inter-block carry delay = 0.327 ns.  Given this data, I will approximate 
+  sumout 0.3 ns, intra-block carry-delay = 0.01 ns, and 
+  inter-block carry-delay = 0.16 ns (since Altera inter-block carry delay has 
+  overhead that we don't have, I'll approximate the delay of a simpler chain at 
+  one half what they have.  This is very rough, anything from 0.01ns to 0.327ns 
+  can be justified).
+
+  Logic block area numbers obtained by scaling overall tile area of a 65nm 
+  Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out 
+  routing area at a channel width of 300. We use a channel width of 300 because it can route 
+  all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
+  total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
+  choosing a width that provides high routability. The architecture can be routed at different channel
+  widths, but we estimate the tile size and hence the physical length of routing wires assuming
+  a channel width of 300.
+
+  Sanity checks employed:
+    1.  We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches 
+        common electrical design.
+
+
+  Authors: Jason Luu, Jeff Goeders, Vaughn Betz
+-->
+<architecture>
+  <!-- 
+       ODIN II specific config begins 
+       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+       ".model [type_of_block]") that this architecture supports.
+
+       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+       already special structures in blif (.names, .input, .output, and .latch) 
+       that describe them.
+  -->
+  <models>
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk"/>
+        <!-- control -->
+        <port name="addr" clock="clk"/>
+        <!-- address lines -->
+        <port name="data" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="adder">
+      <input_ports>
+        <port name="a" combinational_sink_ports="sumout cout"/>
+        <port name="b" combinational_sink_ports="sumout cout"/>
+        <port name="cin" combinational_sink_ports="sumout cout"/>
+      </input_ports>
+      <output_ports>
+        <port name="cout"/>
+        <port name="sumout"/>
+      </output_ports>
+    </model>
+  </models>
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" area="53894">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I" num_pins="40" equivalent="full"/>
+        <input name="cin" num_pins="1"/>
+        <output name="O" num_pins="20" equivalent="none"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="mult_36" height="4" area="396000">
+      <sub_tile name="mult_36">
+        <equivalent_sites>
+          <site pb_type="mult_36" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="a" num_pins="36"/>
+        <input name="b" num_pins="36"/>
+        <output name="out" num_pins="72"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="6" area="548000">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="addr1" num_pins="15"/>
+        <input name="addr2" num_pins="15"/>
+        <input name="data" num_pins="64"/>
+        <input name="we1" num_pins="1"/>
+        <input name="we2" num_pins="1"/>
+        <output name="out" num_pins="64"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <!-- Physical descriptions begin -->
+  <layout>
+    <auto_layout aspect_ratio="1.0">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </auto_layout>
+  </layout>
+  <device>
+    <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
+			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
+			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
+			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
+			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
+			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
+			     lined up with Stratix IV. 
+			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
+			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
+			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
+	                     by 2.5x when looking up in Jeff's tables.
+			     The delay values are lined up with Stratix IV, which has an architecture similar to this
+			     proposed FPGA, and which is also 40 nm 
+			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
+			     4x minimum drive strength buffer. -->
+    <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
+    <!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
+     	  area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
+	    -->
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="wilton" fs="3"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
+           book area formula. This means the mux transistors are about 5x minimum drive strength.
+           We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
+           mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
+           the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
+           by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
+           buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
+           I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
+           (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
+           The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
+           2.5x when looking up in Jeff's tables.
+           Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
+           This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
+    <switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
+    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
+    <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
+  </switchlist>
+  <segmentlist>
+    <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
+             With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
+             reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
+    <segment freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
+      <mux name="0"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
+  </directlist>
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+	     Maximum delays below come from Ian Kuon. They are small, so they should be interpreted as
+	     the delays to and from registers in the I/O (and generally I/Os are registered 
+	     today and that is when you timing analyze them.
+
+		 Minimum delays are retrieved using a ratio of maximum and minimum times as seen in Quartus II
+		 in Stratix IV. The ratio of minimum value/maximum value is as follows:
+			inpad delay:  0.9239
+			outpad delay: 0.9545
+
+	     -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" min="3.92e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" min="1.331e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+      <power method="ignore"/>
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor 
+	   area is 60 L^2 yields a tile area of 84375 MWTAs.
+	   Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area 
+	   This means that only 37% of our area is in the general routing, and 63% is inside the logic
+	   block. Note that the crossbar / local interconnect is considered part of the logic block
+	   area in this analysis. That is a lower proportion of of routing area than most academics
+	   assume, but note that the total routing area really includes the crossbar, which would push
+	   routing area up significantly, we estimate into the ~70% range. 
+	   -->
+    <pb_type name="clb">
+      <input name="I" num_pins="40" equivalent="full"/>
+      <input name="cin" num_pins="1"/>
+      <output name="O" num_pins="20" equivalent="none"/>
+      <output name="cout" num_pins="1"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Describe fracturable logic element.  
+             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+             The outputs of the fracturable logic element can be optionally registered
+        -->
+      <pb_type name="fle" num_pb="10">
+        <input name="in" num_pins="6"/>
+        <input name="cin" num_pins="1"/>
+        <output name="out" num_pins="2"/>
+        <output name="cout" num_pins="1"/>
+        <clock name="clk" num_pins="1"/>
+        <mode name="n2_lut5">
+          <pb_type name="lut5inter" num_pb="1">
+            <input name="in" num_pins="5"/>
+            <input name="cin" num_pins="1"/>
+            <output name="out" num_pins="2"/>
+            <output name="cout" num_pins="1"/>
+            <clock name="clk" num_pins="1"/>
+            <pb_type name="ble5" num_pb="2">
+              <input name="in" num_pins="5"/>
+              <input name="cin" num_pins="1"/>
+              <output name="out" num_pins="1"/>
+              <output name="cout" num_pins="1"/>
+              <clock name="clk" num_pins="1"/>
+              <mode name="blut5">
+                <pb_type name="flut5" num_pb="1">
+                  <input name="in" num_pins="5"/>
+                  <output name="out" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Regular LUT mode -->
+                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                    <input name="in" num_pins="5" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical maximum delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                        82e-12
+                        173e-12
+                        261e-12
+                        263e-12
+                        398e-12
+							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                        -->
+                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                        235e-12
+                        235e-12
+                        235e-12
+                        235e-12
+                        235e-12
+                      </delay_matrix>
+                    <delay_matrix type="min" in_port="lut5.in" out_port="lut5.out">
+                        174e-12
+                        174e-12
+                        174e-12
+                        174e-12
+                        174e-12
+                      </delay_matrix>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
+                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="direct1" input="flut5.in" output="lut5.in"/>
+                    <direct name="direct2" input="lut5.out" output="ff.D">
+                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
+                    </direct>
+                    <direct name="direct3" input="flut5.clk" output="ff.clk"/>
+                    <mux name="mux1" input="ff.Q lut5.out" output="flut5.out">
+                      <delay_constant max="25e-12" min="24e-12" in_port="lut5.out" out_port="flut5.out"/>
+                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="flut5.out"/>
+                    </mux>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
+                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
+                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+              <mode name="arithmetic">
+                <pb_type name="arithmetic" num_pb="1">
+                  <input name="in" num_pins="4"/>
+                  <input name="cin" num_pins="1"/>
+                  <output name="out" num_pins="1"/>
+                  <output name="cout" num_pins="1"/>
+                  <clock name="clk" num_pins="1"/>
+                  <!-- Special dual-LUT mode that drives adder only -->
+                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
+                    <input name="in" num_pins="4" port_class="lut_in"/>
+                    <output name="out" num_pins="1" port_class="lut_out"/>
+                    <!-- LUT timing using delay matrix -->
+                    <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                             we instead take the average of these numbers to get more stable results
+                        82e-12
+                        173e-12
+                        261e-12
+                        263e-12
+							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                        -->
+                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
+                        195e-12
+                        195e-12
+                        195e-12
+                        195e-12
+                      </delay_matrix>
+                    <delay_matrix type="min" in_port="lut4.in" out_port="lut4.out">
+                        144e-12
+                        144e-12
+                        144e-12
+                        144e-12
+                      </delay_matrix>
+                  </pb_type>
+                  <!-- The ratio between minimum and maximum delays in StratixIV for data ports to sumout
+							is 0.6809 and cin to sumout is 0.6969-->
+                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
+                    <input name="a" num_pins="1"/>
+                    <input name="b" num_pins="1"/>
+                    <input name="cin" num_pins="1"/>
+                    <output name="cout" num_pins="1"/>
+                    <output name="sumout" num_pins="1"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.cin" out_port="adder.sumout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.cout"/>
+                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.cout"/>
+                    <delay_constant max="0.01e-9" min="6.9797e-12" in_port="adder.cin" out_port="adder.cout"/>
+                  </pb_type>
+                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                    <input name="D" num_pins="1" port_class="D"/>
+                    <output name="Q" num_pins="1" port_class="Q"/>
+                    <clock name="clk" num_pins="1" port_class="clock"/>
+                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
+                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
+                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+                  </pb_type>
+                  <interconnect>
+                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
+                    <direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
+                    <direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
+                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
+                      </direct>
+                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
+                      </direct>
+                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
+                      <pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
+                    </direct>
+                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
+                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
+                    </direct>
+                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
+                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
+                    </direct>
+                    <mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
+                      <delay_constant max="25e-12" min="24e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
+                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="arithmetic.out"/>
+                    </mux>
+                  </interconnect>
+                </pb_type>
+                <interconnect>
+                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
+                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
+                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
+                  </direct>
+                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
+                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
+                  </direct>
+                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
+                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
+              <direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
+              <direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
+              <direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
+                <pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/>
+              </direct>
+              <direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
+                <pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/>
+              </direct>
+              <direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
+                <pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/>
+              </direct>
+              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
+            <direct name="direct2" input="lut5inter.out" output="fle.out"/>
+            <direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
+            <direct name="carry_in" input="fle.cin" output="lut5inter.cin">
+              <pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/>
+            </direct>
+            <direct name="carry_out" input="lut5inter.cout" output="fle.cout">
+              <pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/>
+            </direct>
+          </interconnect>
+        </mode>
+        <!-- n2_lut5 -->
+        <mode name="n1_lut6">
+          <pb_type name="ble6" num_pb="1">
+            <input name="in" num_pins="6"/>
+            <output name="out" num_pins="1"/>
+            <clock name="clk" num_pins="1"/>
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <!-- LUT timing using delay matrix -->
+              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                       we instead take the average of these numbers to get more stable results
+                  82e-12
+                  173e-12
+                  261e-12
+                  263e-12
+                  398e-12
+                  397e-12
+
+					The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
+						is 0.7295 * the average of the maximum numbers
+                  -->
+              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                  261e-12
+                </delay_matrix>
+              <delay_matrix type="min" in_port="lut6.in" out_port="lut6.out">
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                  174e-12
+                </delay_matrix>
+            </pb_type>
+            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+              <input name="D" num_pins="1" port_class="D"/>
+              <output name="Q" num_pins="1" port_class="Q"/>
+              <clock name="clk" num_pins="1" port_class="clock"/>
+              <T_setup value="66e-12" port="ff.D" clock="clk"/>
+              <T_hold value="37e-12" port="ff.D" clock="clk"/>
+              <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
+              <direct name="direct2" input="lut6.out" output="ff.D">
+                <pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
+              </direct>
+              <direct name="direct3" input="ble6.clk" output="ff.clk"/>
+              <mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
+                <delay_constant max="25e-12" min="24e-12" in_port="lut6.out" out_port="ble6.out"/>
+                <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="ble6.out"/>
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in" output="ble6.in"/>
+            <direct name="direct2" input="ble6.out" output="fle.out[0:0]"/>
+            <direct name="direct3" input="fle.clk" output="ble6.clk"/>
+          </interconnect>
+        </mode>
+        <!-- n1_lut6 -->
+      </pb_type>
+      <interconnect>
+        <!-- We use a full crossbar to get logical equivalence at inputs of CLB 
+           The delays below come from Stratix IV. the delay through a connection block
+           input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps 
+           delay on the connection block input mux (modeled by Ian Kuon), so the remaining
+           delay within the crossbar is 95 ps. 
+		   For the minimum delays, we have the delay through a connection block input mux +
+		   the crossbar in Stratix IV is 144. Subtracting the 72 ps leaves 72 ps remaining.
+           The max delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
+           Since all our outputs LUT outputs go to a BLE output, and have a delay of 
+           25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
+           to get the part that should be marked on the crossbar. For the minimum delay,
+  		   the value in Stratix IV is 93 ps, subtracting the 24 ps leaves 69 ps.-->
+        <complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in">
+          <delay_constant max="95e-12" min="72e-12" in_port="clb.I" out_port="fle[9:0].in"/>
+          <delay_constant max="75e-12" min="69e-12" in_port="fle[9:0].out" out_port="fle[9:0].in"/>
+        </complete>
+        <complete name="clks" input="clb.clk" output="fle[9:0].clk">
+          </complete>
+        <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
+                 By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+                 then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+                 naive specification).
+          -->
+        <direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
+        <direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
+        <!-- Carry chain links -->
+        <direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
+          <!-- Put all inter-block carry chain delay on this one edge -->
+          <delay_constant max="0.16e-9" min="0.11e-9" in_port="clb.cin" out_port="fle[0:0].cin"/>
+          <pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
+        </direct>
+        <direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
+          <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
+        </direct>
+        <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
+          <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
+        </direct>
+      </interconnect>
+    </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+    <!-- Define fracturable multiplier begin -->
+    <!-- This multiplier can operate as a 36x36 multiplier that can fracture to two 18x18 multipliers each of which can further fracture to two 9x9 multipliers 
+	   For delay modelling, the 36x36 DSP multiplier in Stratix IV has a maximum delay of 1.523 ns + 1.93 ns
+	    = 3.45 ns. The average difference between the maximum and minimum values of a dsp mac out in 36 bit multiply
+		mode is 0.51. Hence, the minimum delay is modeled as 0.776 ns + 0.984 ns = 1.760 ns.
+ 		The 18x18 mode doesn't need to sum four 18x18 multipliers, so it is a bit
+	   faster: 1.523 ns for the multiplier, and 1.09 ns for the multiplier output block.
+	    For the input and output interconnect delays, unlike Stratix IV, we don't
+	   have any routing/logic flexibility (crossbars) at the inputs.  There is some output muxing
+	   in Stratix IV and this architecture to select which multiplier outputs should go out (e.g.
+	   9x9 outputs, 18x18 or 36x36) so those are very close between the two architectures. 
+	   We take the conservative (slightly pessimistic)
+           approach modelling the input as the same as the Stratix IV input delay and the output delay the same as the Stratix IV DSP out delay.
+		   
+	   We estimate block area by using the published Stratix III data (which is architecturally identical to Stratix IV)
+	      (H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011) of 0.2623 
+		  mm^2 and scaling from 65 to 40 nm to obtain 0.0993 mm^2. That area is for a DSP block with approximately 2x the functionality of 
+		  the block we use (can implement two 36x36 multiplies instead of our 1, eight 18x18 multiplies instead of our 4, etc.). Hence we 
+		  divide the area by 2 to obtain 0.0497 mm^2. One minimum-width transistor units = 60 L^2 (where L = 40 nm), so is 518,000 MWTUS. 
+		  That area includes routing and the connection block input muxes.  Our DSP block is four 
+		  rows high, and hence includes four horizontal routing channel segments and four vertical ones, which is 4x the routing of a logic 
+		  block (single tile). It also includes 3.6x the outputs of a logic block, and 1.8x the inputs. Hence a slight overestimate of the routing
+		  area associated with our DSP block is four times that of a logic tile, where the routing area of a logic tile was calculated above (at W = 300)
+		  as 30481 MWTAs. Hence the (core, non-routing) area our DSP block is approximately 518,000 - 4 * 30,481 = 396,000 MWTUs.
+      -->
+    <pb_type name="mult_36">
+      <input name="a" num_pins="36"/>
+      <input name="b" num_pins="36"/>
+      <output name="out" num_pins="72"/>
+      <mode name="two_divisible_mult_18x18">
+        <pb_type name="divisible_mult_18x18" num_pb="2">
+          <input name="a" num_pins="18"/>
+          <input name="b" num_pins="18"/>
+          <output name="out" num_pins="36"/>
+          <!-- Model 9x9 delay and 18x18 delay as the same.  9x9 could be faster, but in Stratix IV
+	          isn't, presumably because the multiplier layout is really optimized for 18x18.
+		-->
+          <mode name="two_mult_9x9">
+            <pb_type name="mult_9x9_slice" num_pb="2">
+              <input name="A_cfg" num_pins="9"/>
+              <input name="B_cfg" num_pins="9"/>
+              <output name="OUT_cfg" num_pins="18"/>
+              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="9"/>
+                <input name="b" num_pins="9"/>
+                <output name="out" num_pins="18"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.a" out_port="mult_9x9.out"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.b" out_port="mult_9x9.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
+                </direct>
+                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
+                </direct>
+                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.45e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.45e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <mode name="mult_18x18">
+            <pb_type name="mult_18x18_slice" num_pb="1">
+              <input name="A_cfg" num_pins="18"/>
+              <input name="B_cfg" num_pins="18"/>
+              <output name="OUT_cfg" num_pins="36"/>
+              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="18"/>
+                <input name="b" num_pins="18"/>
+                <output name="out" num_pins="36"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.a" out_port="mult_18x18.out"/>
+                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.b" out_port="mult_18x18.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
+                </direct>
+                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
+                </direct>
+                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.09e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.09e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <power method="sum-of-children"/>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading 134 ps
+				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
+              -->
+          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].b">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].b"/>
+          </direct>
+          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
+            <delay_constant max="1.09e-9" min="74e-12" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mult_36x36">
+        <pb_type name="mult_36x36_slice" num_pb="1">
+          <input name="A_cfg" num_pins="36"/>
+          <input name="B_cfg" num_pins="36"/>
+          <output name="OUT_cfg" num_pins="72"/>
+          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1">
+            <input name="a" num_pins="36"/>
+            <input name="b" num_pins="36"/>
+            <output name="out" num_pins="72"/>
+            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.a" out_port="mult_36x36.out"/>
+            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.b" out_port="mult_36x36.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
+            </direct>
+            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
+            </direct>
+            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
+            </direct>
+          </interconnect>
+          <power method="pin-toggle">
+            <port name="A_cfg" energy_per_toggle="2.13e-12"/>
+            <port name="B_cfg" energy_per_toggle="2.13e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
+		   to a 134 ps delay.
+				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
+              -->
+          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
+            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
+          </direct>
+          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
+            <delay_constant max="1.93e-9" min="74e-12" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Place this multiplier block every 8 columns from (and including) the sixth column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable multiplier end -->
+    <!-- Define fracturable memory begin -->
+    <!-- 32 Kb Memory that can operate from 512x64 to 32Kx1 for single-port mode and 1024x32 to 32Kx1 for dual-port mode.  
+           Area and max delay based off Stratix IV 9K and 144K memories (delay from linear interpolation, Tsu(483 ps, 636 ps) Tco(1084ps, 1969ps)).  
+
+		   uTh/Tsu ratio = 0.468, uTco min/max ratio = 0.97
+           uTh = 226ps, 298ps
+           Min uTco = 1051ps, 1909ps
+           Max input delay = 204ps (from Stratix IV LAB line) - 72ps (this architecture does not lump connection box delay in internal delay)
+           Max output delay = M9K buffer 50ps. 
+		   Min input delay = 160ps (from Stratix IV Lab line) - 72ps
+		   Min output delay = 46ps (M9K buffer min/max ratio = 0.9286)
+		   
+		   Area is obtained by appropriately scaling and adjusting the published Stratix III (which is architecturally identical to Stratix IV)
+		   data from H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011.
+		   Linearly interpolating (by bit count) between the M9k and M144k areas to obtain an M32k (our RAM size) point yields a 65 nm area of
+		   of 0.153 mm^2. Interpolating based on port count between the RAMs would instead yield an area of 0.209 mm^2 for our 32 kB RAM; since 
+		   bit count accounts for more area than ports for a RAM this size we choose the bit count interpolation; however, since the port interpolation
+		   is not radically different this also gives us confidence that interpolating based on bits is OK, but slightly underpredicts area.
+		   Scaling to 40 nm^2 yields .0579 mm^2, and converting to MWTUs at 60 L^2 / MWTU yields 604,000 MWTUs. This includes routing. A Stratix IV
+		   M9K RAM is one row high and hence has one routing tile (one horizonal and one vertical routing segment area). An M144k RAM has 8 such tiles.
+		   Linearly interpolating on
+		   bits to 32 kb yields 2.2 routing tiles incorporated in the area number above. The inter-block routing represents 30% of the area of a logic 
+		   tile according to D. Lewis et al, "Architectural Enhancements in Stratix V," FPGA 2013. Hence we should subtract 0.3 * 2.2 * 84,375 MWTUs to
+		   obtain a RAM core area (not including inter-block routing) of 548,000 MWTU areas for our 32 kb RAM in a 40 nm process.
+      -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="15"/>
+      <input name="addr2" num_pins="15"/>
+      <input name="data" num_pins="64"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="64"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x64_sp">
+        <pb_type name="mem_512x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="64" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="64" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_512x64_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_512x64_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x64_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[8:0]" out_port="mem_512x64_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[63:0]" output="mem_512x64_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:0]" out_port="mem_512x64_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x64_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_512x64_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_512x64_sp.out" output="memory.out[63:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_512x64_sp.out" out_port="memory.out[63:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x64_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_1024x32_sp">
+        <pb_type name="mem_1024x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="32" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="32" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_sp.out" output="memory.out[31:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_sp.out" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_sp">
+        <pb_type name="mem_2048x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="16" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="16" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_sp.out" output="memory.out[15:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_sp.out" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_4096x8_sp">
+        <pb_type name="mem_4096x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="12" port_class="address"/>
+          <input name="data" num_pins="8" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="8" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_4096x8_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_4096x8_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x8_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_4096x8_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_4096x8_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_4096x8_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_4096x8_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_4096x8_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_4096x8_sp.out" output="memory.out[7:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_4096x8_sp.out" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_4096x8_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_sp">
+        <pb_type name="mem_8192x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="13" port_class="address"/>
+          <input name="data" num_pins="4" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="4" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_sp.out" output="memory.out[3:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_sp.out" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_sp">
+        <pb_type name="mem_16384x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="14" port_class="address"/>
+          <input name="data" num_pins="2" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="2" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_sp.out" output="memory.out[1:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_sp.out" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_sp">
+        <pb_type name="mem_32768x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="15" port_class="address"/>
+          <input name="data" num_pins="1" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="1" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.we" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.addr" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.data" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_sp.addr">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_sp.data">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_sp.we">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_sp.out" output="memory.out[0:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_sp.out" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x32_dp">
+        <pb_type name="mem_1024x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="32" port_class="data_in1"/>
+          <input name="data2" num_pins="32" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="32" port_class="data_out1"/>
+          <output name="out2" num_pins="32" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_1024x32_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x32_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[9:0]" out_port="mem_1024x32_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[63:32]" output="mem_1024x32_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:32]" out_port="mem_1024x32_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x32_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_1024x32_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_dp.out1" output="memory.out[31:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out1" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_1024x32_dp.out2" output="memory.out[63:32]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out2" out_port="memory.out[63:32]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_dp">
+        <pb_type name="mem_2048x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="16" port_class="data_in1"/>
+          <input name="data2" num_pins="16" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="16" port_class="data_out1"/>
+          <output name="out2" num_pins="16" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x16_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x16_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[10:0]" out_port="mem_2048x16_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[31:16]" output="mem_2048x16_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:16]" out_port="mem_2048x16_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x16_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x16_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_dp.out1" output="memory.out[15:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out1" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x16_dp.out2" output="memory.out[31:16]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out2" out_port="memory.out[31:16]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x8_dp">
+        <pb_type name="mem_2048x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="12" port_class="address1"/>
+          <input name="addr2" num_pins="12" port_class="address2"/>
+          <input name="data1" num_pins="8" port_class="data_in1"/>
+          <input name="data2" num_pins="8" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="8" port_class="data_out1"/>
+          <output name="out2" num_pins="8" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_2048x8_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_2048x8_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_2048x8_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[11:0]" output="mem_2048x8_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[11:0]" out_port="mem_2048x8_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_2048x8_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_2048x8_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[15:8]" output="mem_2048x8_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:8]" out_port="mem_2048x8_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x8_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x8_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x8_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x8_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x8_dp.out1" output="memory.out[7:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out1" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x8_dp.out2" output="memory.out[15:8]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out2" out_port="memory.out[15:8]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x8_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_dp">
+        <pb_type name="mem_8192x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="13" port_class="address1"/>
+          <input name="addr2" num_pins="13" port_class="address2"/>
+          <input name="data1" num_pins="4" port_class="data_in1"/>
+          <input name="data2" num_pins="4" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="4" port_class="data_out1"/>
+          <output name="out2" num_pins="4" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_8192x4_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x4_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[12:0]" out_port="mem_8192x4_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[7:4]" output="mem_8192x4_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:4]" out_port="mem_8192x4_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_8192x4_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_8192x4_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_dp.out1" output="memory.out[3:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out1" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_8192x4_dp.out2" output="memory.out[7:4]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out2" out_port="memory.out[7:4]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_dp">
+        <pb_type name="mem_16384x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="14" port_class="address1"/>
+          <input name="addr2" num_pins="14" port_class="address2"/>
+          <input name="data1" num_pins="2" port_class="data_in1"/>
+          <input name="data2" num_pins="2" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="2" port_class="data_out1"/>
+          <output name="out2" num_pins="2" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_16384x2_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x2_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[13:0]" out_port="mem_16384x2_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[3:2]" output="mem_16384x2_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:2]" out_port="mem_16384x2_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_16384x2_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_16384x2_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_dp.out1" output="memory.out[1:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out1" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_16384x2_dp.out2" output="memory.out[3:2]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out2" out_port="memory.out[3:2]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_dp">
+        <pb_type name="mem_32768x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="15" port_class="address1"/>
+          <input name="addr2" num_pins="15" port_class="address2"/>
+          <input name="data1" num_pins="1" port_class="data_in1"/>
+          <input name="data2" num_pins="1" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="1" port_class="data_out1"/>
+          <output name="out2" num_pins="1" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.data1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.we1" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.data2" clock="clk"/>
+          <T_hold value="238e-12" port="mem_32768x1_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_dp.addr1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x1_dp.addr2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[14:0]" out_port="mem_32768x1_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_dp.data1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[1:1]" output="mem_32768x1_dp.data2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:1]" out_port="mem_32768x1_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_dp.we1">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_32768x1_dp.we2">
+            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_32768x1_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_dp.out1" output="memory.out[0:0]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out1" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_32768x1_dp.out2" output="memory.out[1:1]">
+            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out2" out_port="memory.out[1:1]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- Place this memory block every 8 columns from (and including) the second column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+  </complexblocklist>
+  <power>
+    <local_interconnect C_wire="2.5e-10"/>
+    <mux_transistor_size mux_transistor_size="3"/>
+    <FF_size FF_size="4"/>
+    <LUT_transistor_size LUT_transistor_size="4"/>
+  </power>
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+</architecture>
diff --git a/third_party/vtr/libs/archfpga/.gitignore b/third_party/vtr/libs/archfpga/.gitignore
new file mode 100644
index 000000000..72c5cad21
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/.gitignore
@@ -0,0 +1 @@
+read_arch
diff --git a/third_party/vtr/libs/archfpga/CMakeLists.txt b/third_party/vtr/libs/archfpga/CMakeLists.txt
new file mode 100644
index 000000000..e2a5ddadb
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/CMakeLists.txt
@@ -0,0 +1,75 @@
+cmake_minimum_required(VERSION 3.9)
+
+project("libarchfpga")
+
+file(GLOB_RECURSE READ_ARCH_EXEC_SRC src/main.cpp)
+file(GLOB_RECURSE WRITE_ARCH_BB_EXEC_SRC src/write_arch_bb.cpp)
+file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
+file(GLOB_RECURSE LIB_HEADERS src/*.h)
+files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
+
+#Remove test executable from library
+list(REMOVE_ITEM LIB_SOURCES ${READ_ARCH_EXEC_SRC} ${WRITE_ARCH_BB_EXEC_SRC})
+
+#Create the library
+add_library(libarchfpga STATIC
+    ${LIB_HEADERS}
+    ${LIB_SOURCES}
+)
+
+target_include_directories(libarchfpga PUBLIC ${LIB_INCLUDE_DIRS})
+
+set_target_properties(libarchfpga PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
+
+#Specify link-time dependancies
+target_link_libraries(libarchfpga
+                        libvtrutil
+                        libpugixml
+                        libpugiutil
+                        libvtrcapnproto
+)
+
+target_compile_definitions(libarchfpga PUBLIC ${INTERCHANGE_SCHEMA_HEADERS})
+
+#Create the test executable
+add_executable(read_arch ${READ_ARCH_EXEC_SRC})
+add_executable(write_arch_bb ${WRITE_ARCH_BB_EXEC_SRC})
+target_link_libraries(read_arch libarchfpga)
+target_link_libraries(write_arch_bb libarchfpga)
+
+#Supress IPO link warnings if IPO is enabled
+get_target_property(READ_ARCH_USES_IPO read_arch INTERPROCEDURAL_OPTIMIZATION)
+get_target_property(WRITE_ARCH_BB_USES_IPO write_arch_bb INTERPROCEDURAL_OPTIMIZATION)
+if (READ_ARCH_USES_IPO)
+    set_property(TARGET read_arch APPEND PROPERTY LINK_FLAGS ${IPO_LINK_WARN_SUPRESS_FLAGS})
+endif()
+if (WRITE_ARCH_BB_USES_IPO)
+    set_property(TARGET write_arch_bb APPEND PROPERTY LINK_FLAGS ${IPO_LINK_WARN_SUPRESS_FLAGS})
+endif()
+
+install(TARGETS libarchfpga read_arch write_arch_bb DESTINATION bin)
+
+#
+# install executables in the VTR source root directory
+# to utilize them for scripts running VTR flow
+#
+set(ARCHFPGA_DIR ${VTR_SOURCE_DIR}/ArchFPGA)
+# making a new custom target out of libarchfpga to automatically perform
+# the house keeping for end users by deleting the ArchFPGA dir in the
+# VTR root dir if it exists and holds expired execs
+add_custom_target(archfpga-execs ALL 
+                    DEPENDS read_arch write_arch_bb
+                    COMMAND ${CMAKE_COMMAND} -E 
+                                            remove_directory ${ARCHFPGA_DIR}                     
+                    COMMAND ${CMAKE_COMMAND} -E 
+                                            make_directory ${ARCHFPGA_DIR}
+                    COMMAND ${CMAKE_COMMAND} -E 
+                                            copy_directory ${CMAKE_CURRENT_BINARY_DIR} ${ARCHFPGA_DIR}
+                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+# 
+# Unit Tests
+#
+file(GLOB_RECURSE TEST_SOURCES test/*.cpp)
+add_executable(test_archfpga ${TEST_SOURCES})
+target_link_libraries(test_archfpga Catch2::Catch2WithMain libarchfpga)
diff --git a/third_party/vtr/libs/archfpga/arch/README.txt b/third_party/vtr/libs/archfpga/arch/README.txt
new file mode 100644
index 000000000..88e5886d9
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/arch/README.txt
@@ -0,0 +1,11 @@
+This directory contains sample architecture files that are used in testing
+libarchfpga. In addition, the architecture files in this directory are used by
+the regression testing facilities of Odin II.
+
+Please be sure to retain sample_arch.xml and update it with any changes that
+are made to the libvpr library.
+
+Ken Kent
+ken@unb.ca
+06.18.2009
+
diff --git a/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml b/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
new file mode 100644
index 000000000..9941d3a79
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
@@ -0,0 +1,744 @@
+<architecture>
+  <!-- jluu and ken: ODIN II specific config -->
+  <models>
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we"/>
+        <!-- control -->
+        <port name="addr"/>
+        <!-- address lines -->
+        <port name="data"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1"/>
+        <!-- write enable -->
+        <port name="we2"/>
+        <!-- write enable -->
+        <port name="addr1"/>
+        <!-- address lines -->
+        <port name="addr2"/>
+        <!-- address lines -->
+        <port name="data1"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+  </models>
+  <tiles>
+    <tile name="io">
+      <sub_tile name="io" capacity="7">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1" equivalent="none"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I" num_pins="56" equivalent="full"/>
+        <output name="O" num_pins="16"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="4">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="addr1" num_pins="16"/>
+        <input name="addr2" num_pins="16"/>
+        <input name="data" num_pins="64"/>
+        <input name="we1" num_pins="1"/>
+        <input name="we2" num_pins="1"/>
+        <output name="out" num_pins="64"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="mult_36" height="3">
+      <sub_tile name="mult_36">
+        <equivalent_sites>
+          <site pb_type="mult_36" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="a" num_pins="36"/>
+        <input name="b" num_pins="36"/>
+        <output name="out" num_pins="72"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- jluu and ken: ODIN II specific config ends -->
+  <!-- jluu and ken: Physical descriptions begin -->
+  <!-- <layout width="20" height="20"/> -->
+  <layout>
+    <auto_layout aspect_ratio="1.0">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="5" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="5" starty="1" priority="19"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="4" starty="1" repeatx="5" priority="20"/>
+      <col type="EMPTY" startx="4" repeatx="5" starty="1" priority="19"/>
+    </auto_layout>
+  </layout>
+  <device>
+    <sizing R_minW_nmos="5726.870117" R_minW_pmos="15491.700195"/>
+    <area grid_logic_tile_area="30000.000000"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="wilton" fs="3"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <switch type="mux" name="0" R="94.841003" Cin="1.537000e-14" Cout="2.194000e-13" Tdel="6.562000e-11" mux_trans_size="10.000000" buf_size="1"/>
+    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
+    <switch type="mux" name="ipin_cblock" R="1431.71752925" Cout="0." Cin="1.191000e-14" Tdel="1.482000e-10" mux_trans_size="1.000000" buf_size="auto"/>
+  </switchlist>
+  <segmentlist>
+    <segment freq="1.000000" length="4" type="unidir" Rmetal="11.064550" Cmetal="4.727860e-14">
+      <mux name="0"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+  </segmentlist>
+  <complexblocklist>
+    <pb_type name="io">
+      <input name="outpad" num_pins="1" equivalent="none"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad"/>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad"/>
+        </interconnect>
+      </mode>
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+    </pb_type>
+    <pb_type name="clb">
+      <input name="I" num_pins="56" equivalent="full"/>
+      <output name="O" num_pins="16"/>
+      <clock name="clk" num_pins="1"/>
+      <pb_type name="ble" num_pb="8">
+        <input name="in" num_pins="7"/>
+        <output name="out" num_pins="2"/>
+        <clock name="clk" num_pins="1"/>
+        <pb_type name="soft_logic" num_pb="1">
+          <input name="in" num_pins="7"/>
+          <output name="out" num_pins="2"/>
+          <mode name="n2_lut5">
+            <pb_type name="lut5" blif_model=".names" num_pb="2" class="lut">
+              <input name="in" num_pins="5" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="soft_logic.in[4:0]" output="lut5[0:0].in[4:0]"/>
+              <direct name="direct2" input="lut5[0:0].out" output="soft_logic.out[0:0]"/>
+              <direct name="direct3" input="soft_logic.in[6:2]" output="lut5[1:1].in[4:0]"/>
+              <direct name="direct4" input="lut5[1:1].out" output="soft_logic.out[1:1]"/>
+            </interconnect>
+          </mode>
+          <mode name="n1_lut6">
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="soft_logic.in[5:0]" output="lut6[0:0].in[5:0]"/>
+              <direct name="direct2" input="lut6[0:0].out" output="soft_logic.out[0:0]"/>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
+          <input name="D" num_pins="1" port_class="D"/>
+          <output name="Q" num_pins="1" port_class="Q"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <!-- Two ff, make ff available to only corresponding luts -->
+          <direct name="direct1" input="ble.in" output="soft_logic.in"/>
+          <direct name="direct2" input="soft_logic.out[0:0]" output="ff[0:0].D"/>
+          <direct name="direct3" input="soft_logic.out[1:1]" output="ff[1:1].D"/>
+          <direct name="direct4" input="ble.clk" output="ff[0:0].clk"/>
+          <direct name="direct5" input="ble.clk" output="ff[1:1].clk"/>
+          <mux name="mux1" input="ff[0:0].Q soft_logic.out[0:0]" output="ble.out[0:0]"/>
+          <mux name="mux2" input="ff[1:1].Q soft_logic.out[1:1]" output="ble.out[1:1]"/>
+        </interconnect>
+      </pb_type>
+      <interconnect>
+        <complete name="complete1" input="clb.I ble[7:0].out" output="ble[7:0].in"/>
+        <complete name="complete2" input="clb.clk" output="ble[7:0].clk"/>
+        <direct name="direct1" input="ble[7:0].out" output="clb.O"/>
+      </interconnect>
+    </pb_type>
+    <pb_type name="memory">
+      <input name="addr1" num_pins="16"/>
+      <input name="addr2" num_pins="16"/>
+      <input name="data" num_pins="64"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="64"/>
+      <clock name="clk" num_pins="1"/>
+      <mode name="mem_1024x64_sp">
+        <pb_type name="mem_1024x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="64" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="64" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x64_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[63:0]" output="mem_1024x64_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x64_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_1024x64_sp.out" output="memory.out[63:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x64_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x32_dp">
+        <pb_type name="mem_2048x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="32" port_class="data_in1"/>
+          <input name="data2" num_pins="32" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="32" port_class="data_out1"/>
+          <output name="out2" num_pins="32" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x32_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x32_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_2048x32_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[63:32]" output="mem_2048x32_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x32_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x32_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_2048x32_dp.out1" output="memory.out[31:0]">
+              </direct>
+          <direct name="dataout2" input="mem_2048x32_dp.out2" output="memory.out[63:32]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x32_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x32_sp">
+        <pb_type name="mem_2048x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="32" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="32" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x32_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_2048x32_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x32_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_2048x32_sp.out" output="memory.out[31:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x32_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_4096x16_dp">
+        <pb_type name="mem_4096x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="12" port_class="address1"/>
+          <input name="addr2" num_pins="12" port_class="address2"/>
+          <input name="data1" num_pins="16" port_class="data_in1"/>
+          <input name="data2" num_pins="16" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="16" port_class="data_out1"/>
+          <output name="out2" num_pins="16" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x16_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[11:0]" output="mem_4096x16_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_4096x16_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[31:16]" output="mem_4096x16_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_4096x16_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_4096x16_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_4096x16_dp.out1" output="memory.out[15:0]">
+              </direct>
+          <direct name="dataout2" input="mem_4096x16_dp.out2" output="memory.out[31:16]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_4096x16_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_4096x16_sp">
+        <pb_type name="mem_4096x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="12" port_class="address"/>
+          <input name="data" num_pins="16" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="16" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x16_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_4096x16_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_4096x16_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_4096x16_sp.out" output="memory.out[15:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_4096x16_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x8_dp">
+        <pb_type name="mem_8192x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="13" port_class="address1"/>
+          <input name="addr2" num_pins="13" port_class="address2"/>
+          <input name="data1" num_pins="8" port_class="data_in1"/>
+          <input name="data2" num_pins="8" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="8" port_class="data_out1"/>
+          <output name="out2" num_pins="8" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x8_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x8_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_8192x8_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[15:8]" output="mem_8192x8_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x8_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_8192x8_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_8192x8_dp.out1" output="memory.out[7:0]">
+              </direct>
+          <direct name="dataout2" input="mem_8192x8_dp.out2" output="memory.out[15:8]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x8_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x8_sp">
+        <pb_type name="mem_8192x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="13" port_class="address"/>
+          <input name="data" num_pins="8" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="8" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x8_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_8192x8_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x8_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_8192x8_sp.out" output="memory.out[7:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x8_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x4_dp">
+        <pb_type name="mem_16384x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="14" port_class="address1"/>
+          <input name="addr2" num_pins="14" port_class="address2"/>
+          <input name="data1" num_pins="4" port_class="data_in1"/>
+          <input name="data2" num_pins="4" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="4" port_class="data_out1"/>
+          <output name="out2" num_pins="4" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x4_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x4_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_16384x4_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[7:4]" output="mem_16384x4_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x4_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_16384x4_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_16384x4_dp.out1" output="memory.out[3:0]">
+              </direct>
+          <direct name="dataout2" input="mem_16384x4_dp.out2" output="memory.out[7:4]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x4_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x4_sp">
+        <pb_type name="mem_16384x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="14" port_class="address"/>
+          <input name="data" num_pins="4" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="4" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x4_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_16384x4_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x4_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_16384x4_sp.out" output="memory.out[3:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x4_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x2_dp">
+        <pb_type name="mem_32768x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="15" port_class="address1"/>
+          <input name="addr2" num_pins="15" port_class="address2"/>
+          <input name="data1" num_pins="2" port_class="data_in1"/>
+          <input name="data2" num_pins="2" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="2" port_class="data_out1"/>
+          <output name="out2" num_pins="2" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x2_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x2_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_32768x2_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[3:2]" output="mem_32768x2_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x2_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_32768x2_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_32768x2_dp.out1" output="memory.out[1:0]">
+              </direct>
+          <direct name="dataout2" input="mem_32768x2_dp.out2" output="memory.out[3:2]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x2_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x2_sp">
+        <pb_type name="mem_32768x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="15" port_class="address"/>
+          <input name="data" num_pins="2" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="2" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x2_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_32768x2_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x2_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_32768x2_sp.out" output="memory.out[1:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x2_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_65536x1_dp">
+        <pb_type name="mem_65536x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr1" num_pins="16" port_class="address1"/>
+          <input name="addr2" num_pins="16" port_class="address2"/>
+          <input name="data1" num_pins="1" port_class="data_in1"/>
+          <input name="data2" num_pins="1" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="1" port_class="data_out1"/>
+          <output name="out2" num_pins="1" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[15:0]" output="mem_65536x1_dp.addr1">
+              </direct>
+          <direct name="address2" input="memory.addr2[15:0]" output="mem_65536x1_dp.addr2">
+              </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_65536x1_dp.data1">
+              </direct>
+          <direct name="data2" input="memory.data[1:1]" output="mem_65536x1_dp.data2">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_65536x1_dp.we1">
+              </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_65536x1_dp.we2">
+              </direct>
+          <direct name="dataout1" input="mem_65536x1_dp.out1" output="memory.out[0:0]">
+              </direct>
+          <direct name="dataout2" input="mem_65536x1_dp.out2" output="memory.out[1:1]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_65536x1_dp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_65536x1_sp">
+        <pb_type name="mem_65536x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
+          <input name="addr" num_pins="16" port_class="address"/>
+          <input name="data" num_pins="1" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="1" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[15:0]" output="mem_65536x1_sp.addr">
+              </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_65536x1_sp.data">
+              </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_65536x1_sp.we">
+              </direct>
+          <direct name="dataout1" input="mem_65536x1_sp.out" output="memory.out[0:0]">
+              </direct>
+          <direct name="clk" input="memory.clk" output="mem_65536x1_sp.clk">
+              </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <!-- This is the 36*36 uniform mult -->
+    <pb_type name="mult_36">
+      <input name="a" num_pins="36"/>
+      <input name="b" num_pins="36"/>
+      <output name="out" num_pins="72"/>
+      <mode name="two_divisible_mult_18x18">
+        <pb_type name="divisible_mult_18x18" num_pb="2">
+          <input name="a" num_pins="18"/>
+          <input name="b" num_pins="18"/>
+          <output name="out" num_pins="36"/>
+          <mode name="two_mult_9x9">
+            <pb_type name="mult_9x9_slice" num_pb="2">
+              <input name="A_cfg" num_pins="9"/>
+              <input name="B_cfg" num_pins="9"/>
+              <output name="OUT_cfg" num_pins="18"/>
+              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1" area="300">
+                <input name="a" num_pins="9"/>
+                <input name="b" num_pins="9"/>
+                <output name="out" num_pins="18"/>
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice.A_cfg" out_port="mult_9x9.a"/>
+                  <C_constant C="1.89e-13" in_port="mult_9x9_slice.A_cfg" out_port="mult_9x9.a"/>
+                </direct>
+                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice.B_cfg" out_port="mult_9x9.b"/>
+                  <C_constant C="1.89e-13" in_port="mult_9x9_slice.B_cfg" out_port="mult_9x9.b"/>
+                </direct>
+                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9.out" out_port="mult_9x9_slice.OUT_cfg"/>
+                  <C_constant C="1.89e-13" in_port="mult_9x9.out" out_port="mult_9x9_slice.OUT_cfg"/>
+                </direct>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_9x9_slice[1:0].A_cfg"/>
+                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_9x9_slice[1:0].A_cfg"/>
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_9x9_slice[1:0].B_cfg"/>
+                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_9x9_slice[1:0].B_cfg"/>
+              </direct>
+              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice[1:0].OUT_cfg" out_port="divisible_mult_18x18.out"/>
+                <C_constant C="1.89e-13" in_port="mult_9x9_slice[1:0].OUT_cfg" out_port="divisible_mult_18x18.out"/>
+              </direct>
+            </interconnect>
+          </mode>
+          <mode name="mult_18x18">
+            <pb_type name="mult_18x18_slice" num_pb="1">
+              <input name="A_cfg" num_pins="18"/>
+              <input name="B_cfg" num_pins="18"/>
+              <output name="OUT_cfg" num_pins="36"/>
+              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1" area="1000">
+                <input name="a" num_pins="18"/>
+                <input name="b" num_pins="18"/>
+                <output name="out" num_pins="36"/>
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.A_cfg" out_port="mult_18x18.a"/>
+                  <C_constant C="1.89e-13" in_port="mult_18x18_slice.A_cfg" out_port="mult_18x18.a"/>
+                </direct>
+                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.B_cfg" out_port="mult_18x18.b"/>
+                  <C_constant C="1.89e-13" in_port="mult_18x18_slice.B_cfg" out_port="mult_18x18.b"/>
+                </direct>
+                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
+                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18.out" out_port="mult_18x18_slice.OUT_cfg"/>
+                  <C_constant C="1.89e-13" in_port="mult_18x18.out" out_port="mult_18x18_slice.OUT_cfg"/>
+                </direct>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_18x18_slice.A_cfg"/>
+                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_18x18_slice.A_cfg"/>
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_18x18_slice.B_cfg"/>
+                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_18x18_slice.B_cfg"/>
+              </direct>
+              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
+                <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.OUT_cfg" out_port="divisible_mult_18x18.out"/>
+                <C_constant C="1.89e-13" in_port="mult_18x18_slice.OUT_cfg" out_port="divisible_mult_18x18.out"/>
+              </direct>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <interconnect>
+          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
+            <C_constant C="1.89e-13" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].a">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].a"/>
+            <C_constant C="1.89e-13" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].a"/>
+          </direct>
+          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
+            <C_constant C="1.89e-13" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mult_36x36">
+        <pb_type name="mult_36x36_slice" num_pb="1">
+          <input name="A_cfg" num_pins="36"/>
+          <input name="B_cfg" num_pins="36"/>
+          <output name="OUT_cfg" num_pins="72"/>
+          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1" area="4000">
+            <input name="a" num_pins="36"/>
+            <input name="b" num_pins="36"/>
+            <output name="out" num_pins="72"/>
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
+              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.A_cfg" out_port="mult_36x36.a"/>
+              <C_constant C="1.89e-13" in_port="mult_36x36_slice.A_cfg" out_port="mult_36x36.a"/>
+            </direct>
+            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
+              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.B_cfg" out_port="mult_36x36.b"/>
+              <C_constant C="1.89e-13" in_port="mult_36x36_slice.B_cfg" out_port="mult_36x36.b"/>
+            </direct>
+            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
+              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36.out" out_port="mult_36x36_slice.OUT_cfg"/>
+              <C_constant C="1.89e-13" in_port="mult_36x36.out" out_port="mult_36x36_slice.OUT_cfg"/>
+            </direct>
+          </interconnect>
+        </pb_type>
+        <interconnect>
+          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
+            <C_constant C="1.89e-13" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
+            <C_constant C="1.89e-13" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
+          </direct>
+          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
+            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
+            <C_constant C="1.89e-13" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+  </complexblocklist>
+</architecture>
diff --git a/third_party/vtr/libs/archfpga/arch/sample_arch.xml b/third_party/vtr/libs/archfpga/arch/sample_arch.xml
new file mode 100755
index 000000000..f2f3e7173
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/arch/sample_arch.xml
@@ -0,0 +1,1215 @@
+<!-- 
+  Flagship Heterogeneous Architecture (No Carry Chains) for VTR 7.0.
+
+  - 40 nm technology
+  - General purpose logic block: 
+    K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared) 
+    with optionally registered outputs
+  - Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.  
+    Height = 6, found on every (8n+2)th column
+  - Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.  
+    Height = 4, found on every (8n+6)th column
+  - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
+
+  Details on Modelling:
+
+  The electrical design of the architecture described here is NOT from an 
+  optimized, SPICED architecture.  Instead, we attempt to create a reasonable 
+  architecture file by using an existing commercial FPGA to approximate the area, 
+  delay, and power of the underlying components. This is combined with a reasonable 40 nm 
+  model of wiring and circuit design for low-level routing components, where available.
+  The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also 
+  has wiring electrical parameters that allow the wire lengths and switch patterns to be 
+  modified and you will still get reasonable delay results for the new architecture.
+  The following describes, in detail, how we obtained the various electrical values for this 
+  architecture.
+
+  Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar 
+  architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture. 
+  (n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)      
+  This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to 
+  match the overall target (a 40 nm FPGA).
+
+  We obtain delay numbers by measuring delays of routing, soft logic blocks, 
+  memories, and multipliers from test circuits on a Stratix IV GX device 
+  (EP4SGX230DF29C2X, i.e. fastest speed grade). For routing, we took the average delay of H4 and V4 
+  wires.  Rmetal and Cmetal values for the routing wires were obtained from work done by Charles 
+  Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal stack 40 nm routing) and 
+  take the R and C data from the ITRS roadmap.  
+
+ For the general purpose logic block, we assume that the area and delays of the Stratix IV 
+  crossbar is close enough to the crossbar modelled here.  We use 33 inputs and 20 feedback lines in 
+  the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
+  Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to 
+  36:1 multiplexers.  We require 60 such multiplexers, while Stratix IV requires 88 for its more
+  complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows: 
+  The Stratix IV crossbar has more inputs (72 vs. 53) and 
+  outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the 
+  Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its 
+  area and delay.  The total number of crossbar switch points is very similar between the two 
+  architectures (3160 for SIV and 3180 for the academic architecture below), so we can use the area 
+  & delay of the Stratix IV crossbar as a good approximation of our crossbar.
+
+  For LUTs, we include LUT 
+  delays measured from Stratix IV which is dependant on the input used (ie. some 
+  LUT inputs are faster than others).  The CAD tools at the time of VTR 7 does 
+  not consider differences in LUT input delays.
+
+  Logic block area numbers obtained by scaling overall tile area of a 65nm 
+  Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out 
+  routing area at a channel width of 300. We use a channel width of 300 because it can route 
+  all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
+  total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
+  choosing a width that provides high routability. The architecture can be routed at different channel
+  widths, but we estimate the tile size and hence the physical length of routing wires assuming
+  a channel width of 300.
+
+  Sanity checks employed:
+    1.  We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches 
+        common electrical design.
+
+
+  Authors: Jason Luu, Jeff Goeders, Vaughn Betz
+-->
+<architecture>
+  <!-- 
+       ODIN II specific config begins 
+       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+       ".model [type_of_block]") that this architecture supports.
+
+       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+       already special structures in blif (.names, .input, .output, and .latch) 
+       that describe them.
+  -->
+  <models>
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk"/>
+        <!-- control -->
+        <port name="addr" clock="clk"/>
+        <!-- address lines -->
+        <port name="data" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+  </models>
+  <tiles>
+    <tile name="io">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left">io.outpad io.inpad io.clock</loc>
+          <loc side="top">io.outpad io.inpad io.clock</loc>
+          <loc side="right">io.outpad io.inpad io.clock</loc>
+          <loc side="bottom">io.outpad io.inpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I" num_pins="33" equivalent="full"/>
+        <output name="O" num_pins="20" equivalent="none"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="mult_36" height="4">
+      <sub_tile name="mult_36">
+        <equivalent_sites>
+          <site pb_type="mult_36" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="a" num_pins="36"/>
+        <input name="b" num_pins="36"/>
+        <output name="out" num_pins="72"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="6">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="addr1" num_pins="15"/>
+        <input name="addr2" num_pins="15"/>
+        <input name="data" num_pins="64"/>
+        <input name="we1" num_pins="1"/>
+        <input name="we2" num_pins="1"/>
+        <output name="out" num_pins="64"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="spread"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <!-- Physical descriptions begin -->
+  <layout>
+    <auto_layout aspect_ratio="1.0">
+      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
+      <perimeter type="io" priority="100"/>
+      <corners type="EMPTY" priority="101"/>
+      <!--Fill with 'clb'-->
+      <fill type="clb" priority="10"/>
+      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
+      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
+      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
+      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
+    </auto_layout>
+  </layout>
+  <device>
+    <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
+			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
+			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
+			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
+			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
+			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
+			     lined up with Stratix IV. 
+			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
+			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
+			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
+	                     by 2.5x when looking up in Jeff's tables.
+			     The delay values are lined up with Stratix IV, which has an architecture similar to this
+			     proposed FPGA, and which is also 40 nm 
+			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
+			     4x minimum drive strength buffer. -->
+    <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
+    <!-- Total Stratix IV tile area is about 8100 um^2, minimum width transistor area is 60 L^2 yields a tile area of 84375 MWTAs,
+	   Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area 
+	   This means that only 37% of our area is in the general routing, and 63% is inside the logic
+	   block. Note that the crossbar / local interconnect is considered part of the logic block
+	   area in this analysis. That is a lower proportion of of routing area than most academics
+	   assume, but note that the total routing area really includes the crossbar, which would push
+	   routing area up significantly, we estimate into the ~70% range.
+	   -->
+    <area grid_logic_tile_area="53894"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="wilton" fs="3"/>
+    <connection_block input_switch_name="ipin_cblock"/>
+  </device>
+  <switchlist>
+    <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
+	       book area formula. This means the mux transistors are about 5x minimum drive strength.
+	       We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
+	       mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
+	       the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
+	       by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
+	       buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
+	       I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
+	       (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
+	       The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
+	       2.5x when looking up in Jeff's tables.
+	       Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
+	       This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
+    <switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
+    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
+    <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
+  </switchlist>
+  <segmentlist>
+    <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
+			     With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
+			     reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
+    <segment freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
+      <mux name="0"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+  </segmentlist>
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+	     Delays below come from Ian Kuon. They are small, so they should be interpreted as
+	     the delays to and from registers in the I/O (and generally I/Os are registered 
+	     today and that is when you timing analyze them.
+	     -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+      <power method="ignore"/>
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <pb_type name="clb">
+      <input name="I" num_pins="33" equivalent="full"/>
+      <output name="O" num_pins="20" equivalent="none"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Describe fracturable logic element.  
+             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+             The outputs of the fracturable logic element can be optionally registered
+        -->
+      <pb_type name="fle" num_pb="10">
+        <input name="in" num_pins="6"/>
+        <output name="out" num_pins="2"/>
+        <clock name="clk" num_pins="1"/>
+        <!-- Dual 5-LUT mode definition begin -->
+        <mode name="n2_lut5">
+          <pb_type name="lut5inter" num_pb="1">
+            <input name="in" num_pins="5"/>
+            <output name="out" num_pins="2"/>
+            <clock name="clk" num_pins="1"/>
+            <pb_type name="ble5" num_pb="2">
+              <input name="in" num_pins="5"/>
+              <output name="out" num_pins="1"/>
+              <clock name="clk" num_pins="1"/>
+              <!-- Define the LUT -->
+              <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <!-- LUT timing using delay matrix -->
+                <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                           we instead take the average of these numbers to get more stable results
+                      82e-12
+                      173e-12
+                      261e-12
+                      263e-12
+                      398e-12
+                      -->
+                <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                  235e-12
+                  235e-12
+                  235e-12
+                  235e-12
+                  235e-12
+                </delay_matrix>
+              </pb_type>
+              <!-- Define the flip-flop -->
+              <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                <input name="D" num_pins="1" port_class="D"/>
+                <output name="Q" num_pins="1" port_class="Q"/>
+                <clock name="clk" num_pins="1" port_class="clock"/>
+                <T_setup value="66e-12" port="ff.D" clock="clk"/>
+                <T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
+              </pb_type>
+              <interconnect>
+                <direct name="direct1" input="ble5.in[4:0]" output="lut5[0:0].in[4:0]"/>
+                <direct name="direct2" input="lut5[0:0].out" output="ff[0:0].D">
+                  <!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
+                  <pack_pattern name="ble5" in_port="lut5[0:0].out" out_port="ff[0:0].D"/>
+                </direct>
+                <direct name="direct3" input="ble5.clk" output="ff[0:0].clk"/>
+                <mux name="mux1" input="ff[0:0].Q lut5.out[0:0]" output="ble5.out[0:0]">
+                  <!-- LUT to output is faster than FF to output on a Stratix IV -->
+                  <delay_constant max="25e-12" in_port="lut5.out[0:0]" out_port="ble5.out[0:0]"/>
+                  <delay_constant max="45e-12" in_port="ff[0:0].Q" out_port="ble5.out[0:0]"/>
+                </mux>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
+              <direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
+              <direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
+              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
+            <direct name="direct2" input="lut5inter.out" output="fle.out"/>
+            <direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
+          </interconnect>
+        </mode>
+        <!-- Dual 5-LUT mode definition end -->
+        <!-- 6-LUT mode definition begin -->
+        <mode name="n1_lut6">
+          <!-- Define 6-LUT mode -->
+          <pb_type name="ble6" num_pb="1">
+            <input name="in" num_pins="6"/>
+            <output name="out" num_pins="1"/>
+            <clock name="clk" num_pins="1"/>
+            <!-- Define LUT -->
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <!-- LUT timing using delay matrix -->
+              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                       we instead take the average of these numbers to get more stable results
+                  82e-12
+                  173e-12
+                  261e-12
+                  263e-12
+                  398e-12
+                  397e-12
+                  -->
+              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                261e-12
+                261e-12
+                261e-12
+                261e-12
+                261e-12
+                261e-12
+              </delay_matrix>
+            </pb_type>
+            <!-- Define flip-flop -->
+            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+              <input name="D" num_pins="1" port_class="D"/>
+              <output name="Q" num_pins="1" port_class="Q"/>
+              <clock name="clk" num_pins="1" port_class="clock"/>
+              <T_setup value="66e-12" port="ff.D" clock="clk"/>
+              <T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
+              <direct name="direct2" input="lut6.out" output="ff.D">
+                <!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
+                <pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
+              </direct>
+              <direct name="direct3" input="ble6.clk" output="ff.clk"/>
+              <mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
+                <!-- LUT to output is faster than FF to output on a Stratix IV -->
+                <delay_constant max="25e-12" in_port="lut6.out" out_port="ble6.out"/>
+                <delay_constant max="45e-12" in_port="ff.Q" out_port="ble6.out"/>
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in" output="ble6.in"/>
+            <direct name="direct2" input="ble6.out" output="fle.out[0:0]"/>
+            <direct name="direct3" input="fle.clk" output="ble6.clk"/>
+          </interconnect>
+        </mode>
+        <!-- 6-LUT mode definition end -->
+      </pb_type>
+      <interconnect>
+        <!-- We use a full crossbar to get logical equivalence at inputs of CLB 
+		     The delays below come from Stratix IV. the delay through a connection block
+		     input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps 
+		     delay on the connection block input mux (modeled by Ian Kuon), so the remaining
+		     delay within the crossbar is 95 ps. 
+		     The delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
+		     Since all our outputs LUT outputs go to a BLE output, and have a delay of 
+		     25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
+		     to get the part that should be marked on the crossbar.	 -->
+        <complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in">
+          <delay_constant max="95e-12" in_port="clb.I" out_port="fle[9:0].in"/>
+          <delay_constant max="75e-12" in_port="fle[9:0].out" out_port="fle[9:0].in"/>
+        </complete>
+        <complete name="clks" input="clb.clk" output="fle[9:0].clk">
+        </complete>
+        <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
+               By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+               then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+               naive specification).
+          -->
+        <direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
+        <direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
+      </interconnect>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- Place this general purpose logic block in any unspecified column -->
+    </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+    <!-- Define fracturable multiplier begin -->
+    <!-- This multiplier can operate as a 36x36 multiplier that can fracture to two 18x18 multipliers each of which can further fracture to two 9x9 multipliers 
+	   For delay modelling, the 36x36 DSP multiplier in Stratix IV has a delay of 1.523 ns + 1.93 ns
+	    = 3.45 ns. The 18x18 mode doesn't need to sum four 18x18 multipliers, so it is a bit
+	   faster: 1.523 ns for the multiplier, and 1.09 ns for the multiplier output block.
+	    For the input and output interconnect delays, unlike Stratix IV, we don't
+	   have any routing/logic flexibility (crossbars) at the inputs.  There is some output muxing
+	   in Stratix IV and this architecture to select which multiplier outputs should go out (e.g.
+	   9x9 outputs, 18x18 or 36x36) so those are very close between the two architectures. 
+	   We take the conservative (slightly pessimistic)
+           approach modelling the input as the same as the Stratix IV input delay and the output delay the same as the Stratix IV DSP out delay.
+      -->
+    <pb_type name="mult_36">
+      <input name="a" num_pins="36"/>
+      <input name="b" num_pins="36"/>
+      <output name="out" num_pins="72"/>
+      <mode name="two_divisible_mult_18x18">
+        <pb_type name="divisible_mult_18x18" num_pb="2">
+          <input name="a" num_pins="18"/>
+          <input name="b" num_pins="18"/>
+          <output name="out" num_pins="36"/>
+          <!-- Model 9x9 delay and 18x18 delay as the same.  9x9 could be faster, but in Stratix IV
+	          isn't, presumably because the multiplier layout is really optimized for 18x18.
+		-->
+          <mode name="two_mult_9x9">
+            <pb_type name="mult_9x9_slice" num_pb="2">
+              <input name="A_cfg" num_pins="9"/>
+              <input name="B_cfg" num_pins="9"/>
+              <output name="OUT_cfg" num_pins="18"/>
+              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="9"/>
+                <input name="b" num_pins="9"/>
+                <output name="out" num_pins="18"/>
+                <delay_constant max="1.523e-9" in_port="mult_9x9.a" out_port="mult_9x9.out"/>
+                <delay_constant max="1.523e-9" in_port="mult_9x9.b" out_port="mult_9x9.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
+                </direct>
+                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
+                </direct>
+                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.45e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.45e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <mode name="mult_18x18">
+            <pb_type name="mult_18x18_slice" num_pb="1">
+              <input name="A_cfg" num_pins="18"/>
+              <input name="B_cfg" num_pins="18"/>
+              <output name="OUT_cfg" num_pins="36"/>
+              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1">
+                <input name="a" num_pins="18"/>
+                <input name="b" num_pins="18"/>
+                <output name="out" num_pins="36"/>
+                <delay_constant max="1.523e-9" in_port="mult_18x18.a" out_port="mult_18x18.out"/>
+                <delay_constant max="1.523e-9" in_port="mult_18x18.b" out_port="mult_18x18.out"/>
+              </pb_type>
+              <interconnect>
+                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
+                </direct>
+                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
+                </direct>
+                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
+                </direct>
+              </interconnect>
+              <power method="pin-toggle">
+                <port name="A_cfg" energy_per_toggle="1.09e-12"/>
+                <port name="B_cfg" energy_per_toggle="1.09e-12"/>
+                <static_power power_per_instance="0.0"/>
+              </power>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
+              </direct>
+              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
+              </direct>
+              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
+              </direct>
+            </interconnect>
+          </mode>
+          <power method="sum-of-children"/>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
+              -->
+          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
+            <delay_constant max="134e-12" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].b">
+            <delay_constant max="134e-12" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].b"/>
+          </direct>
+          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
+            <delay_constant max="1.09e-9" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mult_36x36">
+        <pb_type name="mult_36x36_slice" num_pb="1">
+          <input name="A_cfg" num_pins="36"/>
+          <input name="B_cfg" num_pins="36"/>
+          <output name="OUT_cfg" num_pins="72"/>
+          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1">
+            <input name="a" num_pins="36"/>
+            <input name="b" num_pins="36"/>
+            <output name="out" num_pins="72"/>
+            <delay_constant max="1.523e-9" in_port="mult_36x36.a" out_port="mult_36x36.out"/>
+            <delay_constant max="1.523e-9" in_port="mult_36x36.b" out_port="mult_36x36.out"/>
+          </pb_type>
+          <interconnect>
+            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
+            </direct>
+            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
+            </direct>
+            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
+            </direct>
+          </interconnect>
+          <power method="pin-toggle">
+            <port name="A_cfg" energy_per_toggle="2.13e-12"/>
+            <port name="B_cfg" energy_per_toggle="2.13e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
+		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
+		   to a 134 ps delay.
+              -->
+          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
+            <delay_constant max="134e-12" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
+          </direct>
+          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
+            <delay_constant max="134e-12" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
+          </direct>
+          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
+            <delay_constant max="1.93e-9" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Place this multiplier block every 8 columns from (and including) the sixth column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable multiplier end -->
+    <!-- Define fracturable memory begin -->
+    <!-- 32 Kb Memory that can operate from 512x64 to 32Kx1 for single-port mode and 1024x32 to 32Kx1 for dual-port mode.  
+           Area and delay based off Stratix IV 9K and 144K memories (delay from linear interpolation, Tsu(483 ps, 636 ps) Tco(1084ps, 1969ps)).  
+           Input delay = 204ps (from Stratix IV LAB line) - 72ps (this architecture does not lump connection box delay in internal delay)
+           Output delay = M4K buffer 50ps
+      -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="15"/>
+      <input name="addr2" num_pins="15"/>
+      <input name="data" num_pins="64"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="64"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x64_sp">
+        <pb_type name="mem_512x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="64" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="64" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_512x64_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_512x64_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x64_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[8:0]" out_port="mem_512x64_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[63:0]" output="mem_512x64_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[63:0]" out_port="mem_512x64_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x64_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_512x64_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_512x64_sp.out" output="memory.out[63:0]">
+            <delay_constant max="40e-12" in_port="mem_512x64_sp.out" out_port="memory.out[63:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x64_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_1024x32_sp">
+        <pb_type name="mem_1024x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="32" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="32" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_1024x32_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_sp.out" output="memory.out[31:0]">
+            <delay_constant max="40e-12" in_port="mem_1024x32_sp.out" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_sp">
+        <pb_type name="mem_2048x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="16" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="16" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x16_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_sp.out" output="memory.out[15:0]">
+            <delay_constant max="40e-12" in_port="mem_2048x16_sp.out" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_4096x8_sp">
+        <pb_type name="mem_4096x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="12" port_class="address"/>
+          <input name="data" num_pins="8" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="8" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_4096x8_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_4096x8_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x8_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[11:0]" out_port="mem_4096x8_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_4096x8_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[7:0]" out_port="mem_4096x8_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_4096x8_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_4096x8_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_4096x8_sp.out" output="memory.out[7:0]">
+            <delay_constant max="40e-12" in_port="mem_4096x8_sp.out" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_4096x8_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_sp">
+        <pb_type name="mem_8192x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="13" port_class="address"/>
+          <input name="data" num_pins="4" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="4" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_8192x4_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_sp.out" output="memory.out[3:0]">
+            <delay_constant max="40e-12" in_port="mem_8192x4_sp.out" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_sp">
+        <pb_type name="mem_16384x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="14" port_class="address"/>
+          <input name="data" num_pins="2" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="2" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_16384x2_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_sp.out" output="memory.out[1:0]">
+            <delay_constant max="40e-12" in_port="mem_16384x2_sp.out" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_sp">
+        <pb_type name="mem_32768x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="15" port_class="address"/>
+          <input name="data" num_pins="1" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="1" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.addr" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.data" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_sp.we" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_sp.out" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="9.0e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_sp.addr">
+            <delay_constant max="132e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_sp.addr"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_sp.data">
+            <delay_constant max="132e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_sp.data"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_sp.we">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_32768x1_sp.we"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_sp.out" output="memory.out[0:0]">
+            <delay_constant max="40e-12" in_port="mem_32768x1_sp.out" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x32_dp">
+        <pb_type name="mem_1024x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="32" port_class="data_in1"/>
+          <input name="data2" num_pins="32" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="32" port_class="data_out1"/>
+          <output name="out2" num_pins="32" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_1024x32_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x32_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[9:0]" out_port="mem_1024x32_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[63:32]" output="mem_1024x32_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[63:32]" out_port="mem_1024x32_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x32_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_1024x32_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x32_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_1024x32_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_1024x32_dp.out1" output="memory.out[31:0]">
+            <delay_constant max="40e-12" in_port="mem_1024x32_dp.out1" out_port="memory.out[31:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_1024x32_dp.out2" output="memory.out[63:32]">
+            <delay_constant max="40e-12" in_port="mem_1024x32_dp.out2" out_port="memory.out[63:32]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x32_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x16_dp">
+        <pb_type name="mem_2048x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="16" port_class="data_in1"/>
+          <input name="data2" num_pins="16" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="16" port_class="data_out1"/>
+          <output name="out2" num_pins="16" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x16_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x16_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[10:0]" out_port="mem_2048x16_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[31:16]" output="mem_2048x16_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[31:16]" out_port="mem_2048x16_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x16_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x16_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x16_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_2048x16_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x16_dp.out1" output="memory.out[15:0]">
+            <delay_constant max="40e-12" in_port="mem_2048x16_dp.out1" out_port="memory.out[15:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x16_dp.out2" output="memory.out[31:16]">
+            <delay_constant max="40e-12" in_port="mem_2048x16_dp.out2" out_port="memory.out[31:16]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x16_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_2048x8_dp">
+        <pb_type name="mem_2048x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="12" port_class="address1"/>
+          <input name="addr2" num_pins="12" port_class="address2"/>
+          <input name="data1" num_pins="8" port_class="data_in1"/>
+          <input name="data2" num_pins="8" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="8" port_class="data_out1"/>
+          <output name="out2" num_pins="8" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_2048x8_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_2048x8_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_2048x8_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[11:0]" output="mem_2048x8_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[11:0]" out_port="mem_2048x8_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[11:0]" output="mem_2048x8_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[11:0]" out_port="mem_2048x8_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[7:0]" output="mem_2048x8_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[7:0]" out_port="mem_2048x8_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[15:8]" output="mem_2048x8_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[15:8]" out_port="mem_2048x8_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x8_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x8_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x8_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_2048x8_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_2048x8_dp.out1" output="memory.out[7:0]">
+            <delay_constant max="40e-12" in_port="mem_2048x8_dp.out1" out_port="memory.out[7:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_2048x8_dp.out2" output="memory.out[15:8]">
+            <delay_constant max="40e-12" in_port="mem_2048x8_dp.out2" out_port="memory.out[15:8]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x8_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_8192x4_dp">
+        <pb_type name="mem_8192x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="13" port_class="address1"/>
+          <input name="addr2" num_pins="13" port_class="address2"/>
+          <input name="data1" num_pins="4" port_class="data_in1"/>
+          <input name="data2" num_pins="4" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="4" port_class="data_out1"/>
+          <output name="out2" num_pins="4" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_8192x4_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x4_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[12:0]" out_port="mem_8192x4_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[7:4]" output="mem_8192x4_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[7:4]" out_port="mem_8192x4_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_8192x4_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_8192x4_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_8192x4_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_8192x4_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_8192x4_dp.out1" output="memory.out[3:0]">
+            <delay_constant max="40e-12" in_port="mem_8192x4_dp.out1" out_port="memory.out[3:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_8192x4_dp.out2" output="memory.out[7:4]">
+            <delay_constant max="40e-12" in_port="mem_8192x4_dp.out2" out_port="memory.out[7:4]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_8192x4_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_16384x2_dp">
+        <pb_type name="mem_16384x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="14" port_class="address1"/>
+          <input name="addr2" num_pins="14" port_class="address2"/>
+          <input name="data1" num_pins="2" port_class="data_in1"/>
+          <input name="data2" num_pins="2" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="2" port_class="data_out1"/>
+          <output name="out2" num_pins="2" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_16384x2_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x2_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[13:0]" out_port="mem_16384x2_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[3:2]" output="mem_16384x2_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[3:2]" out_port="mem_16384x2_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_16384x2_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_16384x2_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_16384x2_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_16384x2_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_16384x2_dp.out1" output="memory.out[1:0]">
+            <delay_constant max="40e-12" in_port="mem_16384x2_dp.out1" out_port="memory.out[1:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_16384x2_dp.out2" output="memory.out[3:2]">
+            <delay_constant max="40e-12" in_port="mem_16384x2_dp.out2" out_port="memory.out[3:2]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_16384x2_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="mem_32768x1_dp">
+        <pb_type name="mem_32768x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="15" port_class="address1"/>
+          <input name="addr2" num_pins="15" port_class="address2"/>
+          <input name="data1" num_pins="1" port_class="data_in1"/>
+          <input name="data2" num_pins="1" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="1" port_class="data_out1"/>
+          <output name="out2" num_pins="1" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we1" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.data2" clock="clk"/>
+          <T_setup value="509e-12" port="mem_32768x1_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_dp.out2" clock="clk"/>
+          <power method="pin-toggle">
+            <port name="clk" energy_per_toggle="17.9e-12"/>
+            <static_power power_per_instance="0.0"/>
+          </power>
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_dp.addr1">
+            <delay_constant max="132e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_dp.addr1"/>
+          </direct>
+          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x1_dp.addr2">
+            <delay_constant max="132e-12" in_port="memory.addr2[14:0]" out_port="mem_32768x1_dp.addr2"/>
+          </direct>
+          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_dp.data1">
+            <delay_constant max="132e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_dp.data1"/>
+          </direct>
+          <direct name="data2" input="memory.data[1:1]" output="mem_32768x1_dp.data2">
+            <delay_constant max="132e-12" in_port="memory.data[1:1]" out_port="mem_32768x1_dp.data2"/>
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_32768x1_dp.we1">
+            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_32768x1_dp.we1"/>
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_32768x1_dp.we2">
+            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_32768x1_dp.we2"/>
+          </direct>
+          <direct name="dataout1" input="mem_32768x1_dp.out1" output="memory.out[0:0]">
+            <delay_constant max="40e-12" in_port="mem_32768x1_dp.out1" out_port="memory.out[0:0]"/>
+          </direct>
+          <direct name="dataout2" input="mem_32768x1_dp.out2" output="memory.out[1:1]">
+            <delay_constant max="40e-12" in_port="mem_32768x1_dp.out2" out_port="memory.out[1:1]"/>
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_32768x1_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- Place this memory block every 8 columns from (and including) the second column -->
+      <power method="sum-of-children"/>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+  </complexblocklist>
+  <power>
+    <local_interconnect C_wire="2.5e-10"/>
+  </power>
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+</architecture>
diff --git a/third_party/vtr/libs/archfpga/src/arch_check.cc b/third_party/vtr/libs/archfpga/src/arch_check.cc
new file mode 100644
index 000000000..58e8ced5b
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_check.cc
@@ -0,0 +1,412 @@
+#include <set>
+
+#include "vtr_log.h"
+#include "arch_error.h"
+#include "arch_check.h"
+
+bool check_model_clocks(t_model* model, const char* file, uint32_t line) {
+    //Collect the ports identified as clocks
+    std::set<std::string> clocks;
+    for (t_model_ports* ports : {model->inputs, model->outputs}) {
+        for (t_model_ports* port = ports; port != nullptr; port = port->next) {
+            if (port->is_clock) {
+                clocks.insert(port->name);
+            }
+        }
+    }
+
+    //Check that any clock references on the ports are to identified clock ports
+    for (t_model_ports* ports : {model->inputs, model->outputs}) {
+        for (t_model_ports* port = ports; port != nullptr; port = port->next) {
+            if (!port->clock.empty() && !clocks.count(port->clock)) {
+                archfpga_throw(file, line,
+                               "No matching clock port '%s' on model '%s', required for port '%s'",
+                               port->clock.c_str(), model->name, port->name);
+            }
+        }
+    }
+    return true;
+}
+
+bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line) {
+    //Outputs should have no combinational sinks
+    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
+        if (port->combinational_sink_ports.size() != 0) {
+            archfpga_throw(file, line,
+                           "Model '%s' output port '%s' can not have combinational sink ports",
+                           model->name, port->name);
+        }
+    }
+
+    //Record the output ports
+    std::map<std::string, t_model_ports*> output_ports;
+    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
+        output_ports.insert({port->name, port});
+    }
+
+    for (t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
+        for (const std::string& sink_port_name : port->combinational_sink_ports) {
+            //Check that the input port combinational sinks are all outputs
+            if (!output_ports.count(sink_port_name)) {
+                archfpga_throw(file, line,
+                               "Model '%s' input port '%s' can not be combinationally connected to '%s' (not an output port of the model)",
+                               model->name, port->name, sink_port_name.c_str());
+            }
+
+            //Check that any output combinational sinks are not clocks
+            t_model_ports* sink_port = output_ports[sink_port_name];
+            VTR_ASSERT(sink_port);
+            if (sink_port->is_clock) {
+                archfpga_throw(file, line,
+                               "Model '%s' output port '%s' can not be both: a clock source (is_clock=\"%d\"),"
+                               " and combinationally connected to input port '%s' (acting as a clock buffer).",
+                               model->name, sink_port->name, sink_port->is_clock, port->name);
+            }
+        }
+    }
+
+    return true;
+}
+
+void warn_model_missing_timing(const t_model* model, const char* file, uint32_t line) {
+    //Check whether there are missing edges and warn the user
+    std::set<std::string> comb_connected_outputs;
+    for (t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
+        if (port->clock.empty()                       //Not sequential
+            && port->combinational_sink_ports.empty() //Doesn't drive any combinational outputs
+            && !port->is_clock                        //Not an input clock
+        ) {
+            VTR_LOGF_WARN(file, line,
+                          "Model '%s' input port '%s' has no timing specification (no clock specified to create a sequential input port, not combinationally connected to any outputs, not a clock input)\n", model->name, port->name);
+        }
+
+        comb_connected_outputs.insert(port->combinational_sink_ports.begin(), port->combinational_sink_ports.end());
+    }
+
+    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
+        if (port->clock.empty()                          //Not sequential
+            && !comb_connected_outputs.count(port->name) //Not combinationally drivven
+            && !port->is_clock                           //Not an output clock
+        ) {
+            VTR_LOGF_WARN(file, line,
+                          "Model '%s' output port '%s' has no timing specification (no clock specified to create a sequential output port, not combinationally connected to any inputs, not a clock output)\n", model->name, port->name);
+        }
+    }
+}
+
+void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_tile* sub_tile, t_logical_block_type_ptr logical_block) {
+    auto pb_type = logical_block->pb_type;
+
+    if (pb_type->num_pins > (sub_tile->num_phy_pins / sub_tile->capacity.total())) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Logical Block (%s) has more pins than the Sub Tile (%s).\n",
+                       logical_block->name, sub_tile->name);
+    }
+
+    auto& pin_direct_maps = physical_tile->tile_block_pin_directs_map.at(logical_block->index);
+    auto pin_direct_map = pin_direct_maps.at(sub_tile->index);
+
+    if (pb_type->num_pins != (int)pin_direct_map.size()) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Logical block (%s) and Sub tile (%s) have a different number of ports.\n",
+                       logical_block->name, physical_tile->name);
+    }
+
+    for (auto pin_map : pin_direct_map) {
+        auto block_port = get_port_by_pin(logical_block, pin_map.first.pin);
+
+        auto sub_tile_port = get_port_by_pin(sub_tile, pin_map.second.pin);
+
+        VTR_ASSERT(block_port != nullptr);
+        VTR_ASSERT(sub_tile_port != nullptr);
+
+        if (sub_tile_port->type != block_port->type
+            || sub_tile_port->num_pins != block_port->num_pins
+            || sub_tile_port->equivalent != block_port->equivalent) {
+            archfpga_throw(__FILE__, __LINE__,
+                           "Logical block (%s) and Physical tile (%s) do not have equivalent port specifications. Sub tile port %s, logical block port %s\n",
+                           logical_block->name, sub_tile->name, sub_tile_port->name, block_port->name);
+        }
+    }
+}
+
+bool check_leaf_pb_model_timing_consistency(const t_pb_type* pb_type, const t_arch& arch) {
+    //Normalize the blif model name to match the model name
+    // by removing the leading '.' (.latch, .inputs, .names etc.)
+    // by removing the leading '.subckt'
+    VTR_ASSERT(pb_type->blif_model);
+    std::string blif_model = pb_type->blif_model;
+    std::string subckt = ".subckt ";
+    auto pos = blif_model.find(subckt);
+    if (pos != std::string::npos) {
+        blif_model = blif_model.substr(pos + subckt.size());
+    }
+
+    //Find the matching model
+    const t_model* model = nullptr;
+
+    for (const t_model* models : {arch.models, arch.model_library}) {
+        for (model = models; model != nullptr; model = model->next) {
+            if (std::string(model->name) == blif_model) {
+                break;
+            }
+        }
+        if (model != nullptr) {
+            break;
+        }
+    }
+    if (model == nullptr) {
+        archfpga_throw(get_arch_file_name(), -1,
+                       "Unable to find model for blif_model '%s' found on pb_type '%s'",
+                       blif_model.c_str(), pb_type->name);
+    }
+
+    //Now that we have the model we can compare the timing annotations
+
+    //Check from the pb_type's delay annotations match the model
+    //
+    //  This ensures that the pb_types' delay annotations are consistent with the model
+    for (int i = 0; i < pb_type->num_annotations; ++i) {
+        const t_pin_to_pin_annotation* annot = &pb_type->annotations[i];
+
+        if (annot->type == E_ANNOT_PIN_TO_PIN_DELAY) {
+            //Check that any combinational delays specified match the 'combinational_sinks_ports' in the model
+
+            if (annot->clock) {
+                //Sequential annotation, check that the clock on the specified port matches the model
+
+                //Annotations always put the pin in the input_pins field
+                VTR_ASSERT(annot->input_pins);
+                for (const std::string& input_pin : vtr::split(annot->input_pins)) {
+                    InstPort annot_port(input_pin);
+                    for (const std::string& clock : vtr::split(annot->clock)) {
+                        InstPort annot_clock(clock);
+
+                        //Find the model port
+                        const t_model_ports* model_port = nullptr;
+                        for (const t_model_ports* ports : {model->inputs, model->outputs}) {
+                            for (const t_model_ports* port = ports; port != nullptr; port = port->next) {
+                                if (port->name == annot_port.port_name()) {
+                                    model_port = port;
+                                    break;
+                                }
+                            }
+                            if (model_port != nullptr) break;
+                        }
+                        if (model_port == nullptr) {
+                            archfpga_throw(get_arch_file_name(), annot->line_num,
+                                           "Failed to find port '%s' on '%s' for sequential delay annotation",
+                                           annot_port.port_name().c_str(), annot_port.instance_name().c_str());
+                        }
+
+                        //Check that the clock matches the model definition
+                        std::string model_clock = model_port->clock;
+                        if (model_clock.empty()) {
+                            archfpga_throw(get_arch_file_name(), annot->line_num,
+                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', model specifies"
+                                           " no clock but timing annotation specifies '%s'",
+                                           annot_port.port_name().c_str(), model->name, annot_clock.port_name().c_str());
+                        }
+                        if (model_port->clock != annot_clock.port_name()) {
+                            archfpga_throw(get_arch_file_name(), annot->line_num,
+                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', model specifies"
+                                           " clock as '%s' but timing annotation specifies '%s'",
+                                           annot_port.port_name().c_str(), model->name, model_clock.c_str(), annot_clock.port_name().c_str());
+                        }
+                    }
+                }
+
+            } else if (annot->input_pins && annot->output_pins) {
+                //Combinational annotation
+                VTR_ASSERT_MSG(!annot->clock, "Combinational annotations should have no clock");
+                for (const std::string& input_pin : vtr::split(annot->input_pins)) {
+                    InstPort annot_in(input_pin);
+                    for (const std::string& output_pin : vtr::split(annot->output_pins)) {
+                        InstPort annot_out(output_pin);
+
+                        //Find the input model port
+                        const t_model_ports* model_port = nullptr;
+                        for (const t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
+                            if (port->name == annot_in.port_name()) {
+                                model_port = port;
+                                break;
+                            }
+                        }
+
+                        if (model_port == nullptr) {
+                            archfpga_throw(get_arch_file_name(), annot->line_num,
+                                           "Failed to find port '%s' on '%s' for combinational delay annotation",
+                                           annot_in.port_name().c_str(), annot_in.instance_name().c_str());
+                        }
+
+                        //Check that the output port is listed in the model's combinational sinks
+                        auto b = model_port->combinational_sink_ports.begin();
+                        auto e = model_port->combinational_sink_ports.end();
+                        auto iter = std::find(b, e, annot_out.port_name());
+                        if (iter == e) {
+                            archfpga_throw(get_arch_file_name(), annot->line_num,
+                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', timing annotation"
+                                           " specifies combinational connection to port '%s' but the connection does not exist in the model",
+                                           model_port->name, model->name, annot_out.port_name().c_str());
+                        }
+                    }
+                }
+            } else {
+                throw ArchFpgaError("Unrecognized delay annotation");
+            }
+        }
+    }
+
+    //Build a list of combinationally connected sinks
+    std::set<std::string> comb_connected_outputs;
+    for (t_model_ports* model_ports : {model->inputs, model->outputs}) {
+        for (t_model_ports* model_port = model_ports; model_port != nullptr; model_port = model_port->next) {
+            comb_connected_outputs.insert(model_port->combinational_sink_ports.begin(), model_port->combinational_sink_ports.end());
+        }
+    }
+
+    //Check from the model to pb_type's delay annotations
+    //
+    //  This ensures that the pb_type has annotations for all delays/values
+    //  required by the model
+    for (t_model_ports* model_ports : {model->inputs, model->outputs}) {
+        for (t_model_ports* model_port = model_ports; model_port != nullptr; model_port = model_port->next) {
+            //If the model port has no timing specification don't check anything (e.g. architectures with no timing info)
+            if (model_port->clock.empty()
+                && model_port->combinational_sink_ports.empty()
+                && !comb_connected_outputs.count(model_port->name)) {
+                continue;
+            }
+
+            if (!model_port->clock.empty()) {
+                //Sequential port
+
+                if (model_port->dir == IN_PORT) {
+                    //Sequential inputs must have a T_setup or T_hold
+                    if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_TSETUP) == nullptr
+                        && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_THOLD) == nullptr) {
+                        std::stringstream msg;
+                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
+                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
+                        msg << " port is a sequential input but has neither T_setup nor T_hold specified";
+
+                        if (is_library_model(model)) {
+                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
+                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
+                        } else {
+                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
+                        }
+                    }
+
+                    if (!model_port->combinational_sink_ports.empty()) {
+                        //Sequential input with internal combinational connectsion it must also have T_clock_to_Q
+                        if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX) == nullptr
+                            && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN) == nullptr) {
+                            std::stringstream msg;
+                            msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
+                            msg << " port '" << model_port->name << "' of model '" << model->name << "',";
+                            msg << " port is a sequential input with internal combinational connects but has neither";
+                            msg << " min nor max T_clock_to_Q specified";
+
+                            if (is_library_model(model)) {
+                                //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
+                                VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
+                            } else {
+                                archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
+                            }
+                        }
+                    }
+
+                } else {
+                    VTR_ASSERT(model_port->dir == OUT_PORT);
+                    //Sequential outputs must have T_clock_to_Q
+                    if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX) == nullptr
+                        && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN) == nullptr) {
+                        std::stringstream msg;
+                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
+                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
+                        msg << " port is a sequential output but has neither min nor max T_clock_to_Q specified";
+
+                        if (is_library_model(model)) {
+                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
+                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
+                        } else {
+                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
+                        }
+                    }
+
+                    if (comb_connected_outputs.count(model_port->name)) {
+                        //Sequential output with internal combinational connectison must have T_setup/T_hold
+                        if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_TSETUP) == nullptr
+                            && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_THOLD) == nullptr) {
+                            std::stringstream msg;
+                            msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
+                            msg << " port '" << model_port->name << "' of model '" << model->name << "',";
+                            msg << " port is a sequential output with internal combinational connections but has";
+                            msg << " neither T_setup nor T_hold specified";
+
+                            if (is_library_model(model)) {
+                                //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
+                                VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
+                            } else {
+                                archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
+                            }
+                        }
+                    }
+                }
+            }
+
+            //Check that combinationally connected inputs/outputs have combinational delays between them
+            if (model_port->dir == IN_PORT) {
+                for (const auto& sink_port : model_port->combinational_sink_ports) {
+                    if (find_combinational_annotation(pb_type, model_port->name, sink_port) == nullptr) {
+                        std::stringstream msg;
+                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
+                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
+                        msg << " input port '" << model_port->name << "' has combinational connections to";
+                        msg << " port '" << sink_port.c_str() << "'; specified in model, but no combinational delays found on pb_type";
+
+                        if (is_library_model(model)) {
+                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
+                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
+                        } else {
+                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+void check_models(t_arch* arch) {
+    for (t_model* model = arch->models; model != nullptr; model = model->next) {
+        if (model->pb_types == nullptr) {
+            archfpga_throw(get_arch_file_name(), 0,
+                           "No pb_type found for model %s\n", model->name);
+        }
+
+        int clk_count, input_count, output_count;
+        clk_count = input_count = output_count = 0;
+        for (auto ports : {model->inputs, model->outputs}) {
+            for (auto port = ports; port != nullptr; port = port->next) {
+                int index;
+                switch (port->dir) {
+                    case IN_PORT:
+                        index = port->is_clock ? clk_count++ : input_count++;
+                        break;
+                    case OUT_PORT:
+                        index = output_count++;
+                        break;
+                    default:
+                        archfpga_throw(get_arch_file_name(), 0,
+                                       "Port %s of model %s, has an unrecognized type %s\n", port->name, model->name);
+                }
+
+                port->index = index;
+            }
+        }
+    }
+}
diff --git a/third_party/vtr/libs/archfpga/src/arch_check.h b/third_party/vtr/libs/archfpga/src/arch_check.h
new file mode 100644
index 000000000..20b3ad30d
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_check.h
@@ -0,0 +1,80 @@
+#ifndef ARCH_CHECK_H
+#define ARCH_CHECK_H
+
+/**
+ *  This file includes all the definitions of functions which purpose is to
+ *  check the correctness of the architecture's internal data structures.
+ *
+ *  All new functions corresponding to the architecture checking should end up here.
+ */
+
+#include "arch_types.h"
+#include "arch_util.h"
+
+#include "physical_types_util.h"
+
+#include "vtr_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Checks whether the model has correct clock port specifications
+ *
+ * @param model model definition
+ * @param file architecture file
+ * @param line line in the architecture file that generates the failure
+ */
+bool check_model_clocks(t_model* model, const char* file, uint32_t line);
+
+/**
+ * @brief Checks the correctness of the combinational sinks in the model inputs to outputs connections
+ *
+ * @param model model definition
+ * @param file architecture file
+ * @param line line in the architecture file that generates the failure
+ */
+bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line);
+
+/**
+ * @brief Checks whether the I/O ports can have timing specifications based on their connectivity.
+ *        A port can have timing specs whether it is clocked or is combinationally connected to a
+ *        corresponding I/O port.
+ *        If the check fails, a warning is printed in the output log.
+ *
+ * @param model model definition
+ * @param file architecture file
+ * @param line line in the architecture file that generates the failure
+ */
+void warn_model_missing_timing(const t_model* model, const char* file, uint32_t line);
+
+/**
+ * @brief Checks the consistency of the mappings between a logical block and the corresponding physical tile.
+ *
+ * @param physical_tile physical tile type
+ * @param sub_tile sub tile to check
+ * @param logical_block logical block type
+ */
+void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_tile* sub_tile, t_logical_block_type_ptr logical_block);
+
+/**
+ * @brief Checks the timing consistency between tha pb_type and the corresponding model.
+ *
+ * @param pb_type pb type to check
+ * @param arch architecture data structure
+ */
+bool check_leaf_pb_model_timing_consistency(const t_pb_type* pb_type, const t_arch& arch);
+
+/**
+ * @brief Checks that each model has at least one corresponding pb type. This function also updates the port indices of the models
+ *        based on their type: e.g. clock, input, output.
+ *
+ * @param arch architecture data structure
+ */
+void check_models(t_arch* arch);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_error.cc b/third_party/vtr/libs/archfpga/src/arch_error.cc
new file mode 100644
index 000000000..a9d502ae2
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_error.cc
@@ -0,0 +1,16 @@
+#include <cstdarg>
+
+#include "vtr_util.h"
+#include "arch_error.h"
+
+void archfpga_throw(const char* filename, int line, const char* fmt, ...) {
+    va_list va_args;
+
+    va_start(va_args, fmt);
+
+    auto msg = vtr::vstring_fmt(fmt, va_args);
+
+    va_end(va_args);
+
+    throw ArchFpgaError(msg, filename, line);
+}
diff --git a/third_party/vtr/libs/archfpga/src/arch_error.h b/third_party/vtr/libs/archfpga/src/arch_error.h
new file mode 100644
index 000000000..0dae1d859
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_error.h
@@ -0,0 +1,18 @@
+#ifndef ARCH_ERROR_H
+#define ARCH_ERROR_H
+
+#include "vtr_error.h"
+#include <cstdarg>
+
+//Note that we mark this function with the C++11 attribute 'noreturn'
+//as it will throw exceptions and not return normally. This can help
+//reduce false-positive compiler warnings.
+[[noreturn]] void archfpga_throw(const char* filename, int line, const char* fmt, ...);
+
+class ArchFpgaError : public vtr::VtrError {
+  public:
+    ArchFpgaError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
+        : vtr::VtrError(msg, new_filename, new_linenumber) {}
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_types.h b/third_party/vtr/libs/archfpga/src/arch_types.h
new file mode 100644
index 000000000..9f88a6466
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_types.h
@@ -0,0 +1,35 @@
+/*
+ * Data types describing the FPGA architecture.
+ *
+ * Date: February 19, 2009
+ * Authors: Jason Luu and Kenneth Kent
+ */
+
+#ifndef ARCH_TYPES_H
+#define ARCH_TYPES_H
+
+#include "logic_types.h"
+#include "physical_types.h"
+#include "cad_types.h"
+
+/* Input file parsing. */
+#define TOKENS " \t\n"
+
+/* Value for UNDEFINED data */
+constexpr int UNDEFINED = -1;
+
+/* Maximum value for mininum channel width to avoid overflows of short data type.               */
+constexpr int MAX_CHANNEL_WIDTH = 8000;
+
+/* Built-in library models */
+constexpr const char* MODEL_NAMES = ".names";
+constexpr const char* MODEL_LATCH = ".latch";
+constexpr const char* MODEL_INPUT = ".input";
+constexpr const char* MODEL_OUTPUT = ".output";
+
+enum class e_arch_format {
+    VTR,            ///<VTR-specific device XML format
+    FPGAInterchange ///<FPGA Interchange device format
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_util.cc b/third_party/vtr/libs/archfpga/src/arch_util.cc
new file mode 100644
index 000000000..a8d89c91e
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_util.cc
@@ -0,0 +1,1572 @@
+#include <cstring>
+#include <sstream>
+
+#include "vtr_assert.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "arch_types.h"
+#include "arch_util.h"
+#include "arch_error.h"
+
+#include "read_xml_arch_file.h"
+#include "read_xml_util.h"
+
+/******************** Subroutine declarations ********************************/
+
+static void free_all_pb_graph_nodes(std::vector<t_logical_block_type>& type_descriptors);
+static void free_pb_graph(t_pb_graph_node* pb_graph_node);
+static void free_pb_type(t_pb_type* pb_type);
+
+/******************** End Subroutine declarations ****************************/
+
+/* This gives access to the architecture file name to
+ * all architecture-parser functions       */
+static const char* arch_file_name = nullptr;
+
+void set_arch_file_name(const char* arch) {
+    arch_file_name = arch;
+}
+
+/* Used by functions outside read_xml_util.c to gain access to arch filename */
+const char* get_arch_file_name() {
+    VTR_ASSERT(arch_file_name != nullptr);
+
+    return arch_file_name;
+}
+
+InstPort::InstPort(std::string str) {
+    std::vector<std::string> inst_port = vtr::split(str, ".");
+
+    if (inst_port.size() == 1) {
+        instance_ = name_index();
+        port_ = parse_name_index(inst_port[0]);
+
+    } else if (inst_port.size() == 2) {
+        instance_ = parse_name_index(inst_port[0]);
+        port_ = parse_name_index(inst_port[1]);
+    } else {
+        std::string msg = vtr::string_fmt("Failed to parse instance port specification '%s'",
+                                          str.c_str());
+        throw ArchFpgaError(msg);
+    }
+}
+
+InstPort::name_index InstPort::parse_name_index(const std::string& str) {
+    auto open_bracket_pos = str.find('[');
+    auto close_bracket_pos = str.find(']');
+    auto colon_pos = str.find(':');
+
+    //Parse checks
+    if (open_bracket_pos == std::string::npos && close_bracket_pos != std::string::npos) {
+        //Close brace only
+        std::string msg = "near '" + str + "', missing '['";
+        throw ArchFpgaError(msg);
+    }
+
+    if (open_bracket_pos != std::string::npos && close_bracket_pos == std::string::npos) {
+        //Open brace only
+        std::string msg = "near '" + str + "', missing ']'";
+        throw ArchFpgaError(msg);
+    }
+
+    if (open_bracket_pos != std::string::npos && close_bracket_pos != std::string::npos) {
+        //Have open and close braces, close must be after open
+        if (open_bracket_pos > close_bracket_pos) {
+            std::string msg = "near '" + str + "', '[' after ']'";
+            throw ArchFpgaError(msg);
+        }
+    }
+
+    if (colon_pos != std::string::npos) {
+        //Have a colon, it must be between open/close braces
+        if (colon_pos > close_bracket_pos || colon_pos < open_bracket_pos) {
+            std::string msg = "near '" + str + "', found ':' but not between '[' and ']'";
+            throw ArchFpgaError(msg);
+        }
+    }
+
+    //Extract the name and index info
+    std::string name = str.substr(0, open_bracket_pos);
+    std::string first_idx_str;
+    std::string second_idx_str;
+
+    if (colon_pos == std::string::npos && open_bracket_pos == std::string::npos && close_bracket_pos == std::string::npos) {
+    } else if (colon_pos == std::string::npos) {
+        //No colon, implies a single element
+        first_idx_str = str.substr(open_bracket_pos + 1, close_bracket_pos);
+        second_idx_str = first_idx_str;
+    } else {
+        //Colon, implies a range
+        first_idx_str = str.substr(open_bracket_pos + 1, colon_pos);
+        second_idx_str = str.substr(colon_pos + 1, close_bracket_pos);
+    }
+
+    int first_idx = UNSPECIFIED;
+    if (!first_idx_str.empty()) {
+        std::stringstream ss(first_idx_str);
+        size_t idx;
+        ss >> idx;
+        if (!ss.good()) {
+            std::string msg = "near '" + str + "', expected positive integer";
+            throw ArchFpgaError(msg);
+        }
+        first_idx = idx;
+    }
+
+    int second_idx = UNSPECIFIED;
+    if (!second_idx_str.empty()) {
+        std::stringstream ss(second_idx_str);
+        size_t idx;
+        ss >> idx;
+        if (!ss.good()) {
+            std::string msg = "near '" + str + "', expected positive integer";
+            throw ArchFpgaError(msg);
+        }
+        second_idx = idx;
+    }
+
+    name_index value;
+    value.name = name;
+    value.low_idx = std::min(first_idx, second_idx);
+    value.high_idx = std::max(first_idx, second_idx);
+    return value;
+}
+
+int InstPort::num_instances() const {
+    if (instance_high_index() == UNSPECIFIED || instance_low_index() == UNSPECIFIED) {
+        throw ArchFpgaError("Unspecified instance indicies");
+    }
+    return instance_high_index() - instance_low_index() + 1;
+}
+
+int InstPort::num_pins() const {
+    if (port_high_index() == UNSPECIFIED || port_low_index() == UNSPECIFIED) {
+        throw ArchFpgaError("Unspecified port indicies");
+    }
+    return port_high_index() - port_low_index() + 1;
+}
+
+void free_arch(t_arch* arch) {
+    if (arch == nullptr) {
+        return;
+    }
+
+    for (int i = 0; i < arch->num_switches; ++i) {
+        if (arch->Switches->name != nullptr) {
+            vtr::free(arch->Switches[i].name);
+        }
+    }
+    delete[] arch->Switches;
+    arch->Switches = nullptr;
+
+    free_arch_models(arch->models);
+
+    for (int i = 0; i < arch->num_directs; ++i) {
+        vtr::free(arch->Directs[i].name);
+        vtr::free(arch->Directs[i].from_pin);
+        vtr::free(arch->Directs[i].to_pin);
+    }
+    vtr::free(arch->Directs);
+
+    vtr::free(arch->architecture_id);
+
+    if (arch->model_library) {
+        for (int i = 0; i < 4; ++i) {
+            vtr::t_linked_vptr* vptr = arch->model_library[i].pb_types;
+            while (vptr) {
+                vtr::t_linked_vptr* vptr_prev = vptr;
+                vptr = vptr->next;
+                vtr::free(vptr_prev);
+            }
+        }
+
+        vtr::free(arch->model_library[0].name);
+        vtr::free(arch->model_library[0].outputs->name);
+        delete[] arch->model_library[0].outputs;
+        vtr::free(arch->model_library[1].inputs->name);
+        delete[] arch->model_library[1].inputs;
+        vtr::free(arch->model_library[1].name);
+        vtr::free(arch->model_library[2].name);
+        vtr::free(arch->model_library[2].inputs[0].name);
+        vtr::free(arch->model_library[2].inputs[1].name);
+        delete[] arch->model_library[2].inputs;
+        vtr::free(arch->model_library[2].outputs->name);
+        delete[] arch->model_library[2].outputs;
+        vtr::free(arch->model_library[3].name);
+        vtr::free(arch->model_library[3].inputs->name);
+        delete[] arch->model_library[3].inputs;
+        vtr::free(arch->model_library[3].outputs->name);
+        delete[] arch->model_library[3].outputs;
+        delete[] arch->model_library;
+    }
+
+    if (arch->clocks) {
+        vtr::free(arch->clocks->clock_inf);
+    }
+
+    delete (arch->noc);
+}
+
+//Frees all models in the linked list
+void free_arch_models(t_model* models) {
+    t_model* model = models;
+    while (model) {
+        model = free_arch_model(model);
+    }
+}
+
+//Frees the specified model, and returns the next model (if any) in the linked list
+t_model* free_arch_model(t_model* model) {
+    if (!model) return nullptr;
+
+    t_model* next_model = model->next;
+
+    free_arch_model_ports(model->inputs);
+    free_arch_model_ports(model->outputs);
+
+    vtr::t_linked_vptr* vptr = model->pb_types;
+    while (vptr) {
+        vtr::t_linked_vptr* vptr_prev = vptr;
+        vptr = vptr->next;
+        vtr::free(vptr_prev);
+    }
+
+    if (model->instances)
+        vtr::free(model->instances);
+    vtr::free(model->name);
+    delete model;
+
+    return next_model;
+}
+
+//Frees all the model portss in a linked list
+void free_arch_model_ports(t_model_ports* model_ports) {
+    t_model_ports* model_port = model_ports;
+    while (model_port) {
+        model_port = free_arch_model_port(model_port);
+    }
+}
+
+//Frees the specified model_port, and returns the next model_port (if any) in the linked list
+t_model_ports* free_arch_model_port(t_model_ports* model_port) {
+    if (!model_port) return nullptr;
+
+    t_model_ports* next_port = model_port->next;
+
+    vtr::free(model_port->name);
+    delete model_port;
+
+    return next_port;
+}
+
+void free_type_descriptors(std::vector<t_physical_tile_type>& type_descriptors) {
+    for (auto& type : type_descriptors) {
+        vtr::free(type.name);
+        if (type.index == EMPTY_TYPE_INDEX) {
+            continue;
+        }
+
+        for (auto& sub_tile : type.sub_tiles) {
+            vtr::free(sub_tile.name);
+
+            for (auto port : sub_tile.ports) {
+                vtr::free(port.name);
+            }
+        }
+    }
+    type_descriptors.clear();
+}
+
+void free_type_descriptors(std::vector<t_logical_block_type>& type_descriptors) {
+    free_all_pb_graph_nodes(type_descriptors);
+
+    for (auto& type : type_descriptors) {
+        vtr::free(type.name);
+        if (type.index == EMPTY_TYPE_INDEX) {
+            continue;
+        }
+
+        free_pb_type(type.pb_type);
+        delete type.pb_type;
+    }
+    type_descriptors.clear();
+}
+
+static void free_all_pb_graph_nodes(std::vector<t_logical_block_type>& type_descriptors) {
+    for (auto& type : type_descriptors) {
+        if (type.pb_type) {
+            if (type.pb_graph_head) {
+                free_pb_graph(type.pb_graph_head);
+                delete type.pb_graph_head;
+            }
+        }
+    }
+}
+
+static void free_pb_graph(t_pb_graph_node* pb_graph_node) {
+    int i, j, k;
+    const t_pb_type* pb_type;
+
+    pb_type = pb_graph_node->pb_type;
+
+    /*free all lists of connectable input pin pointer of pb_graph_node and it's children*/
+    /*free_list_of_connectable_input_pin_ptrs (pb_graph_node);*/
+
+    /* Free ports for pb graph node */
+    for (i = 0; i < pb_graph_node->num_input_ports; i++) {
+        for (j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
+            if (pb_graph_node->input_pins[i][j].parent_pin_class)
+                delete[] pb_graph_node->input_pins[i][j].parent_pin_class;
+        }
+        delete[] pb_graph_node->input_pins[i];
+    }
+    for (i = 0; i < pb_graph_node->num_output_ports; i++) {
+        for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
+            if (pb_graph_node->output_pins[i][j].parent_pin_class)
+                delete[] pb_graph_node->output_pins[i][j].parent_pin_class;
+
+            if (pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs) {
+                for (k = 0; k < pb_graph_node->pb_type->depth; k++) {
+                    delete[] pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs[k];
+                }
+                delete[] pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs;
+            }
+
+            if (pb_graph_node->output_pins[i][j].num_connectable_primitive_input_pins)
+                delete[] pb_graph_node->output_pins[i][j].num_connectable_primitive_input_pins;
+        }
+        delete[] pb_graph_node->output_pins[i];
+    }
+    for (i = 0; i < pb_graph_node->num_clock_ports; i++) {
+        for (j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
+            if (pb_graph_node->clock_pins[i][j].parent_pin_class)
+                delete[] pb_graph_node->clock_pins[i][j].parent_pin_class;
+        }
+        delete[] pb_graph_node->clock_pins[i];
+    }
+
+    delete[] pb_graph_node->input_pins;
+    delete[] pb_graph_node->output_pins;
+    delete[] pb_graph_node->clock_pins;
+
+    delete[] pb_graph_node->num_input_pins;
+    delete[] pb_graph_node->num_output_pins;
+    delete[] pb_graph_node->num_clock_pins;
+
+    delete[] pb_graph_node->input_pin_class_size;
+    delete[] pb_graph_node->output_pin_class_size;
+
+    if (pb_graph_node->interconnect_pins) {
+        for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
+            if (pb_graph_node->interconnect_pins[i] == nullptr) continue;
+
+            t_mode* mode = &pb_graph_node->pb_type->modes[i];
+
+            for (j = 0; j < mode->num_interconnect; ++j) {
+                //The interconnect_pins data structures are only initialized for power analysis and
+                //are bizarrely baroque...
+                t_interconnect* interconn = pb_graph_node->interconnect_pins[i][j].interconnect;
+                VTR_ASSERT(interconn == &mode->interconnect[j]);
+
+                t_interconnect_power* interconn_power = interconn->interconnect_power;
+                for (int iport = 0; iport < interconn_power->num_input_ports; ++iport) {
+                    delete[] pb_graph_node->interconnect_pins[i][j].input_pins[iport];
+                }
+                for (int iport = 0; iport < interconn_power->num_output_ports; ++iport) {
+                    delete[] pb_graph_node->interconnect_pins[i][j].output_pins[iport];
+                }
+                delete[] pb_graph_node->interconnect_pins[i][j].input_pins;
+                delete[] pb_graph_node->interconnect_pins[i][j].output_pins;
+            }
+            delete[] pb_graph_node->interconnect_pins[i];
+        }
+    }
+    delete[] pb_graph_node->interconnect_pins;
+    delete pb_graph_node->pb_node_power;
+
+    for (i = 0; i < pb_type->num_modes; i++) {
+        for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
+            for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
+                free_pb_graph(&pb_graph_node->child_pb_graph_nodes[i][j][k]);
+            }
+            vtr::free(pb_graph_node->child_pb_graph_nodes[i][j]);
+        }
+        vtr::free(pb_graph_node->child_pb_graph_nodes[i]);
+    }
+    vtr::free(pb_graph_node->child_pb_graph_nodes);
+}
+
+static void free_pb_type(t_pb_type* pb_type) {
+    vtr::free(pb_type->name);
+    if (pb_type->blif_model)
+        vtr::free(pb_type->blif_model);
+
+    for (int i = 0; i < pb_type->num_modes; ++i) {
+        for (int j = 0; j < pb_type->modes[i].num_pb_type_children; ++j) {
+            free_pb_type(&pb_type->modes[i].pb_type_children[j]);
+        }
+        delete[] pb_type->modes[i].pb_type_children;
+        vtr::free(pb_type->modes[i].name);
+        for (int j = 0; j < pb_type->modes[i].num_interconnect; ++j) {
+            vtr::free(pb_type->modes[i].interconnect[j].input_string);
+            vtr::free(pb_type->modes[i].interconnect[j].output_string);
+            vtr::free(pb_type->modes[i].interconnect[j].name);
+
+            for (int k = 0; k < pb_type->modes[i].interconnect[j].num_annotations; ++k) {
+                if (pb_type->modes[i].interconnect[j].annotations[k].clock)
+                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].clock);
+                if (pb_type->modes[i].interconnect[j].annotations[k].input_pins) {
+                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].input_pins);
+                }
+                if (pb_type->modes[i].interconnect[j].annotations[k].output_pins) {
+                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].output_pins);
+                }
+                for (int m = 0; m < pb_type->modes[i].interconnect[j].annotations[k].num_value_prop_pairs; ++m) {
+                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].value[m]);
+                }
+                vtr::free(pb_type->modes[i].interconnect[j].annotations[k].prop);
+                vtr::free(pb_type->modes[i].interconnect[j].annotations[k].value);
+            }
+            vtr::free(pb_type->modes[i].interconnect[j].annotations);
+            if (pb_type->modes[i].interconnect[j].interconnect_power)
+                vtr::free(pb_type->modes[i].interconnect[j].interconnect_power);
+        }
+        if (pb_type->modes[i].interconnect)
+            delete[] pb_type->modes[i].interconnect;
+        if (pb_type->modes[i].mode_power)
+            vtr::free(pb_type->modes[i].mode_power);
+    }
+    if (pb_type->modes)
+        delete[] pb_type->modes;
+
+    for (int i = 0; i < pb_type->num_annotations; ++i) {
+        for (int j = 0; j < pb_type->annotations[i].num_value_prop_pairs; ++j) {
+            vtr::free(pb_type->annotations[i].value[j]);
+        }
+        vtr::free(pb_type->annotations[i].value);
+        vtr::free(pb_type->annotations[i].prop);
+        if (pb_type->annotations[i].input_pins) {
+            vtr::free(pb_type->annotations[i].input_pins);
+        }
+        if (pb_type->annotations[i].output_pins) {
+            vtr::free(pb_type->annotations[i].output_pins);
+        }
+        if (pb_type->annotations[i].clock) {
+            vtr::free(pb_type->annotations[i].clock);
+        }
+    }
+    if (pb_type->num_annotations > 0) {
+        vtr::free(pb_type->annotations);
+    }
+
+    if (pb_type->pb_type_power) {
+        vtr::free(pb_type->pb_type_power);
+    }
+
+    for (int i = 0; i < pb_type->num_ports; ++i) {
+        vtr::free(pb_type->ports[i].name);
+        if (pb_type->ports[i].port_class) {
+            vtr::free(pb_type->ports[i].port_class);
+        }
+        if (pb_type->ports[i].port_power) {
+            vtr::free(pb_type->ports[i].port_power);
+        }
+    }
+    vtr::free(pb_type->ports);
+}
+
+t_port* findPortByName(const char* name, t_pb_type* pb_type, int* high_index, int* low_index) {
+    t_port* port;
+    int i;
+    unsigned int high;
+    unsigned int low;
+    unsigned int bracket_pos;
+    unsigned int colon_pos;
+
+    bracket_pos = strcspn(name, "[");
+
+    /* Find port by name */
+    port = nullptr;
+    for (i = 0; i < pb_type->num_ports; i++) {
+        char* compare_to = pb_type->ports[i].name;
+
+        if (strlen(compare_to) == bracket_pos
+            && strncmp(name, compare_to, bracket_pos) == 0) {
+            port = &pb_type->ports[i];
+            break;
+        }
+    }
+    if (i >= pb_type->num_ports) {
+        return nullptr;
+    }
+
+    /* Get indices */
+    if (strlen(name) > bracket_pos) {
+        high = atoi(&name[bracket_pos + 1]);
+
+        colon_pos = strcspn(name, ":");
+
+        if (colon_pos < strlen(name)) {
+            low = atoi(&name[colon_pos + 1]);
+        } else {
+            low = high;
+        }
+    } else {
+        high = port->num_pins - 1;
+        low = 0;
+    }
+
+    if (high_index && low_index) {
+        *high_index = high;
+        *low_index = low;
+    }
+
+    return port;
+}
+
+t_physical_tile_type get_empty_physical_type(const char* name) {
+    t_physical_tile_type type;
+    type.name = vtr::strdup(name);
+    type.num_pins = 0;
+    type.width = 1;
+    type.height = 1;
+    type.capacity = 0;
+    type.num_drivers = 0;
+    type.num_receivers = 0;
+    type.area = UNDEFINED;
+    type.switchblock_locations = vtr::Matrix<e_sb_type>({{size_t(type.width), size_t(type.height)}}, e_sb_type::FULL);
+    type.switchblock_switch_overrides = vtr::Matrix<int>({{size_t(type.width), size_t(type.height)}}, DEFAULT_SWITCH);
+    type.is_input_type = false;
+    type.is_output_type = false;
+
+    return type;
+}
+
+t_logical_block_type get_empty_logical_type(const char* name) {
+    t_logical_block_type type;
+    type.name = vtr::strdup(name);
+    type.pb_type = nullptr;
+
+    return type;
+}
+
+std::unordered_set<t_logical_block_type_ptr> get_equivalent_sites_set(t_physical_tile_type_ptr type) {
+    std::unordered_set<t_logical_block_type_ptr> equivalent_sites;
+
+    for (auto& sub_tile : type->sub_tiles) {
+        for (auto& logical_block : sub_tile.equivalent_sites) {
+            equivalent_sites.insert(logical_block);
+        }
+    }
+
+    return equivalent_sites;
+}
+
+void alloc_and_load_default_child_for_pb_type(t_pb_type* pb_type,
+                                              char* new_name,
+                                              t_pb_type* copy) {
+    int i, j;
+    char* dot;
+
+    VTR_ASSERT(pb_type->blif_model != nullptr);
+
+    copy->name = vtr::strdup(new_name);
+    copy->blif_model = vtr::strdup(pb_type->blif_model);
+    copy->class_type = pb_type->class_type;
+    copy->depth = pb_type->depth;
+    copy->model = pb_type->model;
+    copy->modes = nullptr;
+    copy->num_modes = 0;
+    copy->num_clock_pins = pb_type->num_clock_pins;
+    copy->num_input_pins = pb_type->num_input_pins;
+    copy->num_output_pins = pb_type->num_output_pins;
+    copy->num_pins = pb_type->num_pins;
+    copy->num_pb = 1;
+
+    /* Power */
+    copy->pb_type_power = (t_pb_type_power*)vtr::calloc(1,
+                                                        sizeof(t_pb_type_power));
+    copy->pb_type_power->estimation_method = power_method_inherited(pb_type->pb_type_power->estimation_method);
+
+    /* Ports */
+    copy->num_ports = pb_type->num_ports;
+    copy->ports = (t_port*)vtr::calloc(pb_type->num_ports, sizeof(t_port));
+    for (i = 0; i < pb_type->num_ports; i++) {
+        copy->ports[i].is_clock = pb_type->ports[i].is_clock;
+        copy->ports[i].model_port = pb_type->ports[i].model_port;
+        copy->ports[i].type = pb_type->ports[i].type;
+        copy->ports[i].num_pins = pb_type->ports[i].num_pins;
+        copy->ports[i].parent_pb_type = copy;
+        copy->ports[i].name = vtr::strdup(pb_type->ports[i].name);
+        copy->ports[i].port_class = vtr::strdup(pb_type->ports[i].port_class);
+        copy->ports[i].port_index_by_type = pb_type->ports[i].port_index_by_type;
+        copy->ports[i].index = pb_type->ports[i].index;
+        copy->ports[i].absolute_first_pin_index = pb_type->ports[i].absolute_first_pin_index;
+
+        copy->ports[i].port_power = (t_port_power*)vtr::calloc(1,
+                                                               sizeof(t_port_power));
+        //Defaults
+        if (copy->pb_type_power->estimation_method == POWER_METHOD_AUTO_SIZES) {
+            copy->ports[i].port_power->wire_type = POWER_WIRE_TYPE_AUTO;
+            copy->ports[i].port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
+        } else if (copy->pb_type_power->estimation_method
+                   == POWER_METHOD_SPECIFY_SIZES) {
+            copy->ports[i].port_power->wire_type = POWER_WIRE_TYPE_IGNORED;
+            copy->ports[i].port_power->buffer_type = POWER_BUFFER_TYPE_NONE;
+        }
+    }
+
+    copy->annotations = (t_pin_to_pin_annotation*)vtr::calloc(pb_type->num_annotations, sizeof(t_pin_to_pin_annotation));
+    copy->num_annotations = pb_type->num_annotations;
+    for (i = 0; i < copy->num_annotations; i++) {
+        copy->annotations[i].clock = vtr::strdup(pb_type->annotations[i].clock);
+        dot = strstr(pb_type->annotations[i].input_pins, ".");
+        copy->annotations[i].input_pins = (char*)vtr::malloc(sizeof(char) * (strlen(new_name) + strlen(dot) + 1));
+        copy->annotations[i].input_pins[0] = '\0';
+        strcat(copy->annotations[i].input_pins, new_name);
+        strcat(copy->annotations[i].input_pins, dot);
+        if (pb_type->annotations[i].output_pins != nullptr) {
+            dot = strstr(pb_type->annotations[i].output_pins, ".");
+            copy->annotations[i].output_pins = (char*)vtr::malloc(sizeof(char) * (strlen(new_name) + strlen(dot) + 1));
+            copy->annotations[i].output_pins[0] = '\0';
+            strcat(copy->annotations[i].output_pins, new_name);
+            strcat(copy->annotations[i].output_pins, dot);
+        } else {
+            copy->annotations[i].output_pins = nullptr;
+        }
+        copy->annotations[i].line_num = pb_type->annotations[i].line_num;
+        copy->annotations[i].format = pb_type->annotations[i].format;
+        copy->annotations[i].type = pb_type->annotations[i].type;
+        copy->annotations[i].num_value_prop_pairs = pb_type->annotations[i].num_value_prop_pairs;
+        copy->annotations[i].prop = (int*)vtr::malloc(sizeof(int) * pb_type->annotations[i].num_value_prop_pairs);
+        copy->annotations[i].value = (char**)vtr::malloc(sizeof(char*) * pb_type->annotations[i].num_value_prop_pairs);
+        for (j = 0; j < pb_type->annotations[i].num_value_prop_pairs; j++) {
+            copy->annotations[i].prop[j] = pb_type->annotations[i].prop[j];
+            copy->annotations[i].value[j] = vtr::strdup(pb_type->annotations[i].value[j]);
+        }
+    }
+}
+
+/* populate special lut class */
+void ProcessLutClass(t_pb_type* lut_pb_type) {
+    char* default_name;
+    t_port* in_port;
+    t_port* out_port;
+    int i, j;
+
+    if (strcmp(lut_pb_type->name, "lut") != 0) {
+        default_name = vtr::strdup("lut");
+    } else {
+        default_name = vtr::strdup("lut_child");
+    }
+
+    lut_pb_type->num_modes = 2;
+    lut_pb_type->pb_type_power->leakage_default_mode = 1;
+    lut_pb_type->modes = new t_mode[lut_pb_type->num_modes];
+
+    /* First mode, route_through */
+    lut_pb_type->modes[0].name = vtr::strdup("wire");
+    lut_pb_type->modes[0].parent_pb_type = lut_pb_type;
+    lut_pb_type->modes[0].index = 0;
+    lut_pb_type->modes[0].num_pb_type_children = 0;
+    lut_pb_type->modes[0].mode_power = (t_mode_power*)vtr::calloc(1,
+                                                                  sizeof(t_mode_power));
+
+    /* Process interconnect */
+    /* TODO: add timing annotations to route-through */
+    VTR_ASSERT(lut_pb_type->num_ports == 2);
+    if (strcmp(lut_pb_type->ports[0].port_class, "lut_in") == 0) {
+        VTR_ASSERT(strcmp(lut_pb_type->ports[1].port_class, "lut_out") == 0);
+        in_port = &lut_pb_type->ports[0];
+        out_port = &lut_pb_type->ports[1];
+    } else {
+        VTR_ASSERT(strcmp(lut_pb_type->ports[0].port_class, "lut_out") == 0);
+        VTR_ASSERT(strcmp(lut_pb_type->ports[1].port_class, "lut_in") == 0);
+        out_port = &lut_pb_type->ports[0];
+        in_port = &lut_pb_type->ports[1];
+    }
+    lut_pb_type->modes[0].num_interconnect = 1;
+    lut_pb_type->modes[0].interconnect = new t_interconnect[1];
+    lut_pb_type->modes[0].interconnect[0].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 10, sizeof(char));
+    sprintf(lut_pb_type->modes[0].interconnect[0].name, "complete:%s",
+            lut_pb_type->name);
+    lut_pb_type->modes[0].interconnect[0].type = COMPLETE_INTERC;
+    lut_pb_type->modes[0].interconnect[0].input_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(in_port->name) + 2,
+                                                                            sizeof(char));
+    sprintf(lut_pb_type->modes[0].interconnect[0].input_string, "%s.%s",
+            lut_pb_type->name, in_port->name);
+    lut_pb_type->modes[0].interconnect[0].output_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(out_port->name) + 2,
+                                                                             sizeof(char));
+    sprintf(lut_pb_type->modes[0].interconnect[0].output_string, "%s.%s",
+            lut_pb_type->name, out_port->name);
+
+    lut_pb_type->modes[0].interconnect[0].parent_mode_index = 0;
+    lut_pb_type->modes[0].interconnect[0].parent_mode = &lut_pb_type->modes[0];
+    lut_pb_type->modes[0].interconnect[0].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
+
+    lut_pb_type->modes[0].interconnect[0].annotations = (t_pin_to_pin_annotation*)vtr::calloc(lut_pb_type->num_annotations,
+                                                                                              sizeof(t_pin_to_pin_annotation));
+    lut_pb_type->modes[0].interconnect[0].num_annotations = lut_pb_type->num_annotations;
+    for (i = 0; i < lut_pb_type->modes[0].interconnect[0].num_annotations;
+         i++) {
+        lut_pb_type->modes[0].interconnect[0].annotations[i].clock = vtr::strdup(lut_pb_type->annotations[i].clock);
+        lut_pb_type->modes[0].interconnect[0].annotations[i].input_pins = vtr::strdup(lut_pb_type->annotations[i].input_pins);
+        lut_pb_type->modes[0].interconnect[0].annotations[i].output_pins = vtr::strdup(lut_pb_type->annotations[i].output_pins);
+        lut_pb_type->modes[0].interconnect[0].annotations[i].line_num = lut_pb_type->annotations[i].line_num;
+        lut_pb_type->modes[0].interconnect[0].annotations[i].format = lut_pb_type->annotations[i].format;
+        lut_pb_type->modes[0].interconnect[0].annotations[i].type = lut_pb_type->annotations[i].type;
+        lut_pb_type->modes[0].interconnect[0].annotations[i].num_value_prop_pairs = lut_pb_type->annotations[i].num_value_prop_pairs;
+        lut_pb_type->modes[0].interconnect[0].annotations[i].prop = (int*)vtr::malloc(sizeof(int)
+                                                                                      * lut_pb_type->annotations[i].num_value_prop_pairs);
+        lut_pb_type->modes[0].interconnect[0].annotations[i].value = (char**)vtr::malloc(sizeof(char*)
+                                                                                         * lut_pb_type->annotations[i].num_value_prop_pairs);
+        for (j = 0; j < lut_pb_type->annotations[i].num_value_prop_pairs; j++) {
+            lut_pb_type->modes[0].interconnect[0].annotations[i].prop[j] = lut_pb_type->annotations[i].prop[j];
+            lut_pb_type->modes[0].interconnect[0].annotations[i].value[j] = vtr::strdup(lut_pb_type->annotations[i].value[j]);
+        }
+    }
+
+    /* Second mode, LUT */
+
+    lut_pb_type->modes[1].name = vtr::strdup(lut_pb_type->name);
+    lut_pb_type->modes[1].parent_pb_type = lut_pb_type;
+    lut_pb_type->modes[1].index = 1;
+    lut_pb_type->modes[1].num_pb_type_children = 1;
+    lut_pb_type->modes[1].mode_power = (t_mode_power*)vtr::calloc(1,
+                                                                  sizeof(t_mode_power));
+    lut_pb_type->modes[1].pb_type_children = new t_pb_type[1];
+    alloc_and_load_default_child_for_pb_type(lut_pb_type, default_name,
+                                             lut_pb_type->modes[1].pb_type_children);
+    /* moved annotations to child so delete old annotations */
+    for (i = 0; i < lut_pb_type->num_annotations; i++) {
+        for (j = 0; j < lut_pb_type->annotations[i].num_value_prop_pairs; j++) {
+            free(lut_pb_type->annotations[i].value[j]);
+        }
+        free(lut_pb_type->annotations[i].value);
+        free(lut_pb_type->annotations[i].prop);
+        if (lut_pb_type->annotations[i].input_pins) {
+            free(lut_pb_type->annotations[i].input_pins);
+        }
+        if (lut_pb_type->annotations[i].output_pins) {
+            free(lut_pb_type->annotations[i].output_pins);
+        }
+        if (lut_pb_type->annotations[i].clock) {
+            free(lut_pb_type->annotations[i].clock);
+        }
+    }
+    lut_pb_type->num_annotations = 0;
+    free(lut_pb_type->annotations);
+    lut_pb_type->annotations = nullptr;
+    lut_pb_type->modes[1].pb_type_children[0].depth = lut_pb_type->depth + 1;
+    lut_pb_type->modes[1].pb_type_children[0].parent_mode = &lut_pb_type->modes[1];
+    for (i = 0; i < lut_pb_type->modes[1].pb_type_children[0].num_ports; i++) {
+        if (lut_pb_type->modes[1].pb_type_children[0].ports[i].type == IN_PORT) {
+            lut_pb_type->modes[1].pb_type_children[0].ports[i].equivalent = PortEquivalence::FULL;
+        }
+    }
+
+    /* Process interconnect */
+    lut_pb_type->modes[1].num_interconnect = 2;
+    lut_pb_type->modes[1].interconnect = new t_interconnect[lut_pb_type->modes[1].num_interconnect];
+    lut_pb_type->modes[1].interconnect[0].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 10, sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[0].name, "direct:%s",
+            lut_pb_type->name);
+    lut_pb_type->modes[1].interconnect[0].type = DIRECT_INTERC;
+    lut_pb_type->modes[1].interconnect[0].input_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(in_port->name) + 2,
+                                                                            sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[0].input_string, "%s.%s",
+            lut_pb_type->name, in_port->name);
+    lut_pb_type->modes[1].interconnect[0].output_string = (char*)vtr::calloc(strlen(default_name) + strlen(in_port->name) + 2, sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[0].output_string, "%s.%s",
+            default_name, in_port->name);
+    lut_pb_type->modes[1].interconnect[0].infer_annotations = true;
+
+    lut_pb_type->modes[1].interconnect[0].parent_mode_index = 1;
+    lut_pb_type->modes[1].interconnect[0].parent_mode = &lut_pb_type->modes[1];
+    lut_pb_type->modes[1].interconnect[0].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
+
+    lut_pb_type->modes[1].interconnect[1].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 11, sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[1].name, "direct:%s",
+            lut_pb_type->name);
+
+    lut_pb_type->modes[1].interconnect[1].type = DIRECT_INTERC;
+    lut_pb_type->modes[1].interconnect[1].input_string = (char*)vtr::calloc(strlen(default_name) + strlen(out_port->name) + 4, sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[1].input_string, "%s.%s",
+            default_name, out_port->name);
+    lut_pb_type->modes[1].interconnect[1].output_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(out_port->name)
+                                                                                 + strlen(in_port->name) + 2,
+                                                                             sizeof(char));
+    sprintf(lut_pb_type->modes[1].interconnect[1].output_string, "%s.%s",
+            lut_pb_type->name, out_port->name);
+    lut_pb_type->modes[1].interconnect[1].infer_annotations = true;
+
+    lut_pb_type->modes[1].interconnect[1].parent_mode_index = 1;
+    lut_pb_type->modes[1].interconnect[1].parent_mode = &lut_pb_type->modes[1];
+    lut_pb_type->modes[1].interconnect[1].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
+
+    free(default_name);
+
+    free(lut_pb_type->blif_model);
+    lut_pb_type->blif_model = nullptr;
+    lut_pb_type->model = nullptr;
+}
+
+/* populate special memory class */
+void ProcessMemoryClass(t_pb_type* mem_pb_type) {
+    char* default_name;
+    char *input_name, *input_port_name, *output_name, *output_port_name;
+    int i, j, i_inter, num_pb;
+
+    if (strcmp(mem_pb_type->name, "memory_slice") != 0) {
+        default_name = vtr::strdup("memory_slice");
+    } else {
+        default_name = vtr::strdup("memory_slice_1bit");
+    }
+
+    mem_pb_type->modes = new t_mode[1];
+    mem_pb_type->modes[0].name = vtr::strdup(default_name);
+    mem_pb_type->modes[0].parent_pb_type = mem_pb_type;
+    mem_pb_type->modes[0].index = 0;
+    mem_pb_type->modes[0].mode_power = (t_mode_power*)vtr::calloc(1,
+                                                                  sizeof(t_mode_power));
+    num_pb = OPEN;
+    for (i = 0; i < mem_pb_type->num_ports; i++) {
+        if (mem_pb_type->ports[i].port_class != nullptr
+            && strstr(mem_pb_type->ports[i].port_class, "data")
+                   == mem_pb_type->ports[i].port_class) {
+            if (num_pb == OPEN) {
+                num_pb = mem_pb_type->ports[i].num_pins;
+            } else if (num_pb != mem_pb_type->ports[i].num_pins) {
+                archfpga_throw(get_arch_file_name(), 0,
+                               "memory %s has inconsistent number of data bits %d and %d\n",
+                               mem_pb_type->name, num_pb,
+                               mem_pb_type->ports[i].num_pins);
+            }
+        }
+    }
+
+    mem_pb_type->modes[0].num_pb_type_children = 1;
+    mem_pb_type->modes[0].pb_type_children = new t_pb_type[1];
+    alloc_and_load_default_child_for_pb_type(mem_pb_type, default_name,
+                                             &mem_pb_type->modes[0].pb_type_children[0]);
+    mem_pb_type->modes[0].pb_type_children[0].depth = mem_pb_type->depth + 1;
+    mem_pb_type->modes[0].pb_type_children[0].parent_mode = &mem_pb_type->modes[0];
+    mem_pb_type->modes[0].pb_type_children[0].num_pb = num_pb;
+
+    mem_pb_type->num_modes = 1;
+
+    free(mem_pb_type->blif_model);
+    mem_pb_type->blif_model = nullptr;
+    mem_pb_type->model = nullptr;
+
+    mem_pb_type->modes[0].num_interconnect = mem_pb_type->num_ports * num_pb;
+    mem_pb_type->modes[0].interconnect = new t_interconnect[mem_pb_type->modes[0].num_interconnect];
+
+    for (i = 0; i < mem_pb_type->modes[0].num_interconnect; i++) {
+        mem_pb_type->modes[0].interconnect[i].parent_mode_index = 0;
+        mem_pb_type->modes[0].interconnect[i].parent_mode = &mem_pb_type->modes[0];
+    }
+
+    /* Process interconnect */
+    i_inter = 0;
+    for (i = 0; i < mem_pb_type->num_ports; i++) {
+        mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
+        input_port_name = mem_pb_type->ports[i].name;
+        output_port_name = mem_pb_type->ports[i].name;
+
+        if (mem_pb_type->ports[i].type == IN_PORT) {
+            input_name = mem_pb_type->name;
+            output_name = default_name;
+        } else {
+            input_name = default_name;
+            output_name = mem_pb_type->name;
+        }
+
+        if (mem_pb_type->ports[i].port_class != nullptr
+            && strstr(mem_pb_type->ports[i].port_class, "data")
+                   == mem_pb_type->ports[i].port_class) {
+            mem_pb_type->modes[0].interconnect[i_inter].name = (char*)vtr::calloc(i_inter / 10 + 8, sizeof(char));
+            sprintf(mem_pb_type->modes[0].interconnect[i_inter].name,
+                    "direct%d", i_inter);
+            mem_pb_type->modes[0].interconnect[i_inter].infer_annotations = true;
+
+            if (mem_pb_type->ports[i].type == IN_PORT) {
+                /* force data pins to be one bit wide and update stats */
+                mem_pb_type->modes[0].pb_type_children[0].ports[i].num_pins = 1;
+                mem_pb_type->modes[0].pb_type_children[0].num_input_pins -= (mem_pb_type->ports[i].num_pins - 1);
+
+                mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
+                                                                                                  + 2,
+                                                                                              sizeof(char));
+                sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
+                        "%s.%s", input_name, input_port_name);
+                mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name) + strlen(output_port_name)
+                                                                                                   + 2 * (6 + num_pb / 10),
+                                                                                               sizeof(char));
+                sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
+                        "%s[%d:0].%s", output_name, num_pb - 1,
+                        output_port_name);
+            } else {
+                /* force data pins to be one bit wide and update stats */
+                mem_pb_type->modes[0].pb_type_children[0].ports[i].num_pins = 1;
+                mem_pb_type->modes[0].pb_type_children[0].num_output_pins -= (mem_pb_type->ports[i].num_pins - 1);
+
+                mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
+                                                                                                  + 2 * (6 + num_pb / 10),
+                                                                                              sizeof(char));
+                sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
+                        "%s[%d:0].%s", input_name, num_pb - 1, input_port_name);
+                mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name) + strlen(output_port_name)
+                                                                                                   + 2,
+                                                                                               sizeof(char));
+                sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
+                        "%s.%s", output_name, output_port_name);
+            }
+
+            /* Allocate interconnect power structures */
+            mem_pb_type->modes[0].interconnect[i_inter].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
+                                                                                                                sizeof(t_interconnect_power));
+            i_inter++;
+        } else {
+            for (j = 0; j < num_pb; j++) {
+                /* Anything that is not data must be an input */
+                mem_pb_type->modes[0].interconnect[i_inter].name = (char*)vtr::calloc(i_inter / 10 + j / 10 + 10,
+                                                                                      sizeof(char));
+                sprintf(mem_pb_type->modes[0].interconnect[i_inter].name,
+                        "direct%d_%d", i_inter, j);
+                mem_pb_type->modes[0].interconnect[i_inter].infer_annotations = true;
+
+                if (mem_pb_type->ports[i].type == IN_PORT) {
+                    mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
+                    mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
+                                                                                                      + 2,
+                                                                                                  sizeof(char));
+                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
+                            "%s.%s", input_name, input_port_name);
+                    mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name)
+                                                                                                       + strlen(output_port_name)
+                                                                                                       + 2 * (6 + num_pb / 10),
+                                                                                                   sizeof(char));
+                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
+                            "%s[%d:%d].%s", output_name, j, j,
+                            output_port_name);
+                } else {
+                    mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
+                    mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
+                                                                                                      + 2 * (6 + num_pb / 10),
+                                                                                                  sizeof(char));
+                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
+                            "%s[%d:%d].%s", input_name, j, j, input_port_name);
+                    mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name)
+                                                                                                       + strlen(output_port_name) + 2,
+                                                                                                   sizeof(char));
+                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
+                            "%s.%s", output_name, output_port_name);
+                }
+
+                /* Allocate interconnect power structures */
+                mem_pb_type->modes[0].interconnect[i_inter].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
+                                                                                                                    sizeof(t_interconnect_power));
+                i_inter++;
+            }
+        }
+    }
+
+    mem_pb_type->modes[0].num_interconnect = i_inter;
+
+    free(default_name);
+}
+
+e_power_estimation_method power_method_inherited(e_power_estimation_method parent_power_method) {
+    switch (parent_power_method) {
+        case POWER_METHOD_IGNORE:
+        case POWER_METHOD_AUTO_SIZES:
+        case POWER_METHOD_SPECIFY_SIZES:
+        case POWER_METHOD_TOGGLE_PINS:
+            return parent_power_method;
+        case POWER_METHOD_C_INTERNAL:
+        case POWER_METHOD_ABSOLUTE:
+            return POWER_METHOD_IGNORE;
+        case POWER_METHOD_UNDEFINED:
+            return POWER_METHOD_UNDEFINED;
+        case POWER_METHOD_SUM_OF_CHILDREN:
+            /* Just revert to the default */
+            return POWER_METHOD_AUTO_SIZES;
+        default:
+            VTR_ASSERT(0);
+            return POWER_METHOD_UNDEFINED; // Should never get here, but avoids a compiler warning.
+    }
+}
+
+void CreateModelLibrary(t_arch* arch) {
+    t_model* model_library;
+
+    model_library = new t_model[4];
+
+    //INPAD
+    model_library[0].name = vtr::strdup(MODEL_INPUT);
+    model_library[0].index = 0;
+    model_library[0].inputs = nullptr;
+    model_library[0].instances = nullptr;
+    model_library[0].next = &model_library[1];
+    model_library[0].outputs = new t_model_ports[1];
+    model_library[0].outputs->dir = OUT_PORT;
+    model_library[0].outputs->name = vtr::strdup("inpad");
+    model_library[0].outputs->next = nullptr;
+    model_library[0].outputs->size = 1;
+    model_library[0].outputs->min_size = 1;
+    model_library[0].outputs->index = 0;
+    model_library[0].outputs->is_clock = false;
+
+    //OUTPAD
+    model_library[1].name = vtr::strdup(MODEL_OUTPUT);
+    model_library[1].index = 1;
+    model_library[1].inputs = new t_model_ports[1];
+    model_library[1].inputs->dir = IN_PORT;
+    model_library[1].inputs->name = vtr::strdup("outpad");
+    model_library[1].inputs->next = nullptr;
+    model_library[1].inputs->size = 1;
+    model_library[1].inputs->min_size = 1;
+    model_library[1].inputs->index = 0;
+    model_library[1].inputs->is_clock = false;
+    model_library[1].instances = nullptr;
+    model_library[1].next = &model_library[2];
+    model_library[1].outputs = nullptr;
+
+    //LATCH
+    model_library[2].name = vtr::strdup(MODEL_LATCH);
+    model_library[2].index = 2;
+    model_library[2].inputs = new t_model_ports[2];
+
+    model_library[2].inputs[0].dir = IN_PORT;
+    model_library[2].inputs[0].name = vtr::strdup("D");
+    model_library[2].inputs[0].next = &model_library[2].inputs[1];
+    model_library[2].inputs[0].size = 1;
+    model_library[2].inputs[0].min_size = 1;
+    model_library[2].inputs[0].index = 0;
+    model_library[2].inputs[0].is_clock = false;
+    model_library[2].inputs[0].clock = "clk";
+
+    model_library[2].inputs[1].dir = IN_PORT;
+    model_library[2].inputs[1].name = vtr::strdup("clk");
+    model_library[2].inputs[1].next = nullptr;
+    model_library[2].inputs[1].size = 1;
+    model_library[2].inputs[1].min_size = 1;
+    model_library[2].inputs[1].index = 0;
+    model_library[2].inputs[1].is_clock = true;
+
+    model_library[2].instances = nullptr;
+    model_library[2].next = &model_library[3];
+
+    model_library[2].outputs = new t_model_ports[1];
+    model_library[2].outputs[0].dir = OUT_PORT;
+    model_library[2].outputs[0].name = vtr::strdup("Q");
+    model_library[2].outputs[0].next = nullptr;
+    model_library[2].outputs[0].size = 1;
+    model_library[2].outputs[0].min_size = 1;
+    model_library[2].outputs[0].index = 0;
+    model_library[2].outputs[0].is_clock = false;
+    model_library[2].outputs[0].clock = "clk";
+
+    //NAMES
+    model_library[3].name = vtr::strdup(MODEL_NAMES);
+    model_library[3].index = 3;
+
+    model_library[3].inputs = new t_model_ports[1];
+    model_library[3].inputs[0].dir = IN_PORT;
+    model_library[3].inputs[0].name = vtr::strdup("in");
+    model_library[3].inputs[0].next = nullptr;
+    model_library[3].inputs[0].size = 1;
+    model_library[3].inputs[0].min_size = 1;
+    model_library[3].inputs[0].index = 0;
+    model_library[3].inputs[0].is_clock = false;
+    model_library[3].inputs[0].combinational_sink_ports = {"out"};
+
+    model_library[3].instances = nullptr;
+    model_library[3].next = nullptr;
+
+    model_library[3].outputs = new t_model_ports[1];
+    model_library[3].outputs[0].dir = OUT_PORT;
+    model_library[3].outputs[0].name = vtr::strdup("out");
+    model_library[3].outputs[0].next = nullptr;
+    model_library[3].outputs[0].size = 1;
+    model_library[3].outputs[0].min_size = 1;
+    model_library[3].outputs[0].index = 0;
+    model_library[3].outputs[0].is_clock = false;
+
+    arch->model_library = model_library;
+}
+
+void SyncModelsPbTypes(t_arch* arch,
+                       const std::vector<t_logical_block_type>& Types) {
+    for (auto& Type : Types) {
+        if (Type.pb_type != nullptr) {
+            SyncModelsPbTypes_rec(arch, Type.pb_type);
+        }
+    }
+}
+
+void SyncModelsPbTypes_rec(t_arch* arch,
+                           t_pb_type* pb_type) {
+    int i, j, p;
+    t_model *model_match_prim, *cur_model;
+    t_model_ports* model_port;
+    vtr::t_linked_vptr* old;
+    char* blif_model_name = nullptr;
+
+    bool found;
+
+    if (pb_type->blif_model != nullptr) {
+        /* get actual name of subckt */
+        blif_model_name = pb_type->blif_model;
+        if (strstr(blif_model_name, ".subckt ") == blif_model_name) {
+            blif_model_name = strchr(blif_model_name, ' ');
+            ++blif_model_name; //Advance past space
+        }
+        if (!blif_model_name) {
+            archfpga_throw(get_arch_file_name(), 0,
+                           "Unknown blif model %s in pb_type %s\n",
+                           pb_type->blif_model, pb_type->name);
+        }
+
+        /* There are two sets of models to consider, the standard library of models and the user defined models */
+        if (is_library_model(blif_model_name)) {
+            cur_model = arch->model_library;
+        } else {
+            cur_model = arch->models;
+        }
+
+        /* Determine the logical model to use */
+        found = false;
+        model_match_prim = nullptr;
+        while (cur_model && !found) {
+            /* blif model always starts with .subckt so need to skip first 8 characters */
+            if (strcmp(blif_model_name, cur_model->name) == 0) {
+                found = true;
+                model_match_prim = cur_model;
+            }
+            cur_model = cur_model->next;
+        }
+        if (found != true) {
+            archfpga_throw(get_arch_file_name(), 0,
+                           "No matching model for pb_type %s\n", pb_type->blif_model);
+        }
+
+        pb_type->model = model_match_prim;
+        old = model_match_prim->pb_types;
+        model_match_prim->pb_types = (vtr::t_linked_vptr*)vtr::malloc(sizeof(vtr::t_linked_vptr));
+        model_match_prim->pb_types->next = old;
+        model_match_prim->pb_types->data_vptr = pb_type;
+
+        for (p = 0; p < pb_type->num_ports; p++) {
+            found = false;
+            /* TODO: Parse error checking - check if INPUT matches INPUT and OUTPUT matches OUTPUT (not yet done) */
+            model_port = model_match_prim->inputs;
+            while (model_port && !found) {
+                if (strcmp(model_port->name, pb_type->ports[p].name) == 0) {
+                    if (model_port->size < pb_type->ports[p].num_pins) {
+                        model_port->size = pb_type->ports[p].num_pins;
+                    }
+                    if (model_port->min_size > pb_type->ports[p].num_pins
+                        || model_port->min_size == -1) {
+                        model_port->min_size = pb_type->ports[p].num_pins;
+                    }
+                    pb_type->ports[p].model_port = model_port;
+                    if (pb_type->ports[p].type != model_port->dir) {
+                        archfpga_throw(get_arch_file_name(), 0,
+                                       "Direction for port '%s' on model does not match port direction in pb_type '%s'\n",
+                                       pb_type->ports[p].name, pb_type->name);
+                    }
+                    if (pb_type->ports[p].is_clock != model_port->is_clock) {
+                        archfpga_throw(get_arch_file_name(), 0,
+                                       "Port '%s' on model does not match is_clock in pb_type '%s'\n",
+                                       pb_type->ports[p].name, pb_type->name);
+                    }
+                    found = true;
+                }
+                model_port = model_port->next;
+            }
+            model_port = model_match_prim->outputs;
+            while (model_port && !found) {
+                if (strcmp(model_port->name, pb_type->ports[p].name) == 0) {
+                    if (model_port->size < pb_type->ports[p].num_pins) {
+                        model_port->size = pb_type->ports[p].num_pins;
+                    }
+                    if (model_port->min_size > pb_type->ports[p].num_pins
+                        || model_port->min_size == -1) {
+                        model_port->min_size = pb_type->ports[p].num_pins;
+                    }
+
+                    pb_type->ports[p].model_port = model_port;
+                    if (pb_type->ports[p].type != model_port->dir) {
+                        archfpga_throw(get_arch_file_name(), 0,
+                                       "Direction for port '%s' on model does not match port direction in pb_type '%s'\n",
+                                       pb_type->ports[p].name, pb_type->name);
+                    }
+                    found = true;
+                }
+                model_port = model_port->next;
+            }
+            if (found != true) {
+                archfpga_throw(get_arch_file_name(), 0,
+                               "No matching model port for port %s in pb_type %s\n",
+                               pb_type->ports[p].name, pb_type->name);
+            }
+        }
+    } else {
+        for (i = 0; i < pb_type->num_modes; i++) {
+            for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
+                SyncModelsPbTypes_rec(arch,
+                                      &(pb_type->modes[i].pb_type_children[j]));
+            }
+        }
+    }
+}
+
+/* Date:July 10th, 2013
+ * Author: Daniel Chen
+ * Purpose: Attempts to match a clock_name specified in an
+ *			timing annotation (Tsetup, Thold, Tc_to_q) with the
+ *			clock_name specified in the primitive. Applies
+ *			to flipflop/memory right now.
+ */
+void primitives_annotation_clock_match(t_pin_to_pin_annotation* annotation,
+                                       t_pb_type* parent_pb_type) {
+    int i_port;
+    bool clock_valid = false; //Determine if annotation's clock is same as primtive's clock
+
+    if (!parent_pb_type || !annotation) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Annotation_clock check encouters invalid annotation or primitive.\n");
+    }
+
+    for (i_port = 0; i_port < parent_pb_type->num_ports; i_port++) {
+        if (parent_pb_type->ports[i_port].is_clock) {
+            if (strcmp(parent_pb_type->ports[i_port].name, annotation->clock)
+                == 0) {
+                clock_valid = true;
+                break;
+            }
+        }
+    }
+
+    if (!clock_valid) {
+        archfpga_throw(get_arch_file_name(), annotation->line_num,
+                       "Clock '%s' does not match any clock defined in pb_type '%s'.\n",
+                       annotation->clock, parent_pb_type->name);
+    }
+}
+
+const t_segment_inf* find_segment(const t_arch* arch, std::string name) {
+    for (size_t i = 0; i < (arch->Segments).size(); ++i) {
+        const t_segment_inf* seg = &arch->Segments[i];
+        if (seg->name == name) {
+            return seg;
+        }
+    }
+
+    return nullptr;
+}
+
+bool segment_exists(const t_arch* arch, std::string name) {
+    return find_segment(arch, name) != nullptr;
+}
+
+bool is_library_model(const char* model_name) {
+    if (model_name == std::string(MODEL_NAMES)
+        || model_name == std::string(MODEL_LATCH)
+        || model_name == std::string(MODEL_INPUT)
+        || model_name == std::string(MODEL_OUTPUT)) {
+        return true;
+    }
+    return false;
+}
+
+bool is_library_model(const t_model* model) {
+    return is_library_model(model->name);
+}
+
+//Returns true if the specified block type contains the specified blif model name
+//
+// TODO: Remove block_type_contains_blif_model / pb_type_contains_blif_model
+// as part of
+// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
+bool block_type_contains_blif_model(t_logical_block_type_ptr type, const std::string& blif_model_name) {
+    return pb_type_contains_blif_model(type->pb_type, blif_model_name);
+}
+
+//Returns true of a pb_type (or it's children) contain the specified blif model name
+bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::string& blif_model_name) {
+    if (!pb_type) {
+        return false;
+    }
+
+    if (pb_type->blif_model != nullptr) {
+        //Leaf pb_type
+        VTR_ASSERT(pb_type->num_modes == 0);
+        if (blif_model_name == pb_type->blif_model
+            || ".subckt " + blif_model_name == pb_type->blif_model) {
+            return true;
+        } else {
+            return false;
+        }
+    } else {
+        for (int imode = 0; imode < pb_type->num_modes; ++imode) {
+            const t_mode* mode = &pb_type->modes[imode];
+
+            for (int ichild = 0; ichild < mode->num_pb_type_children; ++ichild) {
+                const t_pb_type* pb_type_child = &mode->pb_type_children[ichild];
+                if (pb_type_contains_blif_model(pb_type_child, blif_model_name)) {
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+const t_pin_to_pin_annotation* find_sequential_annotation(const t_pb_type* pb_type, const t_model_ports* port, enum e_pin_to_pin_delay_annotations annot_type) {
+    VTR_ASSERT(annot_type == E_ANNOT_PIN_TO_PIN_DELAY_TSETUP
+               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_THOLD
+               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX
+               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN);
+
+    for (int iannot = 0; iannot < pb_type->num_annotations; ++iannot) {
+        const t_pin_to_pin_annotation* annot = &pb_type->annotations[iannot];
+        InstPort annot_in(annot->input_pins);
+        if (annot_in.port_name() == port->name) {
+            for (int iprop = 0; iprop < annot->num_value_prop_pairs; ++iprop) {
+                if (annot->prop[iprop] == annot_type) {
+                    return annot;
+                }
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+const t_pin_to_pin_annotation* find_combinational_annotation(const t_pb_type* pb_type, std::string in_port, std::string out_port) {
+    for (int iannot = 0; iannot < pb_type->num_annotations; ++iannot) {
+        const t_pin_to_pin_annotation* annot = &pb_type->annotations[iannot];
+        for (const auto& annot_in_str : vtr::split(annot->input_pins)) {
+            InstPort in_pins(annot_in_str);
+            for (const auto& annot_out_str : vtr::split(annot->output_pins)) {
+                InstPort out_pins(annot_out_str);
+                if (in_pins.port_name() == in_port && out_pins.port_name() == out_port) {
+                    for (int iprop = 0; iprop < annot->num_value_prop_pairs; ++iprop) {
+                        if (annot->prop[iprop] == E_ANNOT_PIN_TO_PIN_DELAY_MAX
+                            || annot->prop[iprop] == E_ANNOT_PIN_TO_PIN_DELAY_MIN) {
+                            return annot;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+void link_physical_logical_types(std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                                 std::vector<t_logical_block_type>& LogicalBlockTypes) {
+    for (auto& physical_tile : PhysicalTileTypes) {
+        if (physical_tile.index == EMPTY_TYPE_INDEX) continue;
+
+        auto eq_sites_set = get_equivalent_sites_set(&physical_tile);
+        auto equivalent_sites = std::vector<t_logical_block_type_ptr>(eq_sites_set.begin(), eq_sites_set.end());
+
+        auto criteria = [&physical_tile](const t_logical_block_type* lhs, const t_logical_block_type* rhs) {
+            int num_pins = physical_tile.num_inst_pins;
+
+            int lhs_num_logical_pins = lhs->pb_type->num_pins;
+            int rhs_num_logical_pins = rhs->pb_type->num_pins;
+
+            int lhs_diff_num_pins = num_pins - lhs_num_logical_pins;
+            int rhs_diff_num_pins = num_pins - rhs_num_logical_pins;
+
+            return lhs_diff_num_pins < rhs_diff_num_pins;
+        };
+
+        std::sort(equivalent_sites.begin(), equivalent_sites.end(), criteria);
+
+        for (auto& logical_block : LogicalBlockTypes) {
+            for (auto site : equivalent_sites) {
+                if (0 == strcmp(logical_block.name, site->pb_type->name)) {
+                    logical_block.equivalent_tiles.push_back(&physical_tile);
+                    break;
+                }
+            }
+        }
+    }
+
+    for (auto& logical_block : LogicalBlockTypes) {
+        if (logical_block.index == EMPTY_TYPE_INDEX) continue;
+
+        auto& equivalent_tiles = logical_block.equivalent_tiles;
+
+        if ((int)equivalent_tiles.size() <= 0) {
+            archfpga_throw(__FILE__, __LINE__,
+                           "Logical Block %s does not have any equivalent tiles.\n", logical_block.name);
+        }
+
+        std::unordered_map<int, bool> ignored_pins_check_map;
+        std::unordered_map<int, bool> global_pins_check_map;
+
+        auto criteria = [&logical_block](const t_physical_tile_type* lhs, const t_physical_tile_type* rhs) {
+            int num_logical_pins = logical_block.pb_type->num_pins;
+
+            int lhs_num_pins = lhs->num_inst_pins;
+            int rhs_num_pins = rhs->num_inst_pins;
+
+            int lhs_diff_num_pins = lhs_num_pins - num_logical_pins;
+            int rhs_diff_num_pins = rhs_num_pins - num_logical_pins;
+
+            return lhs_diff_num_pins < rhs_diff_num_pins;
+        };
+
+        std::sort(equivalent_tiles.begin(), equivalent_tiles.end(), criteria);
+
+        for (int pin = 0; pin < logical_block.pb_type->num_pins; pin++) {
+            for (auto& tile : equivalent_tiles) {
+                auto direct_maps = tile->tile_block_pin_directs_map.at(logical_block.index);
+
+                for (auto& sub_tile : tile->sub_tiles) {
+                    auto equiv_sites = sub_tile.equivalent_sites;
+                    if (std::find(equiv_sites.begin(), equiv_sites.end(), &logical_block) == equiv_sites.end()) {
+                        continue;
+                    }
+
+                    auto direct_map = direct_maps.at(sub_tile.index);
+
+                    auto result = direct_map.find(t_logical_pin(pin));
+                    if (result == direct_map.end()) {
+                        archfpga_throw(__FILE__, __LINE__,
+                                       "Logical pin %d not present in pin mapping between Tile %s and Block %s.\n",
+                                       pin, tile->name, logical_block.name);
+                    }
+
+                    int sub_tile_pin_index = result->second.pin;
+                    int phy_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index];
+
+                    bool is_ignored = tile->is_ignored_pin[phy_index];
+                    bool is_global = tile->is_pin_global[phy_index];
+
+                    auto ignored_result = ignored_pins_check_map.insert(std::pair<int, bool>(pin, is_ignored));
+                    if (!ignored_result.second && ignored_result.first->second != is_ignored) {
+                        archfpga_throw(__FILE__, __LINE__,
+                                       "Physical Tile %s has a different value for the ignored pin (physical pin: %d, logical pin: %d) "
+                                       "different from the corresponding pins of the other equivalent site %s\n.",
+                                       tile->name, phy_index, pin, logical_block.name);
+                    }
+
+                    auto global_result = global_pins_check_map.insert(std::pair<int, bool>(pin, is_global));
+                    if (!global_result.second && global_result.first->second != is_global) {
+                        archfpga_throw(__FILE__, __LINE__,
+                                       "Physical Tile %s has a different value for the global pin (physical pin: %d, logical pin: %d) "
+                                       "different from the corresponding pins of the other equivalent sites\n.",
+                                       tile->name, phy_index, pin);
+                    }
+                }
+            }
+        }
+    }
+}
+
+/* Sets up the pin classes for the type. */
+void setup_pin_classes(t_physical_tile_type* type) {
+    int i, k;
+    int pin_count;
+    int num_class;
+
+    for (i = 0; i < type->num_pins; i++) {
+        type->pin_class.push_back(OPEN);
+        type->is_ignored_pin.push_back(true);
+        type->is_pin_global.push_back(true);
+    }
+
+    pin_count = 0;
+
+    t_class_range class_range;
+
+    /* Equivalent pins share the same class, non-equivalent pins belong to different pin classes */
+    for (auto& sub_tile : type->sub_tiles) {
+        int capacity = sub_tile.capacity.total();
+        class_range.low = type->class_inf.size();
+        class_range.high = class_range.low - 1;
+        for (i = 0; i < capacity; ++i) {
+            for (const auto& port : sub_tile.ports) {
+                if (port.equivalent != PortEquivalence::NONE) {
+                    t_class class_inf;
+                    num_class = (int)type->class_inf.size();
+                    class_inf.num_pins = port.num_pins;
+                    class_inf.equivalence = port.equivalent;
+
+                    if (port.type == IN_PORT) {
+                        class_inf.type = RECEIVER;
+                    } else {
+                        VTR_ASSERT(port.type == OUT_PORT);
+                        class_inf.type = DRIVER;
+                    }
+
+                    for (k = 0; k < port.num_pins; ++k) {
+                        class_inf.pinlist.push_back(pin_count);
+                        type->pin_class[pin_count] = num_class;
+                        // clock pins and other specified global ports are initially specified
+                        // as ignored pins (i.e. connections are not created in the rr_graph and
+                        // nets connected to the port are ignored as well).
+                        type->is_ignored_pin[pin_count] = port.is_clock || port.is_non_clock_global;
+                        // clock pins and other specified global ports are flaged as global
+                        type->is_pin_global[pin_count] = port.is_clock || port.is_non_clock_global;
+
+                        if (port.is_clock) {
+                            type->clock_pin_indices.push_back(pin_count);
+                        }
+
+                        pin_count++;
+                    }
+
+                    type->class_inf.push_back(class_inf);
+                    class_range.high++;
+                } else if (port.equivalent == PortEquivalence::NONE) {
+                    for (k = 0; k < port.num_pins; ++k) {
+                        t_class class_inf;
+                        num_class = (int)type->class_inf.size();
+                        class_inf.num_pins = 1;
+                        class_inf.pinlist.push_back(pin_count);
+                        class_inf.equivalence = port.equivalent;
+
+                        if (port.type == IN_PORT) {
+                            class_inf.type = RECEIVER;
+                        } else {
+                            VTR_ASSERT(port.type == OUT_PORT);
+                            class_inf.type = DRIVER;
+                        }
+
+                        type->pin_class[pin_count] = num_class;
+                        // clock pins and other specified global ports are initially specified
+                        // as ignored pins (i.e. connections are not created in the rr_graph and
+                        // nets connected to the port are ignored as well).
+                        type->is_ignored_pin[pin_count] = port.is_clock || port.is_non_clock_global;
+                        // clock pins and other specified global ports are flaged as global
+                        type->is_pin_global[pin_count] = port.is_clock || port.is_non_clock_global;
+
+                        if (port.is_clock) {
+                            type->clock_pin_indices.push_back(pin_count);
+                        }
+
+                        pin_count++;
+
+                        type->class_inf.push_back(class_inf);
+                        class_range.high++;
+                    }
+                }
+            }
+        }
+
+        type->sub_tiles[sub_tile.index].class_range = class_range;
+    }
+
+    VTR_ASSERT(pin_count == type->num_pins);
+}
diff --git a/third_party/vtr/libs/archfpga/src/arch_util.h b/third_party/vtr/libs/archfpga/src/arch_util.h
new file mode 100644
index 000000000..7d882450a
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/arch_util.h
@@ -0,0 +1,125 @@
+#ifndef ARCH_UTIL_H
+#define ARCH_UTIL_H
+
+#include <regex>
+#include <unordered_set>
+#include "physical_types.h"
+
+/**
+ * @brief sets the architecture file name to be retrieved by the various parser functions
+ */
+void set_arch_file_name(const char* arch);
+
+/**
+ * @brief returns the architecture file name, requires that it was previously set
+ */
+const char* get_arch_file_name();
+
+constexpr const char* EMPTY_BLOCK_NAME = "EMPTY";
+
+class InstPort {
+  public:
+    static constexpr int UNSPECIFIED = -1;
+
+    InstPort() = default;
+    InstPort(std::string str);
+    std::string instance_name() const { return instance_.name; }
+    std::string port_name() const { return port_.name; }
+
+    int instance_low_index() const { return instance_.low_idx; }
+    int instance_high_index() const { return instance_.high_idx; }
+    int port_low_index() const { return port_.low_idx; }
+    int port_high_index() const { return port_.high_idx; }
+
+    int num_instances() const;
+    int num_pins() const;
+
+  public:
+    void set_port_low_index(int val) { port_.low_idx = val; }
+    void set_port_high_index(int val) { port_.high_idx = val; }
+
+  private:
+    struct name_index {
+        std::string name = "";
+        int low_idx = UNSPECIFIED;
+        int high_idx = UNSPECIFIED;
+    };
+
+    name_index parse_name_index(const std::string& str);
+
+    name_index instance_;
+    name_index port_;
+};
+
+void free_arch(t_arch* arch);
+void free_arch_models(t_model* models);
+t_model* free_arch_model(t_model* model);
+void free_arch_model_ports(t_model_ports* model_ports);
+t_model_ports* free_arch_model_port(t_model_ports* model_port);
+
+void free_type_descriptors(std::vector<t_logical_block_type>& type_descriptors);
+void free_type_descriptors(std::vector<t_physical_tile_type>& type_descriptors);
+
+t_port* findPortByName(const char* name, t_pb_type* pb_type, int* high_index, int* low_index);
+
+/** @brief Returns and empty physical tile type, assigned with the given name argument.
+ *         The default empty string is assigned if no name is provided
+ */
+t_physical_tile_type get_empty_physical_type(const char* name = EMPTY_BLOCK_NAME);
+
+/** @brief Returns and empty logical block type, assigned with the given name argument.
+ *         The default empty string is assigned if no name is provided
+ */
+t_logical_block_type get_empty_logical_type(const char* name = EMPTY_BLOCK_NAME);
+
+std::unordered_set<t_logical_block_type_ptr> get_equivalent_sites_set(t_physical_tile_type_ptr type);
+
+void alloc_and_load_default_child_for_pb_type(t_pb_type* pb_type,
+                                              char* new_name,
+                                              t_pb_type* copy);
+
+void ProcessLutClass(t_pb_type* lut_pb_type);
+
+void ProcessMemoryClass(t_pb_type* mem_pb_type);
+
+e_power_estimation_method power_method_inherited(e_power_estimation_method parent_power_method);
+
+void CreateModelLibrary(t_arch* arch);
+
+void SyncModelsPbTypes(t_arch* arch,
+                       const std::vector<t_logical_block_type>& Types);
+
+void SyncModelsPbTypes_rec(t_arch* arch,
+                           t_pb_type* pb_type);
+
+void primitives_annotation_clock_match(t_pin_to_pin_annotation* annotation,
+                                       t_pb_type* parent_pb_type);
+
+bool segment_exists(const t_arch* arch, std::string name);
+const t_segment_inf* find_segment(const t_arch* arch, std::string name);
+bool is_library_model(const char* model_name);
+bool is_library_model(const t_model* model);
+
+//Returns true if the specified block type contains the specified blif model name
+bool block_type_contains_blif_model(t_logical_block_type_ptr type, const std::string& blif_model_name);
+
+//Returns true of a pb_type (or it's children) contain the specified blif model name
+bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::string& blif_model_name);
+
+const t_pin_to_pin_annotation* find_sequential_annotation(const t_pb_type* pb_type, const t_model_ports* port, enum e_pin_to_pin_delay_annotations annot_type);
+const t_pin_to_pin_annotation* find_combinational_annotation(const t_pb_type* pb_type, std::string in_port, std::string out_port);
+
+/**
+ * @brief Updates the physical and logical types based on the equivalence between one and the other.
+ *
+ * This function is required to check and synchronize all the information to be able to use the logical block
+ * equivalence, and link all the logical block pins to the physical tile ones, given that multiple logical blocks (i.e. pb_types)
+ * can be placed at the same physical location if this is allowed in the architecture description.
+ *
+ * See https://docs.verilogtorouting.org/en/latest/tutorials/arch/equivalent_sites/ for reference
+ */
+void link_physical_logical_types(std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                                 std::vector<t_logical_block_type>& LogicalBlockTypes);
+
+void setup_pin_classes(t_physical_tile_type* type);
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/cad_types.h b/third_party/vtr/libs/archfpga/src/cad_types.h
new file mode 100644
index 000000000..d04845c47
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/cad_types.h
@@ -0,0 +1,130 @@
+/*
+ * Data types used to give architectural hints for the CAD algorithm
+ */
+#ifndef CAD_TYPES_H
+#define CAD_TYPES_H
+
+#include "logic_types.h"
+#include "physical_types.h"
+
+struct t_pack_pattern_connections;
+
+/**
+ * Data structure used to define the structure of a pack pattern that is defined in the architecture file
+ *
+ * For example: for a pack pattern of a 6-LUT and a FF, each of those primitives will be defined by
+ * a t_pack_pattern_block and each of them will have one t_pack_pattern_connections.
+ *
+ * Data members:
+ *      pattern_index : the id of the pattern this block is part of (matches "index" in t_pack_patterns)
+ *      pb_type       : the pb_type (primitive) that this block represents (Ex. LUT, Adder, FF, etc.)
+ *      connections   : linked list of connections between this t_pack_pattern_block and other
+ *                      t_pack_pattern_blocks in this pack pattern as defined in the architecture
+ *      block_id      : the id of this t_pack_pattern_block within its pack pattern, used to access
+ *                      is_block_optional array in t_pack_patterns and also to access the atom_block_ids
+ *                      vector in the t_pack_molecule data structure.
+ */
+struct t_pack_pattern_block {
+    int pattern_index;
+    const t_pb_type* pb_type;
+    t_pack_pattern_connections* connections;
+    int block_id;
+};
+
+/**
+ * Describes a linked list of connections of a t_pack_pattern_block
+ *
+ * Data members:
+ *      from_block : block driving this connection
+ *      from_pin   : specific pin in the from_block driving the connection
+ *      to_block   : block driven by this connection
+ *      to_pin     : specific pin in the to_block driven by this connection
+ *      next       : next connection in the linked list
+ */
+struct t_pack_pattern_connections {
+    t_pack_pattern_block* from_block;
+    t_pb_graph_pin* from_pin;
+
+    t_pack_pattern_block* to_block;
+    t_pb_graph_pin* to_pin;
+
+    t_pack_pattern_connections* next;
+};
+
+/**
+ * Describes a pack pattern defined in the architecture. A pack pattern is an
+ * architectural concept that defines a pattern of highly constrained and/or desirable
+ * arrangement of primitives that exists within one logic cluster (Ex. CLB).
+ *
+ * For example: A pack pattern could be a 6-LUT and a FF. Where the architecture
+ * file gives a hint for the packer to pack a 6-LUT that is followed by a FF in the
+ * netlist together in one logic element. This helps the packer to achieve high
+ * packing density. Another example, is a carry chain where the adders in the netlist
+ * should be packed together to be able to route Cout to Cin connections using the
+ * dedicated wiring in the architecture.
+ *
+ * Data members:
+ *      name              : name given to the pack pattern in the architecture file
+ *      index             : id of the pack pattern in the list_of_pack_patterns array defined in the packer code
+ *      root_block        : the block defining the starting point of this pattern. For example: for
+ *                          a carry chain pattern, it is the primitive driven by a cluster input pin.
+ *      base_cost         : the sum of the primitive base costs of all the primitives in this pack pattern.
+ *                          The primitive base cost is defined by compute_primitive_base_cost in vpr_utils.cpp 
+ *      num_blocks        : total number of primitives in this pack pattern
+ *      is_block_optional : [0..num_blocks-1] is true if the t_pack_pattern_block defined by block_id
+ *                          is not mandatory for this pack pattern to be formed. For example, in a carry
+ *                          chain pack pattern, the first adder primitive (root_block) is mandatory to
+ *                          form the pattern, but every adder primitive after that is optional as the case
+ *                          when forming a short adder chain.
+ *      is_chain          : does this pack pattern go across clusters. For example, carry chains can normally cross
+ *                          between logic blocks.
+ *      chain_root_pins   : this is only non-empty for pack_patterns with is_chain set. It points to a specific
+ *                          pin of the root_block primitive (Ex. cin of an adder primitive) that is directly
+ *                          connected to a cluster-level block pin that can be drive from the preceding cluster.
+ *                          The first dimension size is greater than one if the cluster has more than one chain
+ *                          of this type. For example, an architecture with two independent adder carry chains
+ *                          with different cluster level Cin and Cout pins. The second dimension size is greater
+ *                          than one if cin of the cluster can reach more than one adder. This means that there is
+ *                          a mux in front of the cin pin of one or more adders in the middle of this chain that
+ *                          chooses between the cout of the preceding adder and the cin pin of the cluster. Which will
+ *                          give more freedom to the packer when placing small adders that are driven by a constant
+ *                          net (gnd/vdd)  [0...num_of_chains][0...num_of_tie_offs]
+ */
+struct t_pack_patterns {
+    char* name;
+    int index;
+    float base_cost;
+
+    t_pack_pattern_block* root_block;
+
+    int num_blocks;
+    bool* is_block_optional;
+
+    bool is_chain;
+    std::vector<std::vector<t_pb_graph_pin*>> chain_root_pins;
+
+    // default constructor initializing to an invalid pack pattern
+    t_pack_patterns() {
+        name = nullptr;
+        index = -1;
+        root_block = nullptr;
+        base_cost = 0;
+        num_blocks = 0;
+        is_block_optional = nullptr;
+        is_chain = false;
+    }
+};
+
+/**
+ * Keeps track of locations that a primitive can go to during packing
+ * Linked list for easy insertion/deletion
+ */
+struct t_cluster_placement_primitive {
+    t_pb_graph_node* pb_graph_node;
+    t_cluster_placement_primitive* next_primitive;
+    bool valid;
+    float base_cost;        /* cost independent of current status of packing */
+    float incremental_cost; /* cost dependant on current status of packing */
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/clock_types.h b/third_party/vtr/libs/archfpga/src/clock_types.h
new file mode 100644
index 000000000..ac622d29a
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/clock_types.h
@@ -0,0 +1,63 @@
+#ifndef CLOCK_TYPES_H
+#define CLOCK_TYPES_H
+
+#include <string>
+#include <vector>
+
+enum class e_clock_type {
+    SPINE,
+    RIB,
+    H_TREE
+};
+
+struct t_metal_layer {
+    float r_metal;
+    float c_metal;
+};
+
+struct t_wire_repeat {
+    std::string x;
+    std::string y;
+};
+
+struct t_wire {
+    std::string start;
+    std::string end;
+    std::string position;
+};
+
+struct t_clock_drive {
+    std::string name;
+    std::string offset;
+    int arch_switch_idx;
+};
+
+struct t_clock_taps {
+    std::string name;
+    std::string offset;
+    std::string increment;
+};
+
+struct t_clock_network_arch {
+    std::string name;
+    int num_inst;
+
+    e_clock_type type;
+
+    std::string metal_layer;
+    t_wire wire;
+    t_wire_repeat repeat;
+    t_clock_drive drive;
+    t_clock_taps tap;
+};
+
+struct t_clock_connection_arch {
+    std::string from;
+    std::string to;
+    int arch_switch_idx;
+    std::string locationx;
+    std::string locationy;
+    float fc;
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/device_grid.cc b/third_party/vtr/libs/archfpga/src/device_grid.cc
new file mode 100644
index 000000000..ffe0cc50e
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/device_grid.cc
@@ -0,0 +1,42 @@
+#include "device_grid.h"
+
+DeviceGrid::DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid)
+    : name_(grid_name)
+    , grid_(grid) {
+    count_instances();
+}
+
+DeviceGrid::DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid, std::vector<t_logical_block_type_ptr> limiting_res)
+    : DeviceGrid(grid_name, grid) {
+    limiting_resources_ = limiting_res;
+}
+
+size_t DeviceGrid::num_instances(t_physical_tile_type_ptr type) const {
+    auto iter = instance_counts_.find(type);
+    if (iter != instance_counts_.end()) {
+        //Return count
+        return iter->second;
+    }
+    return 0; //None found
+}
+
+void DeviceGrid::clear() {
+    grid_.clear();
+    instance_counts_.clear();
+}
+
+void DeviceGrid::count_instances() {
+    instance_counts_.clear();
+
+    //Count the number of blocks in the grid
+    for (size_t x = 0; x < width(); ++x) {
+        for (size_t y = 0; y < height(); ++y) {
+            auto type = grid_[x][y].type;
+
+            if (grid_[x][y].width_offset == 0 && grid_[x][y].height_offset == 0) {
+                //Add capacity only if this is the root location
+                instance_counts_[type] += type->capacity;
+            }
+        }
+    }
+}
diff --git a/third_party/vtr/libs/archfpga/src/device_grid.h b/third_party/vtr/libs/archfpga/src/device_grid.h
new file mode 100644
index 000000000..c4bfbd087
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/device_grid.h
@@ -0,0 +1,61 @@
+#ifndef DEVICE_GRID
+#define DEVICE_GRID
+
+#include <string>
+#include <vector>
+#include "vtr_ndmatrix.h"
+#include "physical_types.h"
+
+///@brief s_grid_tile is the minimum tile of the fpga
+struct t_grid_tile {
+    t_physical_tile_type_ptr type = nullptr; ///<Pointer to type descriptor, NULL for illegal
+    int width_offset = 0;                    ///<Number of grid tiles reserved based on width (right) of a block
+    int height_offset = 0;                   ///<Number of grid tiles reserved based on height (top) of a block
+    const t_metadata_dict* meta = nullptr;
+};
+
+class DeviceGrid {
+  public:
+    DeviceGrid() = default;
+    DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid);
+    DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid, std::vector<t_logical_block_type_ptr> limiting_res);
+
+    const std::string& name() const { return name_; }
+
+    size_t width() const { return grid_.dim_size(0); }
+    size_t height() const { return grid_.dim_size(1); }
+
+    //Note: supports 2-d indexing [0..width()-1][0..height()-1] yielding a t_grid_tile
+    auto operator[](size_t index) const { return grid_[index]; }
+    auto operator[](size_t index) { return grid_[index]; }
+
+    const vtr::Matrix<t_grid_tile>& matrix() const {
+        return grid_;
+    }
+
+    void clear();
+
+    size_t num_instances(t_physical_tile_type_ptr type) const;
+
+    /**
+     * @brief Returns the block types which limits the device size (may be empty if
+     *        resource limits were not considered when selecting the device).
+     */
+    std::vector<t_logical_block_type_ptr> limiting_resources() const { return limiting_resources_; }
+
+  private:
+    void count_instances();
+
+    std::string name_;
+
+    //Note that vtr::Matrix operator[] returns and intermediate type
+    //which can be used or indexing in the second dimension, allowing
+    //traditional 2-d indexing to be used
+    vtr::Matrix<t_grid_tile> grid_;
+
+    std::map<t_physical_tile_type_ptr, size_t> instance_counts_;
+
+    std::vector<t_logical_block_type_ptr> limiting_resources_;
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/echo_arch.cc b/third_party/vtr/libs/archfpga/src/echo_arch.cc
new file mode 100644
index 000000000..3943f6e6a
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/echo_arch.cc
@@ -0,0 +1,632 @@
+#include <cstring>
+#include <cstdlib>
+#include <vector>
+#include <unordered_set>
+
+#include "echo_arch.h"
+#include "arch_types.h"
+#include "arch_util.h"
+#include "vtr_list.h"
+#include "vtr_util.h"
+#include "vtr_memory.h"
+#include "vtr_assert.h"
+
+using vtr::t_linked_vptr;
+
+void PrintArchInfo(FILE* Echo, const t_arch* arch);
+static void PrintPb_types_rec(FILE* Echo, const t_pb_type* pb_type, int level);
+static void PrintPb_types_recPower(FILE* Echo,
+                                   const t_pb_type* pb_type,
+                                   const char* tabs);
+
+/* Output the data from architecture data so user can verify it
+ * was interpretted correctly. */
+void EchoArch(const char* EchoFile,
+              const std::vector<t_physical_tile_type>& PhysicalTileTypes,
+              const std::vector<t_logical_block_type>& LogicalBlockTypes,
+              const t_arch* arch) {
+    int i, j;
+    FILE* Echo;
+    t_model* cur_model;
+    t_model_ports* model_port;
+    t_linked_vptr* cur_vptr;
+
+    Echo = vtr::fopen(EchoFile, "w");
+    cur_model = nullptr;
+
+    //Print all layout device switch/segment list info first
+    PrintArchInfo(Echo, arch);
+
+    //Models
+    fprintf(Echo, "*************************************************\n");
+    for (j = 0; j < 2; j++) {
+        if (j == 0) {
+            fprintf(Echo, "Printing user models \n");
+            cur_model = arch->models;
+        } else if (j == 1) {
+            fprintf(Echo, "Printing library models \n");
+            cur_model = arch->model_library;
+        }
+        while (cur_model) {
+            fprintf(Echo, "Model: \"%s\"\n", cur_model->name);
+            model_port = cur_model->inputs;
+            while (model_port) {
+                fprintf(Echo, "\tInput Ports: \"%s\" \"%d\" min_size=\"%d\"\n",
+                        model_port->name, model_port->size,
+                        model_port->min_size);
+                model_port = model_port->next;
+            }
+            model_port = cur_model->outputs;
+            while (model_port) {
+                fprintf(Echo, "\tOutput Ports: \"%s\" \"%d\" min_size=\"%d\"\n",
+                        model_port->name, model_port->size,
+                        model_port->min_size);
+                model_port = model_port->next;
+            }
+            cur_vptr = cur_model->pb_types;
+            i = 0;
+            while (cur_vptr != nullptr) {
+                fprintf(Echo, "\tpb_type %d: \"%s\"\n", i,
+                        ((t_pb_type*)cur_vptr->data_vptr)->name);
+                cur_vptr = cur_vptr->next;
+                i++;
+            }
+
+            cur_model = cur_model->next;
+        }
+    }
+    fprintf(Echo, "*************************************************\n\n");
+    fprintf(Echo, "*************************************************\n");
+    for (auto& Type : PhysicalTileTypes) {
+        fprintf(Echo, "Type: \"%s\"\n", Type.name);
+        fprintf(Echo, "\tcapacity: %d\n", Type.capacity);
+        fprintf(Echo, "\twidth: %d\n", Type.width);
+        fprintf(Echo, "\theight: %d\n", Type.height);
+        for (const t_fc_specification& fc_spec : Type.fc_specs) {
+            fprintf(Echo, "fc_value_type: ");
+            if (fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE) {
+                fprintf(Echo, "ABSOLUTE");
+            } else if (fc_spec.fc_value_type == e_fc_value_type::FRACTIONAL) {
+                fprintf(Echo, "FRACTIONAL");
+            } else {
+                VTR_ASSERT(false);
+            }
+            fprintf(Echo, " fc_value: %f", fc_spec.fc_value);
+            fprintf(Echo, " segment: %s", arch->Segments[fc_spec.seg_index].name.c_str());
+            fprintf(Echo, " pins:");
+            for (int pin : fc_spec.pins) {
+                fprintf(Echo, " %d", pin);
+            }
+            fprintf(Echo, "\n");
+        }
+        fprintf(Echo, "\tnum_drivers: %d\n", Type.num_drivers);
+        fprintf(Echo, "\tnum_receivers: %d\n", Type.num_receivers);
+
+        int index = Type.index;
+        fprintf(Echo, "\tindex: %d\n", index);
+
+        auto equivalent_sites = get_equivalent_sites_set(&Type);
+
+        for (auto LogicalBlock : equivalent_sites) {
+            fprintf(Echo, "\nEquivalent Site: %s\n", LogicalBlock->name);
+        }
+        fprintf(Echo, "\n");
+    }
+
+    fprintf(Echo, "*************************************************\n\n");
+    fprintf(Echo, "*************************************************\n");
+
+    for (auto& LogicalBlock : LogicalBlockTypes) {
+        if (LogicalBlock.pb_type) {
+            PrintPb_types_rec(Echo, LogicalBlock.pb_type, 2);
+        }
+        fprintf(Echo, "\n");
+    }
+
+    fclose(Echo);
+}
+
+//Added May 2013 Daniel Chen, help dump arch info after loading from XML
+void PrintArchInfo(FILE* Echo, const t_arch* arch) {
+    int i, j;
+
+    fprintf(Echo, "Printing architecture... \n\n");
+    //Layout
+    fprintf(Echo, "*************************************************\n");
+    for (const auto& grid_layout : arch->grid_layouts) {
+        if (grid_layout.grid_type == GridDefType::AUTO) {
+            fprintf(Echo, "Layout: '%s' Type: auto Aspect_Ratio: %f\n", grid_layout.name.c_str(), grid_layout.aspect_ratio);
+        } else {
+            VTR_ASSERT(grid_layout.grid_type == GridDefType::FIXED);
+            fprintf(Echo, "Layout: '%s' Type: fixed Width: %d Height %d\n", grid_layout.name.c_str(), grid_layout.width, grid_layout.height);
+        }
+    }
+    fprintf(Echo, "*************************************************\n\n");
+    //Device
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Device Info:\n");
+
+    fprintf(Echo,
+            "\tSizing: R_minW_nmos %e R_minW_pmos %e\n",
+            arch->R_minW_nmos, arch->R_minW_pmos);
+
+    fprintf(Echo, "\tArea: grid_logic_tile_area %e\n",
+            arch->grid_logic_tile_area);
+
+    fprintf(Echo, "\tChannel Width Distribution:\n");
+
+    switch (arch->Chans.chan_x_dist.type) {
+        case (UNIFORM):
+            fprintf(Echo, "\t\tx: type uniform peak %e\n",
+                    arch->Chans.chan_x_dist.peak);
+            break;
+        case (GAUSSIAN):
+            fprintf(Echo,
+                    "\t\tx: type gaussian peak %e \
+						  width %e Xpeak %e dc %e\n",
+                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.width,
+                    arch->Chans.chan_x_dist.xpeak, arch->Chans.chan_x_dist.dc);
+            break;
+        case (PULSE):
+            fprintf(Echo,
+                    "\t\tx: type pulse peak %e \
+						  width %e Xpeak %e dc %e\n",
+                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.width,
+                    arch->Chans.chan_x_dist.xpeak, arch->Chans.chan_x_dist.dc);
+            break;
+        case (DELTA):
+            fprintf(Echo,
+                    "\t\tx: distr dleta peak %e \
+						  Xpeak %e dc %e\n",
+                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.xpeak,
+                    arch->Chans.chan_x_dist.dc);
+            break;
+        default:
+            fprintf(Echo, "\t\tInvalid Distribution!\n");
+            break;
+    }
+
+    switch (arch->Chans.chan_y_dist.type) {
+        case (UNIFORM):
+            fprintf(Echo, "\t\ty: type uniform peak %e\n",
+                    arch->Chans.chan_y_dist.peak);
+            break;
+        case (GAUSSIAN):
+            fprintf(Echo,
+                    "\t\ty: type gaussian peak %e \
+						  width %e Xpeak %e dc %e\n",
+                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.width,
+                    arch->Chans.chan_y_dist.xpeak, arch->Chans.chan_y_dist.dc);
+            break;
+        case (PULSE):
+            fprintf(Echo,
+                    "\t\ty: type pulse peak %e \
+						  width %e Xpeak %e dc %e\n",
+                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.width,
+                    arch->Chans.chan_y_dist.xpeak, arch->Chans.chan_y_dist.dc);
+            break;
+        case (DELTA):
+            fprintf(Echo,
+                    "\t\ty: distr dleta peak %e \
+						  Xpeak %e dc %e\n",
+                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.xpeak,
+                    arch->Chans.chan_y_dist.dc);
+            break;
+        default:
+            fprintf(Echo, "\t\tInvalid Distribution!\n");
+            break;
+    }
+
+    switch (arch->SBType) {
+        case (WILTON):
+            fprintf(Echo, "\tSwitch Block: type wilton fs %d\n", arch->Fs);
+            break;
+        case (UNIVERSAL):
+            fprintf(Echo, "\tSwitch Block: type universal fs %d\n", arch->Fs);
+            break;
+        case (SUBSET):
+            fprintf(Echo, "\tSwitch Block: type subset fs %d\n", arch->Fs);
+            break;
+        default:
+            break;
+    }
+
+    fprintf(Echo, "\tInput Connect Block Switch Name: %s\n", arch->ipin_cblock_switch_name.c_str());
+
+    fprintf(Echo, "*************************************************\n\n");
+    //Switch list
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Switch List:\n");
+
+    //13 is hard coded because format of %e is always 1.123456e+12
+    //It always consists of 10 alphanumeric digits, a decimal
+    //and a sign
+    for (i = 0; i < arch->num_switches; i++) {
+        if (arch->Switches[i].type() == SwitchType::MUX) {
+            fprintf(Echo, "\tSwitch[%d]: name %s type mux\n", i + 1, arch->Switches[i].name);
+        } else if (arch->Switches[i].type() == SwitchType::TRISTATE) {
+            fprintf(Echo, "\tSwitch[%d]: name %s type tristate\n", i + 1, arch->Switches[i].name);
+        } else if (arch->Switches[i].type() == SwitchType::SHORT) {
+            fprintf(Echo, "\tSwitch[%d]: name %s type short\n", i + 1, arch->Switches[i].name);
+        } else if (arch->Switches[i].type() == SwitchType::BUFFER) {
+            fprintf(Echo, "\tSwitch[%d]: name %s type buffer\n", i + 1, arch->Switches[i].name);
+        } else {
+            VTR_ASSERT(arch->Switches[i].type() == SwitchType::PASS_GATE);
+            fprintf(Echo, "\tSwitch[%d]: name %s type pass_gate\n", i + 1, arch->Switches[i].name);
+        }
+        fprintf(Echo, "\t\t\t\tR %e Cin %e Cout %e\n", arch->Switches[i].R,
+                arch->Switches[i].Cin, arch->Switches[i].Cout);
+        fprintf(Echo, "\t\t\t\t#Tdel values %d buf_size %e mux_trans_size %e\n",
+                (int)arch->Switches[i].Tdel_map_.size(), arch->Switches[i].buf_size,
+                arch->Switches[i].mux_trans_size);
+        if (arch->Switches[i].power_buffer_type == POWER_BUFFER_TYPE_AUTO) {
+            fprintf(Echo, "\t\t\t\tpower_buffer_size auto\n");
+        } else {
+            fprintf(Echo, "\t\t\t\tpower_buffer_size %e\n",
+                    arch->Switches[i].power_buffer_size);
+        }
+    }
+
+    fprintf(Echo, "*************************************************\n\n");
+    //Segment List
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Segment List:\n");
+    for (i = 0; i < (int)(arch->Segments).size(); i++) {
+        const struct t_segment_inf& seg = arch->Segments[i];
+        fprintf(Echo,
+                "\tSegment[%d]: frequency %d length %d R_metal %e C_metal %e\n",
+                i + 1, seg.frequency, seg.length,
+                seg.Rmetal, seg.Cmetal);
+
+        if (seg.directionality == UNI_DIRECTIONAL) {
+            //wire_switch == arch_opin_switch
+            fprintf(Echo, "\t\t\t\ttype unidir mux_name %s\n",
+                    arch->Switches[seg.arch_wire_switch].name);
+        } else { //Should be bidir
+            fprintf(Echo, "\t\t\t\ttype bidir wire_switch %s arch_opin_switch %s\n",
+                    arch->Switches[seg.arch_wire_switch].name,
+                    arch->Switches[seg.arch_opin_switch].name);
+        }
+
+        fprintf(Echo, "\t\t\t\tcb ");
+        for (j = 0; j < (int)seg.cb.size(); j++) {
+            if (seg.cb[j]) {
+                fprintf(Echo, "1 ");
+            } else {
+                fprintf(Echo, "0 ");
+            }
+        }
+        fprintf(Echo, "\n");
+
+        fprintf(Echo, "\t\t\t\tsb ");
+        for (j = 0; j < (int)seg.sb.size(); j++) {
+            if (seg.sb[j]) {
+                fprintf(Echo, "1 ");
+            } else {
+                fprintf(Echo, "0 ");
+            }
+        }
+        fprintf(Echo, "\n");
+    }
+    fprintf(Echo, "*************************************************\n\n");
+    //Direct List
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Direct List:\n");
+    for (i = 0; i < arch->num_directs; i++) {
+        fprintf(Echo, "\tDirect[%d]: name %s from_pin %s to_pin %s\n", i + 1,
+                arch->Directs[i].name, arch->Directs[i].from_pin,
+                arch->Directs[i].to_pin);
+        fprintf(Echo, "\t\t\t\t x_offset %d y_offset %d z_offset %d\n",
+                arch->Directs[i].x_offset, arch->Directs[i].y_offset,
+                arch->Directs[i].sub_tile_offset);
+    }
+    fprintf(Echo, "*************************************************\n\n");
+
+    //Architecture Power
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Power:\n");
+    if (arch->power) {
+        fprintf(Echo, "\tlocal_interconnect C_wire %e factor %f\n",
+                arch->power->C_wire_local, arch->power->local_interc_factor);
+        fprintf(Echo, "\tlogical_effort_factor %f trans_per_sram_bit %f\n",
+                arch->power->logical_effort_factor,
+                arch->power->transistors_per_SRAM_bit);
+    }
+
+    fprintf(Echo, "*************************************************\n\n");
+    //Architecture Clock
+    fprintf(Echo, "*************************************************\n");
+    fprintf(Echo, "Clock:\n");
+    if (arch->clocks) {
+        for (i = 0; i < arch->clocks->num_global_clocks; i++) {
+            if (arch->clocks->clock_inf[i].autosize_buffer) {
+                fprintf(Echo, "\tClock[%d]: buffer_size auto C_wire %e", i + 1,
+                        arch->clocks->clock_inf->C_wire);
+            } else {
+                fprintf(Echo, "\tClock[%d]: buffer_size %e C_wire %e", i + 1,
+                        arch->clocks->clock_inf[i].buffer_size,
+                        arch->clocks->clock_inf[i].C_wire);
+            }
+            fprintf(Echo, "\t\t\t\tstat_prob %f switch_density %f period %e",
+                    arch->clocks->clock_inf[i].prob,
+                    arch->clocks->clock_inf[i].dens,
+                    arch->clocks->clock_inf[i].period);
+        }
+    }
+
+    fprintf(Echo, "*************************************************\n\n");
+}
+
+static void PrintPb_types_rec(FILE* Echo, const t_pb_type* pb_type, int level) {
+    int i, j, k;
+    char* tabs;
+
+    tabs = (char*)vtr::malloc((level + 1) * sizeof(char));
+    for (i = 0; i < level; i++) {
+        tabs[i] = '\t';
+    }
+    tabs[level] = '\0';
+
+    fprintf(Echo, "%spb_type name: %s\n", tabs, pb_type->name);
+    fprintf(Echo, "%s\tblif_model: %s\n", tabs, pb_type->blif_model);
+    fprintf(Echo, "%s\tclass_type: %d\n", tabs, pb_type->class_type);
+    fprintf(Echo, "%s\tnum_modes: %d\n", tabs, pb_type->num_modes);
+    fprintf(Echo, "%s\tnum_ports: %d\n", tabs, pb_type->num_ports);
+    for (i = 0; i < pb_type->num_ports; i++) {
+        fprintf(Echo, "%s\tport %s type %d num_pins %d\n", tabs,
+                pb_type->ports[i].name, pb_type->ports[i].type,
+                pb_type->ports[i].num_pins);
+    }
+
+    if (pb_type->num_modes > 0) { /*one or more modes*/
+        for (i = 0; i < pb_type->num_modes; i++) {
+            fprintf(Echo, "%s\tmode %s:\n", tabs, pb_type->modes[i].name);
+            for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
+                PrintPb_types_rec(Echo, &pb_type->modes[i].pb_type_children[j],
+                                  level + 2);
+            }
+            for (j = 0; j < pb_type->modes[i].num_interconnect; j++) {
+                fprintf(Echo, "%s\t\tinterconnect %d %s %s\n", tabs,
+                        pb_type->modes[i].interconnect[j].type,
+                        pb_type->modes[i].interconnect[j].input_string,
+                        pb_type->modes[i].interconnect[j].output_string);
+                for (k = 0;
+                     k < pb_type->modes[i].interconnect[j].num_annotations;
+                     k++) {
+                    fprintf(Echo, "%s\t\t\tannotation %s %s %d: %s\n", tabs,
+                            pb_type->modes[i].interconnect[j].annotations[k].input_pins,
+                            pb_type->modes[i].interconnect[j].annotations[k].output_pins,
+                            pb_type->modes[i].interconnect[j].annotations[k].format,
+                            pb_type->modes[i].interconnect[j].annotations[k].value[0]);
+                }
+                //Print power info for interconnects
+                if (pb_type->modes[i].interconnect[j].interconnect_power) {
+                    if (pb_type->modes[i].interconnect[j].interconnect_power->power_usage.dynamic
+                        || pb_type->modes[i].interconnect[j].interconnect_power->power_usage.leakage) {
+                        fprintf(Echo, "%s\t\t\tpower %e %e\n", tabs,
+                                pb_type->modes[i].interconnect[j].interconnect_power->power_usage.dynamic,
+                                pb_type->modes[i].interconnect[j].interconnect_power->power_usage.leakage);
+                    }
+                }
+            }
+        }
+    } else { /*leaf pb with unknown model*/
+        /*LUT(names) already handled, it naturally has 2 modes.
+         * I/O has no annotations to be displayed
+         * All other library or user models may have delays specificied, e.g. Tsetup and Tcq
+         * Display the additional information*/
+        if (strcmp(pb_type->model->name, MODEL_NAMES)
+            && strcmp(pb_type->model->name, MODEL_INPUT)
+            && strcmp(pb_type->model->name, MODEL_OUTPUT)) {
+            for (k = 0; k < pb_type->num_annotations; k++) {
+                fprintf(Echo, "%s\t\t\tannotation %s %s %s %d: %s\n", tabs,
+                        pb_type->annotations[k].clock,
+                        pb_type->annotations[k].input_pins,
+                        pb_type->annotations[k].output_pins,
+                        pb_type->annotations[k].format,
+                        pb_type->annotations[k].value[0]);
+            }
+        }
+    }
+
+    if (pb_type->pb_type_power) {
+        PrintPb_types_recPower(Echo, pb_type, tabs);
+    }
+    free(tabs);
+}
+
+//Added May 2013 Daniel Chen, help dump arch info after loading from XML
+static void PrintPb_types_recPower(FILE* Echo,
+                                   const t_pb_type* pb_type,
+                                   const char* tabs) {
+    int i = 0;
+    /*Print power information for each pb if available*/
+    switch (pb_type->pb_type_power->estimation_method) {
+        case POWER_METHOD_UNDEFINED:
+            fprintf(Echo, "%s\tpower method: undefined\n", tabs);
+            break;
+        case POWER_METHOD_IGNORE:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because of the inheritance property of auto-size*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_IGNORE)
+                    break;
+            }
+            fprintf(Echo, "%s\tpower method: ignore\n", tabs);
+            break;
+        case POWER_METHOD_SUM_OF_CHILDREN:
+            fprintf(Echo, "%s\tpower method: sum-of-children\n", tabs);
+            break;
+        case POWER_METHOD_AUTO_SIZES:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because of the inheritance property of auto-size*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_AUTO_SIZES)
+                    break;
+            }
+            fprintf(Echo, "%s\tpower method: auto-size\n", tabs);
+            break;
+        case POWER_METHOD_SPECIFY_SIZES:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because of the inheritance property of specify-size*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_SPECIFY_SIZES)
+                    break;
+            }
+
+            fprintf(Echo, "%s\tpower method: specify-size\n", tabs);
+            for (i = 0; i < pb_type->num_ports; i++) {
+                //Print all the power information on each port, only if available,
+                //will not print if value is 0 or NULL
+                if (pb_type->ports[i].port_power->buffer_type
+                    || pb_type->ports[i].port_power->wire_type
+                    || pb_type->pb_type_power->absolute_power_per_instance.leakage
+                    || pb_type->pb_type_power->absolute_power_per_instance.dynamic) {
+                    fprintf(Echo, "%s\t\tport %s type %d num_pins %d\n", tabs,
+                            pb_type->ports[i].name, pb_type->ports[i].type,
+                            pb_type->ports[i].num_pins);
+                    //Buffer size
+                    switch (pb_type->ports[i].port_power->buffer_type) {
+                        case (POWER_BUFFER_TYPE_UNDEFINED):
+                        case (POWER_BUFFER_TYPE_NONE):
+                            break;
+                        case (POWER_BUFFER_TYPE_AUTO):
+                            fprintf(Echo, "%s\t\t\tbuffer_size %s\n", tabs, "auto");
+                            break;
+                        case (POWER_BUFFER_TYPE_ABSOLUTE_SIZE):
+                            fprintf(Echo, "%s\t\t\tbuffer_size %f\n", tabs,
+                                    pb_type->ports[i].port_power->buffer_size);
+                            break;
+                        default:
+                            break;
+                    }
+                    switch (pb_type->ports[i].port_power->wire_type) {
+                        case (POWER_WIRE_TYPE_UNDEFINED):
+                        case (POWER_WIRE_TYPE_IGNORED):
+                            break;
+                        case (POWER_WIRE_TYPE_C):
+                            fprintf(Echo, "%s\t\t\twire_cap: %e\n", tabs,
+                                    pb_type->ports[i].port_power->wire.C);
+                            break;
+                        case (POWER_WIRE_TYPE_ABSOLUTE_LENGTH):
+                            fprintf(Echo, "%s\t\t\twire_len(abs): %e\n", tabs,
+                                    pb_type->ports[i].port_power->wire.absolute_length);
+                            break;
+                        case (POWER_WIRE_TYPE_RELATIVE_LENGTH):
+                            fprintf(Echo, "%s\t\t\twire_len(rel): %f\n", tabs,
+                                    pb_type->ports[i].port_power->wire.relative_length);
+                            break;
+                        case (POWER_WIRE_TYPE_AUTO):
+                            fprintf(Echo, "%s\t\t\twire_len: %s\n", tabs, "auto");
+                            break;
+                        default:
+                            break;
+                    }
+                }
+            }
+            //Output static power even if non zero
+            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
+                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
+
+            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
+                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
+            break;
+        case POWER_METHOD_TOGGLE_PINS:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because once energy_per_toggle is specified at one level,
+                 * all children pb's are energy_per_toggle and only want to display once*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_TOGGLE_PINS)
+                    break;
+            }
+
+            fprintf(Echo, "%s\tpower method: pin-toggle\n", tabs);
+            for (i = 0; i < pb_type->num_ports; i++) {
+                /*Print all the power information on each port, only if available,
+                 * will not print if value is 0 or NULL*/
+                if (pb_type->ports[i].port_power->energy_per_toggle
+                    || pb_type->ports[i].port_power->scaled_by_port
+                    || pb_type->pb_type_power->absolute_power_per_instance.leakage
+                    || pb_type->pb_type_power->absolute_power_per_instance.dynamic) {
+                    fprintf(Echo, "%s\t\tport %s type %d num_pins %d\n", tabs,
+                            pb_type->ports[i].name, pb_type->ports[i].type,
+                            pb_type->ports[i].num_pins);
+                    //Toggle Energy
+                    if (pb_type->ports[i].port_power->energy_per_toggle) {
+                        fprintf(Echo, "%s\t\t\tenergy_per_toggle %e\n", tabs,
+                                pb_type->ports[i].port_power->energy_per_toggle);
+                    }
+                    //Scaled by port (could be reversed)
+                    if (pb_type->ports[i].port_power->scaled_by_port) {
+                        if (pb_type->ports[i].port_power->scaled_by_port->num_pins
+                            > 1) {
+                            fprintf(Echo,
+                                    (pb_type->ports[i].port_power->reverse_scaled ? "%s\t\t\tscaled_by_static_prob_n: %s[%d]\n" : "%s\t\t\tscaled_by_static_prob: %s[%d]\n"),
+                                    tabs,
+                                    pb_type->ports[i].port_power->scaled_by_port->name,
+                                    pb_type->ports[i].port_power->scaled_by_port_pin_idx);
+                        } else {
+                            fprintf(Echo,
+                                    (pb_type->ports[i].port_power->reverse_scaled ? "%s\t\t\tscaled_by_static_prob_n: %s\n" : "%s\t\t\tscaled_by_static_prob: %s\n"),
+                                    tabs,
+                                    pb_type->ports[i].port_power->scaled_by_port->name);
+                        }
+                    }
+                }
+            }
+            //Output static power even if non zero
+            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
+                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
+
+            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
+                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
+
+            break;
+        case POWER_METHOD_C_INTERNAL:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because of values at this level includes all children pb's*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_C_INTERNAL)
+                    break;
+            }
+            fprintf(Echo, "%s\tpower method: C-internal\n", tabs);
+
+            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
+                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
+
+            if (pb_type->pb_type_power->C_internal)
+                fprintf(Echo, "%s\t\tdynamic c-internal: %e \n", tabs,
+                        pb_type->pb_type_power->C_internal);
+            break;
+        case POWER_METHOD_ABSOLUTE:
+            if (pb_type->parent_mode) {
+                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
+                 * This is because of values at this level includes all children pb's*/
+                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
+                    == POWER_METHOD_ABSOLUTE)
+                    break;
+            }
+            fprintf(Echo, "%s\tpower method: absolute\n", tabs);
+            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
+                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
+
+            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
+                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
+                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
+            break;
+        default:
+            fprintf(Echo, "%s\tpower method: error has occcured\n", tabs);
+            break;
+    }
+}
diff --git a/third_party/vtr/libs/archfpga/src/echo_arch.h b/third_party/vtr/libs/archfpga/src/echo_arch.h
new file mode 100644
index 000000000..4e9d23001
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/echo_arch.h
@@ -0,0 +1,11 @@
+#ifndef ECHO_ARCH_H
+#define ECHO_ARCH_H
+
+#include "arch_types.h"
+
+void EchoArch(const char* EchoFile,
+              const std::vector<t_physical_tile_type>& PhysicalTileTypes,
+              const std::vector<t_logical_block_type>& LogicalBlockTypes,
+              const t_arch* arch);
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/histogram.cc b/third_party/vtr/libs/archfpga/src/histogram.cc
new file mode 100644
index 000000000..aa00ec2ae
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/histogram.cc
@@ -0,0 +1,119 @@
+#include <algorithm>
+#include <string>
+#include <sstream>
+#include <cmath>
+
+#include "vtr_log.h"
+#include "vtr_assert.h"
+#include "vtr_util.h"
+
+#include "histogram.h"
+
+std::vector<HistogramBucket> build_histogram(std::vector<float> values, size_t num_bins, float min_value, float max_value) {
+    std::vector<HistogramBucket> histogram;
+
+    if (values.empty()) return histogram;
+
+    if (std::isnan(min_value)) {
+        min_value = *std::min_element(values.begin(), values.end());
+    }
+    if (std::isnan(max_value)) {
+        max_value = *std::max_element(values.begin(), values.end());
+    }
+
+    //Determine the bin size
+    float range = max_value - min_value;
+    float bin_size = range / num_bins;
+
+    //Create the buckets
+    float bucket_min = min_value;
+    for (size_t ibucket = 0; ibucket < num_bins; ++ibucket) {
+        float bucket_max = bucket_min + bin_size;
+
+        histogram.emplace_back(bucket_min, bucket_max);
+
+        bucket_min = bucket_max;
+    }
+
+    //To avoid round-off errors we force the max value of the last bucket equal to the max value
+    histogram[histogram.size() - 1].max_value = max_value;
+
+    //Count the values into the buckets
+    auto comp = [](const HistogramBucket& bucket, float value) {
+        return bucket.max_value < value;
+    };
+    for (auto value : values) {
+        //Find the bucket who's max is less than the current slack
+
+        auto iter = std::lower_bound(histogram.begin(), histogram.end(), value, comp);
+        VTR_ASSERT(iter != histogram.end());
+
+        iter->count++;
+    }
+
+    return histogram;
+}
+
+void print_histogram(std::vector<HistogramBucket> histogram) {
+    size_t char_width = 80;
+
+    auto lines = format_histogram(histogram, char_width);
+
+    for (auto line : lines) {
+        VTR_LOG("%s\n", line.c_str());
+    }
+}
+
+float get_histogram_mode(std::vector<HistogramBucket> histogram) {
+    size_t max_count = 0;
+    float mode = 0.0;
+    for (auto bucket : histogram) {
+        if (bucket.count > max_count) {
+            mode = bucket.max_value;
+
+            max_count = bucket.count;
+        }
+    }
+
+    return mode;
+}
+
+std::vector<std::string> format_histogram(std::vector<HistogramBucket> histogram, size_t width) {
+    std::vector<std::string> lines;
+
+    //Determine the maximum and total count
+    size_t max_count = 0;
+    size_t total_count = 0;
+    for (const HistogramBucket& bucket : histogram) {
+        max_count = std::max(max_count, bucket.count);
+        total_count += bucket.count;
+    }
+
+    if (max_count == 0) return lines; //Nothing to do
+
+    int count_digits = ceil(log10(max_count));
+
+    //Determine the maximum prefix length
+    size_t bar_len = width
+                     - (18 + 3) //bucket prefix
+                     - count_digits
+                     - 7  //percentage
+                     - 2; //-2 for " |" appended after count
+
+    for (size_t ibucket = 0; ibucket < histogram.size(); ++ibucket) {
+        std::string line;
+
+        float pct = histogram[ibucket].count / float(total_count) * 100;
+
+        line += vtr::string_fmt("[% 9.2g:% 9.2g) %*zu (%5.1f%%) |", histogram[ibucket].min_value, histogram[ibucket].max_value, count_digits, histogram[ibucket].count, pct);
+
+        size_t num_chars = std::round((double(histogram[ibucket].count) / max_count) * bar_len);
+        for (size_t i = 0; i < num_chars; ++i) {
+            line += "*";
+        }
+
+        lines.push_back(line);
+    }
+
+    return lines;
+}
diff --git a/third_party/vtr/libs/archfpga/src/histogram.h b/third_party/vtr/libs/archfpga/src/histogram.h
new file mode 100644
index 000000000..29a0ae470
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/histogram.h
@@ -0,0 +1,26 @@
+#ifndef VPR_HISTOGRAM_H
+#define VPR_HISTOGRAM_H
+
+#include <limits>
+#include <vector>
+
+struct HistogramBucket {
+    HistogramBucket(float min_val, float max_val, float init_count = 0) noexcept
+        : min_value(min_val)
+        , max_value(max_val)
+        , count(init_count) {}
+
+    float min_value = std::numeric_limits<float>::quiet_NaN();
+    float max_value = std::numeric_limits<float>::quiet_NaN();
+    size_t count = 0;
+};
+
+std::vector<HistogramBucket> build_histogram(std::vector<float> values, size_t num_bins, float min_value = std::numeric_limits<float>::quiet_NaN(), float max_value = std::numeric_limits<float>::quiet_NaN());
+
+void print_histogram(std::vector<HistogramBucket> histogram);
+
+float get_histogram_mode(std::vector<HistogramBucket> histogram);
+
+std::vector<std::string> format_histogram(std::vector<HistogramBucket> histogram, size_t width = 80);
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/logic_types.h b/third_party/vtr/libs/archfpga/src/logic_types.h
new file mode 100644
index 000000000..4427b8501
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/logic_types.h
@@ -0,0 +1,57 @@
+/*
+ * Data types describing the logic (technology-mapped) models that the architecture can implement.
+ * Logic models include LUT (.names), flipflop (.latch), inpad, outpad, memory slice, etc.
+ * Logic models are from the internal VPR library, or can be user-defined (both defined in .blif)
+ *
+ * Date: February 19, 2009
+ * Authors: Jason Luu and Kenneth Kent
+ */
+
+#ifndef LOGIC_TYPES_H
+#define LOGIC_TYPES_H
+
+#include "vtr_list.h"
+#include <vector>
+#include <string>
+
+/*
+ * Logic model data types
+ * A logic model is described by its I/O ports and function name
+ */
+enum PORTS {
+    IN_PORT,
+    OUT_PORT,
+    INOUT_PORT,
+    ERR_PORT
+};
+
+struct t_model_ports {
+    enum PORTS dir = ERR_PORT;                         /* port direction */
+    char* name = nullptr;                              /* name of this port */
+    int size = 0;                                      /* maximum number of pins */
+    int min_size = 0;                                  /* minimum number of pins */
+    bool is_clock = false;                             /* clock? */
+    bool is_non_clock_global = false;                  /* not a clock but is a special, global, control signal (eg global asynchronous reset, etc) */
+    std::string clock;                                 /* The clock associated with this pin (if the pin is sequential) */
+    std::vector<std::string> combinational_sink_ports; /* The other ports on this model which are combinationally driven by this port */
+
+    t_model_ports* next = nullptr; /* next port */
+
+    int index = -1; /* indexing for array look-up */
+};
+
+struct t_model {
+    char* name = nullptr;             /* name of this logic model */
+    t_model_ports* inputs = nullptr;  /* linked list of input/clock ports */
+    t_model_ports* outputs = nullptr; /* linked list of output ports */
+    void* instances = nullptr;
+    int used = 0;
+    vtr::t_linked_vptr* pb_types = nullptr; /* Physical block types that implement this model */
+    t_model* next = nullptr;                /* next model (linked list) */
+
+    bool never_prune = false; /* Don't remove from the netlist even if a block of this type has no output ports used and, therefore, unconnected to the rest of the netlist */
+
+    int index = -1;
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/main.cc b/third_party/vtr/libs/archfpga/src/main.cc
new file mode 100644
index 000000000..6a9e3f354
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/main.cc
@@ -0,0 +1,80 @@
+/*
+ * Test libarchfpga, try reading an architecture and print the results to a file
+ *
+ * Date: February 19, 2009
+ * Author: Jason Luu
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+
+#include "vtr_error.h"
+#include "vtr_memory.h"
+
+#include "arch_util.h"
+#include "read_xml_arch_file.h"
+#include "echo_arch.h"
+
+void print_help();
+
+int main(int argc, char** argv) {
+    try {
+        t_arch arch;
+        std::vector<t_physical_tile_type> physical_tile_types;
+        std::vector<t_logical_block_type> logical_block_types;
+
+        if (argc - 1 != 3) {
+            printf("Error: Unexpected # of arguments.  Expected 3 found %d arguments\n",
+                   argc);
+            print_help();
+            return 1;
+        }
+
+        printf("------------------------------------------------------------------------------\n");
+        printf("- Read architecture file and print library data structures into an output file\n");
+        printf("------------------------------------------------------------------------------\n\n");
+
+        printf(
+            "Inputs: \n"
+            "architecture %s \n"
+            "timing_driven %d \n"
+            "output file %s\n",
+            argv[1], atoi(argv[2]), argv[3]);
+        printf("Reading in architecture\n");
+
+        /* function declarations */
+        XmlReadArch(argv[1], atoi(argv[2]), &arch, physical_tile_types, logical_block_types);
+
+        printf("Printing Results\n");
+
+        EchoArch(argv[3], physical_tile_types, logical_block_types, &arch);
+
+        // CLEAN UP
+        free_arch(&arch);
+        free_type_descriptors(physical_tile_types);
+        free_type_descriptors(logical_block_types);
+
+    } catch (vtr::VtrError& vtr_error) {
+        printf("Failed to process architecture %s: %s\n", argv[1], vtr_error.what());
+        return 1;
+    } catch (std::exception& error) {
+        printf("Failed to process architecture %s: %s\n", argv[1], error.what());
+        return 1;
+    }
+
+    printf("Done\n");
+
+    return 0;
+}
+
+void print_help() {
+    printf("\n---------------------------------------------------------------------------------------\n");
+    printf("read_arch - Read a VPR architecture file and output internal data structures\n");
+    printf("\n");
+    printf("Usage: read_arch <arch_file.xml> <timing_driven (0|1)> <output_file>\n");
+    printf("\n");
+    printf("  ex: read_arch k4_n10.xml 1 arch_data.out\n");
+    printf("      Read timing-driven architecture k4_n10.xml and output the results to arch_data.out\n");
+    printf("\n---------------------------------------------------------------------------------------\n");
+}
diff --git a/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc b/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
new file mode 100644
index 000000000..8587b1c56
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
@@ -0,0 +1,473 @@
+/*
+ * See vpr/SRC/route/build_switchblocks.c for a detailed description of how the new
+ * switch block format works and what files are involved.
+ *
+ *
+ * A large chunk of this file is dedicated to helping parse the initial switchblock
+ * specificaiton in the XML arch file, providing error checking, etc.
+ *
+ * Another large chunk of this file is dedicated to parsing the actual formulas
+ * specified by the switch block permutation functions into their numeric counterparts.
+ */
+
+#include <string.h>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <stack>
+#include <utility>
+#include <algorithm>
+
+#include "vtr_assert.h"
+#include "vtr_util.h"
+
+#include "pugixml.hpp"
+#include "pugixml_util.hpp"
+
+#include "arch_error.h"
+
+#include "read_xml_util.h"
+#include "arch_util.h"
+#include "arch_types.h"
+#include "physical_types.h"
+#include "parse_switchblocks.h"
+
+using pugiutil::ReqOpt;
+
+using vtr::FormulaParser;
+using vtr::t_formula_data;
+
+/**** Function Declarations ****/
+/*---- Functions for Parsing Switchblocks from Architecture ----*/
+
+//Load an XML wireconn specification into a t_wireconn_inf
+t_wireconn_inf parse_wireconn(pugi::xml_node node, const pugiutil::loc_data& loc_data);
+
+//Process the desired order of a wireconn
+static void parse_switchpoint_order(const char* order, SwitchPointOrder& switchpoint_order);
+
+//Process a wireconn defined in the inline style (using attributes)
+void parse_wireconn_inline(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc);
+
+//Process a wireconn defined in the multinode style (more advanced specification)
+void parse_wireconn_multinode(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc);
+
+//Process a <from> or <to> sub-node of a multinode wireconn
+t_wire_switchpoints parse_wireconn_from_to_node(pugi::xml_node node, const pugiutil::loc_data& loc_data);
+
+/* parses the wire types specified in the comma-separated 'ch' char array into the vector wire_points_vec.
+ * Spaces are trimmed off */
+static void parse_comma_separated_wire_types(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints);
+
+/* parses the wirepoints specified in ch into the vector wire_points_vec */
+static void parse_comma_separated_wire_points(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints);
+
+/* Parses the number of connections type */
+static void parse_num_conns(std::string num_conns, t_wireconn_inf& wireconn);
+
+/* checks for correctness of a unidir switchblock. */
+static void check_unidir_switchblock(const t_switchblock_inf* sb);
+
+/* checks for correctness of a bidir switchblock. */
+static void check_bidir_switchblock(const t_permutation_map* permutation_map);
+
+/* checks for correctness of a wireconn segment specification. */
+static void check_wireconn(const t_arch* arch, const t_wireconn_inf& wireconn);
+
+/**** Function Definitions ****/
+
+/*---- Functions for Parsing Switchblocks from Architecture ----*/
+
+/* Reads-in the wire connections specified for the switchblock in the xml arch file */
+void read_sb_wireconns(const t_arch_switch_inf* /*switches*/, int /*num_switches*/, pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data) {
+    /* Make sure that Node is a switchblock */
+    check_node(Node, "switchblock", loc_data);
+
+    int num_wireconns;
+    pugi::xml_node SubElem;
+
+    /* count the number of specified wire connections for this SB */
+    num_wireconns = count_children(Node, "wireconn", loc_data, ReqOpt::OPTIONAL);
+    sb->wireconns.reserve(num_wireconns);
+
+    if (num_wireconns > 0) {
+        SubElem = get_first_child(Node, "wireconn", loc_data);
+    }
+    for (int i = 0; i < num_wireconns; i++) {
+        t_wireconn_inf wc = parse_wireconn(SubElem, loc_data);
+        sb->wireconns.push_back(wc);
+        SubElem = SubElem.next_sibling(SubElem.name());
+    }
+
+    return;
+}
+
+t_wireconn_inf parse_wireconn(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    t_wireconn_inf wc;
+
+    size_t num_children = count_children(node, "from", loc_data, ReqOpt::OPTIONAL);
+    num_children += count_children(node, "to", loc_data, ReqOpt::OPTIONAL);
+
+    if (num_children == 0) {
+        parse_wireconn_inline(node, loc_data, wc);
+    } else {
+        VTR_ASSERT(num_children > 0);
+        parse_wireconn_multinode(node, loc_data, wc);
+    }
+
+    return wc;
+}
+
+void parse_wireconn_inline(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc) {
+    //Parse an inline wireconn definition, using attributes
+    expect_only_attributes(node, {"num_conns", "from_type", "to_type", "from_switchpoint", "to_switchpoint", "from_order", "to_order"}, loc_data);
+
+    /* get the connection style */
+    const char* char_prop = get_attribute(node, "num_conns", loc_data).value();
+    parse_num_conns(char_prop, wc);
+
+    /* get from type */
+    char_prop = get_attribute(node, "from_type", loc_data).value();
+    parse_comma_separated_wire_types(char_prop, wc.from_switchpoint_set);
+
+    /* get to type */
+    char_prop = get_attribute(node, "to_type", loc_data).value();
+    parse_comma_separated_wire_types(char_prop, wc.to_switchpoint_set);
+
+    /* get the source wire point */
+    char_prop = get_attribute(node, "from_switchpoint", loc_data).value();
+    parse_comma_separated_wire_points(char_prop, wc.from_switchpoint_set);
+
+    /* get the destination wire point */
+    char_prop = get_attribute(node, "to_switchpoint", loc_data).value();
+    parse_comma_separated_wire_points(char_prop, wc.to_switchpoint_set);
+
+    char_prop = get_attribute(node, "from_order", loc_data, ReqOpt::OPTIONAL).value();
+    parse_switchpoint_order(char_prop, wc.from_switchpoint_order);
+
+    char_prop = get_attribute(node, "to_order", loc_data, ReqOpt::OPTIONAL).value();
+    parse_switchpoint_order(char_prop, wc.to_switchpoint_order);
+}
+
+void parse_wireconn_multinode(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc) {
+    expect_only_children(node, {"from", "to"}, loc_data);
+
+    /* get the connection style */
+    const char* char_prop = get_attribute(node, "num_conns", loc_data).value();
+    parse_num_conns(char_prop, wc);
+
+    char_prop = get_attribute(node, "from_order", loc_data, ReqOpt::OPTIONAL).value();
+    parse_switchpoint_order(char_prop, wc.from_switchpoint_order);
+
+    char_prop = get_attribute(node, "to_order", loc_data, ReqOpt::OPTIONAL).value();
+    parse_switchpoint_order(char_prop, wc.to_switchpoint_order);
+
+    size_t num_from_children = count_children(node, "from", loc_data);
+    size_t num_to_children = count_children(node, "to", loc_data);
+
+    VTR_ASSERT(num_from_children > 0);
+    VTR_ASSERT(num_to_children > 0);
+
+    for (pugi::xml_node child : node.children()) {
+        if (child.name() == std::string("from")) {
+            t_wire_switchpoints from_switchpoints = parse_wireconn_from_to_node(child, loc_data);
+            wc.from_switchpoint_set.push_back(from_switchpoints);
+        } else if (child.name() == std::string("to")) {
+            t_wire_switchpoints to_switchpoints = parse_wireconn_from_to_node(child, loc_data);
+            wc.to_switchpoint_set.push_back(to_switchpoints);
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Unrecognized child node '%s' of parent node '%s'",
+                           node.name(), child.name());
+        }
+    }
+}
+
+t_wire_switchpoints parse_wireconn_from_to_node(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    expect_only_attributes(node, {"type", "switchpoint"}, loc_data);
+
+    size_t attribute_count = count_attributes(node, loc_data);
+
+    if (attribute_count != 2) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Expected only 2 attributes on node '%s'",
+                       node.name());
+    }
+
+    t_wire_switchpoints wire_switchpoints;
+    wire_switchpoints.segment_name = get_attribute(node, "type", loc_data).value();
+
+    auto points_str = get_attribute(node, "switchpoint", loc_data).value();
+    for (const auto& point_str : vtr::split(points_str, ",")) {
+        int switchpoint = vtr::atoi(point_str);
+        wire_switchpoints.switchpoints.push_back(switchpoint);
+    }
+
+    if (wire_switchpoints.switchpoints.empty()) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Empty switchpoint specification",
+                       node.name());
+    }
+
+    return wire_switchpoints;
+}
+
+static void parse_switchpoint_order(const char* order, SwitchPointOrder& switchpoint_order) {
+    if (order == std::string("")) {
+        switchpoint_order = SwitchPointOrder::SHUFFLED; //Default
+    } else if (order == std::string("fixed")) {
+        switchpoint_order = SwitchPointOrder::FIXED;
+    } else if (order == std::string("shuffled")) {
+        switchpoint_order = SwitchPointOrder::SHUFFLED;
+    } else {
+        archfpga_throw(__FILE__, __LINE__, "Unrecognized switchpoint order '%s'", order);
+    }
+}
+
+/* parses the wire types specified in the comma-separated 'ch' char array into the vector wire_points_vec.
+ * Spaces are trimmed off */
+static void parse_comma_separated_wire_types(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints) {
+    auto types = vtr::split(ch, ",");
+
+    if (types.empty()) {
+        archfpga_throw(__FILE__, __LINE__, "parse_comma_separated_wire_types: found empty wireconn wire type entry\n");
+    }
+
+    for (const auto& type : types) {
+        t_wire_switchpoints wsp;
+        wsp.segment_name = type;
+
+        wire_switchpoints.push_back(wsp);
+    }
+}
+
+/* parses the wirepoints specified in the comma-separated 'ch' char array into the vector wire_points_vec */
+static void parse_comma_separated_wire_points(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints) {
+    auto points = vtr::split(ch, ",");
+    if (points.empty()) {
+        archfpga_throw(__FILE__, __LINE__, "parse_comma_separated_wire_points: found empty wireconn wire point entry\n");
+    }
+
+    for (const auto& point_str : points) {
+        int point = vtr::atoi(point_str);
+
+        for (auto& wire_switchpoint : wire_switchpoints) {
+            wire_switchpoint.switchpoints.push_back(point);
+        }
+    }
+}
+
+static void parse_num_conns(std::string num_conns, t_wireconn_inf& wireconn) {
+    //num_conns is now interpretted as a formula and processed in build_switchblocks
+    wireconn.num_conns_formula = num_conns;
+}
+
+/* Loads permutation funcs specified under Node into t_switchblock_inf. Node should be
+ * <switchfuncs> */
+void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data) {
+    /* Make sure the passed-in is correct */
+    check_node(Node, "switchfuncs", loc_data);
+
+    pugi::xml_node SubElem;
+
+    /* get the number of specified permutation functions */
+    int num_funcs = count_children(Node, "func", loc_data, ReqOpt::OPTIONAL);
+
+    const char* func_type;
+    const char* func_formula;
+    std::vector<std::string>* func_ptr;
+
+    /* used to index into permutation map of switchblock */
+    SB_Side_Connection conn;
+
+    /* now we iterate through all the specified permutation functions, and
+     * load them into the switchblock structure as appropriate */
+    if (num_funcs > 0) {
+        SubElem = get_first_child(Node, "func", loc_data);
+    }
+    for (int ifunc = 0; ifunc < num_funcs; ifunc++) {
+        /* get function type */
+        func_type = get_attribute(SubElem, "type", loc_data).as_string(nullptr);
+
+        /* get function formula */
+        func_formula = get_attribute(SubElem, "formula", loc_data).as_string(nullptr);
+
+        /* go through all the possible cases of func_type */
+        if (0 == strcmp(func_type, "lt")) {
+            conn.set_sides(LEFT, TOP);
+        } else if (0 == strcmp(func_type, "lr")) {
+            conn.set_sides(LEFT, RIGHT);
+        } else if (0 == strcmp(func_type, "lb")) {
+            conn.set_sides(LEFT, BOTTOM);
+        } else if (0 == strcmp(func_type, "tl")) {
+            conn.set_sides(TOP, LEFT);
+        } else if (0 == strcmp(func_type, "tb")) {
+            conn.set_sides(TOP, BOTTOM);
+        } else if (0 == strcmp(func_type, "tr")) {
+            conn.set_sides(TOP, RIGHT);
+        } else if (0 == strcmp(func_type, "rt")) {
+            conn.set_sides(RIGHT, TOP);
+        } else if (0 == strcmp(func_type, "rl")) {
+            conn.set_sides(RIGHT, LEFT);
+        } else if (0 == strcmp(func_type, "rb")) {
+            conn.set_sides(RIGHT, BOTTOM);
+        } else if (0 == strcmp(func_type, "bl")) {
+            conn.set_sides(BOTTOM, LEFT);
+        } else if (0 == strcmp(func_type, "bt")) {
+            conn.set_sides(BOTTOM, TOP);
+        } else if (0 == strcmp(func_type, "br")) {
+            conn.set_sides(BOTTOM, RIGHT);
+        } else {
+            /* unknown permutation function */
+            archfpga_throw(__FILE__, __LINE__, "Unknown permutation function specified: %s\n", func_type);
+        }
+        func_ptr = &(sb->permutation_map[conn]);
+
+        /* Here we load the specified switch function(s) */
+        func_ptr->push_back(std::string(func_formula));
+
+        func_ptr = nullptr;
+        /* get the next switchblock function */
+        SubElem = SubElem.next_sibling(SubElem.name());
+    }
+
+    return;
+}
+
+/* checks for correctness of switch block read-in from the XML architecture file */
+void check_switchblock(const t_switchblock_inf* sb, const t_arch* arch) {
+    /* get directionality */
+    enum e_directionality directionality = sb->directionality;
+
+    /* Check for errors in the switchblock descriptions */
+    if (UNI_DIRECTIONAL == directionality) {
+        check_unidir_switchblock(sb);
+    } else {
+        VTR_ASSERT(BI_DIRECTIONAL == directionality);
+        check_bidir_switchblock(&(sb->permutation_map));
+    }
+
+    /* check that specified wires exist */
+    for (const auto& wireconn : sb->wireconns) {
+        check_wireconn(arch, wireconn);
+    }
+
+    //TODO:
+    /* check that the wire segment directionality matches the specified switch block directionality */
+    /* check for duplicate names */
+    /* check that specified switches exist */
+    /* check that type of switchblock matches type of switch specified */
+}
+
+/* checks for correctness of a unidirectional switchblock. hard exit if error found (to be changed to throw later) */
+static void check_unidir_switchblock(const t_switchblock_inf* sb) {
+    /* Check that the destination wire points are always the starting points (i.e. of wire point 0) */
+    for (const t_wireconn_inf& wireconn : sb->wireconns) {
+        for (const t_wire_switchpoints& wire_to_points : wireconn.to_switchpoint_set) {
+            if (wire_to_points.switchpoints.size() > 1 || wire_to_points.switchpoints[0] != 0) {
+                archfpga_throw(__FILE__, __LINE__, "Unidirectional switch blocks are currently only allowed to drive the start points of wire segments\n");
+            }
+        }
+    }
+}
+
+/* checks for correctness of a bidirectional switchblock */
+static void check_bidir_switchblock(const t_permutation_map* permutation_map) {
+    /**** check that if side1->side2 is specified, then side2->side1 is not, as it is implicit ****/
+
+    /* variable used to index into the permutation map */
+    SB_Side_Connection conn;
+
+    /* iterate over all combinations of from_side -> to side */
+    for (e_side from_side : {TOP, RIGHT, BOTTOM, LEFT}) {
+        for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) {
+            /* can't connect a switchblock side to itself */
+            if (from_side == to_side) {
+                continue;
+            }
+
+            /* index into permutation map with this variable */
+            conn.set_sides(from_side, to_side);
+
+            /* check if a connection between these sides exists */
+            t_permutation_map::const_iterator it = (*permutation_map).find(conn);
+            if (it != (*permutation_map).end()) {
+                /* the two sides are connected */
+                /* check if the opposite connection has been specified */
+                conn.set_sides(to_side, from_side);
+                it = (*permutation_map).find(conn);
+                if (it != (*permutation_map).end()) {
+                    archfpga_throw(__FILE__, __LINE__, "If a bidirectional switch block specifies a connection from side1->side2, no connection should be specified from side2->side1 as it is implicit.\n");
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+static void check_wireconn(const t_arch* arch, const t_wireconn_inf& wireconn) {
+    for (const t_wire_switchpoints& wire_switchpoints : wireconn.from_switchpoint_set) {
+        auto seg_name = wire_switchpoints.segment_name;
+
+        //Make sure the segment exists
+        const t_segment_inf* seg_info = find_segment(arch, seg_name);
+        if (!seg_info) {
+            archfpga_throw(__FILE__, __LINE__, "Failed to find segment '%s' for <wireconn> from type specification\n", seg_name.c_str());
+        }
+
+        //Check that the specified switch points are valid
+        for (int switchpoint : wire_switchpoints.switchpoints) {
+            if (switchpoint < 0) {
+                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> from_switchpoint '%d' (must be >= 0)\n", switchpoint, seg_name.c_str());
+            }
+            if (switchpoint >= seg_info->length) {
+                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> from_switchpoints '%d' (must be < %d)\n", switchpoint, seg_info->length);
+            }
+            //TODO: check that points correspond to valid sb locations
+        }
+    }
+
+    for (const t_wire_switchpoints& wire_switchpoints : wireconn.to_switchpoint_set) {
+        auto seg_name = wire_switchpoints.segment_name;
+
+        //Make sure the segment exists
+        const t_segment_inf* seg_info = find_segment(arch, seg_name);
+        if (!seg_info) {
+            archfpga_throw(__FILE__, __LINE__, "Failed to find segment '%s' for <wireconn> to type specification\n", seg_name.c_str());
+        }
+
+        //Check that the specified switch points are valid
+        for (int switchpoint : wire_switchpoints.switchpoints) {
+            if (switchpoint < 0) {
+                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> to_switchpoint '%d' (must be >= 0)\n", switchpoint, seg_name.c_str());
+            }
+            if (switchpoint >= seg_info->length) {
+                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> to_switchpoints '%d' (must be < %d)\n", switchpoint, seg_info->length);
+            }
+            //TODO: check that points correspond to valid sb locations
+        }
+    }
+}
+
+/*---- Functions for Parsing the Symbolic Switchblock Formulas ----*/
+
+/* returns integer result according to the specified switchblock formula and data. formula may be piece-wise */
+int get_sb_formula_raw_result(FormulaParser& formula_parser, const char* formula, const t_formula_data& mydata) {
+    /* the result of the formula will be an integer */
+    int result = -1;
+
+    /* check formula */
+    if (nullptr == formula) {
+        archfpga_throw(__FILE__, __LINE__, "in get_sb_formula_result: SB formula pointer NULL\n");
+    } else if ('\0' == formula[0]) {
+        archfpga_throw(__FILE__, __LINE__, "in get_sb_formula_result: SB formula empty\n");
+    }
+
+    /* parse based on whether formula is piece-wise or not */
+    if (formula_parser.is_piecewise_formula(formula)) {
+        //EXPERIMENTAL
+        result = formula_parser.parse_piecewise_formula(formula, mydata);
+    } else {
+        result = formula_parser.parse_formula(formula, mydata);
+    }
+
+    return result;
+}
diff --git a/third_party/vtr/libs/archfpga/src/parse_switchblocks.h b/third_party/vtr/libs/archfpga/src/parse_switchblocks.h
new file mode 100644
index 000000000..a76860318
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/parse_switchblocks.h
@@ -0,0 +1,22 @@
+#ifndef PARSE_SWITCHBLOCKS_H
+#define PARSE_SWITCHBLOCKS_H
+
+#include <vector>
+#include "pugixml.hpp"
+#include "pugixml_util.hpp"
+#include "vtr_expr_eval.h"
+
+/**** Function Declarations ****/
+/* Loads permutation funcs specified under Node into t_switchblock_inf */
+void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data);
+
+/* Reads-in the wire connections specified for the switchblock in the xml arch file */
+void read_sb_wireconns(const t_arch_switch_inf* switches, int num_switches, pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data);
+
+/* checks for correctness of switch block read-in from the XML architecture file */
+void check_switchblock(const t_switchblock_inf* sb, const t_arch* arch);
+
+/* returns integer result according to the specified formula and data */
+int get_sb_formula_raw_result(vtr::FormulaParser& formula_parser, const char* formula, const vtr::t_formula_data& mydata);
+
+#endif /* PARSE_SWITCHBLOCKS_H */
diff --git a/third_party/vtr/libs/archfpga/src/physical_types.cc b/third_party/vtr/libs/archfpga/src/physical_types.cc
new file mode 100644
index 000000000..dfa110f39
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/physical_types.cc
@@ -0,0 +1,255 @@
+#include "physical_types.h"
+#include "vtr_math.h"
+#include "vtr_util.h"
+#include "vtr_log.h"
+
+#include "arch_util.h"
+
+static bool switch_type_is_buffered(SwitchType type);
+static bool switch_type_is_configurable(SwitchType type);
+static e_directionality switch_type_directionaity(SwitchType type);
+
+//Ensure the constant has external linkage to avoid linking errors
+constexpr int t_arch_switch_inf::UNDEFINED_FANIN;
+
+/*
+ * t_arch_switch_inf
+ */
+
+SwitchType t_arch_switch_inf::type() const {
+    return type_;
+}
+
+bool t_arch_switch_inf::buffered() const {
+    return switch_type_is_buffered(type());
+}
+
+bool t_arch_switch_inf::configurable() const {
+    return switch_type_is_configurable(type());
+}
+
+e_directionality t_arch_switch_inf::directionality() const {
+    return switch_type_directionaity(type());
+}
+
+float t_arch_switch_inf::Tdel(int fanin) const {
+    if (fixed_Tdel()) {
+        auto itr = Tdel_map_.find(UNDEFINED_FANIN);
+        VTR_ASSERT(itr != Tdel_map_.end());
+        return itr->second;
+    } else {
+        VTR_ASSERT(fanin >= 0);
+        return vtr::linear_interpolate_or_extrapolate(&Tdel_map_, fanin);
+    }
+}
+
+bool t_arch_switch_inf::fixed_Tdel() const {
+    return Tdel_map_.size() == 1 && Tdel_map_.count(UNDEFINED_FANIN);
+}
+
+void t_arch_switch_inf::set_Tdel(int fanin, float delay) {
+    Tdel_map_[fanin] = delay;
+}
+
+void t_arch_switch_inf::set_type(SwitchType type_val) {
+    type_ = type_val;
+}
+
+/*
+ * t_rr_switch_inf
+ */
+
+SwitchType t_rr_switch_inf::type() const {
+    return type_;
+}
+
+bool t_rr_switch_inf::buffered() const {
+    return switch_type_is_buffered(type());
+}
+
+bool t_rr_switch_inf::configurable() const {
+    return switch_type_is_configurable(type());
+}
+
+void t_rr_switch_inf::set_type(SwitchType type_val) {
+    type_ = type_val;
+}
+
+static bool switch_type_is_buffered(SwitchType type) {
+    //Muxes and Tristates isolate thier input and output into
+    //seperate DC connected sub-circuits
+    return type == SwitchType::MUX
+           || type == SwitchType::TRISTATE
+           || type == SwitchType::BUFFER;
+}
+
+static bool switch_type_is_configurable(SwitchType type) {
+    //Shorts and buffers are non-configurable
+    return !(type == SwitchType::SHORT
+             || type == SwitchType::BUFFER);
+}
+
+static e_directionality switch_type_directionaity(SwitchType type) {
+    if (type == SwitchType::SHORT
+        || type == SwitchType::PASS_GATE) {
+        //Shorts and pass gates can conduct in either direction
+        return e_directionality::BI_DIRECTIONAL;
+    } else {
+        VTR_ASSERT_SAFE(type == SwitchType::MUX
+                        || type == SwitchType::TRISTATE
+                        || type == SwitchType::BUFFER);
+        //Buffered switches can only drive in one direction
+        return e_directionality::UNI_DIRECTIONAL;
+    }
+}
+
+/*
+ * t_physical_tile_type
+ */
+std::vector<int> t_physical_tile_type::get_clock_pins_indices() const {
+    for (auto pin_index : this->clock_pin_indices) {
+        VTR_ASSERT(pin_index < this->num_pins);
+    }
+
+    return this->clock_pin_indices;
+}
+
+int t_physical_tile_type::get_sub_tile_loc_from_pin(int pin_num) const {
+    VTR_ASSERT(pin_num < this->num_pins);
+
+    for (auto sub_tile : this->sub_tiles) {
+        auto max_inst_pins = sub_tile.num_phy_pins / sub_tile.capacity.total();
+
+        for (int pin = 0; pin < sub_tile.num_phy_pins; pin++) {
+            if (sub_tile.sub_tile_to_tile_pin_indices[pin] == pin_num) {
+                //If the physical tile pin matches pin_num, return the
+                //corresponding absolute capacity location of the sub_tile
+                return pin / max_inst_pins + sub_tile.capacity.low;
+            }
+        }
+    }
+
+    return OPEN;
+}
+
+bool t_physical_tile_type::is_empty() const {
+    return std::string(name) == std::string(EMPTY_BLOCK_NAME);
+}
+
+/*
+ * t_logical_block_type
+ */
+
+bool t_logical_block_type::is_empty() const {
+    return std::string(name) == std::string(EMPTY_BLOCK_NAME);
+}
+
+/**
+ * t_pb_graph_node
+ */
+
+int t_pb_graph_node::num_pins() const {
+    int npins = 0;
+
+    for (int iport = 0; iport < num_input_ports; ++iport) {
+        npins += num_input_pins[iport];
+    }
+
+    for (int iport = 0; iport < num_output_ports; ++iport) {
+        npins += num_output_pins[iport];
+    }
+
+    for (int iport = 0; iport < num_clock_ports; ++iport) {
+        npins += num_clock_pins[iport];
+    }
+
+    return npins;
+}
+
+std::string t_pb_graph_node::hierarchical_type_name() const {
+    std::vector<std::string> names;
+    std::string child_mode_name;
+
+    for (auto curr_node = this; curr_node != nullptr; curr_node = curr_node->parent_pb_graph_node) {
+        std::string type_name;
+
+        //get name and type of physical block
+        type_name = curr_node->pb_type->name;
+        type_name += "[" + std::to_string(curr_node->placement_index) + "]";
+
+        if (!curr_node->is_primitive()) {
+            // primitives have no modes
+            type_name += "[" + child_mode_name + "]";
+        }
+
+        if (!curr_node->is_root()) {
+            // get the mode of this child
+            child_mode_name = curr_node->pb_type->parent_mode->name;
+        }
+
+        names.push_back(type_name);
+    }
+
+    //We walked up from the leaf to root, so we join in reverse order
+    return vtr::join(names.rbegin(), names.rend(), "/");
+}
+
+/**
+ * t_pb_graph_pin
+ */
+
+std::string t_pb_graph_pin::to_string(const bool full_description) const {
+    std::string parent_name = this->parent_node->pb_type->name;
+    std::string parent_index = std::to_string(this->parent_node->placement_index);
+    std::string port_name = this->port->name;
+    std::string pin_index = std::to_string(this->pin_number);
+
+    std::string pin_string = parent_name + "[" + parent_index + "]";
+    pin_string += "." + port_name + "[" + pin_index + "]";
+
+    if (!full_description) return pin_string;
+
+    // Traverse upward through the pb_type hierarchy, constructing
+    // name that represents the whole hierarchy to reach this pin.
+    auto parent_parent_node = this->parent_node->parent_pb_graph_node;
+    for (auto pb_node = parent_parent_node; pb_node != nullptr; pb_node = pb_node->parent_pb_graph_node) {
+        std::string parent = pb_node->pb_type->name;
+        parent += "[" + std::to_string(pb_node->placement_index) + "]";
+        pin_string = parent + "/" + pin_string;
+    }
+    return pin_string;
+}
+
+/**
+ * t_pb_graph_edge
+ */
+
+bool t_pb_graph_edge::annotated_with_pattern(int pattern_index) const {
+    for (int ipattern = 0; ipattern < this->num_pack_patterns; ipattern++) {
+        if (this->pack_pattern_indices[ipattern] == pattern_index) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool t_pb_graph_edge::belongs_to_pattern(int pattern_index) const {
+    // return true if this edge is annotated with this pattern
+    if (this->annotated_with_pattern(pattern_index)) {
+        return true;
+        // if not annotated check if its pattern should be inferred
+    } else if (this->infer_pattern) {
+        // if pattern should be inferred try to infer it from all connected output edges
+        for (int ipin = 0; ipin < this->num_output_pins; ipin++) {
+            for (int iedge = 0; iedge < this->output_pins[ipin]->num_output_edges; iedge++) {
+                if (this->output_pins[ipin]->output_edges[iedge]->belongs_to_pattern(pattern_index)) {
+                    return true;
+                }
+            }
+        }
+    }
+
+    // return false otherwise
+    return false;
+}
diff --git a/third_party/vtr/libs/archfpga/src/physical_types.h b/third_party/vtr/libs/archfpga/src/physical_types.h
new file mode 100644
index 000000000..4b326dd1f
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/physical_types.h
@@ -0,0 +1,1896 @@
+/*
+ * Data types describing the physical components on the FPGA architecture.
+ *
+ * We assume an island style FPGA where complex logic blocks are arranged in a grid and each side of the logic block has access to the inter-block interconnect.  To keep the logic blocks general,
+ * we allow arbitrary hierarchy, modes, primitives, and interconnect within each complex logic block.  The data structures here describe the properties of the island-style FPGA as well as the details on
+ * hierarchy, modes, primitives, and interconnects within each logic block.
+ *
+ * Data structures that flesh out
+ *
+ * The data structures that store the
+ *
+ * Key data types:
+ * t_physical_tile_type: represents the type of a tile in the device grid and describes its physical characteristics (pin locations, area, width, height, etc.)
+ * t_logical_block_type: represents and describes the type of a clustered block
+ * pb_type: describes the types of physical blocks within the t_logical_block_type in a hierarchy where the top block is the complex block and the leaf blocks implement one logical block
+ * pb_graph_node: is a flattened version of pb_type so a pb_type with 10 instances will have 10 pb_graph_nodes representing each instance
+ *
+ * Additional notes:
+ *
+ * The interconnect specified in the architecture file gets flattened out in the pb_graph_node netlist.  Each pb_graph_node contains pb_graph_pins which allow it to connect to other pb_graph_nodes.
+ * These pins are in connected to other pins through pb_graph_edges. The pin connections are based on what is specified in the <interconnect> tags of the architecture file.
+ *
+ * Date: February 19, 2009
+ * Authors: Jason Luu and Kenneth Kent
+ */
+
+#ifndef PHYSICAL_TYPES_H
+#define PHYSICAL_TYPES_H
+
+#include <functional>
+#include <vector>
+#include <unordered_map>
+#include <string>
+#include <map>
+#include <unordered_map>
+#include <limits>
+#include <numeric>
+
+#include "vtr_ndmatrix.h"
+#include "vtr_hash.h"
+#include "vtr_bimap.h"
+#include "vtr_string_interning.h"
+
+#include "logic_types.h"
+#include "clock_types.h"
+
+//Forward declarations
+struct t_clock_arch;
+struct t_clock_network;
+struct t_power_arch;
+struct t_interconnect_pins;
+struct t_power_usage;
+struct t_pb_type_power;
+struct t_mode_power;
+struct t_interconnect_power;
+struct t_port_power;
+struct t_physical_tile_port;
+struct t_equivalent_site;
+struct t_physical_tile_type;
+typedef const t_physical_tile_type* t_physical_tile_type_ptr;
+struct t_sub_tile;
+struct t_logical_block_type;
+typedef const t_logical_block_type* t_logical_block_type_ptr;
+struct t_logical_pin;
+struct t_physical_pin;
+struct t_pb_type;
+struct t_pb_graph_pin_power;
+struct t_mode;
+struct t_pb_graph_node_power;
+struct t_port;
+class t_pb_graph_node;
+struct t_pin_to_pin_annotation;
+struct t_interconnect;
+class t_pb_graph_pin;
+class t_pb_graph_edge;
+struct t_cluster_placement_primitive;
+struct t_arch;
+enum class e_sb_type;
+
+/****************************************************************************/
+/* FPGA metadata types                                                      */
+/****************************************************************************/
+/* t_metadata_value, and t_metadata_dict provide a types to store
+ * metadata about the FPGA architecture and routing routing graph along side
+ * the pb_type, grid, node and edge descriptions.
+ *
+ * The metadata is stored as a simple key/value map.  key's are string and an
+ * optional coordinate. t_metadata_value provides the value storage, which is a
+ * string.
+ */
+
+// Metadata value storage.
+class t_metadata_value {
+  public:
+    explicit t_metadata_value(vtr::interned_string v)
+        : value_(v) {}
+    explicit t_metadata_value(const t_metadata_value& o) noexcept
+        : value_(o.value_) {}
+
+    // Return string value.
+    vtr::interned_string as_string() const { return value_; }
+
+  private:
+    vtr::interned_string value_;
+};
+
+// Metadata storage dictionary.
+struct t_metadata_dict : vtr::flat_map<
+                             vtr::interned_string,
+                             std::vector<t_metadata_value>,
+                             vtr::interned_string_less> {
+    // Is this key present in the map?
+    inline bool has(vtr::interned_string key) const {
+        return this->count(key) >= 1;
+    }
+
+    // Get all metadata values matching key.
+    //
+    // Returns nullptr if key is not found.
+    inline const std::vector<t_metadata_value>* get(vtr::interned_string key) const {
+        auto iter = this->find(key);
+        if (iter != this->end()) {
+            return &iter->second;
+        }
+        return nullptr;
+    }
+
+    // Get metadata values matching key.
+    //
+    // Returns nullptr if key is not found or if multiple values are prsent
+    // per key.
+    inline const t_metadata_value* one(vtr::interned_string key) const {
+        auto values = get(key);
+        if (values == nullptr) {
+            return nullptr;
+        }
+        if (values->size() != 1) {
+            return nullptr;
+        }
+        return &((*values)[0]);
+    }
+
+    // Adds value to key.
+    void add(vtr::interned_string key, vtr::interned_string value) {
+        // Get the iterator to the key, which may already have elements if
+        // add was called with this key in the past.
+        (*this)[key].emplace_back(t_metadata_value(value));
+    }
+};
+
+/*************************************************************************************************/
+/* FPGA basic definitions                                                                        */
+/*************************************************************************************************/
+
+/* Pins describe I/O into clustered logic block.
+ * A pin may be unconnected, driving a net or in the fanout, respectively. */
+enum e_pin_type {
+    OPEN = -1,
+    DRIVER = 0,
+    RECEIVER = 1
+};
+
+/* Type of interconnect within complex block: Complete for everything connected (full crossbar), direct for one-to-one connections, and mux for many-to-one connections */
+enum e_interconnect {
+    COMPLETE_INTERC = 1,
+    DIRECT_INTERC = 2,
+    MUX_INTERC = 3
+};
+
+/* Orientations. */
+enum e_side : unsigned char {
+    TOP = 0,
+    RIGHT = 1,
+    BOTTOM = 2,
+    LEFT = 3,
+    NUM_SIDES
+};
+constexpr std::array<e_side, NUM_SIDES> SIDES = {{TOP, RIGHT, BOTTOM, LEFT}};                    //Set of all side orientations
+constexpr std::array<const char*, NUM_SIDES> SIDE_STRING = {{"TOP", "RIGHT", "BOTTOM", "LEFT"}}; //String versions of side orientations
+
+/* pin location distributions */
+enum e_pin_location_distr {
+    E_SPREAD_PIN_DISTR,
+    E_PERIMETER_PIN_DISTR,
+    E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR,
+    E_CUSTOM_PIN_DISTR
+};
+
+/* pb_type class */
+enum e_pb_type_class {
+    UNKNOWN_CLASS = 0,
+    LUT_CLASS = 1,
+    LATCH_CLASS = 2,
+    MEMORY_CLASS = 3,
+    NUM_CLASSES
+};
+
+// Set of all pb_type classes
+constexpr std::array<e_pb_type_class, NUM_CLASSES> PB_TYPE_CLASSES = {
+    {UNKNOWN_CLASS, LUT_CLASS, LATCH_CLASS, MEMORY_CLASS}};
+
+// String versions of pb_type class values
+constexpr std::array<const char*, NUM_CLASSES> PB_TYPE_CLASS_STRING = {
+    {"unknown", "lut", "flipflop", "memory"}};
+
+/* Annotations for pin-to-pin connections */
+enum e_pin_to_pin_annotation_type {
+    E_ANNOT_PIN_TO_PIN_DELAY = 0,
+    E_ANNOT_PIN_TO_PIN_CAPACITANCE,
+    E_ANNOT_PIN_TO_PIN_PACK_PATTERN
+};
+enum e_pin_to_pin_annotation_format {
+    E_ANNOT_PIN_TO_PIN_MATRIX = 0,
+    E_ANNOT_PIN_TO_PIN_CONSTANT
+};
+enum e_pin_to_pin_delay_annotations {
+    E_ANNOT_PIN_TO_PIN_DELAY_MIN = 0,        //pb interconnect or primitive combinational max delay
+    E_ANNOT_PIN_TO_PIN_DELAY_MAX,            //pb interconnect or primitive combinational max delay
+    E_ANNOT_PIN_TO_PIN_DELAY_TSETUP,         //primitive setup time
+    E_ANNOT_PIN_TO_PIN_DELAY_THOLD,          //primitive hold time
+    E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN, //primitive min clock-to-q delay
+    E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX, //primitive max clock-to-q delay
+};
+enum e_pin_to_pin_capacitance_annotations {
+    E_ANNOT_PIN_TO_PIN_CAPACITANCE_C = 0
+};
+enum e_pin_to_pin_pack_pattern_annotations {
+    E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME = 0
+};
+
+/* Power Estimation type for a PB */
+enum e_power_estimation_method_ {
+    POWER_METHOD_UNDEFINED = 0,
+    POWER_METHOD_IGNORE,          /* Ignore power of this PB, and all children PB */
+    POWER_METHOD_SUM_OF_CHILDREN, /* Ignore power of this PB, but consider children */
+    POWER_METHOD_AUTO_SIZES,      /* Transistor-level, auto-sized buffers/wires */
+    POWER_METHOD_SPECIFY_SIZES,   /* Transistor-level, user-specified buffers/wires */
+    POWER_METHOD_TOGGLE_PINS,     /* Dynamic: Energy per pin toggle, Static: Absolute */
+    POWER_METHOD_C_INTERNAL,      /* Dynamic: Equiv. Internal capacitance, Static: Absolute */
+    POWER_METHOD_ABSOLUTE         /* Dynamic: Aboslute, Static: Absolute */
+};
+typedef enum e_power_estimation_method_ e_power_estimation_method;
+typedef enum e_power_estimation_method_ t_power_estimation_method;
+
+/* Specifies what part of the FPGA a custom switchblock should be built in (i.e. perimeter, core, everywhere) */
+enum e_sb_location {
+    E_PERIMETER = 0,
+    E_CORNER,
+    E_FRINGE, /* perimeter minus corners */
+    E_CORE,
+    E_EVERYWHERE
+};
+
+/*************************************************************************************************/
+/* FPGA grid layout data types                                                                   */
+/*************************************************************************************************/
+/* Grid location specification
+ *  Each member is a formula evaluated in terms of 'W' (device width),
+ *  and 'H' (device height). Formulas can be evaluated using parse_formula()
+ *  from expr_eval.h.
+ */
+struct t_grid_loc_spec {
+    t_grid_loc_spec(std::string start, std::string end, std::string repeat, std::string incr)
+        : start_expr(start)
+        , end_expr(end)
+        , repeat_expr(repeat)
+        , incr_expr(incr) {}
+
+    std::string start_expr; //Starting position (inclusive)
+    std::string end_expr;   //Ending position (inclusive)
+
+    std::string repeat_expr; //Distance between repeated
+                             // region instances
+
+    std::string incr_expr; //Distance between block instantiations
+                           // with the region
+};
+
+/* Definition of how to place physical logic block in the grid.
+ *  This defines a region of the grid to be set to a specific type
+ *  (provided it's priority is high enough to override other blocks).
+ *
+ *  The diagram below illustrates the layout specification.
+ *
+ *                      +----+                +----+           +----+
+ *                      |    |                |    |           |    |
+ *                      |    |                |    |    ...    |    |
+ *                      |    |                |    |           |    |
+ *                      +----+                +----+           +----+
+ *
+ *                        .                     .                 .
+ *                        .                     .                 .
+ *                        .                     .                 .
+ *
+ *                      +----+                +----+           +----+
+ *                      |    |                |    |           |    |
+ *                      |    |                |    |    ...    |    |
+ *                      |    |                |    |           |    |
+ *                      +----+                +----+           +----+
+ *                   ^
+ *                   |
+ *           repeaty |
+ *                   |
+ *                   v        (endx,endy)
+ *                      +----+                +----+           +----+
+ *                      |    |                |    |           |    |
+ *                      |    |                |    |    ...    |    |
+ *                      |    |                |    |           |    |
+ *                      +----+                +----+           +----+
+ *       (startx,starty)
+ *                            <-------------->
+ *                                 repeatx
+ *
+ *  startx/endx and endx/endy define a rectangular region instances dimensions.
+ *  The region instance is then repeated every repeatx/repeaty (if specified).
+ *
+ *  Within a particular region instance a block of block_type is laid down every
+ *  incrx/incry units (if not specified defaults to block width/height):
+ *
+ *
+ *    * = an instance of block_type within the region
+ *
+ *                    +------------------------------+
+ *                    |*         *         *        *|
+ *                    |                              |
+ *                    |                              |
+ *                    |                              |
+ *                    |                              |
+ *                    |                              |
+ *                    |*         *         *        *|
+ *                ^   |                              |
+ *                |   |                              |
+ *          incry |   |                              |
+ *                |   |                              |
+ *                v   |                              |
+ *                    |*         *         *        *|
+ *                    +------------------------------+
+ *
+ *                      <------->
+ *                        incrx
+ *
+ *  In the above diagram incrx = 10, and incry = 6
+ */
+struct t_grid_loc_def {
+    t_grid_loc_def(std::string block_type_val, int priority_val)
+        : block_type(block_type_val)
+        , priority(priority_val)
+        , x("0", "W-1", "max(w+1,W)", "w") //Fill in x direction, no repeat, incr by block width
+        , y("0", "H-1", "max(h+1,H)", "h") //Fill in y direction, no repeat, incr by block height
+    {}
+
+    std::string block_type; //The block type name
+
+    int priority = 0; //Priority of the specification.
+                      // In case of conflicting specifications
+                      // the largest priority wins.
+
+    t_grid_loc_spec x; //Horizontal location specification
+    t_grid_loc_spec y; //Veritcal location specification
+
+    // When 1 metadata tag is split among multiple t_grid_loc_def, one
+    // t_grid_loc_def is arbitrarily chosen to own the metadata, and the other
+    // t_grid_loc_def point to the owned version.
+    std::unique_ptr<t_metadata_dict> owned_meta;
+    t_metadata_dict* meta = nullptr; // Metadata for this location definition. This
+                                     // metadata may be shared with multiple grid_locs
+                                     // that come from a common definition.
+};
+
+enum GridDefType {
+    AUTO,
+    FIXED
+};
+
+struct t_grid_def {
+    GridDefType grid_type = GridDefType::AUTO; //The type of this grid specification
+
+    std::string name = ""; //The name of this device
+
+    int width = -1;  //Fixed device width (only valid for grid_type == FIXED)
+    int height = -1; //Fixed device height (only valid for grid_type == FIXED)
+
+    float aspect_ratio = 1.; //Aspect ratio for auto-sized devices (only valid for
+                             //grid_type == AUTO)
+
+    std::vector<t_grid_loc_def> loc_defs; //The list of grid location definitions for this grid specification
+};
+
+/************************* POWER ***********************************/
+
+/* Global clock architecture */
+struct t_clock_arch {
+    int num_global_clocks;
+    t_clock_network* clock_inf; /* Details about each clock */
+};
+
+/* Architecture information for a single clock */
+struct t_clock_network {
+    bool autosize_buffer; /* autosize clock buffers */
+    float buffer_size;    /* if not autosized, the clock buffer size */
+    float C_wire;         /* Wire capacitance (per meter) */
+
+    float prob;   /* Static probability of net assigned to this clock */
+    float dens;   /* Switching density of net assigned to this clock */
+    float period; /* Period of clock */
+};
+
+/* Power-related architecture information */
+struct t_power_arch {
+    float C_wire_local; /* Capacitance of local interconnect (per meter) */
+    //int seg_buffer_split; /* Split segment for distributed buffer (no split=1) */
+    float logical_effort_factor;
+    float local_interc_factor;
+    float transistors_per_SRAM_bit;
+    float mux_transistor_size;
+    float FF_size;
+    float LUT_transistor_size;
+};
+
+/* Power usage for an entity */
+struct t_power_usage {
+    float dynamic;
+    float leakage;
+};
+
+/*************************************************************************************************/
+/* FPGA Physical Logic Blocks data types                                                         */
+/*************************************************************************************************/
+
+enum class PortEquivalence {
+    NONE,    //The pins within the port are not equivalent and can not be swapped
+    FULL,    //The pins within the port are fully equivalent and can be freely swapped (e.g. logically equivalent or modelling a full-crossbar)
+    INSTANCE //The port is equivalent with instance swapping (more restrictive that FULL)
+};
+
+/* A class of CLB pins that share common properties
+ * port_name: name of this class of pins
+ * type:  DRIVER or RECEIVER (what is this pinclass?)              *
+ * num_pins:  The number of logically equivalent pins forming this *
+ *           class.                                                *
+ * pinlist[]:  List of clb pin numbers which belong to this class. */
+struct t_class {
+    enum e_pin_type type;
+    PortEquivalence equivalence;
+    int num_pins;
+    std::vector<int> pinlist; /* [0..num_pins - 1] */
+};
+
+/* Struct to hold the class ranges for specific sub tiles */
+struct t_class_range {
+    int low = 0;
+    int high = 0;
+};
+
+enum e_power_wire_type {
+    POWER_WIRE_TYPE_UNDEFINED = 0,
+    POWER_WIRE_TYPE_IGNORED,
+    POWER_WIRE_TYPE_C,
+    POWER_WIRE_TYPE_ABSOLUTE_LENGTH,
+    POWER_WIRE_TYPE_RELATIVE_LENGTH,
+    POWER_WIRE_TYPE_AUTO
+};
+
+enum e_power_buffer_type {
+    POWER_BUFFER_TYPE_UNDEFINED = 0,
+    POWER_BUFFER_TYPE_NONE,
+    POWER_BUFFER_TYPE_AUTO,
+    POWER_BUFFER_TYPE_ABSOLUTE_SIZE
+};
+
+struct t_port_power {
+    /* Transistor-Level Power Properties */
+
+    // Wire
+    e_power_wire_type wire_type;
+    union {
+        float C;
+        float absolute_length;
+        float relative_length;
+    } wire;
+
+    // Buffer
+    e_power_buffer_type buffer_type;
+    float buffer_size;
+
+    /* Pin-Toggle Power Properties */
+    bool pin_toggle_initialized;
+    float energy_per_toggle;
+    t_port* scaled_by_port;
+    int scaled_by_port_pin_idx;
+    bool reverse_scaled; /* Scale by (1-prob) */
+};
+
+//The type of Fc specification
+enum class e_fc_type {
+    IN, //The fc specification for an input pin
+    OUT //The fc specification for an output pin
+};
+
+//The value type of the Fc specification
+enum class e_fc_value_type {
+    FRACTIONAL, //Fractional Fc specification (i.e. fraction of routing channel tracks)
+    ABSOLUTE    //Absolute Fc specification (i.e. absolute number of tracks)
+};
+
+//Describes the Fc specification for a set of pins and a segment
+struct t_fc_specification {
+    e_fc_type fc_type;             //What type of Fc
+    e_fc_value_type fc_value_type; //How to interpret the Fc value
+    float fc_value;                //The Fc value
+    int seg_index;                 //The target segment index
+    std::vector<int> pins;         //The block pins collectively effected by this Fc
+};
+
+//Defines the default Fc specification for an architecture
+struct t_default_fc_spec {
+    bool specified = false;         //Whether or not a default specification exists
+    e_fc_value_type in_value_type;  //Type of the input value (frac or abs)
+    float in_value;                 //Input Fc value
+    e_fc_value_type out_value_type; //Type of the output value (frac or abs)
+    float out_value;                //Output Fc value
+};
+
+enum class e_sb_type {
+    NONE,       //No SB at this location
+    HORIZONTAL, //Horizontal straight-through connections
+    VERTICAL,   //Vertical straight-through connections
+    TURNS,      //Turning connections only
+    STRAIGHT,   //Straight-through connections (i.e. vertical + horizontal)
+    FULL        //Full SB at this location (i.e. turns + straight)
+
+};
+
+constexpr int NO_SWITCH = -1;
+constexpr int DEFAULT_SWITCH = -2;
+
+/* Describes the type for a physical tile
+ * name: unique identifier for type
+ * num_pins: Number of pins for the block
+ * capacity: Number of blocks of this type that can occupy one grid tile (typically used by IOs).
+ * width: Width of large block in grid tiles
+ * height: Height of large block in grid tiles
+ *
+ * pinloc: Is set to true if a given pin exists on a certain position of a
+ *         block. Derived from pin_location_distribution/pin_loc_assignments
+ *
+ * pin_location_distribution: The pin distribution type
+ * num_pin_loc_assignments: The number of strings within each pin_loc_assignments
+ * pin_loc_assignments: The strings for a custom pin location distribution.
+ *                      Valid only for pin_location_distribution == E_CUSTOM_PIN_DISTR
+ *
+ * num_class: Number of logically-equivalent pin classes
+ * class_inf: Information of each logically-equivalent class
+ *
+ * pin_avg_width_offset: Average width offset to specified pin (exact if only a single physical pin instance)
+ * pin_avg_height_offset: Average height offset to specified pin (exact if only a single physical pin instance)
+ * pin_class: The class a pin belongs to
+ * is_ignored_pin: Whether or not a pin is ignored durring rr_graph generation and routing.
+ *                 This is usually the case for clock pins and other global pins unless the
+ *                 clock_modeling option is set to route the clock through regular inter-block
+ *                 wiring or through a dedicated clock network.
+ * is_pin_global: Whether or not this pin is marked as global. Clock pins and other specified
+ *                global pins in the architecture file are marked as global.
+ *
+ * fc_specs: The Fc specifications for all pins
+ *
+ * switchblock_locations: Switch block configuration for this block.
+ *                        Each element describes the type of SB which should be
+ *                        constructed at the specified location.
+ *                        Note that the SB is located to the top-right of the
+ *                        grid tile location. [0..width-1][0..height-1]
+ *
+ * area: Describes how much area this logic block takes, if undefined, use default
+ * type_timing_inf: timing information unique to this type
+ * num_drivers: Total number of output drivers supplied
+ * num_receivers: Total number of input receivers supplied
+ * index: Keep track of type in array for easy access
+ * logical_tile_index: index of the corresponding logical block type
+ *
+ * In general, the physical tile is a placeable physical resource on the FPGA device,
+ * and it is allowed to contain an heterogeneous set of logical blocks (pb_types).
+ *
+ * Each physical tile must specify at least one sub tile, that is a physical location
+ * on the sub tiles stacks. This means that a physical tile occupies an (x, y) location on the grid,
+ * and it has at least one sub tile slot that allows for a placement within the (x, y) location.
+ *
+ * Therefore, to identify the location of a logical block within the device grid, we need to
+ * specify three different coordinates:
+ *      - x         : horizontal coordinate
+ *      - y         : vertical coordinate
+ *      - sub tile  : location within the sub tile stack at an (x, y) physical location
+ *
+ * A physical tile is heterogeneous as it allows the placement of different kinds of logical blocks within,
+ * that can share the same (x, y) placement location.
+ *
+ */
+struct t_physical_tile_type {
+    char* name = nullptr;
+    int num_pins = 0;
+    int num_inst_pins = 0;
+    int num_input_pins = 0;
+    int num_output_pins = 0;
+    int num_clock_pins = 0;
+
+    std::vector<int> clock_pin_indices;
+
+    int capacity = 0;
+
+    int width = 0;
+    int height = 0;
+
+    vtr::NdMatrix<std::vector<bool>, 3> pinloc; /* [0..width-1][0..height-1][0..3][0..num_pins-1] */
+
+    std::vector<t_class> class_inf; /* [0..num_class-1] */
+
+    std::vector<int> pin_width_offset;  // [0..num_pins-1]
+    std::vector<int> pin_height_offset; // [0..num_pins-1]
+    std::vector<int> pin_class;         // [0..num_pins-1]
+    std::vector<bool> is_ignored_pin;   // [0..num_pins-1]
+    std::vector<bool> is_pin_global;    // [0..num_pins-1]
+
+    std::vector<t_fc_specification> fc_specs;
+
+    vtr::Matrix<e_sb_type> switchblock_locations;
+    vtr::Matrix<int> switchblock_switch_overrides;
+
+    float area = 0;
+
+    /* This info can be determined from class_inf and pin_class but stored for faster access */
+    int num_drivers = 0;
+    int num_receivers = 0;
+
+    int index = -1; /* index of type descriptor in array (allows for index referencing) */
+
+    // vector of the different types of sub tiles allowed for the physical tile.
+    std::vector<t_sub_tile> sub_tiles;
+
+    /* Unordered map indexed by the logical block index.
+     * tile_block_pin_directs_map[logical block index][logical block pin] -> physical tile pin */
+    std::unordered_map<int, std::unordered_map<int, vtr::bimap<t_logical_pin, t_physical_pin>>> tile_block_pin_directs_map;
+
+    /* Returns the indices of pins that contain a clock for this physical logic block */
+    std::vector<int> get_clock_pins_indices() const;
+
+    // Returns the sub tile location of the physical tile given an input pin
+    int get_sub_tile_loc_from_pin(int pin_num) const;
+
+    // TODO: Remove is_input_type / is_output_type as part of
+    // https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
+
+    // Does this t_physical_tile_type contain an inpad?
+    bool is_input_type = false;
+
+    // Does this t_physical_tile_type contain an outpad?
+    bool is_output_type = false;
+
+    // Is this t_physical_tile_type an empty type?
+    bool is_empty() const;
+};
+
+/* Holds the capacity range of a certain sub_tile block within the parent physical tile type.
+ * E.g. TILE_X has the following sub tiles:
+ *          - SUB_TILE_A: capacity_range --> 0 to 4
+ *          - SUB_TILE_B: capacity_range --> 5 to 11
+ *          - SUB_TILE_C: capacity_range --> 12 to 16
+ *
+ * Totale TILE_X capacity is 17
+ */
+struct t_capacity_range {
+    int low = 0;
+    int high = 0;
+
+    void set(int low_cap, int high_cap) {
+        low = low_cap;
+        high = high_cap;
+    }
+
+    bool is_in_range(int cap) const {
+        return cap >= low and cap <= high;
+    }
+
+    int total() const {
+        return high - low + 1;
+    }
+};
+
+/**
+ * @brief Describes the possible placeable blocks within a physical tile type.
+ *
+ * Heterogeneous blocks:
+ *
+ * The sub tile allows to have heterogeneous blocks placed at the same grid location.
+ * Heterogeneous blocks are blocks which do not share either the same functionality or the
+ * IO interface, but do share the same (x, y) grid location.
+ * For each heterogeneous block type than, there should be a corresponding sub tile to enable
+ * its placement within the physical tile.
+ *
+ * For further information there is a tutorial on the VTR documentation page.
+ *
+ *
+ * Equivalent sites:
+ *
+ * Moreover, the same sub tile enables to allow the placement of different implementations
+ * of a logical block.
+ * This means that two blocks that have different internal functionalities, but the IO interface of one block
+ * is a subset of the other, they can be placed at the same sub tile location within the physical tile.
+ * These two blocks can be identified as equivalent, hence they can belong to the same sub tile.
+ */
+struct t_sub_tile {
+    char* name = nullptr;
+
+    // Mapping between the sub tile's pins and the physical pins corresponding
+    // to the physical tile type.
+    std::vector<int> sub_tile_to_tile_pin_indices;
+
+    std::vector<t_physical_tile_port> ports;
+
+    std::vector<t_logical_block_type_ptr> equivalent_sites; ///>List of netlist blocks (t_logical_block) that one could
+                                                            ///>place within this sub tile.
+
+    t_capacity_range capacity; ///>Indicates the total number of sub tile instances of this type placeable at a
+                               ///>physical location.
+                               ///>E.g.: capacity can range from 4 to 7, meaning that there are four placeable sub tiles
+                               ///>      at a physical location, and compatible netlist blocks can be placed at sub_tile
+                               ///>      indices ranging from 4 to 7.
+    t_class_range class_range;
+
+    int num_phy_pins = 0;
+
+    int index = -1;
+};
+
+/** A logical pin defines the pin index of a logical block type (i.e. a top level PB type)
+ *  This structure wraps the int value of the logical pin to allow its storage in the
+ *  vtr::bimap container.
+ */
+struct t_logical_pin {
+    int pin = -1;
+
+    t_logical_pin(int value) {
+        pin = value;
+    }
+
+    bool operator==(const t_logical_pin o) const {
+        return pin == o.pin;
+    }
+
+    bool operator<(const t_logical_pin o) const {
+        return pin < o.pin;
+    }
+};
+
+/** A physical pin defines the pin index of a physical tile type (i.e. a grid tile type)
+ *  This structure wraps the int value of the physical pin to allow its storage in the
+ *  vtr::bimap container.
+ */
+struct t_physical_pin {
+    int pin = -1;
+
+    t_physical_pin(int value) {
+        pin = value;
+    }
+
+    bool operator==(const t_physical_pin o) const {
+        return pin == o.pin;
+    }
+
+    bool operator<(const t_physical_pin o) const {
+        return pin < o.pin;
+    }
+};
+
+/** Describes I/O and clock ports of a physical tile type
+ *
+ *  It corresponds to <port/> tags in the FPGA architecture description
+ *
+ *  Data members:
+ *      name: name of the port
+ *      is_clock: whether or not this port is a clock
+ *      is_non_clock_global: Applies to top level pb_type, this pin is not a clock but
+ *                           is a global signal (useful for stuff like global reset signals,
+ *                           perhaps useful for VCC and GND)
+ *      num_pins: the number of pins this port has
+ *      tile_type: pointer to the associated tile type
+ *      port_class: port belongs to recognized set of ports in class library
+ *      index: port index by index in array of parent pb_type
+ *      absolute_first_pin_index: absolute index of the first pin in the physical tile.
+ *                                All the other pin indices can be calculated with num_pins
+ *      port_index_by_type index of port by type (index by input, output, or clock)
+ *      equivalence: Applies to logic block I/Os and to primitive inputs only
+ */
+struct t_physical_tile_port {
+    char* name;
+    enum PORTS type;
+    bool is_clock;
+    bool is_non_clock_global;
+    int num_pins;
+    PortEquivalence equivalent;
+
+    int index;
+    int absolute_first_pin_index;
+    int port_index_by_type;
+
+    t_physical_tile_port() {
+        is_clock = false;
+        is_non_clock_global = false;
+
+        num_pins = 1;
+        equivalent = PortEquivalence::NONE;
+    }
+};
+
+/* Describes the type for a logical block
+ * name: unique identifier for type
+ * pb_type: Internal subblocks and routing information for this physical block
+ * pb_graph_head: Head of DAG of pb_types_nodes and their edges
+ *
+ * index: Keep track of type in array for easy access
+ * physical_tile_index: index of the corresponding physical tile type
+ *
+ * A logical block is the implementation of a component's functionality of the FPGA device
+ * and it identifies its logical behaviour and internal connections.
+ *
+ * The logical block type is mainly used during the packing stage of VPR and is used to generate
+ * the packed netlist and all the corresponding blocks and their internal structure.
+ *
+ * The logical blocks than get assigned to a possible physical tile for the placement step.
+ *
+ * A logical block must correspond to at least one physical tile.
+ */
+struct t_logical_block_type {
+    char* name = nullptr;
+
+    /* Clustering info */
+    t_pb_type* pb_type = nullptr;
+    t_pb_graph_node* pb_graph_head = nullptr;
+
+    int index = -1; /* index of type descriptor in array (allows for index referencing) */
+
+    std::vector<t_physical_tile_type_ptr> equivalent_tiles; ///>List of physical tiles at which one could
+                                                            ///>place this type of netlist block.
+
+    // Is this t_logical_block_type empty?
+    bool is_empty() const;
+};
+
+/*************************************************************************************************
+ * PB Type Hierarchy                                                                             *
+ *************************************************************************************************
+ *
+ * VPR represents the 'type' of block types corresponding to FPGA grid locations using a hierarchy
+ * of t_pb_type objects.
+ *
+ * The root t_pb_type corresponds to a single top level block type and maps to a particular type
+ * of location in the FPGA device grid (e.g. Logic, DSP, RAM etc.).
+ *
+ * A non-root t_pb_type represents an intermediate level of hierarchy within the root block type.
+ *
+ * The PB Type hierarchy corresponds to the tags specified in the FPGA architecture description:
+ *
+ *      struct              XML Tag
+ *      ------              ------------
+ *      t_pb_type           <pb_type/>
+ *      t_mode              <mode/>
+ *      t_interconnect      <interconnect/>
+ *      t_port              <port/>
+ *
+ * The various structures hold pointers to each other which encode the hierarchy.
+ */
+
+/** Describes the type of clustered block if a root (parent_mode == nullptr), an
+ *  intermediate level of hierarchy (parent_mode != nullptr), or a leaf/primitive
+ *  (num_modes == 0, model != nullptr).
+ *
+ *  This (along with t_mode) corresponds to the hierarchical specification of
+ *  block modes that users provide in the architecture (i.e. <pb_type/> tags).
+ *
+ *  It is also useful to note that a single t_pb_type may represent multiple instances of that
+ *  type in the architecture (see the num_pb field).
+ *
+ *  In VPR there is a single instance of a t_pb_type for each type, which is referenced as a
+ *  flyweight by other objects (e.g. t_pb_graph_node).
+ *
+ *  Data members:
+ *      name: name of the physical block type
+ *      num_pb: maximum number of instances of this physical block type sharing one parent
+ *      blif_model: the string in the blif circuit that corresponds with this pb type
+ *      class_type: Special library name
+ *      modes: Different modes accepted
+ *      ports: I/O and clock ports
+ *      num_clock_pins: A count of the total number of clock pins
+ *      num_input_pins: A count of the total number of input pins
+ *      num_output_pins: A count of the total number of output pins
+ *      num_pins: A count of the total number of pins
+ *      timing: Timing matrix of block [0..num_inputs-1][0..num_outputs-1]
+ *      parent_mode: mode of the parent block
+ *      t_mode_power: ???
+ *      meta: Table storing extra arbitrary metadata attributes.
+ */
+struct t_pb_type {
+    char* name = nullptr;
+    int num_pb = 0;
+    char* blif_model = nullptr;
+    t_model* model = nullptr;
+    enum e_pb_type_class class_type = UNKNOWN_CLASS;
+
+    t_mode* modes = nullptr; /* [0..num_modes-1] */
+    int num_modes = 0;
+    t_port* ports = nullptr; /* [0..num_ports] */
+    int num_ports = 0;
+
+    int num_clock_pins = 0;
+    int num_input_pins = 0; /* inputs not including clock pins */
+    int num_output_pins = 0;
+
+    int num_pins = 0;
+
+    t_mode* parent_mode = nullptr;
+    int depth = 0; /* depth of pb_type */
+
+    t_pin_to_pin_annotation* annotations = nullptr; /* [0..num_annotations-1] */
+    int num_annotations = 0;
+
+    /* Power related members */
+    t_pb_type_power* pb_type_power = nullptr;
+
+    t_metadata_dict meta;
+};
+
+/** Describes an operational mode of a clustered logic block
+ *
+ *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
+ *  It corresponds to <mode/> tags in the FPGA architecture description
+ *
+ *  Data members:
+ *      name: name of the mode
+ *      pb_type_children: pb_types it contains
+ *      interconnect: interconnect of parent pb_type to children pb_types or children to children pb_types
+ *      num_interconnect: Total number of interconnect tags specified by user
+ *      parent_pb_type: Which parent contains this mode
+ *      index: Index of mode in array with other modes
+ *      disable_packing: Specify if the mode is disabled/enabled for VPR packer.
+ *                       By default, every mode is enabled for VPR packer.
+ *                       Users can disable it for VPR packer through arch XML
+ *                       When flag is set true, the mode is invisible to VPR packer.
+ *                       No logic will be mapped to the pb_type under the mode
+ *      t_mode_power: ???
+ *      meta: Table storing extra arbitrary metadata attributes.
+ */
+struct t_mode {
+    char* name = nullptr;
+    t_pb_type* pb_type_children = nullptr; /* [0..num_child_pb_types] */
+    int num_pb_type_children = 0;
+    t_interconnect* interconnect = nullptr;
+    int num_interconnect = 0;
+    t_pb_type* parent_pb_type = nullptr;
+    int index = 0;
+
+    /* Packer-related switches */
+    bool disable_packing = false;
+
+    /* Power related members */
+    t_mode_power* mode_power = nullptr;
+
+    t_metadata_dict meta;
+};
+
+/** Describes an interconnect edge inside a cluster
+ *
+ *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
+ *  It corresponds to <interconnect/> tags in the FPGA architecture description
+ *
+ *  Data members:
+ *      type: type of the interconnect
+ *      name: identifier for interconnect
+ *      input_string: input string verbatim to parse later
+ *      output_string: input string output to parse later
+ *      annotations: Annotations for delay, power, etc
+ *      num_annotations: Total number of annotations
+ *      infer_annotations: This interconnect is autogenerated, if true, infer pack_patterns
+ *                         such as carry-chains and forced packs based on interconnect linked to it
+ *      parent_mode_index: Mode of parent as int
+ */
+struct t_interconnect {
+    enum e_interconnect type;
+    char* name = nullptr;
+
+    char* input_string = nullptr;
+    char* output_string = nullptr;
+
+    t_pin_to_pin_annotation* annotations = nullptr; /* [0..num_annotations-1] */
+    int num_annotations = 0;
+    bool infer_annotations = false;
+
+    int line_num = 0; /* Interconnect is processed later, need to know what line number it messed up on to give proper error message */
+
+    int parent_mode_index = 0;
+
+    /* Power related members */
+    t_mode* parent_mode = nullptr;
+
+    t_interconnect_power* interconnect_power = nullptr;
+    t_metadata_dict meta;
+};
+
+/** Describes I/O and clock ports
+ *
+ *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
+ *  It corresponds to <port/> tags in the FPGA architecture description
+ *
+ *  Data members:
+ *      name: name of the port
+ *      model_port: associated model port
+ *      is_clock: whether or not this port is a clock
+ *      is_non_clock_global: Applies to top level pb_type, this pin is not a clock but
+ *                           is a global signal (useful for stuff like global reset signals,
+ *                           perhaps useful for VCC and GND)
+ *      num_pins: the number of pins this port has
+ *      parent_pb_type: pointer to the parent pb_type
+ *      port_class: port belongs to recognized set of ports in class library
+ *      index: port index by index in array of parent pb_type
+ *      port_index_by_type index of port by type (index by input, output, or clock)
+ *      equivalence: Applies to logic block I/Os and to primitive inputs only
+ */
+struct t_port {
+    char* name;
+    t_model_ports* model_port;
+    enum PORTS type;
+    bool is_clock;
+    bool is_non_clock_global;
+    int num_pins;
+    PortEquivalence equivalent;
+    t_pb_type* parent_pb_type;
+    char* port_class;
+
+    int index;
+    int port_index_by_type;
+    int absolute_first_pin_index;
+
+    t_port_power* port_power;
+};
+
+struct t_pb_type_power {
+    /* Type of power estimation for this pb */
+    e_power_estimation_method estimation_method;
+
+    t_power_usage absolute_power_per_instance; /* User-provided absolute power per block */
+
+    float C_internal;         /*Internal capacitance of the pb */
+    int leakage_default_mode; /* Default mode for leakage analysis, if block has no set mode */
+
+    t_power_usage power_usage;            /* Total power usage of this pb type */
+    t_power_usage power_usage_bufs_wires; /* Power dissipated in local buffers and wire switching (Subset of total power) */
+};
+
+struct t_interconnect_power {
+    t_power_usage power_usage;
+
+    /* These are not necessarily power-related; however, at the moment
+     * only power estimation uses them
+     */
+    bool port_info_initialized;
+    int num_input_ports;
+    int num_output_ports;
+    int num_pins_per_port;
+    float transistor_cnt;
+};
+
+struct t_interconnect_pins {
+    t_interconnect* interconnect;
+
+    t_pb_graph_pin*** input_pins;  // [0..num_input_ports-1][0..num_pins_per_port-1]
+    t_pb_graph_pin*** output_pins; // [0..num_output_ports-1][0..num_pins_per_port-1]
+};
+
+struct t_mode_power {
+    t_power_usage power_usage; /* Power usage of this mode */
+};
+
+/** Info placed between pins in the architecture file (e.g. delay annotations),
+ *
+ * This is later for additional information.
+ *
+ * Data Members:
+ *      value: value/property pair
+ *      prop: value/property pair
+ *      type: type of annotation
+ *      format: formatting of data
+ *      input_pins: input pins as string affected by annotation
+ *      output_pins: output pins as string affected by annotation
+ *      clock_pin: clock as string affected by annotation
+ */
+struct t_pin_to_pin_annotation {
+    char** value; /* [0..num_value_prop_pairs - 1] */
+    int* prop;    /* [0..num_value_prop_pairs - 1] */
+    int num_value_prop_pairs;
+
+    enum e_pin_to_pin_annotation_type type;
+    enum e_pin_to_pin_annotation_format format;
+
+    char* input_pins;
+    char* output_pins;
+    char* clock;
+
+    int line_num; /* used to report what line number this annotation is found in architecture file */
+};
+
+/*************************************************************************************************
+ * PB Graph                                                                                      *
+ *************************************************************************************************
+ *
+ * The PB graph represents the flattened and elaborated connectivity within a t_pb_type (i.e.
+ * the routing resource graph), derived from the t_pb_type hierarchy.
+ *
+ * The PB graph is built of t_pb_graph_node and t_pb_graph_pin objects.
+ *
+ * There is a single PB graph associated with each root t_pb_type, and it is referenced in other objects (e.g.
+ * t_pb) as a flyweight.
+ *
+ */
+
+/** Describes the internal connectivity corresponding to a t_pb_type and t_mode of a cluster.
+ *
+ *  There is a t_pb_graph_node for each instance of the pb_type (i.e. t_pb_type may describe
+ *  num_pb instances of the type, with each instance represented as a t_pb_graph_node).
+ *  The distinction between the pb_type and the pb_graph_node is necessary since the 'position'
+ *  of a particular instance in the cluster is important when routing the cluster (since the routing
+ *  accessible from each position may be different).
+ *
+ *  Data members:
+ *      pb_type               : Pointer to the type of pb graph node this belongs to
+ *      placement_index       : there are a certain number of pbs available, this gives the index of the node
+ *      child_pb_graph_nodes  : array of children pb graph nodes organized into modes
+ *      parent_pb_graph_node  : parent pb graph node
+ *      total_primitive_count : Total number of this primitive type in the cluster. If there are 10 ALMs per cluster
+ *                              and 2 FFs per ALM (given the mode of the parent of this primitive) then the total is 20.
+ *      illegal_modes         : vector containing illegal modes that result in conflicts during routing
+ */
+class t_pb_graph_node {
+  public:
+    t_pb_type* pb_type;
+
+    int placement_index;
+
+    /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
+     *
+     * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to
+     * the parent_pb.
+     * Example: Edges that connect LUTs A, B and C to the parent pb_graph_node refer to the correct parent's mode which is set to "LUTs",
+     *          but edges of LUT D have the mode of edge corresponding to a wrong parent's pb_graph_node mode, namely "LUTRAM".
+     *          This situation is unfeasible as the edge modes are inconsistent between siblings of the same parent pb_graph_node.
+     *          In this case, the "LUTs" mode of the parent pb_graph_node cannot be used as the LUT D is not able to have a feasible
+     *          edge mode that does relate with the other sibling's edge modes.
+     *
+     *          The "LUTs" index mode is added to the illegal_modes vector. The conflicting mode marked as illegal is the most restrictive one.
+     *          This means that LUT D is unable to be routed if using the parent's "LUTs" mode (otherwise "LUTs" mode would be selected for LUT D
+     *          as well), but LUTs A, B and C could still be routed using the parent pb_graph_node's mode "LUTRAM".
+     *          Therefore, "LUTs" is marked as illegal and all the LUTs (A, B, C and D) will have a consistent parent pb_graph_node mode, namely "LUTRAM".
+     *
+     * Usage: cluster_router uses this information to exclude the expansion of a node which has a not cosistent mode.
+     *        Everytime the mode consistency check fails, the index of the mode that causes the conflict is added to this vector.
+     * */
+    std::vector<int> illegal_modes;
+
+    t_pb_graph_pin** input_pins;  /* [0..num_input_ports-1] [0..num_port_pins-1]*/
+    t_pb_graph_pin** output_pins; /* [0..num_output_ports-1] [0..num_port_pins-1]*/
+    t_pb_graph_pin** clock_pins;  /* [0..num_clock_ports-1] [0..num_port_pins-1]*/
+
+    int num_input_ports;
+    int num_output_ports;
+    int num_clock_ports;
+
+    int* num_input_pins;  /* [0..num_input_ports - 1] */
+    int* num_output_pins; /* [0..num_output_ports - 1] */
+    int* num_clock_pins;  /* [0..num_clock_ports - 1] */
+
+    t_pb_graph_node*** child_pb_graph_nodes; /* [0..num_modes-1][0..num_pb_type_in_mode-1][0..num_pb-1] */
+    t_pb_graph_node* parent_pb_graph_node;
+
+    int total_pb_pins; /* only valid for top-level */
+
+    void* temp_scratch_pad;                                     /* temporary data, useful for keeping track of things when traversing data structure */
+    t_cluster_placement_primitive* cluster_placement_primitive; /* pointer to indexing structure useful during packing stage */
+
+    int* input_pin_class_size;  /* Stores the number of pins that belong to a particular input pin class */
+    int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
+    int* output_pin_class_size; /* Stores the number of pins that belong to a particular output pin class */
+    int num_output_pin_class;   /* number of output pin classes that this pb_graph_node has */
+
+    int total_primitive_count; /* total number of this primitive type in the cluster */
+
+    /* Interconnect instances for this pb
+     * Only used for power
+     */
+    t_pb_graph_node_power* pb_node_power;
+    t_interconnect_pins** interconnect_pins; /* [0..num_modes-1][0..num_interconnect_in_mode] */
+
+    // Returns true if this pb_graph_node represents a primitive type (primitives have 0 modes)
+    bool is_primitive() const { return this->pb_type->num_modes == 0; }
+
+    // Returns true if this pb_graph_node represents a root graph node (ex. clb)
+    bool is_root() const { return this->parent_pb_graph_node == nullptr; }
+
+    //Returns the number of pins on this graph node
+    //  Note this is the total for all ports on this node excluding any children (i.e. sum of all num_input_pins, num_output_pins, num_clock_pins)
+    int num_pins() const;
+    // Returns a string containing the hierarchical type name of the pb_graph_node
+    // Ex: clb[0][default]/lab[0][default]/fle[3][n1_lut6]/ble6[0][default]/lut6[0]
+    std::string hierarchical_type_name() const;
+};
+
+/* Identify pb pin type for timing purposes */
+enum e_pb_graph_pin_type {
+    PB_PIN_NORMAL = 0,
+    PB_PIN_SEQUENTIAL,
+    PB_PIN_INPAD,
+    PB_PIN_OUTPAD,
+    PB_PIN_TERMINAL,
+    PB_PIN_CLOCK
+};
+
+/** Describes a pb graph pin
+ *
+ *  Data Members:
+ *      port: pointer to the port that this pin is associated with
+ *      pin_number: pin number of the port that this pin is associated with
+ *      input edges: [0..num_input_edges - 1]edges incoming
+ *      num_input_edges: number edges incoming
+ *      output edges: [0..num_output_edges - 1]edges out_going
+ *      num_output_edges: number edges out_going
+ *      parent_node: parent pb_graph_node
+ *      pin_count_in_cluster: Unique number for pin inside cluster
+ */
+class t_pb_graph_pin {
+  public:
+    t_port* port = nullptr;
+    int pin_number = 0;
+    std::vector<t_pb_graph_edge*> input_edges; /* [0..num_input_edges] */
+    int num_input_edges = 0;
+    std::vector<t_pb_graph_edge*> output_edges; /* [0..num_output_edges] */
+    int num_output_edges = 0;
+
+    t_pb_graph_node* parent_node = nullptr;
+    int pin_count_in_cluster = 0;
+
+    int scratch_pad = 0; /* temporary data structure useful to store traversal info */
+
+    enum e_pb_graph_pin_type type = PB_PIN_NORMAL; /* The type of this pin (sequential, i/o etc.) */
+
+    /* sequential timing information */
+    float tsu = std::numeric_limits<float>::quiet_NaN();     /* For sequential logic elements the setup time */
+    float thld = std::numeric_limits<float>::quiet_NaN();    /* For sequential logic elements the hold time */
+    float tco_min = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the minimum clock to output time */
+    float tco_max = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the maximum clock to output time */
+    t_pb_graph_pin* associated_clock_pin = nullptr;          /* For sequentail elements, the associated clock */
+
+    /* combinational timing information */
+    int num_pin_timing = 0;                   /* Number of ipin to opin timing edges*/
+    std::vector<t_pb_graph_pin*> pin_timing;  /* timing edge sink pins  [0..num_pin_timing-1]*/
+    std::vector<float> pin_timing_del_max;    /* primitive ipin to opin max-delay [0..num_pin_timing-1]*/
+    std::vector<float> pin_timing_del_min;    /* primitive ipin to opin min-delay [0..num_pin_timing-1]*/
+    int num_pin_timing_del_max_annotated = 0; //The list of valid pin_timing_del_max entries runs from [0..num_pin_timing_del_max_annotated-1]
+    int num_pin_timing_del_min_annotated = 0; //The list of valid pin_timing_del_max entries runs from [0..num_pin_timing_del_min_annotated-1]
+
+    /* Applies to clusters only */
+    int pin_class = 0;
+
+    /* Applies to pins of primitive only */
+    int* parent_pin_class = nullptr; /* [0..depth-1] the grouping of pins that this particular pin belongs to */
+    /* Applies to output pins of primitives only */
+    t_pb_graph_pin*** list_of_connectable_input_pin_ptrs = nullptr; /* [0..depth-1][0..num_connectable_primitive_input_pins-1] what input pins this output can connect to without exiting cluster at given depth */
+    int* num_connectable_primitive_input_pins = nullptr;            /* [0..depth-1] number of input pins that this output pin can reach without exiting cluster at given depth */
+
+    bool is_forced_connection = false; /* This output pin connects to one and only one input pin */
+
+    t_pb_graph_pin_power* pin_power = nullptr;
+
+    // class member functions
+  public:
+    // Returns true if this pin belongs to a primitive block like
+    // a LUT or FF, instead of a cluster-level block like a CLB.
+    bool is_primitive_pin() const {
+        return this->parent_node->is_primitive();
+    }
+    // Returns true if this pin belongs to a root pb_block which is a pb_block
+    // that has no parent block. For example, pins of a CLB, IO, DSP, etc.
+    bool is_root_block_pin() const {
+        return this->parent_node->is_root();
+    }
+    // This function returns a string that contains the name of the pin
+    // and the entire sequence of pb_types in the hierarchy from the block
+    // of this pin back to the cluster-level (top-level) pb_type in the
+    // following format: clb[0]/lab[0]/fle[3]/ble6[0]/lut6[0].in[0]
+    // if full_description is set to false it will only return lut6[0].in[0]
+    std::string to_string(const bool full_description = true) const;
+};
+
+/** Describes a pb graph edge
+ *
+ *  Note that this is a "fat" edge which supports bused based connections
+ *
+ *  Data members:
+ *      input_pins: array of pb_type graph input pins ptrs entering this edge
+ *      num_input_pins: Number of input pins entering this edge
+ *      output_pins: array of pb_type graph output pins ptrs exiting this edge
+ *      num_output_pins: Number of output pins exiting this edge
+ *
+ *      num_pack_patterns: number of pack patterns this edge belongs to
+ *      pack_pattern_names: [0..num_pack_patterns-1] name of each pack pattern
+ *      pack_pattern_indices: [0..num_pack_patterns-1] id of each pack pattern
+ *      infer_pattern: if true, pattern of this edge could be inferred by checking
+ *                     input/output edges. This is true when the edge is a single
+ *                     fanout edge and is driven or driving another edge which is
+ *                     annotated with a pack pattern.
+ */
+class t_pb_graph_edge {
+  public:
+    /* edge connectivity */
+    t_pb_graph_pin** input_pins;
+    int num_input_pins;
+    t_pb_graph_pin** output_pins;
+    int num_output_pins;
+
+    /* timing information */
+    float delay_max;
+    float delay_min;
+    float capacitance;
+
+    /* who drives this edge */
+    t_interconnect* interconnect;
+    int driver_set;
+    int driver_pin;
+
+    /* pack pattern info */
+    int num_pack_patterns;
+    std::vector<const char*> pack_pattern_names;
+    int* pack_pattern_indices;
+    bool infer_pattern;
+
+    // class member functions
+  public:
+    // Returns true is this edge is annotated with the given pattern_index
+    //  pattern_index : index of the packing pattern
+    bool annotated_with_pattern(int pattern_index) const;
+
+    // Returns true is this edge is annotated with pattern_index or its pattern
+    // is inferred and a connected output edge is annotated with pattern_index
+    //   pattern_index : index of the packing pattern
+    bool belongs_to_pattern(int pattern_index) const;
+};
+
+struct t_pb_graph_node_power {
+    float transistor_cnt_pb_children; /* Total transistor size of this pb */
+    float transistor_cnt_interc;      /* Total transistor size of the interconnect in this pb */
+    float transistor_cnt_buffers;
+};
+
+struct t_pb_graph_pin_power {
+    /* Transistor-level Power Properties */
+    float C_wire;
+    float buffer_size;
+
+    /* Pin-Toggle Power Properties */
+    t_pb_graph_pin* scaled_by_pin;
+};
+
+/*************************************************************************************************/
+/* FPGA Routing architecture                                                                     */
+/*************************************************************************************************/
+
+/* Description of routing channel distribution across the FPGA, only available for global routing
+ * Width is standard dev. for Gaussian.  xpeak is where peak     *
+ * occurs. dc is the dc offset for Gaussian and pulse waveforms. */
+enum e_stat {
+    UNIFORM,
+    GAUSSIAN,
+    PULSE,
+    DELTA
+};
+struct t_chan {
+    enum e_stat type;
+    float peak;
+    float width;
+    float xpeak;
+    float dc;
+};
+
+/* chan_x_dist: Describes the x-directed channel width distribution.         *
+ * chan_y_dist: Describes the y-directed channel width distribution.         */
+struct t_chan_width_dist {
+    t_chan chan_x_dist;
+    t_chan chan_y_dist;
+};
+
+enum e_directionality {
+    UNI_DIRECTIONAL,
+    BI_DIRECTIONAL
+};
+/* X_AXIS: Data that describes an x-directed wire segment (CHANX)                     *
+ * Y_AXIS: Data that describes an y-directed wire segment (CHANY)                     *     
+ * BOTH_AXIS: Data that can be applied to both x-directed and y-directed wire segment */
+enum e_parallel_axis {
+    X_AXIS,
+    Y_AXIS,
+    BOTH_AXIS
+};
+enum e_switch_block_type {
+    SUBSET,
+    WILTON,
+    UNIVERSAL,
+    FULL,
+    CUSTOM
+};
+typedef enum e_switch_block_type t_switch_block_type;
+enum e_Fc_type {
+    ABSOLUTE,
+    FRACTIONAL
+};
+
+/* Lists all the important information about a certain segment type.  Only   *
+ * used if the route_type is DETAILED.  [0 .. det_routing_arch.num_segment]  *
+ * name: the name of this segment                                            *
+ * frequency:  ratio of tracks which are of this segment type.               *
+ * length:     Length (in clbs) of the segment.                              *
+ * arch_wire_switch: Index of the switch type that connects other wires      *
+ *                   *to* this segment. Note that this index is in relation  *
+ *                   to the switches from the architecture file, not the     *
+ *                   expanded list of switches that is built at the end of   *
+ *                   build_rr_graph.                                         *
+ * arch_opin_switch: Index of the switch type that connects output pins      *
+ *                   (OPINs) *to* this segment. Note that this index is in   *
+ *                   relation to the switches from the architecture file,    *
+ *                   not the expanded list of switches that is built         *
+ *                   at the end of build_rr_graph                            *
+ * frac_cb:  The fraction of logic blocks along its length to which this     *
+ *           segment can connect.  (i.e. internal population).               *
+ * frac_sb:  The fraction of the length + 1 switch blocks along the segment  *
+ *           to which the segment can connect.  Segments that aren't long    *
+ *           lines must connect to at least two switch boxes.                *
+ * parallel_axis:   Defines what axis the segment is parallel to. See        *
+ *                  e_parallel_axis comments for more details on the values. *
+ * Cmetal: Capacitance of a routing track, per unit logic block length.      *
+ * Rmetal: Resistance of a routing track, per unit logic block length.       *
+ * (UDSD by AY) drivers: How do signals driving a routing track connect to   *
+ *                       the track?  
+ * seg_index: The index of the segment as stored in the appropriate Segs list*
+ *            Upon loading the architecture, we use this field to keep track *
+ *            the segment's index in the unified segment_inf vector. This is *
+ *            usefull when building the rr_graph for different Y & X channels*
+ *            interms of track distribution and segment type.                *
+ * meta: Table storing extra arbitrary metadata attributes.                  */
+struct t_segment_inf {
+    std::string name;
+    int frequency;
+    int length;
+    short arch_wire_switch;
+    short arch_opin_switch;
+    float frac_cb;
+    float frac_sb;
+    bool longline;
+    float Rmetal;
+    float Cmetal;
+    enum e_directionality directionality;
+    enum e_parallel_axis parallel_axis;
+    std::vector<bool> cb;
+    std::vector<bool> sb;
+    int seg_index;
+    //float Cmetal_per_m; /* Wire capacitance (per meter) */
+};
+
+inline bool operator==(const t_segment_inf& a, const t_segment_inf& b) {
+    return a.name == b.name && a.frequency == b.frequency && a.length == b.length && a.arch_wire_switch == b.arch_wire_switch && a.arch_opin_switch == b.arch_opin_switch && a.frac_cb == b.frac_cb && a.frac_sb == b.frac_sb && a.longline == b.longline && a.Rmetal == b.Rmetal && a.Cmetal == b.Cmetal && a.directionality == b.directionality && a.parallel_axis == b.parallel_axis && a.cb == b.cb && a.sb == b.sb;
+}
+
+/*provide hashing for t_segment_inf to enable the use of many std containers.
+ * Only the most important/varying fields are used (not worth the extra overhead to include all fields)*/
+
+struct t_hash_segment_inf {
+    size_t operator()(const t_segment_inf& seg_inf) const noexcept {
+        size_t result;
+        result = ((((std::hash<std::string>()(seg_inf.name)
+                     ^ std::hash<int>()(seg_inf.frequency) << 10)
+                    ^ std::hash<int>()(seg_inf.length) << 20)
+                   ^ std::hash<int>()((int)seg_inf.arch_opin_switch) << 30));
+        return result;
+    }
+};
+enum class SwitchType {
+    MUX = 0,   //A configurable (buffered) mux (single-driver)
+    TRISTATE,  //A configurable tristate-able buffer (multi-driver)
+    PASS_GATE, //A configurable pass transitor switch (multi-driver)
+    SHORT,     //A non-configurable electrically shorted connection (multi-driver)
+    BUFFER,    //A non-configurable non-tristate-able buffer (uni-driver)
+    INVALID,   //Unspecified, usually an error
+    NUM_SWITCH_TYPES
+};
+constexpr std::array<const char*, size_t(SwitchType::NUM_SWITCH_TYPES)> SWITCH_TYPE_STRINGS = {{"MUX", "TRISTATE", "PASS_GATE", "SHORT", "BUFFER", "INVALID"}};
+
+/* Constant/Reserved names for switches in architecture XML
+ * Delayless switch:
+ *   The zero-delay switch created by VPR internally 
+ *   This is a special switch just to ease CAD algorithms
+ *   It is mainly used in
+ *     - the edges between SOURCE and SINK nodes in routing resource graphs  
+ *     - the edges in CLB-to-CLB connections (defined by <directlist> in arch XML)
+ *   
+ */
+constexpr const char* VPR_DELAYLESS_SWITCH_NAME = "__vpr_delayless_switch__";
+
+enum class BufferSize {
+    AUTO,
+    ABSOLUTE
+};
+
+/* Lists all the important information about a switch type read from the     *
+ * architecture file.                                                        *
+ * [0 .. Arch.num_switch]                                                    *
+ * buffered:  Does this switch include a buffer?                             *
+ * R:  Equivalent resistance of the buffer/switch.                           *
+ * Cin:  Input capacitance.                                                  *
+ * Cout:  Output capacitance.                                                *
+ * Cinternal: Since multiplexers and tristate buffers are modeled as a       *
+ *            parallel stream of pass transistors feeding into a buffer,     *
+ *            we would expect an additional "internal capacitance"           *
+ *            to arise when the pass transistor is enabled and the signal    *
+ *            must propogate to the buffer. See diagram of one stream below: *
+ *                                                                           *
+ *                  Pass Transistor                                          *
+ *                       |                                                   *
+ *                     -----                                                 *
+ *                     -----      Buffer                                     *
+ *                    |     |       |\                                       *
+ *              ------       -------| \--------                              *
+ *                |             |   | /    |                                 *
+ *              =====         ===== |/   =====                               *
+ *              =====         =====      =====                               *
+ *                |             |          |                                 *
+ *             Input C    Internal C    Output C                             *
+ *                                                                           *
+ * Tdel_map: A map where the key is the number of inputs and the entry       *
+ *           is the corresponding delay. If there is only one entry at key   *
+ *           UNDEFINED, then delay is a constant (doesn't vary with fan-in). *
+ *	         A map saves us the trouble of sorting, and has lower access     *
+ *           time for interpolation/extrapolation purposes                   *
+ * mux_trans_size:  The area of each transistor in the segment's driving mux *
+ *                  measured in minimum width transistor units               *
+ * buf_size:  The area of the buffer. If set to zero, area should be         *
+ *            calculated from R                                              */
+struct t_arch_switch_inf {
+  public:
+    static constexpr int UNDEFINED_FANIN = -1;
+
+    char* name = nullptr;
+    float R = 0.;
+    float Cin = 0.;
+    float Cout = 0.;
+    float Cinternal = 0.;
+    float mux_trans_size = 1.;
+    BufferSize buf_size_type = BufferSize::AUTO;
+    float buf_size = 0.;
+    e_power_buffer_type power_buffer_type = POWER_BUFFER_TYPE_AUTO;
+    float power_buffer_size = 0.;
+
+  public:
+    //Returns the type of switch
+    SwitchType type() const;
+
+    //Returns true if this switch type isolates its input and output into
+    //separate DC-connected subcircuits
+    bool buffered() const;
+
+    //Returns true if this switch type is configurable
+    bool configurable() const;
+
+    //Returns whether the switch's directionality (e.g. BI_DIR, UNI_DIR)
+    e_directionality directionality() const;
+
+    //Returns the intrinsic delay of this switch
+    float Tdel(int fanin = UNDEFINED_FANIN) const;
+
+    //Returns true if the Tdel value is independent of fanout
+    bool fixed_Tdel() const;
+
+  public:
+    void set_Tdel(int fanin, float delay);
+    void set_type(SwitchType type_val);
+
+  private:
+    SwitchType type_ = SwitchType::INVALID;
+    std::map<int, double> Tdel_map_;
+
+    friend void PrintArchInfo(FILE*, const t_arch*);
+};
+
+/* Lists all the important information about an rr switch type.              *
+ * The s_rr_switch_inf describes a switch derived from a switch described    *
+ * by s_arch_switch_inf. This indirection allows us to vary properties of a  *
+ * given switch, such as varying delay with switch fan-in.                   *
+ * buffered:  Does this switch isolate it's input/output into separate       *
+ *            DC-connected sub-circuits?                                     *
+ * configurable: Is this switch is configurable (i.e. can the switch can be  *
+ *               turned on or off)?. This allows modelling of non-optional   *
+ *               switches (e.g. fixed buffers, or shorted connections) which *
+ *               must be used (e.g. expanded by the router) if a connected   *
+ *               segment is used.                                            *
+ * R:  Equivalent resistance of the buffer/switch.                           *
+ * Cin:  Input capacitance.                                                  *
+ * Cout:  Output capacitance.                                                *
+ * Cinternal: Internal capacitance, see the definition above.                *
+ * Tdel:  Intrinsic delay.  The delay through an unloaded switch is          *
+ *        Tdel + R * Cout.                                                   *
+ * mux_trans_size:  The area of each transistor in the segment's driving mux *
+ *                  measured in minimum width transistor units               *
+ * buf_size:  The area of the buffer. If set to zero, area should be         *
+ *            calculated from R                                              */
+struct t_rr_switch_inf {
+    float R = 0.;
+    float Cin = 0.;
+    float Cout = 0.;
+    float Cinternal = 0.;
+    float Tdel = 0.;
+    float mux_trans_size = 0.;
+    float buf_size = 0.;
+    const char* name = nullptr;
+    e_power_buffer_type power_buffer_type = POWER_BUFFER_TYPE_UNDEFINED;
+    float power_buffer_size = 0.;
+
+  public:
+    //Returns the type of switch
+    SwitchType type() const;
+
+    //Returns true if this switch type isolates its input and output into
+    //seperate DC-connected subcircuits
+    bool buffered() const;
+
+    //Returns true if this switch type is configurable
+    bool configurable() const;
+
+  public:
+    void set_type(SwitchType type_val);
+
+  private:
+    SwitchType type_ = SwitchType::INVALID;
+};
+
+/* Lists all the important information about a direct chain connection.     *
+ * [0 .. det_routing_arch.num_direct]                                       *
+ * name:  Name of this direct chain connection                              *
+ * from_pin:  The type of the pin that drives this chain connection         *
+ * In the format of <block_name>.<pin_name>                      *
+ * to_pin:  The type of pin that is driven by this chain connection         *
+ * In the format of <block_name>.<pin_name>                        *
+ * x_offset:  The x offset from the source to the sink of this connection   *
+ * y_offset:  The y offset from the source to the sink of this connection   *
+ * z_offset:  The z offset from the source to the sink of this connection   *
+ * switch_type: The index into the switch list for the switch used by this  *
+ *              direct                                                      *
+ * line: The line number in the .arch file that specifies this              *
+ *       particular placement macro.                                        *
+ */
+struct t_direct_inf {
+    char* name;
+    char* from_pin;
+    char* to_pin;
+    int x_offset;
+    int y_offset;
+    int sub_tile_offset;
+    int switch_type;
+    e_side from_side;
+    e_side to_side;
+    int line;
+};
+
+enum class SwitchPointOrder {
+    FIXED,   //Switchpoints are ordered as specified in architecture
+    SHUFFLED //Switchpoints are shuffled (more diversity)
+};
+
+//A collection of switchpoints associated with a segment
+struct t_wire_switchpoints {
+    std::string segment_name;      //The type of segment
+    std::vector<int> switchpoints; //The indices of wire points along the segment
+};
+
+/* Used to list information about a set of track segments that should connect through a switchblock */
+struct t_wireconn_inf {
+    std::vector<t_wire_switchpoints> from_switchpoint_set;             //The set of segment/wirepoints representing the 'from' set (union of all t_wire_switchpoints in vector)
+    std::vector<t_wire_switchpoints> to_switchpoint_set;               //The set of segment/wirepoints representing the 'to' set (union of all t_wire_switchpoints in vector)
+    SwitchPointOrder from_switchpoint_order = SwitchPointOrder::FIXED; //The desired from_switchpoint_set ordering
+    SwitchPointOrder to_switchpoint_order = SwitchPointOrder::FIXED;   //The desired to_switchpoint_set ordering
+
+    std::string num_conns_formula; /* Specifies how many connections should be made for this wireconn.
+                                    *
+                                    * '<int>': A specific number of connections
+                                    * 'from':  The number of generated connections between the 'from' and 'to' sets equals the
+                                    *          size of the 'from' set. This ensures every element in the from set is connected
+                                    *          to an element of the 'to' set.
+                                    *          Note: this it may result in 'to' elements being driven by multiple 'from'
+                                    *          elements (if 'from' is larger than 'to'), or in some elements of 'to' having
+                                    *          no driving connections (if 'to' is larger than 'from').
+                                    * 'to':    The number of generated connections is set equal to the size of the 'to' set.
+                                    *          This ensures that each element of the 'to' set has precisely one incomming connection.
+                                    *          Note: this may result in 'from' elements driving multiple 'to' elements (if 'to' is
+                                    *          larger than 'from'), or some 'from' elements driving to 'to' elements (if 'from' is
+                                    *          larger than 'to')
+                                    */
+};
+
+/* represents a connection between two sides of a switchblock */
+class SB_Side_Connection {
+  public:
+    /* specify the two SB sides that form a connection */
+    enum e_side from_side = TOP;
+    enum e_side to_side = TOP;
+
+    void set_sides(enum e_side from, enum e_side to) {
+        from_side = from;
+        to_side = to;
+    }
+
+    SB_Side_Connection() = default;
+
+    SB_Side_Connection(enum e_side from, enum e_side to)
+        : from_side(from)
+        , to_side(to) {
+    }
+
+    /* overload < operator which will be used by std::map */
+    bool operator<(const SB_Side_Connection& obj) const {
+        bool result;
+
+        if (from_side < obj.from_side) {
+            result = true;
+        } else {
+            if (from_side == obj.from_side) {
+                result = (to_side < obj.to_side) ? true : false;
+            } else {
+                result = false;
+            }
+        }
+
+        return result;
+    }
+};
+
+/* Use a map to index into the string permutation functions used to connect from one side to another */
+typedef std::map<SB_Side_Connection, std::vector<std::string>> t_permutation_map;
+
+/* Lists all information about a particular switch block specified in the architecture file */
+struct t_switchblock_inf {
+    std::string name;                /* the name of this switchblock */
+    e_sb_location location;          /* where on the FPGA this switchblock should be built (i.e. perimeter, core, everywhere) */
+    e_directionality directionality; /* the directionality of this switchblock (unidir/bidir) */
+
+    t_permutation_map permutation_map; /* map holding the permutation functions attributed to this switchblock */
+
+    std::vector<t_wireconn_inf> wireconns; /* list of wire types/groups this SB will connect */
+};
+
+/* Clock related data types used for building a dedicated clock network */
+struct t_clock_arch_spec {
+    std::vector<t_clock_network_arch> clock_networks_arch;
+    std::unordered_map<std::string, t_metal_layer> clock_metal_layers;
+    std::vector<t_clock_connection_arch> clock_connections_arch;
+};
+
+struct t_lut_cell {
+    std::string name;
+    std::string init_param;
+    std::vector<std::string> inputs;
+};
+
+struct t_lut_bel {
+    std::string name;
+
+    std::vector<std::string> input_pins;
+    std::string output_pin;
+
+    bool operator==(const t_lut_bel& other) const {
+        return name == other.name && input_pins == other.input_pins && output_pin == other.output_pin;
+    }
+};
+
+struct t_lut_element {
+    std::string site_type;
+    int width;
+    std::vector<t_lut_bel> lut_bels;
+
+    bool operator==(const t_lut_element& other) const {
+        return site_type == other.site_type && width == other.width && lut_bels == other.lut_bels;
+    }
+};
+
+/**
+ * Represents a Network-on-chip(NoC) Router data type. It is used
+ * to store individual router information when parsing the arch file.
+ * */
+struct t_router {
+    /** A unique id provided by the user to identify a router. Must be a positive value*/
+    int id = -1;
+
+    /** A value representing the approximate horizontal position on the FPGA device where the router
+     * tile is located*/
+    double device_x_position = -1;
+    /** A value representing the approximate vertical position on the FPGA device where the router
+     * tile is located*/
+    double device_y_position = -1;
+
+    /** A list of router ids that are connected to the current router*/
+    std::vector<int> connection_list;
+};
+
+/**
+ * Network-on-chip(NoC) data type used to store the network properties
+ * when parsing the arh file. This is used when building the dedicated on-chip
+ * network during the device creation.
+ * */
+struct t_noc_inf {
+    double link_bandwidth; /*!< The maximum bandwidth supported in the NoC. This value is the same for all links. units in bps*/
+    double link_latency;   /*!< The worst case latency seen when traversing a link. This value is the same for all links. units in seconds*/
+    double router_latency; /*!< The worst case latency seen when traversing a router. This value is the same for all routers, units in seconds*/
+
+    /** A list of all routers in the NoC*/
+    std::vector<t_router> router_list;
+
+    /** Represents the name of a router tile on the FPGA device. This should match the name used in the arch file when
+     * describing a NoC router tile within the FPGA device*/
+    std::string noc_router_tile_name;
+};
+
+/*   Detailed routing architecture */
+struct t_arch {
+    mutable vtr::string_internment strings;
+    std::vector<vtr::interned_string> interned_strings;
+
+    char* architecture_id; //Secure hash digest of the architecture file to uniquely identify this architecture
+
+    t_chan_width_dist Chans;
+    enum e_switch_block_type SBType;
+    std::vector<t_switchblock_inf> switchblocks;
+    float R_minW_nmos;
+    float R_minW_pmos;
+    int Fs;
+    float grid_logic_tile_area;
+    std::vector<t_segment_inf> Segments;
+    t_arch_switch_inf* Switches = nullptr;
+    int num_switches;
+    t_direct_inf* Directs = nullptr;
+    int num_directs = 0;
+
+    t_model* models = nullptr;
+    t_model* model_library = nullptr;
+
+    t_power_arch* power = nullptr;
+    t_clock_arch* clocks = nullptr;
+
+    // Constants
+    // VCC and GND cells are special virtual cells that are
+    // used to handle the constant network of the device.
+    //
+    // Similarly, the constant nets are defined to identify
+    // the generic name for the constant network.
+    //
+    // Given that usually, the constants have a dedicated network in
+    // real FPGAs, this information becomes relevant to identify which
+    // nets from the circuit netlist are belonging to the constant network,
+    // and assigned to it accordingly.
+    //
+    // NOTE: At the moment, the constant cells and nets are primarly used
+    // for the interchange netlist format, to determine which are the constants
+    // net names and which virtual cell is responsible to generate them.
+    // The information is present in the device database.
+    std::pair<std::string, std::string> gnd_cell;
+    std::pair<std::string, std::string> vcc_cell;
+
+    std::string gnd_net = "$__gnd_net";
+    std::string vcc_net = "$__vcc_net";
+
+    // Luts
+    std::vector<t_lut_cell> lut_cells;
+    std::unordered_map<std::string, std::vector<t_lut_element>> lut_elements;
+
+    //The name of the switch used for the input connection block (i.e. to
+    //connect routing tracks to block pins).
+    //This should correspond to a switch in Switches
+    std::string ipin_cblock_switch_name;
+
+    std::vector<t_grid_def> grid_layouts; //Set of potential device layouts
+
+    t_clock_arch_spec clock_arch; // Clock related data types
+
+    // if we have an embedded NoC in the architecture, then we store it here
+    t_noc_inf* noc = nullptr;
+};
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/physical_types_util.cc b/third_party/vtr/libs/archfpga/src/physical_types_util.cc
new file mode 100644
index 000000000..6971d883a
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/physical_types_util.cc
@@ -0,0 +1,492 @@
+#include "vtr_assert.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "arch_types.h"
+#include "arch_util.h"
+#include "arch_error.h"
+
+#include "physical_types_util.h"
+
+/**
+ * @brief Data structure that holds information about a phyisical pin
+ *
+ * This structure holds the following information on a pin:
+ *   - sub_tile_index: index of the sub tile within the physical tile type containing this pin
+ *   - capacity_instance: sub tile instance containing this physical pin.
+ *                        Each sub tile has a capacity field, which determines how many of its
+ *                        instances are present in the belonging physical tile.
+ *                        E.g.:
+ *                          - The sub tile BUFG has a capacity of 4 within its belonging physical tile CLOCK_TILE.
+ *                          - The capacity instance of a pin in the CLOCK_TILE identifies which of the 4 instances
+ *                            the pin belongs to.
+ *   - port_index: Each sub tile has a set of ports with a variable number of pins. The port_index field identifies
+ *                 which port the physical pin belongs to.
+ *   - pin_index_in_port: Given that ports can have multiple pins, we need also a field to identify which one of the
+ *                        multiple pins of the port corresponds to the physical pin.
+ *
+ */
+struct t_pin_inst_port {
+    int sub_tile_index;    // Sub Tile index
+    int capacity_instance; // within capacity
+    int port_index;        // Port index
+    int pin_index_in_port; // Pin's index within the port
+};
+
+/******************** Subroutine declarations and definition ****************************/
+
+static std::tuple<int, int, int> get_pin_index_for_inst(t_physical_tile_type_ptr type, int pin_index);
+static t_pin_inst_port block_type_pin_index_to_pin_inst(t_physical_tile_type_ptr type, int pin_index);
+
+static std::tuple<int, int, int> get_pin_index_for_inst(t_physical_tile_type_ptr type, int pin_index) {
+    VTR_ASSERT(pin_index < type->num_pins);
+
+    int total_pin_counts = 0;
+    int pin_offset = 0;
+    for (auto& sub_tile : type->sub_tiles) {
+        total_pin_counts += sub_tile.num_phy_pins;
+
+        if (pin_index < total_pin_counts) {
+            int pins_per_inst = sub_tile.num_phy_pins / sub_tile.capacity.total();
+            int inst_num = (pin_index - pin_offset) / pins_per_inst;
+            int inst_index = (pin_index - pin_offset) % pins_per_inst;
+
+            return std::make_tuple(inst_index, inst_num, sub_tile.index);
+        }
+
+        pin_offset += sub_tile.num_phy_pins;
+    }
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Could not infer the correct pin instance index for %s (pin index: %d)", type->name, pin_index);
+}
+
+static t_pin_inst_port block_type_pin_index_to_pin_inst(t_physical_tile_type_ptr type, int pin_index) {
+    int sub_tile_index, inst_num;
+    std::tie<int, int, int>(pin_index, inst_num, sub_tile_index) = get_pin_index_for_inst(type, pin_index);
+
+    t_pin_inst_port pin_inst_port;
+    pin_inst_port.sub_tile_index = sub_tile_index;
+    pin_inst_port.capacity_instance = inst_num;
+    pin_inst_port.port_index = OPEN;
+    pin_inst_port.pin_index_in_port = OPEN;
+
+    for (auto const& port : type->sub_tiles[sub_tile_index].ports) {
+        if (pin_index >= port.absolute_first_pin_index && pin_index < port.absolute_first_pin_index + port.num_pins) {
+            pin_inst_port.port_index = port.index;
+            pin_inst_port.pin_index_in_port = pin_index - port.absolute_first_pin_index;
+            break;
+        }
+    }
+    return pin_inst_port;
+}
+
+/******************** End Subroutine declarations and definition ************************/
+
+int get_sub_tile_physical_pin(int sub_tile_index,
+                              t_physical_tile_type_ptr physical_tile,
+                              t_logical_block_type_ptr logical_block,
+                              int pin) {
+    t_logical_pin logical_pin(pin);
+
+    const auto& direct_map = physical_tile->tile_block_pin_directs_map.at(logical_block->index).at(sub_tile_index);
+    auto result = direct_map.find(logical_pin);
+
+    if (result == direct_map.end()) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Couldn't find the corresponding physical tile pin of the logical block pin %d."
+                       "Physical Tile Type: %s, Logical Block Type: %s.\n",
+                       pin, physical_tile->name, logical_block->name);
+    }
+
+    return result->second.pin;
+}
+
+int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
+                                              t_logical_block_type_ptr logical_block) {
+    int sub_tile_index = OPEN;
+    for (const auto& sub_tile : physical_tile->sub_tiles) {
+        auto eq_sites = sub_tile.equivalent_sites;
+        auto it = std::find(eq_sites.begin(), eq_sites.end(), logical_block);
+        if (it != eq_sites.end()) {
+            sub_tile_index = sub_tile.index;
+        }
+    }
+
+    if (sub_tile_index == OPEN) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Found no instances of logical block type '%s' within physical tile type '%s'. ",
+                       logical_block->name, physical_tile->name);
+    }
+
+    return sub_tile_index;
+}
+
+int get_physical_pin(t_physical_tile_type_ptr physical_tile,
+                     t_logical_block_type_ptr logical_block,
+                     int pin) {
+    int sub_tile_index = get_logical_block_physical_sub_tile_index(physical_tile, logical_block);
+
+    if (sub_tile_index == OPEN) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Couldn't find the corresponding physical tile type pin of the logical block type pin %d.",
+                       pin);
+    }
+
+    int sub_tile_physical_pin = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, pin);
+    return physical_tile->sub_tiles[sub_tile_index].sub_tile_to_tile_pin_indices[sub_tile_physical_pin];
+}
+
+int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
+                                              t_logical_block_type_ptr logical_block,
+                                              int sub_tile_capacity) {
+    int sub_tile_index = OPEN;
+    for (const auto& sub_tile : physical_tile->sub_tiles) {
+        auto eq_sites = sub_tile.equivalent_sites;
+        auto it = std::find(eq_sites.begin(), eq_sites.end(), logical_block);
+        if (it != eq_sites.end()
+            && (sub_tile.capacity.is_in_range(sub_tile_capacity))) {
+            sub_tile_index = sub_tile.index;
+            break;
+        }
+    }
+
+    if (sub_tile_index == OPEN) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Found no instances of logical block type '%s' within physical tile type '%s'. ",
+                       logical_block->name, physical_tile->name);
+    }
+
+    return sub_tile_index;
+}
+
+/**
+ * This function returns the most common physical tile type given a logical block
+ */
+t_physical_tile_type_ptr pick_physical_type(t_logical_block_type_ptr logical_block) {
+    return logical_block->equivalent_tiles[0];
+}
+
+t_logical_block_type_ptr pick_logical_type(t_physical_tile_type_ptr physical_tile) {
+    return physical_tile->sub_tiles[0].equivalent_sites[0];
+}
+
+bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block) {
+    const auto& equivalent_tiles = logical_block->equivalent_tiles;
+    return std::find(equivalent_tiles.begin(), equivalent_tiles.end(), physical_tile) != equivalent_tiles.end();
+}
+
+bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc) {
+    bool capacity_compatible = false;
+    for (auto& sub_tile : physical_tile->sub_tiles) {
+        auto result = std::find(sub_tile.equivalent_sites.begin(), sub_tile.equivalent_sites.end(), logical_block);
+
+        if (sub_tile.capacity.is_in_range(sub_tile_loc) && result != sub_tile.equivalent_sites.end()) {
+            capacity_compatible = true;
+            break;
+        }
+    }
+
+    return capacity_compatible && is_tile_compatible(physical_tile, logical_block);
+}
+
+int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
+                                          t_logical_block_type_ptr logical_block,
+                                          int sub_tile_capacity,
+                                          int pin) {
+    int sub_tile_index = get_logical_block_physical_sub_tile_index(physical_tile, logical_block, sub_tile_capacity);
+
+    if (sub_tile_index == OPEN) {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Couldn't find the corresponding physical tile type pin of the logical block type pin %d.",
+                       pin);
+    }
+
+    int sub_tile_physical_pin = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, pin);
+
+    /* Find the relative capacity of the logical_block in this sub tile */
+    int relative_capacity = sub_tile_capacity - physical_tile->sub_tiles[sub_tile_index].capacity.low;
+
+    /* Find the number of pins per block in the equivalent site list
+     * of the sub tile. Otherwise, the current logical block may have smaller/larger number of pins
+     * than other logical blocks that can be placed in the sub-tile. This will lead to an error
+     * when computing the pin index!
+     */
+    int block_num_pins = physical_tile->sub_tiles[sub_tile_index].num_phy_pins / physical_tile->sub_tiles[sub_tile_index].capacity.total();
+
+    return relative_capacity * block_num_pins
+           + physical_tile->sub_tiles[sub_tile_index].sub_tile_to_tile_pin_indices[sub_tile_physical_pin];
+}
+
+int get_max_num_pins(t_logical_block_type_ptr logical_block) {
+    int max_num_pins = 0;
+
+    for (auto physical_tile : logical_block->equivalent_tiles) {
+        max_num_pins = std::max(max_num_pins, physical_tile->num_pins);
+    }
+
+    return max_num_pins;
+}
+
+//Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
+int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type) {
+    int iclass = OPEN;
+
+    int ipin = find_pin(type, port_name, pin_index_in_port);
+
+    if (ipin != OPEN) {
+        iclass = type->pin_class[ipin];
+
+        if (iclass != OPEN) {
+            VTR_ASSERT(type->class_inf[iclass].type == pin_type);
+        }
+    }
+    return iclass;
+}
+
+int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port) {
+    int ipin = OPEN;
+    int port_base_ipin = 0;
+    int num_pins = OPEN;
+    int pin_offset = 0;
+
+    bool port_found = false;
+    for (const auto& sub_tile : type->sub_tiles) {
+        for (const auto& port : sub_tile.ports) {
+            if (0 == strcmp(port.name, port_name.c_str())) {
+                port_found = true;
+                num_pins = port.num_pins;
+                break;
+            }
+
+            port_base_ipin += port.num_pins;
+        }
+
+        if (port_found) {
+            break;
+        }
+
+        port_base_ipin = 0;
+        pin_offset += sub_tile.num_phy_pins;
+    }
+
+    if (num_pins != OPEN) {
+        VTR_ASSERT(pin_index_in_port < num_pins);
+
+        ipin = port_base_ipin + pin_index_in_port + pin_offset;
+    }
+
+    return ipin;
+}
+
+std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin) {
+    int pins_to_remove = 0;
+    for (auto sub_tile : physical_tile->sub_tiles) {
+        auto capacity = sub_tile.capacity;
+        int sub_tile_num_pins = sub_tile.num_phy_pins;
+        int sub_tile_pin = pin - pins_to_remove;
+
+        if (sub_tile_pin < sub_tile_num_pins) {
+            int rel_capacity = sub_tile_pin / (sub_tile_num_pins / capacity.total());
+            int rel_pin = sub_tile_pin % (sub_tile_num_pins / capacity.total());
+
+            return std::pair<int, int>(rel_capacity + capacity.low, rel_pin);
+        }
+
+        pins_to_remove += sub_tile_num_pins;
+    }
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Couldn't find sub tile that contains the pin %d in physical tile %s.\n",
+                   pin, physical_tile->name);
+}
+
+int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location) {
+    int pins_to_add = 0;
+    for (auto sub_tile : physical_tile->sub_tiles) {
+        auto capacity = sub_tile.capacity;
+        int rel_capacity = capacity_location - capacity.low;
+        int num_inst_pins = sub_tile.num_phy_pins / capacity.total();
+
+        if (capacity.is_in_range(capacity_location)) {
+            return pins_to_add + num_inst_pins * rel_capacity + relative_pin;
+        }
+
+        pins_to_add += sub_tile.num_phy_pins;
+    }
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Couldn't find sub tile that contains the relative pin %d at the capacity location %d in physical tile %s.\n",
+                   relative_pin, capacity_location, physical_tile->name);
+}
+bool is_opin(int ipin, t_physical_tile_type_ptr type) {
+    /* Returns true if this clb pin is an output, false otherwise. */
+
+    if (ipin > type->num_pins) {
+        //Not a top level pin
+        return false;
+    }
+
+    int iclass = type->pin_class[ipin];
+
+    if (type->class_inf[iclass].type == DRIVER)
+        return true;
+    else
+        return false;
+}
+
+// TODO: Remove is_input_type / is_output_type / is_io_type as part of
+// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
+bool is_input_type(t_physical_tile_type_ptr type) {
+    return type->is_input_type;
+}
+
+bool is_output_type(t_physical_tile_type_ptr type) {
+    return type->is_output_type;
+}
+
+bool is_io_type(t_physical_tile_type_ptr type) {
+    return is_input_type(type)
+           || is_output_type(type);
+}
+
+std::string block_type_pin_index_to_name(t_physical_tile_type_ptr type, int pin_index) {
+    VTR_ASSERT(pin_index < type->num_pins);
+
+    std::string pin_name = type->name;
+
+    int sub_tile_index, inst_num;
+    std::tie<int, int, int>(pin_index, inst_num, sub_tile_index) = get_pin_index_for_inst(type, pin_index);
+
+    if (type->sub_tiles[sub_tile_index].capacity.total() > 1) {
+        pin_name += "[" + std::to_string(inst_num) + "]";
+    }
+
+    pin_name += ".";
+
+    for (auto const& port : type->sub_tiles[sub_tile_index].ports) {
+        if (pin_index >= port.absolute_first_pin_index && pin_index < port.absolute_first_pin_index + port.num_pins) {
+            //This port contains the desired pin index
+            int index_in_port = pin_index - port.absolute_first_pin_index;
+            pin_name += port.name;
+            pin_name += "[" + std::to_string(index_in_port) + "]";
+            return pin_name;
+        }
+    }
+
+    return "<UNKOWN>";
+}
+
+std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_type_ptr type, int class_index) {
+    VTR_ASSERT(class_index < (int)type->class_inf.size());
+
+    auto class_inf = type->class_inf[class_index];
+
+    std::vector<t_pin_inst_port> pin_info;
+    for (int ipin = 0; ipin < class_inf.num_pins; ++ipin) {
+        int pin_index = class_inf.pinlist[ipin];
+        pin_info.push_back(block_type_pin_index_to_pin_inst(type, pin_index));
+    }
+
+    auto cmp = [](const t_pin_inst_port& lhs, const t_pin_inst_port& rhs) {
+        return std::tie(lhs.capacity_instance, lhs.port_index, lhs.pin_index_in_port)
+               < std::tie(rhs.capacity_instance, rhs.port_index, rhs.pin_index_in_port);
+    };
+
+    //Ensure all the pins are in order
+    std::sort(pin_info.begin(), pin_info.end(), cmp);
+
+    //Determine ranges for each capacity instance and port pair
+    std::map<std::tuple<int, int, int>, std::pair<int, int>> pin_ranges;
+    for (const auto& pin_inf : pin_info) {
+        auto key = std::make_tuple(pin_inf.sub_tile_index, pin_inf.capacity_instance, pin_inf.port_index);
+        if (!pin_ranges.count(key)) {
+            pin_ranges[key].first = pin_inf.pin_index_in_port;
+            pin_ranges[key].second = pin_inf.pin_index_in_port;
+        } else {
+            VTR_ASSERT(pin_ranges[key].second == pin_inf.pin_index_in_port - 1);
+            pin_ranges[key].second = pin_inf.pin_index_in_port;
+        }
+    }
+
+    //Format pin ranges
+    std::vector<std::string> pin_names;
+    for (auto kv : pin_ranges) {
+        auto type_port = kv.first;
+        auto pins = kv.second;
+
+        int isub_tile, icapacity, iport;
+        std::tie<int, int, int>(isub_tile, icapacity, iport) = type_port;
+
+        int ipin_start = pins.first;
+        int ipin_end = pins.second;
+
+        auto& sub_tile = type->sub_tiles[isub_tile];
+
+        std::string pin_name;
+        if (ipin_start == ipin_end) {
+            pin_name = vtr::string_fmt("%s[%d].%s[%d]",
+                                       type->name,
+                                       icapacity,
+                                       sub_tile.ports[iport].name,
+                                       ipin_start);
+        } else {
+            pin_name = vtr::string_fmt("%s[%d].%s[%d:%d]",
+                                       type->name,
+                                       icapacity,
+                                       sub_tile.ports[iport].name,
+                                       ipin_start,
+                                       ipin_end);
+        }
+
+        pin_names.push_back(pin_name);
+    }
+
+    return pin_names;
+}
+
+const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name) {
+    for (auto port : sub_tile->ports) {
+        if (0 == strcmp(port.name, port_name)) {
+            return &sub_tile->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
+const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) {
+    auto pb_type = type->pb_type;
+
+    for (int i = 0; i < pb_type->num_ports; i++) {
+        auto port = pb_type->ports[i];
+        if (0 == strcmp(port.name, port_name)) {
+            return &pb_type->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
+const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) {
+    for (auto port : sub_tile->ports) {
+        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+            return &sub_tile->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
+
+const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin) {
+    auto pb_type = type->pb_type;
+
+    for (int i = 0; i < pb_type->num_ports; i++) {
+        auto port = pb_type->ports[i];
+        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+            return &pb_type->ports[port.index];
+        }
+    }
+
+    return nullptr;
+}
diff --git a/third_party/vtr/libs/archfpga/src/physical_types_util.h b/third_party/vtr/libs/archfpga/src/physical_types_util.h
new file mode 100644
index 000000000..71c74a614
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/physical_types_util.h
@@ -0,0 +1,304 @@
+#ifndef PHYSICAL_TYPES_UTIL_H
+#define PHYSICAL_TYPES_UTIL_H
+
+#include "physical_types.h"
+
+/********************************************************************
+ *                                                                  *
+ *  Physical types utility functions                                *
+ *                                                                  *
+ *  This source file contains several utilities that enable the     *
+ *  interaction with the architecture's physical types.             *
+ *  Mainly, the two classes of objects accessed by the utility      *
+ *  functions in this file are the following:                       *
+ *    - physical_tile_type: identifies a placeable tile within      *
+ *                          the device grid.                        *
+ *    - logical_block_tpye: identifies a clustered block type       *
+ *                          within the clb_netlist                  *
+ *                                                                  *
+ *  All the following utilities are intended to ease the            *
+ *  developement to access the above mentioned classes and perform  *
+ *  some required operations with their data.                       *
+ *                                                                  *
+ *  Please classify such functions in this file                     *
+ *                                                                  *
+ * ******************************************************************/
+
+/*
+ * Terms definition.
+ *
+ * This comment helps in clarifying what some of the data types correspond to
+ * and what their purpose is, for a better understanding of the utility routines.
+ *
+ *   - logical_pin_index: corresponds to the absolute pin index of a logical block type.
+ *   - physical_pin_index: corresponds to the absolute pin index of a physical tile type.
+ *   - sub tile: component of a physical tile type.
+ *               For further information on sub tiles refer to the documentation at:
+ *               https://docs.verilogtorouting.org/en/latest/arch/reference/#tag-%3Csub_tilename
+ *   - capacity: corresponds to the total number of instances of a sub tile within the belonging
+ *               physical tile.
+ *
+ * Given that, each physical tile type can be used to place a different number/type of logical block types
+ * it is necessary to have at disposal utilities to correctly synchronize and connect a logical block
+ * to a compatible physical tile.
+ *
+ * For instance, if there are multiple physical tile types compatible with a logical type, it must be assumed
+ * that the there is no 1:1 mapping between the logical block pins and the physical tiles one.
+ *
+ * To clarify, imagine a situation as follows:
+ *
+ *   - BUFG logical block type:
+ *
+ *        *----------------*
+ *        |                |
+ *    --->| CLK        OUT |--->
+ *        |                |
+ *        *----------------*
+ *
+ *     The logical pin indices are:
+ *       - CLK: 0
+ *       - OUT: 1
+ *
+ *   - CLOCK TILE physical tile type, containing a BUFGCTRL sub tile of capacity of 2:
+ *
+ *        *--------------------------*
+ *        |                          |
+ *        |    BUFGCTRL sub tile     |
+ *        |    Instance 1            |
+ *        |    *----------------*    |
+ *        |    |                |    |
+ *  CLK_1 |--->| CLK        OUT |--->| OUT_1
+ *  EN_1  |--->| EN             |    |
+ *        |    |                |    |
+ *        |    *----------------*    |
+ *        |                          |
+ *        |    BUFGCTRL sub tile     |
+ *        |    Instance 2            |
+ *        |    *----------------*    |
+ *        |    |                |    |
+ *  CLK_2 |--->| CLK        OUT |--->| OUT_2
+ *  EN_2  |--->| EN             |    |
+ *        |    |                |    |
+ *        |    *----------------*    |
+ *        |                          |
+ *        *--------------------------*
+ *
+ *     The physical pin indices are:
+ *       - CLK_1: 0
+ *       - EN_1 : 1
+ *       - OUT_1: 2
+ *       - CLK_2: 3
+ *       - EN_2 : 4
+ *       - OUT_2: 5
+ *
+ * The BUFG logical block can be placed in a BUFGCTRL sub tile of the CLOCK TILE physical tile.
+ * As visible in the diagram, the CLOCK TILE contains a total of 6 physical pins, 3 for each instance,
+ * and the logical block contains only 2 pins, but still, it is compatible to be placed within
+ * the BUFGCTRL sub tile.
+ *
+ * One of the purposes of the following utility functions is to correctly identify the relation
+ * of a logical pin (e.g. CLK of the BUFG logical block type) with the corresponding physical tile index
+ * (e.g. CLK_2 of the CLOCK TILE physical tile, in case the logical block is placed on the second instance
+ * of the BUFGCTRL sub tile).
+ *
+ * With the assumption that there is no 1:1 mapping between logical block and sub tile pins
+ * (e.g. EN pin in the BUFGCTRL sub tile), there is some extra computation and data structures
+ * needed to correctly identify the relation between the pins.
+ *
+ * For instance, the following information are required:
+ *   - mapping between logical and sub tile pins.
+ *   - mapping between sub tile pins and absoulte physical pin
+ *   - capacity instance of the sub tile
+ *
+ * With all the above information we can calculate correctly the connection between the CLK (logical pin)
+ * and CLK_2 (physical pin) from the BUFG (logical block) and CLOCK TILE (physical tile).
+ */
+
+///@brief Returns true if the absolute physical pin index is an output of the given physical tile type
+bool is_opin(int ipin, t_physical_tile_type_ptr type);
+
+///@brief Returns true if the given physical tile type can implement a .input block type
+bool is_input_type(t_physical_tile_type_ptr type);
+///@brief Returns true if the given physical tile type can implement a .output block type
+bool is_output_type(t_physical_tile_type_ptr type);
+///@brief Returns true if the given physical tile type can implement either a .input or .output block type
+bool is_io_type(t_physical_tile_type_ptr type);
+
+/**
+ * @brief Returns the corresponding physical pin based on the input parameters:
+ *
+ * - physical_tile
+ * - relative_pin: this is the pin relative to a specific sub tile
+ * - capacity location: absolute sub tile location
+ *
+ * Take the above CLOCK TILE example:
+ *   - we want to get the absolute physical pin corresponding to the first pin
+ *     of the second instance of the BUFGCTRL sub tile
+ *
+ *   int pin = get_physical_pin_from_capacity_location(clock_tile, 0, 1);
+ *
+ *   This function call returns the absolute pin index of the CLK_1 pin (assumed that it is the first pin of the sub tile).
+ *   The value returned in this case is 3.
+ *   Note: capacity and pin indices start from zero.
+ */
+int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location);
+
+/**
+ * @brief Returns a pair consisting of the absolute capacity location relative to the pin parameter
+ *
+ *
+ * Take the above CLOCK TILE example:
+ *   - given the CLOCK TILE and the index corresponding to the CLK_1 pin, we want the relative pin
+ *     of one of its sub tiles at a particualr capacity location (i.e. sub tile instance).
+ *
+ * std::tie(absolute_capacity, relative_pin) = get_capacity_location_from_physical_pin(clock_tile, 3)
+ *
+ * The value returned is (1, 0), where:
+ *   - 1 corresponds to the capacity location (sub tile instance) where the absoulte physical pin index (CLK_1) is connected
+ *   - 0 corresponds to the relative pin index within the BUFGCTRL sub tile
+ */
+std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin);
+
+///@brief Returns the name of the pin_index'th pin on the specified block type
+std::string block_type_pin_index_to_name(t_physical_tile_type_ptr type, int pin_index);
+
+///@brief Returns the name of the class_index'th pin class on the specified block type
+std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_type_ptr type, int class_index);
+
+///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
+t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types);
+
+int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
+
+///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
+int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port);
+
+///@brief Returns the maximum number of pins within a logical block
+int get_max_num_pins(t_logical_block_type_ptr logical_block);
+
+///@brief Verifies whether a given logical block is compatible with a given physical tile
+bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block);
+
+///@brief Verifies whether a logical block and a relative placement location is compatible with a given physical tile
+bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc);
+
+/**
+ * @brief Returns the first physical tile type that matches the logical block
+ *
+ * The order of the physical tiles suitable for the input logical block follows the order
+ * with which the logical blocks appear in the architecture XML definition
+ */
+t_physical_tile_type_ptr pick_physical_type(t_logical_block_type_ptr logical_block);
+
+/**
+ * Returns the first logical block type that matches the physical tile
+ *
+ * The order of the logical blocks suitable for the input physical tile follows the order
+ * with which the physical tiles appear in the architecture XML definition
+ */
+t_logical_block_type_ptr pick_logical_type(t_physical_tile_type_ptr physical_tile);
+
+/**
+ * @brief Returns the sub tile index (within 'physical_tile') corresponding to the
+ * 'logical block'.
+ *
+ * This function will return the index for the first sub_tile that can accommodate
+ * the logical block.
+ *
+ * It is typically called before/during placement,
+ * when picking a sub-tile to fit a logical block
+ */
+int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
+                                              t_logical_block_type_ptr logical_block);
+/**
+ * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
+ * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ *
+ * This function is called before/during placement, when a sub tile index was not yet assigned.
+ *
+ * Throws an exception if the corresponding physical pin can't be found.
+ */
+int get_physical_pin(t_physical_tile_type_ptr physical_tile,
+                     t_logical_block_type_ptr logical_block,
+                     int pin);
+/**
+ * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
+ * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ * This function considers if a given offset is in the range of sub tile capacity
+ *
+ *   (First pin index at current sub-tile)                                     (The wanted pin index)
+ *
+ *   |                                                               |<----- pin ------->|
+ *   v                                                                                   v
+ *
+ *   |<----- capacity.low ----->|<----- capacity.low + 1 ----->| ... |<----- sub_tile_capacity ---->|
+ *
+ * Throws an exception if the corresponding physical pin can't be found.
+ *
+ * Take the above CLOCK TILE example:
+ *   - we want to get the absolute physical pin corresponding to the CLK pin
+ *     of the BUFG logical block placed at the second instance of the BUFGCTRL sub tile
+ *
+ *   int pin = get_physical_pin_at_sub_tile_location(clock_tile, bufg_block, 1, 0);
+ *
+ *   where the input params are:
+ *     - clock_tile: CLOCK TILE
+ *     - bufg_block: BUFG
+ *     - 1: second absolute capacity instance in the CLOCK TILE
+ *     - 0: logical pin index corresponding to CLK
+ *
+ *   This function call returns the absolute pin index of the CLK_1 pin (assumed that it is the first pin of the sub tile).
+ *   The value returned in this case is 3.
+ *   Note: capacity and pin indices start from zero.
+ */
+int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
+                                          t_logical_block_type_ptr logical_block,
+                                          int sub_tile_capacity,
+                                          int pin);
+
+/**
+ * @brief Returns the sub tile index (within 'physical_tile') corresponding to the
+ * 'logical block' by considering if a given offset is in the range of sub tile capacity
+ */
+int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
+                                              t_logical_block_type_ptr logical_block,
+                                              int sub_tile_capacity);
+/**
+ * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
+ * logical index ('pin') of the 'logical_block' at sub-tile location 'sub_tile_index'.
+ *
+ * Throws an exception if the corresponding physical pin can't be found.
+ */
+int get_sub_tile_physical_pin(int sub_tile_index,
+                              t_physical_tile_type_ptr physical_tile,
+                              t_logical_block_type_ptr logical_block,
+                              int pin);
+
+/**
+ * @brief Returns one of the physical ports of a tile corresponding to the port_name.
+ * Given that each sub_tile's port that has exactly the same name has to be equivalent
+ * one to the other, it is indifferent which port is returned.
+ */
+t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, const char* port_name);
+
+/**
+ * @brief Returns the physical tile port given the port name and the corresponding sub tile
+ */
+const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name);
+
+/**
+ * @brief Returns the logical block port given the port name and the corresponding logical block type
+ */
+const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name);
+
+/**
+ * @brief Returns the physical tile port given the pin name and the corresponding sub tile
+ */
+const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin);
+
+/**
+ * @brief Returns the logical block port given the pin name and the corresponding logical block type
+ */
+const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin);
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
new file mode 100644
index 000000000..4d56a3f53
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
@@ -0,0 +1,2542 @@
+#include <algorithm>
+#include <kj/std/iostream.h>
+#include <limits>
+#include <map>
+#include <regex>
+#include <set>
+#include <stdlib.h>
+#include <string>
+#include <string.h>
+#include <zlib.h>
+
+#include "vtr_assert.h"
+#include "vtr_digest.h"
+#include "vtr_log.h"
+#include "vtr_memory.h"
+#include "vtr_util.h"
+
+#include "arch_check.h"
+#include "arch_error.h"
+#include "arch_util.h"
+#include "arch_types.h"
+
+#include "read_fpga_interchange_arch.h"
+
+/*
+ * FPGA Interchange Device frontend
+ *
+ * This file contains functions to read and parse a Cap'n'proto FPGA interchange device description
+ * and populate the various VTR architecture's internal data structures.
+ *
+ * The Device data is, by default, GZipped, hence the requirement of the ZLIB library to allow
+ * for in-memory decompression of the input file.
+ */
+
+using namespace DeviceResources;
+using namespace LogicalNetlist;
+using namespace capnp;
+
+// Necessary to reduce code verbosity when getting the pin directions
+static const auto INPUT = LogicalNetlist::Netlist::Direction::INPUT;
+static const auto OUTPUT = LogicalNetlist::Netlist::Direction::OUTPUT;
+static const auto INOUT = LogicalNetlist::Netlist::Direction::INOUT;
+
+static const auto LOGIC = Device::BELCategory::LOGIC;
+static const auto ROUTING = Device::BELCategory::ROUTING;
+static const auto SITE_PORT = Device::BELCategory::SITE_PORT;
+
+// Enum for pack pattern expansion direction
+enum e_pp_dir {
+    FORWARD = 0,
+    BACKWARD = 1
+};
+
+struct t_package_pin {
+    std::string name;
+
+    std::string site_name;
+    std::string bel_name;
+};
+
+struct t_bel_cell_mapping {
+    size_t cell;
+    size_t site;
+    std::vector<std::pair<size_t, size_t>> pins;
+
+    bool operator<(const t_bel_cell_mapping& other) const {
+        return cell < other.cell || (cell == other.cell && site < other.site);
+    }
+};
+
+// Intermediate data type to store information on interconnects to be created
+struct t_ic_data {
+    std::string input;
+    std::set<std::string> outputs;
+
+    bool requires_pack_pattern;
+};
+
+/****************** Utility functions ******************/
+
+/**
+ * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two
+ * speed_models (slow and fast).
+ *
+ * Timing data can be found on PIPs, nodes, site pins and bel pins.
+ * This function retrieves the timing value based on the wanted speed model and the wanted corner.
+ *
+ * More information on the FPGA Interchange timing model can be found here:
+ *   - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp
+ */
+static float get_corner_value(Device::CornerModel::Reader model, const char* speed_model, const char* value) {
+    bool slow_model = std::string(speed_model) == std::string("slow");
+    bool fast_model = std::string(speed_model) == std::string("fast");
+
+    bool min_corner = std::string(value) == std::string("min");
+    bool typ_corner = std::string(value) == std::string("typ");
+    bool max_corner = std::string(value) == std::string("max");
+
+    if (!slow_model && !fast_model) {
+        archfpga_throw("", __LINE__,
+                       "Wrong speed model `%s`. Expected `slow` or `fast`\n", speed_model);
+    }
+
+    if (!min_corner && !typ_corner && !max_corner) {
+        archfpga_throw("", __LINE__,
+                       "Wrong corner model `%s`. Expected `min`, `typ` or `max`\n", value);
+    }
+
+    bool has_fast = model.getFast().hasFast();
+    bool has_slow = model.getSlow().hasSlow();
+
+    if (slow_model && has_slow) {
+        auto half = model.getSlow().getSlow();
+        if (min_corner && half.getMin().isMin()) {
+            return half.getMin().getMin();
+        } else if (typ_corner && half.getTyp().isTyp()) {
+            return half.getTyp().getTyp();
+        } else if (max_corner && half.getMax().isMax()) {
+            return half.getMax().getMax();
+        } else {
+            if (half.getMin().isMin()) {
+                return half.getMin().getMin();
+            } else if (half.getTyp().isTyp()) {
+                return half.getTyp().getTyp();
+            } else if (half.getMax().isMax()) {
+                return half.getMax().getMax();
+            } else {
+                archfpga_throw("", __LINE__,
+                               "Invalid speed model %s. No value found!\n", speed_model);
+            }
+        }
+    } else if (fast_model && has_fast) {
+        auto half = model.getFast().getFast();
+        if (min_corner && half.getMin().isMin()) {
+            return half.getMin().getMin();
+        } else if (typ_corner && half.getTyp().isTyp()) {
+            return half.getTyp().getTyp();
+        } else if (max_corner && half.getMax().isMax()) {
+            return half.getMax().getMax();
+        } else {
+            if (half.getMin().isMin()) {
+                return half.getMin().getMin();
+            } else if (half.getTyp().isTyp()) {
+                return half.getTyp().getTyp();
+            } else if (half.getMax().isMax()) {
+                return half.getMax().getMax();
+            } else {
+                archfpga_throw("", __LINE__,
+                               "Invalid speed model %s. No value found!\n", speed_model);
+            }
+        }
+    }
+
+    return 0.;
+}
+
+/** @brief Returns the port corresponding to the given model in the architecture */
+static t_model_ports* get_model_port(t_arch* arch, std::string model, std::string port, bool fail = true) {
+    for (t_model* m : {arch->models, arch->model_library}) {
+        for (; m != nullptr; m = m->next) {
+            if (std::string(m->name) != model)
+                continue;
+
+            for (t_model_ports* p : {m->inputs, m->outputs})
+                for (; p != nullptr; p = p->next)
+                    if (std::string(p->name) == port)
+                        return p;
+        }
+    }
+
+    if (fail)
+        archfpga_throw(__FILE__, __LINE__,
+                       "Could not find model port: %s (%s)\n", port.c_str(), model.c_str());
+
+    return nullptr;
+}
+
+/** @brief Returns the specified architecture model */
+static t_model* get_model(t_arch* arch, std::string model) {
+    for (t_model* m : {arch->models, arch->model_library})
+        for (; m != nullptr; m = m->next)
+            if (std::string(m->name) == model)
+                return m;
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Could not find model: %s\n", model.c_str());
+}
+
+/** @brief Returns the physical or logical type by its name */
+template<typename T>
+static T* get_type_by_name(const char* type_name, std::vector<T>& types) {
+    for (auto& type : types) {
+        if (0 == strcmp(type.name, type_name)) {
+            return &type;
+        }
+    }
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Could not find type: %s\n", type_name);
+}
+
+/** @brief Returns a generic port instantiation for a complex block */
+static t_port get_generic_port(t_arch* arch,
+                               t_pb_type* pb_type,
+                               PORTS dir,
+                               std::string name,
+                               std::string model = "",
+                               int num_pins = 1) {
+    t_port port;
+    port.parent_pb_type = pb_type;
+    port.name = vtr::strdup(name.c_str());
+    port.num_pins = num_pins;
+    port.index = 0;
+    port.absolute_first_pin_index = 0;
+    port.port_index_by_type = 0;
+    port.equivalent = PortEquivalence::NONE;
+    port.type = dir;
+    port.is_clock = false;
+    port.is_non_clock_global = false;
+    port.model_port = nullptr;
+    port.port_class = vtr::strdup(nullptr);
+    port.port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
+
+    if (!model.empty())
+        port.model_port = get_model_port(arch, model, name);
+
+    return port;
+}
+
+/** @brief Returns true if a given port name exists in the given complex block */
+static bool block_port_exists(t_pb_type* pb_type, std::string port_name) {
+    for (int iport = 0; iport < pb_type->num_ports; iport++) {
+        const t_port port = pb_type->ports[iport];
+
+        if (std::string(port.name) == port_name)
+            return true;
+    }
+
+    return false;
+}
+
+/** @brief Returns a pack pattern given it's name, input and output strings */
+static t_pin_to_pin_annotation get_pack_pattern(std::string pp_name, std::string input, std::string output) {
+    t_pin_to_pin_annotation pp;
+
+    pp.prop = (int*)vtr::calloc(1, sizeof(int));
+    pp.value = (char**)vtr::calloc(1, sizeof(char*));
+
+    pp.type = E_ANNOT_PIN_TO_PIN_PACK_PATTERN;
+    pp.format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+    pp.prop[0] = (int)E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME;
+    pp.value[0] = vtr::strdup(pp_name.c_str());
+    pp.input_pins = vtr::strdup(input.c_str());
+    pp.output_pins = vtr::strdup(output.c_str());
+    pp.num_value_prop_pairs = 1;
+    pp.clock = nullptr;
+
+    return pp;
+}
+
+/****************** End Utility functions ******************/
+
+struct ArchReader {
+  public:
+    ArchReader(t_arch* arch,
+               Device::Reader& arch_reader,
+               const char* arch_file,
+               std::vector<t_physical_tile_type>& phys_types,
+               std::vector<t_logical_block_type>& logical_types)
+        : arch_(arch)
+        , arch_file_(arch_file)
+        , ar_(arch_reader)
+        , ptypes_(phys_types)
+        , ltypes_(logical_types) {
+        set_arch_file_name(arch_file);
+
+        for (std::string str : ar_.getStrList()) {
+            auto interned_string = arch_->strings.intern_string(vtr::string_view(str.c_str()));
+            arch_->interned_strings.push_back(interned_string);
+        }
+    }
+
+    void read_arch() {
+        // Preprocess arch information
+        process_luts();
+        process_package_pins();
+        process_cell_bel_mappings();
+        process_constants();
+        process_bels_and_sites();
+
+        process_models();
+        process_constant_model();
+
+        process_device();
+
+        process_layout();
+        process_switches();
+        process_segments();
+
+        process_sites();
+        process_constant_block();
+
+        process_tiles();
+        process_constant_tile();
+
+        link_physical_logical_types(ptypes_, ltypes_);
+
+        SyncModelsPbTypes(arch_, ltypes_);
+        check_models(arch_);
+    }
+
+  private:
+    t_arch* arch_;
+    const char* arch_file_;
+    Device::Reader& ar_;
+    std::vector<t_physical_tile_type>& ptypes_;
+    std::vector<t_logical_block_type>& ltypes_;
+
+    t_default_fc_spec default_fc_;
+
+    std::string bel_dedup_suffix_ = "_bel";
+    std::string const_block_ = "constant_block";
+
+    std::unordered_set<int> take_bels_;
+    std::unordered_set<int> take_sites_;
+
+    // Package pins
+
+    // TODO: add possibility to have multiple packages
+    std::vector<t_package_pin> package_pins_;
+    std::unordered_set<std::string> pad_bels_;
+    std::string out_suffix_ = "_out";
+    std::string in_suffix_ = "_in";
+
+    // Bel Cell mappings
+    std::unordered_map<uint32_t, std::set<t_bel_cell_mapping>> bel_cell_mappings_;
+    std::unordered_map<std::string, int> segment_name_to_segment_idx;
+
+    // Utils
+
+    /** @brief Returns the string corresponding to the given index */
+    std::string str(size_t idx) {
+        return arch_->interned_strings[idx].get(&arch_->strings);
+    }
+
+    /** @brief Get the BEL count of a site depending on its category (e.g. logic or routing BELs) */
+    int get_bel_type_count(Device::SiteType::Reader& site, Device::BELCategory category, bool skip_lut = false) {
+        int count = 0;
+        for (auto bel : site.getBels()) {
+            auto bel_name = str(bel.getName());
+            bool is_logic = category == LOGIC;
+
+            if (skip_lut && is_lut(bel_name, str(site.getName())))
+                continue;
+
+            bool skip_bel = is_logic && take_bels_.count(bel.getName()) == 0;
+
+            if (bel.getCategory() == category && !skip_bel)
+                count++;
+        }
+
+        return count;
+    }
+
+    /** @brief Get the BEL reader given its name and site */
+    Device::BEL::Reader get_bel_reader(Device::SiteType::Reader& site, std::string bel_name) {
+        for (auto bel : site.getBels())
+            if (str(bel.getName()) == bel_name)
+                return bel;
+        VTR_ASSERT_MSG(0, "Could not find the BEL reader!\n");
+    }
+
+    /** @brief Get the BEL pin reader given its name, site and corresponding BEL */
+    Device::BELPin::Reader get_bel_pin_reader(Device::SiteType::Reader& site, Device::BEL::Reader& bel, std::string pin_name) {
+        auto bel_pins = site.getBelPins();
+
+        for (auto bel_pin : bel.getPins()) {
+            auto pin_reader = bel_pins[bel_pin];
+            if (str(pin_reader.getName()) == pin_name)
+                return pin_reader;
+        }
+        VTR_ASSERT_MSG(0, "Could not find the BEL pin reader!\n");
+    }
+
+    /** @brief Get the BEL name, with an optional deduplication suffix in case its name collides with the site name */
+    std::string get_bel_name(Device::SiteType::Reader& site, Device::BEL::Reader& bel) {
+        if (bel.getCategory() == SITE_PORT)
+            return str(site.getName());
+
+        auto site_name = str(site.getName());
+        auto bel_name = str(bel.getName());
+
+        return site_name == bel_name ? bel_name + bel_dedup_suffix_ : bel_name;
+    }
+
+    /** @brief Returns the name of the input argument BEL with optionally the de-duplication suffix removed */
+    std::string remove_bel_suffix(std::string bel) {
+        std::smatch regex_matches;
+        std::string regex = std::string("(.*)") + bel_dedup_suffix_;
+        const std::regex bel_regex(regex.c_str());
+        if (std::regex_match(bel, regex_matches, bel_regex))
+            return regex_matches[1].str();
+
+        return bel;
+    }
+
+    /** @brief Returns true in case the input argument corresponds to the name of a LUT */
+    bool is_lut(std::string name, const std::string site = std::string()) {
+        for (auto cell : arch_->lut_cells)
+            if (cell.name == name)
+                return true;
+
+        for (const auto& it : arch_->lut_elements) {
+            if (!site.empty() && site != it.first) {
+                continue;
+            }
+
+            for (const auto& lut_element : it.second) {
+                for (const auto& lut_bel : lut_element.lut_bels) {
+                    if (lut_bel.name == name) {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    t_lut_element* get_lut_element_for_bel(const std::string& site_type, const std::string& bel_name) {
+        if (!arch_->lut_elements.count(site_type)) {
+            return nullptr;
+        }
+
+        for (auto& lut_element : arch_->lut_elements.at(site_type)) {
+            for (auto& lut_bel : lut_element.lut_bels) {
+                if (lut_bel.name == bel_name) {
+                    return &lut_element;
+                }
+            }
+        }
+
+        return nullptr;
+    }
+
+    /** @brief Returns true in case the input argument corresponds to a PAD BEL */
+    bool is_pad(std::string name) {
+        return pad_bels_.count(name) != 0;
+    }
+
+    /** @brief Utility function to fill in all the necessary information for the sub_tile
+     *
+     *  Given a physical tile type and a corresponding sub tile with additional information on the IO pin count
+     *  this function populates all the data structures corresponding to the sub tile, and modifies also the parent
+     *  physical tile type, updating the pin numberings as  well as the directs pin mapping for the equivalent sites
+     *
+     *  Affected data structures:
+     *      - pinloc
+     *      - fc_specs
+     *      - equivalent_sites
+     *      - tile_block_pin_directs_map
+     **/
+    void fill_sub_tile(t_physical_tile_type& type, t_sub_tile& sub_tile, int num_pins, int input_count, int output_count) {
+        sub_tile.num_phy_pins += num_pins;
+        type.num_pins += num_pins;
+        type.num_inst_pins += num_pins;
+
+        type.num_input_pins += input_count;
+        type.num_output_pins += output_count;
+        type.num_receivers += input_count;
+        type.num_drivers += output_count;
+
+        type.pin_width_offset.resize(type.num_pins, 0);
+        type.pin_height_offset.resize(type.num_pins, 0);
+
+        type.pinloc.resize({1, 1, 4}, std::vector<bool>(type.num_pins, false));
+        for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+            for (int pin = 0; pin < type.num_pins; pin++) {
+                type.pinloc[0][0][side][pin] = true;
+                type.pin_width_offset[pin] = 0;
+                type.pin_height_offset[pin] = 0;
+            }
+        }
+
+        vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
+
+        for (int npin = 0; npin < type.num_pins; npin++) {
+            t_physical_pin physical_pin(npin);
+            t_logical_pin logical_pin(npin);
+
+            directs_map.insert(logical_pin, physical_pin);
+        }
+
+        auto ltype = get_type_by_name<t_logical_block_type>(sub_tile.name, ltypes_);
+        sub_tile.equivalent_sites.push_back(ltype);
+
+        type.tile_block_pin_directs_map[ltype->index][sub_tile.index] = directs_map;
+
+        // Assign FC specs
+        int iblk_pin = 0;
+        for (const auto& port : sub_tile.ports) {
+            t_fc_specification fc_spec;
+
+            // FIXME: Use always one segment for the time being.
+            //        Can use the right segment for this IOPIN as soon
+            //        as the RR graph reading from the interchange is complete.
+            fc_spec.seg_index = 0;
+
+            //Apply type and defaults
+            if (port.type == IN_PORT) {
+                fc_spec.fc_type = e_fc_type::IN;
+                fc_spec.fc_value_type = default_fc_.in_value_type;
+                fc_spec.fc_value = default_fc_.in_value;
+            } else {
+                VTR_ASSERT(port.type == OUT_PORT);
+                fc_spec.fc_type = e_fc_type::OUT;
+                fc_spec.fc_value_type = default_fc_.out_value_type;
+                fc_spec.fc_value = default_fc_.out_value;
+            }
+
+            //Add all the pins from this port
+            for (int iport_pin = 0; iport_pin < port.num_pins; ++iport_pin) {
+                int true_physical_blk_pin = sub_tile.sub_tile_to_tile_pin_indices[iblk_pin++];
+                fc_spec.pins.push_back(true_physical_blk_pin);
+            }
+
+            type.fc_specs.push_back(fc_spec);
+        }
+    }
+
+    /** @brief Returns an intermediate map representing all the interconnects to be added in a site */
+    std::unordered_map<std::string, t_ic_data> get_interconnects(Device::SiteType::Reader& site) {
+        // dictionary:
+        //   - key: interconnect name
+        //   - value: interconnect data
+        std::unordered_map<std::string, t_ic_data> ics;
+
+        const std::string site_type = str(site.getName());
+
+        for (auto wire : site.getSiteWires()) {
+            std::string wire_name = str(wire.getName());
+
+            // pin name, bel name
+            int pin_id = OPEN;
+            bool pad_exists = false;
+            bool all_inout_pins = true;
+            std::string pad_bel_name;
+            std::string pad_bel_pin_name;
+            for (auto pin : wire.getPins()) {
+                auto bel_pin = site.getBelPins()[pin];
+                auto dir = bel_pin.getDir();
+                std::string bel_pin_name = str(bel_pin.getName());
+
+                auto bel = get_bel_reader(site, str(bel_pin.getBel()));
+                auto bel_name = get_bel_name(site, bel);
+
+                auto bel_is_pad = is_pad(bel_name);
+
+                pad_exists |= bel_is_pad;
+                all_inout_pins &= dir == INOUT;
+
+                if (bel_is_pad) {
+                    pad_bel_name = bel_name;
+                    pad_bel_pin_name = bel_pin_name;
+                }
+
+                if (dir == OUTPUT)
+                    pin_id = pin;
+            }
+
+            if (pin_id == OPEN) {
+                // If no driver pin has been found, the assumption is that
+                // there must be a PAD with inout pin connected to other inout pins
+                for (auto pin : wire.getPins()) {
+                    auto bel_pin = site.getBelPins()[pin];
+                    std::string bel_pin_name = str(bel_pin.getName());
+
+                    auto bel = get_bel_reader(site, str(bel_pin.getBel()));
+                    auto bel_name = get_bel_name(site, bel);
+
+                    if (!is_pad(bel_name))
+                        continue;
+
+                    pin_id = pin;
+                }
+            }
+
+            VTR_ASSERT(pin_id != OPEN);
+
+            auto out_pin = site.getBelPins()[pin_id];
+            auto out_pin_bel = get_bel_reader(site, str(out_pin.getBel()));
+            auto out_pin_name = str(out_pin.getName());
+
+            for (auto pin : wire.getPins()) {
+                if ((int)pin == pin_id)
+                    continue;
+
+                auto bel_pin = site.getBelPins()[pin];
+                std::string out_bel_pin_name = str(bel_pin.getName());
+
+                auto out_bel = get_bel_reader(site, str(bel_pin.getBel()));
+                auto out_bel_name = get_bel_name(site, out_bel);
+
+                auto in_bel = out_pin_bel;
+                auto in_bel_name = get_bel_name(site, in_bel);
+                auto in_bel_pin_name = out_pin_name;
+
+                bool skip_in_bel = in_bel.getCategory() == LOGIC && take_bels_.count(in_bel.getName()) == 0;
+                bool skip_out_bel = out_bel.getCategory() == LOGIC && take_bels_.count(out_bel.getName()) == 0;
+                if (skip_in_bel || skip_out_bel)
+                    continue;
+
+                // LUT bels are nested under pb_types which represent LUT
+                // elements. Check if a BEL belongs to a LUT element and
+                // adjust pb_type name in the interconnect accordingly.
+                auto get_lut_element_index = [&](const std::string& bel_name) {
+                    auto lut_element = get_lut_element_for_bel(site_type, bel_name);
+                    if (lut_element == nullptr)
+                        return -1;
+
+                    const auto& lut_elements = arch_->lut_elements.at(site_type);
+                    auto it = std::find(lut_elements.begin(), lut_elements.end(), *lut_element);
+                    VTR_ASSERT(it != lut_elements.end());
+
+                    return (int)std::distance(lut_elements.begin(), it);
+                };
+
+                // TODO: This avoids having LUTs that can be used in other ways than LUTs, e.g. as DRAMs.
+                //       Once support is added for macro expansion, all the connections currently marked as
+                //       invalid will be re-enabled.
+                auto is_lut_connection_valid = [&](const std::string& bel_name, const std::string& pin_name) {
+                    auto lut_element = get_lut_element_for_bel(site_type, bel_name);
+                    if (lut_element == nullptr)
+                        return false;
+
+                    bool pin_found = false;
+                    for (auto lut_bel : lut_element->lut_bels) {
+                        for (auto lut_bel_pin : lut_bel.input_pins)
+                            pin_found |= lut_bel_pin == pin_name;
+
+                        pin_found |= lut_bel.output_pin == pin_name;
+                    }
+
+                    if (!pin_found)
+                        return false;
+
+                    return true;
+                };
+
+                int index = get_lut_element_index(out_bel_name);
+                bool valid_lut = is_lut_connection_valid(out_bel_name, out_bel_pin_name);
+                if (index >= 0) {
+                    out_bel_name = "LUT" + std::to_string(index);
+
+                    if (!valid_lut)
+                        continue;
+                }
+
+                index = get_lut_element_index(in_bel_name);
+                valid_lut = is_lut_connection_valid(in_bel_name, in_bel_pin_name);
+                if (index >= 0) {
+                    in_bel_name = "LUT" + std::to_string(index);
+
+                    if (!valid_lut)
+                        continue;
+                }
+
+                std::string ostr = out_bel_name + "." + out_bel_pin_name;
+                std::string istr = in_bel_name + "." + in_bel_pin_name;
+
+                // TODO: If the bel pin is INOUT (e.g. PULLDOWN/PULLUP in Series7)
+                //       for now treat as input only and assign the in suffix
+                if (bel_pin.getDir() == INOUT && !all_inout_pins && !is_pad(out_bel_name))
+                    ostr += in_suffix_;
+
+                auto ic_name = wire_name + "_" + out_bel_pin_name;
+
+                bool requires_pack_pattern = pad_exists;
+
+                std::vector<std::pair<std::string, t_ic_data>> ics_data;
+                if (all_inout_pins) {
+                    std::string extra_istr = out_bel_name + "." + out_bel_pin_name + out_suffix_;
+                    std::string extra_ostr = in_bel_name + "." + in_bel_pin_name + in_suffix_;
+                    std::string extra_ic_name = ic_name + "_extra";
+
+                    std::set<std::string> extra_ostrs{extra_ostr};
+                    t_ic_data extra_ic_data = {
+                        extra_istr,           // ic input
+                        extra_ostrs,          // ic outputs
+                        requires_pack_pattern // pack pattern required
+                    };
+
+                    ics_data.push_back(std::make_pair(extra_ic_name, extra_ic_data));
+
+                    istr += out_suffix_;
+                    ostr += in_suffix_;
+                } else if (pad_exists) {
+                    if (out_bel_name == pad_bel_name)
+                        ostr += in_suffix_;
+                    else { // Create new wire to connect PAD output to the BELs input
+                        ic_name = wire_name + "_" + pad_bel_pin_name + out_suffix_;
+                        istr = pad_bel_name + "." + pad_bel_pin_name + out_suffix_;
+                    }
+                }
+
+                std::set<std::string> ostrs{ostr};
+                t_ic_data ic_data = {
+                    istr,
+                    ostrs,
+                    requires_pack_pattern};
+
+                ics_data.push_back(std::make_pair(ic_name, ic_data));
+
+                for (auto entry : ics_data) {
+                    auto name = entry.first;
+                    auto data = entry.second;
+
+                    auto res = ics.emplace(name, data);
+
+                    if (!res.second) {
+                        auto old_data = res.first->second;
+
+                        VTR_ASSERT(old_data.input == data.input);
+                        VTR_ASSERT(data.outputs.size() == 1);
+
+                        for (auto out : data.outputs)
+                            res.first->second.outputs.insert(out);
+                        res.first->second.requires_pack_pattern |= data.requires_pack_pattern;
+                    }
+                }
+            }
+        }
+
+        return ics;
+    }
+
+    /**
+     * Preprocessors:
+     *   - process_bels_and_sites: information on whether sites and bels need to be expanded in pb types
+     *   - process_luts: processes information on which cells and bels are LUTs
+     *   - process_package_pins: processes information on the device's pinout and which sites and bels
+     *                           contain IO pads
+     *   - process_cell_bel_mapping: processes mappings between a cell and the possible BELs location for that cell
+     *   - process_constants: processes constants cell and net names
+     */
+    void process_bels_and_sites() {
+        auto tiles = ar_.getTileList();
+        auto tile_types = ar_.getTileTypeList();
+        auto site_types = ar_.getSiteTypeList();
+
+        for (auto tile : tiles) {
+            auto tile_type = tile_types[tile.getType()];
+
+            for (auto site : tile.getSites()) {
+                auto site_type_in_tile = tile_type.getSiteTypes()[site.getType()];
+                auto site_type = site_types[site_type_in_tile.getPrimaryType()];
+
+                bool found = false;
+                for (auto bel : site_type.getBels()) {
+                    auto bel_name = bel.getName();
+                    bool res = bel_cell_mappings_.find(bel_name) != bel_cell_mappings_.end();
+
+                    found |= res;
+
+                    if (res || is_pad(str(bel_name)))
+                        take_bels_.insert(bel_name);
+                }
+
+                if (found)
+                    take_sites_.insert(site_type.getName());
+
+                // TODO: Enable also alternative site types handling
+            }
+        }
+    }
+
+    void process_luts() {
+        // Add LUT Cell definitions
+        // This is helpful to understand which cells are LUTs
+        auto lut_def = ar_.getLutDefinitions();
+
+        for (auto lut_cell : lut_def.getLutCells()) {
+            t_lut_cell cell;
+            cell.name = lut_cell.getCell().cStr();
+            for (auto input : lut_cell.getInputPins())
+                cell.inputs.push_back(input.cStr());
+
+            auto equation = lut_cell.getEquation();
+            if (equation.isInitParam())
+                cell.init_param = equation.getInitParam().cStr();
+
+            arch_->lut_cells.push_back(cell);
+        }
+
+        for (auto lut_elem : lut_def.getLutElements()) {
+            for (auto lut : lut_elem.getLuts()) {
+                t_lut_element element;
+                element.site_type = lut_elem.getSite().cStr();
+                element.width = lut.getWidth();
+
+                for (auto bel : lut.getBels()) {
+                    t_lut_bel lut_bel;
+                    lut_bel.name = bel.getName().cStr();
+                    std::vector<std::string> ipins;
+
+                    for (auto pin : bel.getInputPins())
+                        ipins.push_back(pin.cStr());
+
+                    lut_bel.input_pins = ipins;
+                    lut_bel.output_pin = bel.getOutputPin().cStr();
+
+                    element.lut_bels.push_back(lut_bel);
+                }
+
+                arch_->lut_elements[element.site_type].push_back(element);
+            }
+        }
+    }
+
+    void process_package_pins() {
+        for (auto package : ar_.getPackages()) {
+            for (auto pin : package.getPackagePins()) {
+                t_package_pin pckg_pin;
+                pckg_pin.name = str(pin.getPackagePin());
+
+                if (pin.getBel().isBel()) {
+                    pckg_pin.bel_name = str(pin.getBel().getBel());
+                    pad_bels_.insert(pckg_pin.bel_name);
+                }
+
+                if (pin.getSite().isSite())
+                    pckg_pin.site_name = str(pin.getSite().getSite());
+
+                package_pins_.push_back(pckg_pin);
+            }
+        }
+    }
+
+    void process_cell_bel_mappings() {
+        auto primLib = ar_.getPrimLibs();
+        auto portList = primLib.getPortList();
+
+        for (auto cell_mapping : ar_.getCellBelMap()) {
+            size_t cell_name = cell_mapping.getCell();
+
+            int found_valid_prim = false;
+            for (auto primitive : primLib.getCellDecls()) {
+                bool is_prim = str(primitive.getLib()) == std::string("primitives");
+                bool is_cell = cell_name == primitive.getName();
+
+                bool has_inout = false;
+                for (auto port_idx : primitive.getPorts()) {
+                    auto port = portList[port_idx];
+
+                    if (port.getDir() == INOUT) {
+                        has_inout = true;
+                        break;
+                    }
+                }
+
+                if (is_prim && is_cell && !has_inout) {
+                    found_valid_prim = true;
+                    break;
+                }
+            }
+
+            if (!found_valid_prim)
+                continue;
+
+            for (auto common_pins : cell_mapping.getCommonPins()) {
+                std::vector<std::pair<size_t, size_t>> pins;
+
+                for (auto pin_map : common_pins.getPins())
+                    pins.emplace_back(pin_map.getCellPin(), pin_map.getBelPin());
+
+                for (auto site_type_entry : common_pins.getSiteTypes()) {
+                    size_t site_type = site_type_entry.getSiteType();
+
+                    for (auto bel : site_type_entry.getBels()) {
+                        t_bel_cell_mapping mapping;
+
+                        mapping.cell = cell_name;
+                        mapping.site = site_type;
+                        mapping.pins = pins;
+
+                        std::set<t_bel_cell_mapping> maps{mapping};
+                        auto res = bel_cell_mappings_.emplace(bel, maps);
+                        if (!res.second) {
+                            res.first->second.insert(mapping);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void process_constants() {
+        auto consts = ar_.getConstants();
+
+        arch_->gnd_cell = std::make_pair(str(consts.getGndCellType()), str(consts.getGndCellPin()));
+        arch_->vcc_cell = std::make_pair(str(consts.getVccCellType()), str(consts.getVccCellPin()));
+
+        arch_->gnd_net = consts.getGndNetName().isName() ? str(consts.getGndNetName().getName()) : "$__gnd_net";
+        arch_->vcc_net = consts.getVccNetName().isName() ? str(consts.getVccNetName().getName()) : "$__vcc_net";
+    }
+
+    /* end preprocessors */
+
+    // Model processing
+    void process_models() {
+        // Populate the common library, namely .inputs, .outputs, .names, .latches
+        CreateModelLibrary(arch_);
+
+        t_model* temp = nullptr;
+        std::map<std::string, int> model_name_map;
+        std::pair<std::map<std::string, int>::iterator, bool> ret_map_name;
+
+        int model_index = NUM_MODELS_IN_LIBRARY;
+        arch_->models = nullptr;
+
+        auto primLib = ar_.getPrimLibs();
+        for (auto primitive : primLib.getCellDecls()) {
+            if (str(primitive.getLib()) == std::string("primitives")) {
+                std::string prim_name = str(primitive.getName());
+
+                if (is_lut(prim_name))
+                    continue;
+
+                // Check whether the model can be placed in at least one
+                // BEL that was marked as valid (e.g. added to the take_bels_ data structure)
+                bool has_bel = false;
+                for (auto bel_cell_map : bel_cell_mappings_) {
+                    auto bel_name = bel_cell_map.first;
+
+                    bool take_bel = take_bels_.count(bel_name) != 0;
+
+                    if (!take_bel || is_lut(str(bel_name)))
+                        continue;
+
+                    for (auto map : bel_cell_map.second)
+                        has_bel |= primitive.getName() == map.cell;
+                }
+
+                if (!has_bel)
+                    continue;
+
+                try {
+                    temp = new t_model;
+                    temp->index = model_index++;
+
+                    temp->never_prune = true;
+                    temp->name = vtr::strdup(prim_name.c_str());
+
+                    ret_map_name = model_name_map.insert(std::pair<std::string, int>(temp->name, 0));
+                    if (!ret_map_name.second) {
+                        archfpga_throw(arch_file_, __LINE__,
+                                       "Duplicate model name: '%s'.\n", temp->name);
+                    }
+
+                    if (!process_model_ports(temp, primitive)) {
+                        free_arch_model(temp);
+                        continue;
+                    }
+
+                    check_model_clocks(temp, arch_file_, __LINE__);
+                    check_model_combinational_sinks(temp, arch_file_, __LINE__);
+                    warn_model_missing_timing(temp, arch_file_, __LINE__);
+
+                } catch (ArchFpgaError& e) {
+                    free_arch_model(temp);
+                    throw;
+                }
+                temp->next = arch_->models;
+                arch_->models = temp;
+            }
+        }
+    }
+
+    bool process_model_ports(t_model* model, Netlist::CellDeclaration::Reader primitive) {
+        auto primLib = ar_.getPrimLibs();
+        auto portList = primLib.getPortList();
+
+        std::set<std::pair<std::string, enum PORTS>> port_names;
+
+        for (auto port_idx : primitive.getPorts()) {
+            auto port = portList[port_idx];
+            enum PORTS dir = ERR_PORT;
+            switch (port.getDir()) {
+                case INPUT:
+                    dir = IN_PORT;
+                    break;
+                case OUTPUT:
+                    dir = OUT_PORT;
+                    break;
+                case INOUT:
+                    return false;
+                    break;
+                default:
+                    break;
+            }
+            t_model_ports* model_port = new t_model_ports;
+            model_port->dir = dir;
+            model_port->name = vtr::strdup(str(port.getName()).c_str());
+
+            // TODO: add parsing of clock port types when the interchange schema allows for it:
+            //       https://github.com/chipsalliance/fpga-interchange-schema/issues/66
+
+            //Sanity checks
+            if (model_port->is_clock == true && model_port->is_non_clock_global == true) {
+                archfpga_throw(arch_file_, __LINE__,
+                               "Model port '%s' cannot be both a clock and a non-clock signal simultaneously", model_port->name);
+            }
+            if (model_port->name == nullptr) {
+                archfpga_throw(arch_file_, __LINE__,
+                               "Model port is missing a name");
+            }
+            if (port_names.count(std::pair<std::string, enum PORTS>(model_port->name, dir)) && dir != INOUT_PORT) {
+                archfpga_throw(arch_file_, __LINE__,
+                               "Duplicate model port named '%s'", model_port->name);
+            }
+            if (dir == OUT_PORT && !model_port->combinational_sink_ports.empty()) {
+                archfpga_throw(arch_file_, __LINE__,
+                               "Model output ports can not have combinational sink ports");
+            }
+
+            model_port->min_size = 1;
+            model_port->size = 1;
+            if (port.isBus()) {
+                int s = port.getBus().getBusStart();
+                int e = port.getBus().getBusEnd();
+                model_port->size = std::abs(e - s) + 1;
+            }
+
+            port_names.insert(std::pair<std::string, enum PORTS>(model_port->name, dir));
+            //Add the port
+            if (dir == IN_PORT) {
+                model_port->next = model->inputs;
+                model->inputs = model_port;
+
+            } else if (dir == OUT_PORT) {
+                model_port->next = model->outputs;
+                model->outputs = model_port;
+            }
+        }
+
+        return true;
+    }
+
+    // Complex Blocks
+    void process_sites() {
+        auto siteTypeList = ar_.getSiteTypeList();
+
+        int index = 0;
+        // TODO: Make this dynamic depending on data from the interchange
+        auto EMPTY = get_empty_logical_type();
+        EMPTY.index = index;
+        ltypes_.push_back(EMPTY);
+
+        for (auto site : siteTypeList) {
+            auto bels = site.getBels();
+
+            if (bels.size() == 0)
+                continue;
+
+            t_logical_block_type ltype;
+
+            std::string name = str(site.getName());
+
+            if (take_sites_.count(site.getName()) == 0)
+                continue;
+
+            // Check for duplicates
+            auto is_duplicate = [name](t_logical_block_type l) { return std::string(l.name) == name; };
+            VTR_ASSERT(std::find_if(ltypes_.begin(), ltypes_.end(), is_duplicate) == ltypes_.end());
+
+            ltype.name = vtr::strdup(name.c_str());
+            ltype.index = ++index;
+
+            auto pb_type = new t_pb_type;
+            ltype.pb_type = pb_type;
+
+            pb_type->name = vtr::strdup(name.c_str());
+            pb_type->num_pb = 1;
+            process_block_ports(pb_type, site);
+
+            // Process modes (for simplicity, only the default mode is allowed for the time being)
+            pb_type->num_modes = 1;
+            pb_type->modes = new t_mode[pb_type->num_modes];
+
+            auto mode = &pb_type->modes[0];
+            mode->parent_pb_type = pb_type;
+            mode->index = 0;
+            mode->name = vtr::strdup("default");
+            mode->disable_packing = false;
+
+            // Get LUT elements for this site
+            std::vector<t_lut_element> lut_elements;
+            if (arch_->lut_elements.count(name))
+                lut_elements = arch_->lut_elements.at(name);
+
+            // Count non-LUT BELs plus LUT elements
+            int block_count = get_bel_type_count(site, LOGIC, true) + get_bel_type_count(site, ROUTING, true) + lut_elements.size();
+
+            mode->num_pb_type_children = block_count;
+            mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
+
+            // Add regular BELs
+            int count = 0;
+            for (auto bel : bels) {
+                auto category = bel.getCategory();
+                if (bel.getCategory() == SITE_PORT)
+                    continue;
+
+                bool is_logic = category == LOGIC;
+
+                if (take_bels_.count(bel.getName()) == 0 && is_logic)
+                    continue;
+
+                if (is_lut(str(bel.getName()), name))
+                    continue;
+
+                auto bel_name = str(bel.getName());
+                std::pair<std::string, std::string> key(name, bel_name);
+
+                auto mid_pb_type = &mode->pb_type_children[count++];
+                std::string mid_pb_type_name = bel_name == name ? bel_name + bel_dedup_suffix_ : bel_name;
+
+                mid_pb_type->name = vtr::strdup(mid_pb_type_name.c_str());
+                mid_pb_type->num_pb = 1;
+                mid_pb_type->parent_mode = mode;
+                mid_pb_type->blif_model = nullptr;
+
+                if (!is_pad(bel_name))
+                    process_block_ports(mid_pb_type, site, bel.getName());
+
+                if (is_pad(bel_name))
+                    process_pad_block(mid_pb_type, bel, site);
+                else if (is_logic)
+                    process_generic_block(mid_pb_type, bel, site);
+                else {
+                    VTR_ASSERT(category == ROUTING);
+                    process_routing_block(mid_pb_type);
+                }
+            }
+
+            // Add LUT elements
+            for (size_t i = 0; i < lut_elements.size(); ++i) {
+                const auto& lut_element = lut_elements[i];
+
+                auto mid_pb_type = &mode->pb_type_children[count++];
+                std::string lut_name = "LUT" + std::to_string(i);
+                mid_pb_type->name = vtr::strdup(lut_name.c_str());
+                mid_pb_type->num_pb = 1;
+                mid_pb_type->parent_mode = mode;
+                mid_pb_type->blif_model = nullptr;
+
+                process_lut_element(mid_pb_type, lut_element);
+            }
+
+            process_interconnects(mode, site);
+            ltypes_.push_back(ltype);
+        }
+    }
+
+    /** @brief Processes a LUT element starting from the intermediate pb type */
+    void process_lut_element(t_pb_type* parent, const t_lut_element& lut_element) {
+        // Collect ports for the parent pb_type representing the whole LUT
+        // element
+        std::set<std::tuple<std::string, PORTS, int>> parent_ports;
+        for (const auto& lut_bel : lut_element.lut_bels) {
+            for (const auto& name : lut_bel.input_pins) {
+                parent_ports.emplace(name, IN_PORT, 1);
+            }
+
+            parent_ports.emplace(lut_bel.output_pin, OUT_PORT, 1);
+        }
+
+        // Create the ports
+        create_ports(parent, parent_ports);
+
+        // Make a single mode for each member LUT of the LUT element
+        parent->num_modes = (int)lut_element.lut_bels.size();
+        parent->modes = new t_mode[parent->num_modes];
+
+        for (size_t i = 0; i < lut_element.lut_bels.size(); ++i) {
+            const t_lut_bel& lut_bel = lut_element.lut_bels[i];
+            auto mode = &parent->modes[i];
+
+            mode->name = vtr::strdup(lut_bel.name.c_str());
+            mode->parent_pb_type = parent;
+            mode->index = i;
+
+            // Leaf pb_type block for the LUT
+            mode->num_pb_type_children = 1;
+            mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
+
+            auto pb_type = &mode->pb_type_children[0];
+            pb_type->name = vtr::strdup(lut_bel.name.c_str());
+            pb_type->num_pb = 1;
+            pb_type->parent_mode = mode;
+            pb_type->blif_model = nullptr;
+
+            process_lut_block(pb_type, lut_bel);
+
+            // Mode interconnect
+            mode->num_interconnect = lut_bel.input_pins.size() + 1;
+            mode->interconnect = new t_interconnect[mode->num_interconnect];
+
+            std::string istr, ostr, name;
+
+            // Inputs
+            for (size_t j = 0; j < lut_bel.input_pins.size(); ++j) {
+                auto* ic = &mode->interconnect[j];
+
+                ic->type = DIRECT_INTERC;
+                ic->parent_mode = mode;
+                ic->parent_mode_index = mode->index;
+
+                istr = std::string(parent->name) + "." + lut_bel.input_pins[j];
+                ostr = std::string(pb_type->name) + ".in[" + std::to_string(j) + "]";
+                name = istr + "_to_" + ostr;
+
+                ic->input_string = vtr::strdup(istr.c_str());
+                ic->output_string = vtr::strdup(ostr.c_str());
+                ic->name = vtr::strdup(name.c_str());
+            }
+
+            // Output
+            auto* ic = &mode->interconnect[mode->num_interconnect - 1];
+            ic->type = DIRECT_INTERC;
+            ic->parent_mode = mode;
+            ic->parent_mode_index = mode->index;
+
+            istr = std::string(pb_type->name) + ".out";
+            ostr = std::string(parent->name) + "." + lut_bel.output_pin;
+            name = istr + "_to_" + ostr;
+
+            ic->input_string = vtr::strdup(istr.c_str());
+            ic->output_string = vtr::strdup(ostr.c_str());
+            ic->name = vtr::strdup(name.c_str());
+        }
+    }
+
+    /** @brief Processes a LUT primitive starting from the intermediate pb type */
+    void process_lut_block(t_pb_type* pb_type, const t_lut_bel& lut_bel) {
+        // Create port list
+        size_t width = lut_bel.input_pins.size();
+
+        std::set<std::tuple<std::string, PORTS, int>> ports;
+        ports.emplace("in", IN_PORT, width);
+        ports.emplace("out", OUT_PORT, 1);
+
+        create_ports(pb_type, ports);
+
+        // Make two modes. One for LUT-thru and another for the actual LUT bel
+        pb_type->num_modes = 2;
+        pb_type->modes = new t_mode[pb_type->num_modes];
+
+        // ................................................
+        // LUT-thru
+        t_mode* mode = &pb_type->modes[0];
+
+        // Mode
+        mode->name = vtr::strdup("wire");
+        mode->parent_pb_type = pb_type;
+        mode->index = 0;
+        mode->num_pb_type_children = 0;
+
+        // Mode interconnect
+        mode->num_interconnect = 1;
+        mode->interconnect = new t_interconnect[mode->num_interconnect];
+        t_interconnect* ic = &mode->interconnect[0];
+
+        std::string istr, ostr, name;
+
+        istr = std::string(pb_type->name) + ".in";
+        ostr = std::string(pb_type->name) + ".out";
+        name = "passthrough";
+
+        ic->input_string = vtr::strdup(istr.c_str());
+        ic->output_string = vtr::strdup(ostr.c_str());
+        ic->name = vtr::strdup(name.c_str());
+
+        ic->type = COMPLETE_INTERC;
+        ic->parent_mode = mode;
+        ic->parent_mode_index = mode->index;
+
+        // ................................................
+        // LUT BEL
+        mode = &pb_type->modes[1];
+
+        // Mode
+        mode->name = vtr::strdup("lut");
+        mode->parent_pb_type = pb_type;
+        mode->index = 1;
+
+        // Leaf pb_type
+        mode->num_pb_type_children = 1;
+        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
+
+        auto lut = &mode->pb_type_children[0];
+        lut->name = vtr::strdup("lut");
+        lut->num_pb = 1;
+        lut->parent_mode = mode;
+
+        lut->blif_model = vtr::strdup(MODEL_NAMES);
+        lut->model = get_model(arch_, std::string(MODEL_NAMES));
+
+        lut->num_ports = 2;
+        lut->ports = (t_port*)vtr::calloc(lut->num_ports, sizeof(t_port));
+        lut->ports[0] = get_generic_port(arch_, lut, IN_PORT, "in", MODEL_NAMES, width);
+        lut->ports[1] = get_generic_port(arch_, lut, OUT_PORT, "out", MODEL_NAMES);
+
+        lut->ports[0].equivalent = PortEquivalence::FULL;
+
+        // Set classes
+        pb_type->class_type = LUT_CLASS;
+        lut->class_type = LUT_CLASS;
+        lut->ports[0].port_class = vtr::strdup("lut_in");
+        lut->ports[1].port_class = vtr::strdup("lut_out");
+
+        // Mode interconnect
+        mode->num_interconnect = 2;
+        mode->interconnect = new t_interconnect[mode->num_interconnect];
+
+        // Input
+        ic = &mode->interconnect[0];
+        ic->type = DIRECT_INTERC;
+        ic->parent_mode = mode;
+        ic->parent_mode_index = mode->index;
+
+        istr = std::string(pb_type->name) + ".in";
+        ostr = std::string(lut->name) + ".in";
+        name = istr + "_to_" + ostr;
+
+        ic->input_string = vtr::strdup(istr.c_str());
+        ic->output_string = vtr::strdup(ostr.c_str());
+        ic->name = vtr::strdup(name.c_str());
+
+        // Output
+        ic = &mode->interconnect[1];
+        ic->type = DIRECT_INTERC;
+        ic->parent_mode = mode;
+        ic->parent_mode_index = mode->index;
+
+        istr = std::string(lut->name) + ".out";
+        ostr = std::string(pb_type->name) + ".out";
+        name = istr + "_to_" + ostr;
+
+        ic->input_string = vtr::strdup(istr.c_str());
+        ic->output_string = vtr::strdup(ostr.c_str());
+        ic->name = vtr::strdup(name.c_str());
+    }
+
+    /** @brief Generates the leaf pb types for the PAD type */
+    void process_pad_block(t_pb_type* pad, Device::BEL::Reader& bel, Device::SiteType::Reader& site) {
+        // For now, hard-code two modes for pads, so that PADs can either be IPADs or OPADs
+        pad->num_modes = 2;
+        pad->modes = new t_mode[2];
+
+        // Add PAD pb_type ports
+        VTR_ASSERT(bel.getPins().size() == 1);
+        std::string pin = str(site.getBelPins()[bel.getPins()[0]].getName());
+        std::string ipin = pin + in_suffix_;
+        std::string opin = pin + out_suffix_;
+
+        auto num_ports = 2;
+        auto ports = new t_port[num_ports];
+        pad->ports = ports;
+        pad->num_ports = pad->num_pins = num_ports;
+        pad->num_input_pins = 1;
+        pad->num_output_pins = 1;
+
+        int pin_abs = 0;
+        int pin_count = 0;
+        for (auto dir : {IN_PORT, OUT_PORT}) {
+            int pins_dir_count = 0;
+            t_port* port = &ports[pin_count];
+
+            port->parent_pb_type = pad;
+            port->index = pin_count++;
+            port->port_index_by_type = pins_dir_count++;
+            port->absolute_first_pin_index = pin_abs++;
+
+            port->equivalent = PortEquivalence::NONE;
+            port->num_pins = 1;
+            port->type = dir;
+            port->is_clock = false;
+
+            bool is_input = dir == IN_PORT;
+            port->name = is_input ? vtr::strdup(ipin.c_str()) : vtr::strdup(opin.c_str());
+            port->model_port = nullptr;
+            port->port_class = vtr::strdup(nullptr);
+            port->port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
+        }
+
+        // OPAD mode
+        auto omode = &pad->modes[0];
+        omode->name = vtr::strdup("opad");
+        omode->parent_pb_type = pad;
+        omode->index = 0;
+        omode->num_pb_type_children = 1;
+        omode->pb_type_children = new t_pb_type[1];
+
+        auto opad = new t_pb_type;
+        opad->name = vtr::strdup("opad");
+        opad->num_pb = 1;
+        opad->parent_mode = omode;
+
+        num_ports = 1;
+        opad->num_ports = num_ports;
+        opad->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
+        opad->blif_model = vtr::strdup(MODEL_OUTPUT);
+        opad->model = get_model(arch_, std::string(MODEL_OUTPUT));
+
+        opad->ports[0] = get_generic_port(arch_, opad, IN_PORT, "outpad", MODEL_OUTPUT);
+        omode->pb_type_children[0] = *opad;
+
+        // IPAD mode
+        auto imode = &pad->modes[1];
+        imode->name = vtr::strdup("ipad");
+        imode->parent_pb_type = pad;
+        imode->index = 1;
+        imode->num_pb_type_children = 1;
+        imode->pb_type_children = new t_pb_type[1];
+
+        auto ipad = new t_pb_type;
+        ipad->name = vtr::strdup("ipad");
+        ipad->num_pb = 1;
+        ipad->parent_mode = imode;
+
+        num_ports = 1;
+        ipad->num_ports = num_ports;
+        ipad->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
+        ipad->blif_model = vtr::strdup(MODEL_INPUT);
+        ipad->model = get_model(arch_, std::string(MODEL_INPUT));
+
+        ipad->ports[0] = get_generic_port(arch_, ipad, OUT_PORT, "inpad", MODEL_INPUT);
+        imode->pb_type_children[0] = *ipad;
+
+        // Handle interconnects
+        int num_pins = 1;
+
+        omode->num_interconnect = num_pins;
+        omode->interconnect = new t_interconnect[num_pins];
+
+        imode->num_interconnect = num_pins;
+        imode->interconnect = new t_interconnect[num_pins];
+
+        std::string opad_istr = std::string(pad->name) + std::string(".") + ipin;
+        std::string opad_ostr = std::string(opad->name) + std::string(".outpad");
+        std::string o_ic_name = std::string(pad->name) + std::string("_") + std::string(opad->name);
+
+        std::string ipad_istr = std::string(ipad->name) + std::string(".inpad");
+        std::string ipad_ostr = std::string(pad->name) + std::string(".") + opin;
+        std::string i_ic_name = std::string(ipad->name) + std::string("_") + std::string(pad->name);
+
+        auto o_ic = new t_interconnect[num_pins];
+        auto i_ic = new t_interconnect[num_pins];
+
+        o_ic->name = vtr::strdup(o_ic_name.c_str());
+        o_ic->type = DIRECT_INTERC;
+        o_ic->parent_mode_index = 0;
+        o_ic->parent_mode = omode;
+        o_ic->input_string = vtr::strdup(opad_istr.c_str());
+        o_ic->output_string = vtr::strdup(opad_ostr.c_str());
+
+        i_ic->name = vtr::strdup(i_ic_name.c_str());
+        i_ic->type = DIRECT_INTERC;
+        i_ic->parent_mode_index = 0;
+        i_ic->parent_mode = imode;
+        i_ic->input_string = vtr::strdup(ipad_istr.c_str());
+        i_ic->output_string = vtr::strdup(ipad_ostr.c_str());
+
+        omode->interconnect[0] = *o_ic;
+        imode->interconnect[0] = *i_ic;
+    }
+
+    /** @brief Generates the leaf pb types for a generic intermediate block, with as many modes
+     *         as the number of models that can be used in this complex block.
+     */
+    void process_generic_block(t_pb_type* pb_type, Device::BEL::Reader& bel, Device::SiteType::Reader& site) {
+        std::string pb_name = std::string(pb_type->name);
+
+        std::set<t_bel_cell_mapping> maps(bel_cell_mappings_[bel.getName()]);
+
+        std::vector<t_bel_cell_mapping> map_to_erase;
+        for (auto map : maps) {
+            auto name = str(map.cell);
+            bool is_compatible = map.site == site.getName();
+
+            for (auto pin_map : map.pins) {
+                if (is_compatible == false)
+                    break;
+
+                auto cell_pin = str(pin_map.first);
+                auto bel_pin = str(pin_map.second);
+
+                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
+                    continue;
+
+                // Assign suffix to bel pin as it is a inout pin which was split in out and in ports
+                auto pin_reader = get_bel_pin_reader(site, bel, bel_pin);
+                bool is_inout = pin_reader.getDir() == INOUT;
+
+                auto model_port = get_model_port(arch_, name, cell_pin, false);
+
+                if (is_inout && model_port != nullptr)
+                    bel_pin = model_port->dir == IN_PORT ? bel_pin + in_suffix_ : bel_pin + out_suffix_;
+
+                is_compatible &= block_port_exists(pb_type, bel_pin);
+            }
+
+            if (!is_compatible)
+                map_to_erase.push_back(map);
+        }
+
+        for (auto map : map_to_erase)
+            VTR_ASSERT(maps.erase(map) == 1);
+
+        int num_modes = maps.size();
+
+        VTR_ASSERT(num_modes > 0);
+
+        pb_type->num_modes = num_modes;
+        pb_type->modes = new t_mode[num_modes];
+
+        int count = 0;
+        for (auto map : maps) {
+            if (map.site != site.getName())
+                continue;
+
+            int idx = count++;
+            t_mode* mode = &pb_type->modes[idx];
+            auto name = str(map.cell);
+            mode->name = vtr::strdup(name.c_str());
+            mode->parent_pb_type = pb_type;
+            mode->index = idx;
+            mode->num_pb_type_children = 1;
+            mode->pb_type_children = new t_pb_type[1];
+
+            auto leaf = &mode->pb_type_children[0];
+            std::string leaf_name = name == std::string(pb_type->name) ? name + std::string("_leaf") : name;
+            leaf->name = vtr::strdup(leaf_name.c_str());
+            leaf->num_pb = 1;
+            leaf->parent_mode = mode;
+
+            // Pre-count pins
+            int ic_count = 0;
+            for (auto pin_map : map.pins) {
+                auto cell_pin = str(pin_map.first);
+
+                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
+                    continue;
+
+                ic_count++;
+            }
+
+            int num_ports = ic_count;
+            leaf->num_ports = num_ports;
+            leaf->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
+            leaf->blif_model = vtr::strdup((std::string(".subckt ") + name).c_str());
+            leaf->model = get_model(arch_, name);
+
+            mode->num_interconnect = num_ports;
+            mode->interconnect = new t_interconnect[num_ports];
+            std::set<std::tuple<std::string, PORTS, int>> pins;
+            ic_count = 0;
+            for (auto pin_map : map.pins) {
+                auto cell_pin = str(pin_map.first);
+                auto bel_pin = str(pin_map.second);
+
+                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
+                    continue;
+
+                std::smatch regex_matches;
+                std::string pin_suffix;
+                const std::regex port_regex("([0-9A-Za-z-]+)\\[([0-9]+)\\]");
+                if (std::regex_match(cell_pin, regex_matches, port_regex)) {
+                    cell_pin = regex_matches[1].str();
+                    pin_suffix = std::string("[") + regex_matches[2].str() + std::string("]");
+                }
+
+                auto model_port = get_model_port(arch_, name, cell_pin);
+
+                auto size = model_port->size;
+                auto dir = model_port->dir;
+
+                // Assign suffix to bel pin as it is a inout pin which was split in out and in ports
+                auto pin_reader = get_bel_pin_reader(site, bel, bel_pin);
+                bool is_inout = pin_reader.getDir() == INOUT;
+
+                pins.emplace(cell_pin, dir, size);
+
+                std::string istr, ostr, ic_name;
+                switch (dir) {
+                    case IN_PORT:
+                        bel_pin = is_inout ? bel_pin + in_suffix_ : bel_pin;
+                        istr = pb_name + std::string(".") + bel_pin;
+                        ostr = leaf_name + std::string(".") + cell_pin + pin_suffix;
+                        break;
+                    case OUT_PORT:
+                        bel_pin = is_inout ? bel_pin + out_suffix_ : bel_pin;
+                        istr = leaf_name + std::string(".") + cell_pin + pin_suffix;
+                        ostr = pb_name + std::string(".") + bel_pin;
+                        break;
+                    default:
+                        VTR_ASSERT(0);
+                }
+
+                ic_name = istr + std::string("_") + ostr;
+
+                auto ic = &mode->interconnect[ic_count++];
+                ic->name = vtr::strdup(ic_name.c_str());
+                ic->type = DIRECT_INTERC;
+                ic->parent_mode_index = idx;
+                ic->parent_mode = mode;
+                ic->input_string = vtr::strdup(istr.c_str());
+                ic->output_string = vtr::strdup(ostr.c_str());
+            }
+
+            create_ports(leaf, pins, name);
+        }
+    }
+
+    /** @brief Generates a routing block to allow for cascading routing blocks to be
+     *         placed in the same complex block type.
+     */
+    void process_routing_block(t_pb_type* pb_type) {
+        pb_type->num_modes = 1;
+        pb_type->modes = new t_mode[1];
+
+        int idx = 0;
+        auto mode = &pb_type->modes[idx];
+
+        std::string name = std::string(pb_type->name);
+        mode->name = vtr::strdup(name.c_str());
+        mode->parent_pb_type = pb_type;
+        mode->index = idx;
+        mode->num_pb_type_children = 0;
+
+        std::string istr, ostr, ic_name;
+
+        // The MUX interconnections can only have a single output
+        VTR_ASSERT(pb_type->num_output_pins == 1);
+
+        for (int iport = 0; iport < pb_type->num_ports; iport++) {
+            const t_port port = pb_type->ports[iport];
+            auto port_name = name + "." + std::string(port.name);
+            switch (port.type) {
+                case IN_PORT:
+                    istr += istr.empty() ? port_name : " " + port_name;
+                    break;
+                case OUT_PORT:
+                    ostr = port_name;
+                    break;
+                default:
+                    VTR_ASSERT(0);
+            }
+        }
+
+        ic_name = std::string(pb_type->name);
+
+        mode->num_interconnect = 1;
+        mode->interconnect = new t_interconnect[1];
+
+        e_interconnect ic_type = pb_type->num_input_pins == 1 ? DIRECT_INTERC : MUX_INTERC;
+
+        auto ic = &mode->interconnect[idx];
+        ic->name = vtr::strdup(ic_name.c_str());
+        ic->type = ic_type;
+        ic->parent_mode_index = idx;
+        ic->parent_mode = mode;
+        ic->input_string = vtr::strdup(istr.c_str());
+        ic->output_string = vtr::strdup(ostr.c_str());
+    }
+
+    /** @brief Processes all the ports of a given complex block.
+     *         If a bel name index is specified, the bel pins are processed, otherwise the site ports
+     *         are processed instead.
+     */
+    void process_block_ports(t_pb_type* pb_type, Device::SiteType::Reader& site, size_t bel_name = OPEN) {
+        // Prepare data based on pb_type level
+        std::set<std::tuple<std::string, PORTS, int>> pins;
+        if (bel_name == (size_t)OPEN) {
+            for (auto pin : site.getPins()) {
+                auto dir = pin.getDir() == INPUT ? IN_PORT : OUT_PORT;
+                pins.emplace(str(pin.getName()), dir, 1);
+            }
+        } else {
+            auto bel = get_bel_reader(site, str(bel_name));
+
+            for (auto bel_pin : bel.getPins()) {
+                auto pin = site.getBelPins()[bel_pin];
+                auto dir = pin.getDir();
+
+                switch (dir) {
+                    case INPUT:
+                        pins.emplace(str(pin.getName()), IN_PORT, 1);
+                        break;
+                    case OUTPUT:
+                        pins.emplace(str(pin.getName()), OUT_PORT, 1);
+                        break;
+                    case INOUT:
+                        pins.emplace(str(pin.getName()) + in_suffix_, IN_PORT, 1);
+                        pins.emplace(str(pin.getName()) + out_suffix_, OUT_PORT, 1);
+                        break;
+                    default:
+                        VTR_ASSERT(0);
+                }
+            }
+        }
+
+        create_ports(pb_type, pins);
+    }
+
+    /** @brief Generates all the port for a complex block, given its pointer and a map of ports (key) and their direction and width */
+    void create_ports(t_pb_type* pb_type, std::set<std::tuple<std::string, PORTS, int>>& pins, std::string model = "") {
+        std::unordered_set<std::string> names;
+
+        auto num_ports = pins.size();
+        auto ports = new t_port[num_ports];
+        pb_type->ports = ports;
+        pb_type->num_ports = pb_type->num_pins = num_ports;
+        pb_type->num_input_pins = 0;
+        pb_type->num_output_pins = 0;
+
+        int pin_abs = 0;
+        int pin_count = 0;
+        for (auto dir : {IN_PORT, OUT_PORT}) {
+            int pins_dir_count = 0;
+            for (auto pin_tuple : pins) {
+                std::string pin_name;
+                PORTS pin_dir;
+                int num_pins;
+                std::tie(pin_name, pin_dir, num_pins) = pin_tuple;
+
+                if (pin_dir != dir)
+                    continue;
+
+                bool is_input = dir == IN_PORT;
+                pb_type->num_input_pins += is_input ? 1 : 0;
+                pb_type->num_output_pins += is_input ? 0 : 1;
+
+                auto port = get_generic_port(arch_, pb_type, dir, pin_name, /*string_model=*/"", num_pins);
+                ports[pin_count] = port;
+                port.index = pin_count++;
+                port.port_index_by_type = pins_dir_count++;
+                port.absolute_first_pin_index = pin_abs++;
+
+                if (!model.empty())
+                    port.model_port = get_model_port(arch_, model, pin_name);
+            }
+        }
+    }
+
+    /** @brief Processes and creates the interconnects corresponding to a given mode */
+    void process_interconnects(t_mode* mode, Device::SiteType::Reader& site) {
+        auto ics = get_interconnects(site);
+        auto num_ic = ics.size();
+
+        mode->num_interconnect = num_ic;
+        mode->interconnect = new t_interconnect[num_ic];
+
+        int curr_ic = 0;
+        std::unordered_set<std::string> names;
+
+        // Handle site wires, namely direct interconnects
+        for (auto ic_pair : ics) {
+            auto ic_name = ic_pair.first;
+            auto ic_data = ic_pair.second;
+
+            auto input = ic_data.input;
+            auto outputs = ic_data.outputs;
+
+            auto merge_string = [](std::string ss, std::string s) {
+                return ss.empty() ? s : ss + " " + s;
+            };
+
+            std::string outputs_str = std::accumulate(outputs.begin(), outputs.end(), std::string(), merge_string);
+
+            t_interconnect* ic = &mode->interconnect[curr_ic++];
+
+            // No line num for interconnects, as line num is XML specific
+            // TODO: probably line_num should be deprecated as it is dependent
+            //       on the input architecture format.
+            ic->line_num = 0;
+            ic->type = DIRECT_INTERC;
+            ic->parent_mode_index = mode->index;
+            ic->parent_mode = mode;
+
+            VTR_ASSERT(names.insert(ic_name).second);
+            ic->name = vtr::strdup(ic_name.c_str());
+            ic->input_string = vtr::strdup(input.c_str());
+            ic->output_string = vtr::strdup(outputs_str.c_str());
+        }
+
+        // Checks and, in case, adds all the necessary pack patterns to the marked interconnects
+        for (size_t iic = 0; iic < num_ic; iic++) {
+            t_interconnect* ic = &mode->interconnect[iic];
+
+            auto ic_data = ics.at(std::string(ic->name));
+
+            if (ic_data.requires_pack_pattern) {
+                auto backward_pps_map = propagate_pack_patterns(ic, site, BACKWARD);
+                auto forward_pps_map = propagate_pack_patterns(ic, site, FORWARD);
+
+                std::unordered_map<t_interconnect*, std::set<std::string>> pps_map;
+
+                for (auto pp : backward_pps_map)
+                    pps_map.emplace(pp.first, std::set<std::string>{});
+
+                for (auto pp : forward_pps_map)
+                    pps_map.emplace(pp.first, std::set<std::string>{});
+
+                // Cross-product of all pack-patterns added both when exploring backwards and forward.
+                // E.g.:
+                //   Generated pack patterns
+                //      - backward: OBUFDS, OBUF
+                //      - forward: OPAD
+                //  Final pack patterns:
+                //      - OBUFDS_OPAD, OBUF_OPAD
+                for (auto for_pp_pair : forward_pps_map)
+                    for (auto back_pp_pair : backward_pps_map)
+                        for (auto for_pp : for_pp_pair.second)
+                            for (auto back_pp : back_pp_pair.second) {
+                                std::string pp_name = for_pp + "_" + back_pp;
+                                pps_map.at(for_pp_pair.first).insert(pp_name);
+                                pps_map.at(back_pp_pair.first).insert(pp_name);
+                            }
+
+                for (auto pair : pps_map) {
+                    t_interconnect* pp_ic = pair.first;
+
+                    auto num_pp = pair.second.size();
+                    pp_ic->num_annotations = num_pp;
+                    pp_ic->annotations = new t_pin_to_pin_annotation[num_pp];
+
+                    int idx = 0;
+                    for (auto pp_name : pair.second)
+                        pp_ic->annotations[idx++] = get_pack_pattern(pp_name, pp_ic->input_string, pp_ic->output_string);
+                }
+            }
+        }
+    }
+
+    /** @brief Propagates and generates all pack_patterns required for the given ic.
+     *         This is necessary to find all root blocks that generate the pack pattern.
+     */
+    std::unordered_map<t_interconnect*, std::set<std::string>> propagate_pack_patterns(t_interconnect* ic, Device::SiteType::Reader& site, e_pp_dir direction) {
+        auto site_pins = site.getBelPins();
+
+        std::string endpoint = direction == BACKWARD ? ic->input_string : ic->output_string;
+        auto ic_endpoints = vtr::split(endpoint, " ");
+
+        std::unordered_map<t_interconnect*, std::set<std::string>> pps_map;
+
+        bool is_backward = direction == BACKWARD;
+
+        for (auto ep : ic_endpoints) {
+            auto parts = vtr::split(ep, ".");
+            auto bel = parts[0];
+            auto pin = parts[1];
+
+            if (bel == str(site.getName()))
+                return pps_map;
+
+            // Assign mode and pb_type
+            t_mode* parent_mode = ic->parent_mode;
+            t_pb_type* pb_type = nullptr;
+
+            for (int ipb = 0; ipb < parent_mode->num_pb_type_children; ipb++)
+                if (std::string(parent_mode->pb_type_children[ipb].name) == bel)
+                    pb_type = &parent_mode->pb_type_children[ipb];
+
+            VTR_ASSERT(pb_type != nullptr);
+
+            auto bel_reader = get_bel_reader(site, remove_bel_suffix(bel));
+
+            // Passing through routing mux. Check at the muxes input pins interconnects
+            if (bel_reader.getCategory() == ROUTING) {
+                for (auto bel_pin : bel_reader.getPins()) {
+                    auto pin_reader = site_pins[bel_pin];
+                    auto pin_name = str(pin_reader.getName());
+
+                    if (pin_reader.getDir() != (is_backward ? INPUT : OUTPUT))
+                        continue;
+
+                    for (int iic = 0; iic < parent_mode->num_interconnect; iic++) {
+                        t_interconnect* other_ic = &parent_mode->interconnect[iic];
+
+                        if (std::string(ic->name) == std::string(other_ic->name))
+                            continue;
+
+                        std::string ic_to_find = bel + "." + pin_name;
+
+                        bool found = false;
+                        for (auto out : vtr::split(is_backward ? other_ic->output_string : other_ic->input_string, " "))
+                            found |= out == ic_to_find;
+
+                        if (found) {
+                            // An output interconnect to propagate was found, continue searching
+                            auto res = propagate_pack_patterns(other_ic, site, direction);
+
+                            for (auto pp_map : res)
+                                pps_map.emplace(pp_map.first, pp_map.second);
+                        }
+                    }
+                }
+            } else {
+                VTR_ASSERT(bel_reader.getCategory() == LOGIC);
+
+                for (int imode = 0; imode < pb_type->num_modes; imode++) {
+                    t_mode* mode = &pb_type->modes[imode];
+
+                    for (int iic = 0; iic < mode->num_interconnect; iic++) {
+                        t_interconnect* other_ic = &mode->interconnect[iic];
+
+                        bool found = false;
+                        for (auto other_ep : vtr::split(is_backward ? other_ic->output_string : other_ic->input_string, " ")) {
+                            found |= other_ep == ep;
+                        }
+
+                        if (found) {
+                            std::string pp_name = std::string(pb_type->name) + "." + std::string(mode->name);
+
+                            std::set<std::string> pp{pp_name};
+                            auto res = pps_map.emplace(other_ic, pp);
+
+                            if (!res.second)
+                                res.first->second.insert(pp_name);
+                        }
+                    }
+                }
+            }
+        }
+
+        return pps_map;
+    }
+
+    // Physical Tiles
+    void process_tiles() {
+        auto EMPTY = get_empty_physical_type();
+        int index = 0;
+        EMPTY.index = index;
+        ptypes_.push_back(EMPTY);
+
+        auto tileTypeList = ar_.getTileTypeList();
+        auto siteTypeList = ar_.getSiteTypeList();
+
+        for (auto tile : tileTypeList) {
+            t_physical_tile_type ptype;
+            auto name = str(tile.getName());
+
+            if (name == EMPTY.name)
+                continue;
+
+            bool has_valid_sites = false;
+
+            for (auto site_type : tile.getSiteTypes())
+                has_valid_sites |= take_sites_.count(siteTypeList[site_type.getPrimaryType()].getName()) != 0;
+
+            if (!has_valid_sites)
+                continue;
+
+            ptype.name = vtr::strdup(name.c_str());
+            ptype.index = ++index;
+            ptype.width = ptype.height = ptype.area = 1;
+            ptype.capacity = 0;
+
+            process_sub_tiles(ptype, tile);
+
+            setup_pin_classes(&ptype);
+
+            bool is_io = false;
+            for (auto site : tile.getSiteTypes()) {
+                auto site_type = ar_.getSiteTypeList()[site.getPrimaryType()];
+
+                for (auto bel : site_type.getBels())
+                    is_io |= is_pad(str(bel.getName()));
+            }
+
+            ptype.is_input_type = ptype.is_output_type = is_io;
+
+            // TODO: remove the following once the RR graph generation is fully enabled from the device database
+            ptype.switchblock_locations = vtr::Matrix<e_sb_type>({{1, 1}}, e_sb_type::FULL);
+            ptype.switchblock_switch_overrides = vtr::Matrix<int>({{1, 1}}, DEFAULT_SWITCH);
+
+            ptypes_.push_back(ptype);
+        }
+    }
+
+    void process_sub_tiles(t_physical_tile_type& type, Device::TileType::Reader& tile) {
+        // TODO: only one subtile at the moment
+        auto siteTypeList = ar_.getSiteTypeList();
+        for (auto site_in_tile : tile.getSiteTypes()) {
+            t_sub_tile sub_tile;
+
+            auto site = siteTypeList[site_in_tile.getPrimaryType()];
+
+            if (take_sites_.count(site.getName()) == 0)
+                continue;
+
+            auto pins_to_wires = site_in_tile.getPrimaryPinsToTileWires();
+
+            sub_tile.index = type.capacity;
+            sub_tile.name = vtr::strdup(str(site.getName()).c_str());
+            sub_tile.capacity.set(type.capacity, type.capacity);
+            type.capacity++;
+
+            int port_idx = 0;
+            int abs_first_pin_idx = 0;
+            int icount = 0;
+            int ocount = 0;
+
+            std::unordered_map<std::string, std::string> port_name_to_wire_name;
+            int idx = 0;
+            for (auto dir : {INPUT, OUTPUT}) {
+                int port_idx_by_type = 0;
+                for (auto pin : site.getPins()) {
+                    if (pin.getDir() != dir)
+                        continue;
+
+                    t_physical_tile_port port;
+
+                    port.name = vtr::strdup(str(pin.getName()).c_str());
+
+                    port_name_to_wire_name[std::string(port.name)] = str(pins_to_wires[idx++]);
+
+                    sub_tile.sub_tile_to_tile_pin_indices.push_back(type.num_pins + port_idx);
+                    port.index = port_idx++;
+
+                    port.absolute_first_pin_index = abs_first_pin_idx++;
+                    port.port_index_by_type = port_idx_by_type++;
+
+                    if (dir == INPUT) {
+                        port.type = IN_PORT;
+                        icount++;
+                    } else {
+                        port.type = OUT_PORT;
+                        ocount++;
+                    }
+
+                    sub_tile.ports.push_back(port);
+                }
+            }
+
+            auto pins_size = site.getPins().size();
+            fill_sub_tile(type, sub_tile, pins_size, icount, ocount);
+
+            type.sub_tiles.push_back(sub_tile);
+        }
+    }
+
+    /** @brief The constant block is a synthetic tile which is used to assign a virtual
+     *         location in the grid to the constant signals which are than driven to
+     *         all the real constant wires.
+     *
+     * The block's diagram can be seen below. The port names are specified in
+     * the interchange device database, therefore GND and VCC are mainly
+     * examples in this case.
+     *
+     * +---------------+
+     * |               |
+     * |  +-------+    |
+     * |  |       |    |
+     * |  |  GND  +----+--> RR Graph node
+     * |  |       |    |
+     * |  +-------+    |
+     * |               |
+     * |               |
+     * |  +-------+    |
+     * |  |       |    |
+     * |  |  VCC  +----+--> RR Graph node
+     * |  |       |    |
+     * |  +-------+    |
+     * |               |
+     * +---------------+
+     */
+    void process_constant_block() {
+        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
+
+        // Create constant complex block
+        t_logical_block_type block;
+
+        block.name = vtr::strdup(const_block_.c_str());
+        block.index = ltypes_.size();
+
+        auto pb_type = new t_pb_type;
+        block.pb_type = pb_type;
+
+        pb_type->name = vtr::strdup(const_block_.c_str());
+        pb_type->num_pb = 1;
+
+        pb_type->num_modes = 1;
+        pb_type->modes = new t_mode[pb_type->num_modes];
+
+        pb_type->num_ports = 2;
+        pb_type->ports = (t_port*)vtr::calloc(pb_type->num_ports, sizeof(t_port));
+
+        pb_type->num_output_pins = 2;
+        pb_type->num_input_pins = 0;
+        pb_type->num_clock_pins = 0;
+        pb_type->num_pins = 2;
+
+        auto mode = &pb_type->modes[0];
+        mode->parent_pb_type = pb_type;
+        mode->index = 0;
+        mode->name = vtr::strdup("default");
+        mode->disable_packing = false;
+
+        mode->num_interconnect = 2;
+        mode->interconnect = new t_interconnect[mode->num_interconnect];
+
+        mode->num_pb_type_children = 2;
+        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
+
+        int count = 0;
+        for (auto const_cell : const_cells) {
+            auto leaf_pb_type = &mode->pb_type_children[count];
+
+            std::string leaf_name = const_cell.first;
+            leaf_pb_type->name = vtr::strdup(leaf_name.c_str());
+            leaf_pb_type->num_pb = 1;
+            leaf_pb_type->parent_mode = mode;
+            leaf_pb_type->blif_model = nullptr;
+
+            leaf_pb_type->num_output_pins = 1;
+            leaf_pb_type->num_input_pins = 0;
+            leaf_pb_type->num_clock_pins = 0;
+            leaf_pb_type->num_pins = 1;
+
+            int num_ports = 1;
+            leaf_pb_type->num_ports = num_ports;
+            leaf_pb_type->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
+            leaf_pb_type->blif_model = vtr::strdup(const_cell.first.c_str());
+            leaf_pb_type->model = get_model(arch_, const_cell.first);
+
+            leaf_pb_type->ports[0] = get_generic_port(arch_, leaf_pb_type, OUT_PORT, const_cell.second, const_cell.first);
+            pb_type->ports[count] = get_generic_port(arch_, leaf_pb_type, OUT_PORT, const_cell.first + "_" + const_cell.second);
+
+            std::string istr = leaf_name + "." + const_cell.second;
+            std::string ostr = const_block_ + "." + const_cell.first + "_" + const_cell.second;
+            std::string ic_name = const_cell.first;
+
+            auto ic = &mode->interconnect[count];
+
+            ic->name = vtr::strdup(ic_name.c_str());
+            ic->type = DIRECT_INTERC;
+            ic->parent_mode_index = 0;
+            ic->parent_mode = mode;
+            ic->input_string = vtr::strdup(istr.c_str());
+            ic->output_string = vtr::strdup(ostr.c_str());
+
+            count++;
+        }
+
+        ltypes_.push_back(block);
+    }
+
+    /** @brief Creates the models corresponding to the constant cells that are used in a given interchange device */
+    void process_constant_model() {
+        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
+
+        // Create constant models
+        for (auto const_cell : const_cells) {
+            t_model* model = new t_model;
+            model->index = arch_->models->index + 1;
+
+            model->never_prune = true;
+            model->name = vtr::strdup(const_cell.first.c_str());
+
+            t_model_ports* model_port = new t_model_ports;
+            model_port->dir = OUT_PORT;
+            model_port->name = vtr::strdup(const_cell.second.c_str());
+
+            model_port->min_size = 1;
+            model_port->size = 1;
+            model_port->next = model->outputs;
+            model->outputs = model_port;
+
+            model->next = arch_->models;
+            arch_->models = model;
+        }
+    }
+
+    /** @brief Creates a synthetic constant tile that will be located in the external layer of the device.
+     *
+     *  The constant tile has two output ports, one for GND and the other for VCC. The constant tile hosts
+     *  the constant pb type that is generated as well. See process_constant_model and process_constant_block.
+     */
+    void process_constant_tile() {
+        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
+        // Create constant tile
+        t_physical_tile_type constant;
+        constant.name = vtr::strdup(const_block_.c_str());
+        constant.index = ptypes_.size();
+        constant.width = constant.height = constant.area = 1;
+        constant.capacity = 1;
+        constant.is_input_type = constant.is_output_type = false;
+
+        constant.switchblock_locations = vtr::Matrix<e_sb_type>({{1, 1}}, e_sb_type::FULL);
+        constant.switchblock_switch_overrides = vtr::Matrix<int>({{1, 1}}, DEFAULT_SWITCH);
+
+        t_sub_tile sub_tile;
+        sub_tile.index = 0;
+        sub_tile.name = vtr::strdup(const_block_.c_str());
+        int count = 0;
+        for (auto const_cell : const_cells) {
+            sub_tile.sub_tile_to_tile_pin_indices.push_back(count);
+
+            t_physical_tile_port port;
+            port.type = OUT_PORT;
+            port.num_pins = 1;
+
+            port.name = vtr::strdup((const_cell.first + "_" + const_cell.second).c_str());
+
+            port.index = port.absolute_first_pin_index = port.port_index_by_type = 0;
+
+            sub_tile.ports.push_back(port);
+
+            count++;
+        }
+
+        fill_sub_tile(constant, sub_tile, 2, 0, 2);
+        constant.sub_tiles.push_back(sub_tile);
+
+        setup_pin_classes(&constant);
+
+        ptypes_.push_back(constant);
+    }
+
+    // Layout Processing
+    void process_layout() {
+        auto tiles = ar_.getTileList();
+        auto tile_types = ar_.getTileTypeList();
+        auto site_types = ar_.getSiteTypeList();
+
+        std::vector<std::string> packages;
+        for (auto package : ar_.getPackages())
+            packages.push_back(str(package.getName()));
+
+        for (auto name : packages) {
+            t_grid_def grid_def;
+            grid_def.width = grid_def.height = 0;
+            for (auto tile : tiles) {
+                grid_def.width = std::max(grid_def.width, tile.getCol() + 1);
+                grid_def.height = std::max(grid_def.height, tile.getRow() + 1);
+            }
+
+            grid_def.width += 2;
+            grid_def.height += 2;
+
+            grid_def.grid_type = GridDefType::FIXED;
+
+            if (name == "auto") {
+                // At the moment, the interchange specifies fixed-layout only architectures,
+                // and allowing for auto-sizing could potentially be implemented later on
+                // to allow for experimentation on new architectures.
+                // For the time being the layout is restricted to be only fixed.
+                archfpga_throw(arch_file_, __LINE__,
+                               "The name auto is reserved for auto-size layouts; please choose another name");
+            }
+            grid_def.name = name;
+            for (auto tile : tiles) {
+                auto tile_type = tile_types[tile.getType()];
+
+                bool found = false;
+                for (auto site : tile.getSites()) {
+                    auto site_type_in_tile = tile_type.getSiteTypes()[site.getType()];
+                    auto site_type = site_types[site_type_in_tile.getPrimaryType()];
+
+                    found |= take_sites_.count(site_type.getName()) != 0;
+                }
+
+                if (!found)
+                    continue;
+
+                t_metadata_dict data;
+                std::string tile_prefix = str(tile.getName());
+                std::string tile_type_name = str(tile_type.getName());
+
+                size_t pos = tile_prefix.find(tile_type_name);
+                if (pos != std::string::npos && pos == 0)
+                    tile_prefix.erase(pos, tile_type_name.length() + 1);
+                t_grid_loc_def single(tile_type_name, 1);
+                single.x.start_expr = std::to_string(tile.getCol() + 1);
+                single.y.start_expr = std::to_string(tile.getRow() + 1);
+
+                single.x.end_expr = single.x.start_expr + " + w - 1";
+                single.y.end_expr = single.y.start_expr + " + h - 1";
+
+                single.owned_meta = std::make_unique<t_metadata_dict>(data);
+                single.meta = single.owned_meta.get();
+                grid_def.loc_defs.emplace_back(std::move(single));
+            }
+
+            // The constant source tile will be placed at (0, 0)
+            t_grid_loc_def constant(const_block_, 1);
+            constant.x.start_expr = std::to_string(1);
+            constant.y.start_expr = std::to_string(1);
+
+            constant.x.end_expr = constant.x.start_expr + " + w - 1";
+            constant.y.end_expr = constant.y.start_expr + " + h - 1";
+
+            grid_def.loc_defs.emplace_back(std::move(constant));
+
+            arch_->grid_layouts.emplace_back(std::move(grid_def));
+        }
+    }
+
+    void process_device() {
+        /*
+         * The generic architecture data is not currently available in the interchange format
+         * therefore, for a very initial implementation, the values are taken from the ones
+         * used primarly in the Xilinx series7 devices, generated using SymbiFlow.
+         *
+         * As the interchange format develops further, with possibly more details, this function can
+         * become dynamic, allowing for different parameters for the different architectures.
+         *
+         * FIXME: This will require to be dynamically assigned, and a suitable representation added
+         *        to the FPGA interchange device schema.
+         */
+        arch_->R_minW_nmos = 6065.520020;
+        arch_->R_minW_pmos = 18138.500000;
+        arch_->grid_logic_tile_area = 14813.392;
+        arch_->Chans.chan_x_dist.type = UNIFORM;
+        arch_->Chans.chan_x_dist.peak = 1;
+        arch_->Chans.chan_x_dist.width = 0;
+        arch_->Chans.chan_x_dist.xpeak = 0;
+        arch_->Chans.chan_x_dist.dc = 0;
+        arch_->Chans.chan_y_dist.type = UNIFORM;
+        arch_->Chans.chan_y_dist.peak = 1;
+        arch_->Chans.chan_y_dist.width = 0;
+        arch_->Chans.chan_y_dist.xpeak = 0;
+        arch_->Chans.chan_y_dist.dc = 0;
+        arch_->ipin_cblock_switch_name = std::string("generic");
+        arch_->SBType = WILTON;
+        arch_->Fs = 3;
+        default_fc_.specified = true;
+        default_fc_.in_value_type = e_fc_value_type::FRACTIONAL;
+        default_fc_.in_value = 1.0;
+        default_fc_.out_value_type = e_fc_value_type::FRACTIONAL;
+        default_fc_.out_value = 1.0;
+    }
+
+    void process_switches() {
+        std::set<std::pair<bool, uint32_t>> pip_timing_models;
+        for (auto tile_type : ar_.getTileTypeList()) {
+            for (auto pip : tile_type.getPips()) {
+                pip_timing_models.insert(std::pair<bool, uint32_t>(pip.getBuffered21(), pip.getTiming()));
+                if (!pip.getDirectional())
+                    pip_timing_models.insert(std::pair<bool, uint32_t>(pip.getBuffered20(), pip.getTiming()));
+            }
+        }
+
+        auto timing_data = ar_.getPipTimings();
+
+        std::vector<std::pair<bool, uint32_t>> pip_timing_models_list;
+        pip_timing_models_list.reserve(pip_timing_models.size());
+
+        for (auto entry : pip_timing_models) {
+            pip_timing_models_list.push_back(entry);
+        }
+
+        size_t num_switches = pip_timing_models.size() + 2;
+        std::string switch_name;
+
+        arch_->num_switches = num_switches;
+
+        if (num_switches > 0) {
+            arch_->Switches = new t_arch_switch_inf[num_switches];
+        }
+
+        float R, Cin, Cint, Cout, Tdel;
+        for (size_t i = 0; i < num_switches; ++i) {
+            t_arch_switch_inf* as = &arch_->Switches[i];
+
+            R = Cin = Cint = Cout = Tdel = 0.0;
+            SwitchType type;
+
+            if (i == 0) {
+                switch_name = "short";
+                type = SwitchType::SHORT;
+                R = 0.0;
+            } else if (i == 1) {
+                switch_name = "generic";
+                type = SwitchType::MUX;
+                R = 0.0;
+            } else {
+                auto entry = pip_timing_models_list[i - 2];
+                auto model = timing_data[entry.second];
+                std::stringstream name;
+                std::string mux_type_string = entry.first ? "mux_" : "passGate_";
+                name << mux_type_string;
+
+                // FIXME: allow to dynamically choose different speed models and corners
+                R = get_corner_value(model.getOutputResistance(), "slow", "min");
+                name << "R" << std::scientific << R;
+
+                Cin = get_corner_value(model.getInputCapacitance(), "slow", "min");
+                name << "Cin" << std::scientific << Cin;
+
+                Cout = get_corner_value(model.getOutputCapacitance(), "slow", "min");
+                name << "Cout" << std::scientific << Cout;
+
+                if (entry.first) {
+                    Cint = get_corner_value(model.getInternalCapacitance(), "slow", "min");
+                    name << "Cinternal" << std::scientific << Cint;
+                }
+
+                Tdel = get_corner_value(model.getInternalDelay(), "slow", "min");
+                name << "Tdel" << std::scientific << Tdel;
+
+                switch_name = name.str() + std::to_string(i);
+                type = entry.first ? SwitchType::MUX : SwitchType::PASS_GATE;
+            }
+
+            /* Should never happen */
+            if (switch_name == std::string(VPR_DELAYLESS_SWITCH_NAME)) {
+                archfpga_throw(arch_file_, __LINE__,
+                               "Switch name '%s' is a reserved name for VPR internal usage!", switch_name.c_str());
+            }
+
+            as->name = vtr::strdup(switch_name.c_str());
+            as->set_type(type);
+            as->mux_trans_size = as->type() == SwitchType::MUX ? 1 : 0;
+
+            as->R = R;
+            as->Cin = Cin;
+            as->Cout = Cout;
+            as->Cinternal = Cint;
+            as->set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel);
+
+            if (as->type() == SwitchType::SHORT || as->type() == SwitchType::PASS_GATE) {
+                as->buf_size_type = BufferSize::ABSOLUTE;
+                as->buf_size = 0;
+                as->power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
+                as->power_buffer_size = 0.;
+            } else {
+                as->buf_size_type = BufferSize::AUTO;
+                as->buf_size = 0.;
+                as->power_buffer_type = POWER_BUFFER_TYPE_AUTO;
+            }
+        }
+    }
+
+    void process_segments() {
+        // Segment names will be taken from wires connected to pips
+        // They are good representation for nodes
+        std::set<uint32_t> wire_names;
+        for (auto tile_type : ar_.getTileTypeList()) {
+            auto wires = tile_type.getWires();
+            for (auto pip : tile_type.getPips()) {
+                wire_names.insert(wires[pip.getWire0()]);
+                wire_names.insert(wires[pip.getWire1()]);
+            }
+        }
+
+        // FIXME: have only one segment type for the time being, so that
+        //        the RR graph generation is correct.
+        //        This can be removed once the RR graph reader from the interchange
+        //        device is ready and functional.
+        size_t num_seg = 1; //wire_names.size();
+
+        arch_->Segments.resize(num_seg);
+        size_t index = 0;
+        for (auto i : wire_names) {
+            if (index >= num_seg) break;
+
+            // Use default values as we will populate rr_graph with correct values
+            // This segments are just declaration of future use
+            arch_->Segments[index].name = str(i);
+            arch_->Segments[index].length = 1;
+            arch_->Segments[index].frequency = 1;
+            arch_->Segments[index].Rmetal = 1e-12;
+            arch_->Segments[index].Cmetal = 1e-12;
+            arch_->Segments[index].parallel_axis = BOTH_AXIS;
+
+            // TODO: Only bi-directional segments are created, but it the interchange format
+            //       has directionality information on PIPs, which may be used to infer the
+            //       segments' directonality.
+            arch_->Segments[index].directionality = BI_DIRECTIONAL;
+            arch_->Segments[index].arch_wire_switch = 1;
+            arch_->Segments[index].arch_opin_switch = 1;
+            arch_->Segments[index].cb.resize(1);
+            arch_->Segments[index].cb[0] = true;
+            arch_->Segments[index].sb.resize(2);
+            arch_->Segments[index].sb[0] = true;
+            arch_->Segments[index].sb[1] = true;
+            segment_name_to_segment_idx[str(i)] = index;
+            ++index;
+        }
+    }
+};
+
+void FPGAInterchangeReadArch(const char* FPGAInterchangeDeviceFile,
+                             const bool /*timing_enabled*/,
+                             t_arch* arch,
+                             std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                             std::vector<t_logical_block_type>& LogicalBlockTypes) {
+    // Decompress GZipped capnproto device file
+    gzFile file = gzopen(FPGAInterchangeDeviceFile, "r");
+    VTR_ASSERT(file != Z_NULL);
+
+    std::vector<uint8_t> output_data;
+    output_data.resize(4096);
+    std::stringstream sstream(std::ios_base::in | std::ios_base::out | std::ios_base::binary);
+    while (true) {
+        int ret = gzread(file, output_data.data(), output_data.size());
+        VTR_ASSERT(ret >= 0);
+        if (ret > 0) {
+            sstream.write((const char*)output_data.data(), ret);
+            VTR_ASSERT(sstream);
+        } else {
+            VTR_ASSERT(ret == 0);
+            int error;
+            gzerror(file, &error);
+            VTR_ASSERT(error == Z_OK);
+            break;
+        }
+    }
+
+    VTR_ASSERT(gzclose(file) == Z_OK);
+
+    sstream.seekg(0);
+    kj::std::StdInputStream istream(sstream);
+
+    // Reader options
+    capnp::ReaderOptions reader_options;
+    reader_options.nestingLimit = std::numeric_limits<int>::max();
+    reader_options.traversalLimitInWords = std::numeric_limits<uint64_t>::max();
+
+    capnp::InputStreamMessageReader message_reader(istream, reader_options);
+
+    auto device_reader = message_reader.getRoot<DeviceResources::Device>();
+
+    arch->architecture_id = vtr::strdup(vtr::secure_digest_file(FPGAInterchangeDeviceFile).c_str());
+
+    ArchReader reader(arch, device_reader, FPGAInterchangeDeviceFile, PhysicalTileTypes, LogicalBlockTypes);
+    reader.read_arch();
+}
diff --git a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
new file mode 100644
index 000000000..1e84c0cbe
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
@@ -0,0 +1,34 @@
+#ifndef READ_FPGAINTERCHANGE_ARCH_FILE_H
+#define READ_FPGAINTERCHANGE_ARCH_FILE_H
+
+#include "arch_types.h"
+
+#include "DeviceResources.capnp.h"
+#include "LogicalNetlist.capnp.h"
+#include "capnp/serialize.h"
+#include "capnp/serialize-packed.h"
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* special type indexes, necessary for initialization, everything afterwards
+ * should use the pointers to these type indices */
+
+#define NUM_MODELS_IN_LIBRARY 4
+#define EMPTY_TYPE_INDEX 0
+
+/* function declaration */
+void FPGAInterchangeReadArch(const char* FPGAInterchangeDeviceFile,
+                             const bool timing_enabled,
+                             t_arch* arch,
+                             std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                             std::vector<t_logical_block_type>& LogicalBlockTypes);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
new file mode 100644
index 000000000..95481f264
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
@@ -0,0 +1,5037 @@
+/* The XML parser processes an XML file into a tree data structure composed of
+ * pugi::xml_nodes.  Each node represents an XML element.  For example
+ * <a> <b/> </a> will generate two pugi::xml_nodes.  One called "a" and its
+ * child "b".  Each pugi::xml_node can contain various XML data such as attribute
+ * information and text content.  The XML parser provides several functions to
+ * help the developer build, and traverse tree (this is also somtime referred to
+ * as the Document Object Model or DOM).
+ *
+ * For convenience it often makes sense to use some wraper functions (provided in
+ * the pugiutil namespace of libvtrutil) which simplify loading an XML file and
+ * error handling.
+ *
+ * The function pugiutil::load_xml() reads in an xml file.
+ *
+ * The function pugiutil::get_single_child() returns a child xml_node for a given parent
+ * xml_node if there is a child which matches the name provided by the developer.
+ *
+ * The function pugiutil::get_attribute() is used to extract attributes from an
+ * xml_node, returning a pugi::xml_attribute. xml_attribute objects support accessors
+ * such as as_float(), as_int() to retrieve semantic values. See pugixml documentation
+ * for more details.
+ *
+ * Architecture file checks already implemented (Daniel Chen):
+ *		- Duplicate pb_types, pb_type ports, models, model ports,
+ *			interconnects, interconnect annotations.
+ *		- Port and pin range checking (port with 4 pins can only be
+ *			accessed within [0:3].
+ *		- LUT delay matrix size matches # of LUT inputs
+ *		- Ensures XML tags are ordered.
+ *		- Clocked primitives that have timing annotations must have a clock
+ *			name matching the primitive.
+ *		- Enforced VPR definition of LUT and FF must have one input port (n pins)
+ *			and one output port(1 pin).
+ *		- Checks file extension for blif and architecture xml file, avoid crashes if
+ *			the two files are swapped on command line.
+ *
+ */
+
+#include <string.h>
+#include <map>
+#include <set>
+#include <string>
+#include <sstream>
+#include <algorithm>
+
+#include "pugixml.hpp"
+#include "pugixml_util.hpp"
+
+#include "vtr_assert.h"
+#include "vtr_log.h"
+#include "vtr_util.h"
+#include "vtr_memory.h"
+#include "vtr_digest.h"
+#include "vtr_token.h"
+#include "vtr_bimap.h"
+
+#include "arch_check.h"
+#include "arch_error.h"
+#include "arch_util.h"
+#include "arch_types.h"
+
+#include "read_xml_arch_file.h"
+#include "read_xml_util.h"
+#include "parse_switchblocks.h"
+
+#include "physical_types_util.h"
+
+using namespace std::string_literals;
+using pugiutil::ReqOpt;
+
+struct t_fc_override {
+    std::string port_name;
+    std::string seg_name;
+    e_fc_value_type fc_value_type;
+    float fc_value;
+};
+
+struct t_pin_counts {
+    int input = 0;
+    int output = 0;
+    int clock = 0;
+
+    int total() {
+        return input + output + clock;
+    }
+};
+
+struct t_pin_locs {
+  private:
+    // Distribution must be set once for each physical tile type
+    // and must be equal for each sub tile within a physical tile.
+    bool distribution_set = false;
+
+  public:
+    enum e_pin_location_distr distribution = E_SPREAD_PIN_DISTR;
+
+    /* [0..num_sub_tiles-1][0..width-1][0..height-1][0..3][0..num_tokens-1] */
+    vtr::NdMatrix<std::vector<std::string>, 4> assignments;
+
+    bool is_distribution_set() {
+        return distribution_set;
+    }
+
+    void set_distribution() {
+        VTR_ASSERT(distribution_set == false);
+        distribution_set = true;
+    }
+};
+
+/* Function prototypes */
+/*   Populate data */
+
+static void LoadPinLoc(pugi::xml_node Locations,
+                       t_physical_tile_type* type,
+                       t_pin_locs* pin_locs,
+                       const pugiutil::loc_data& loc_data);
+template<typename T>
+static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
+                                            T type,
+                                            const char* pin_loc_string,
+                                            const pugiutil::loc_data& loc_data);
+
+/* Process XML hierarchy */
+static void ProcessTiles(pugi::xml_node Node,
+                         std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                         std::vector<t_logical_block_type>& LogicalBlockTypes,
+                         const t_default_fc_spec& arch_def_fc,
+                         t_arch& arch,
+                         const pugiutil::loc_data& loc_data);
+// TODO: Remove block_type_contains_blif_model / pb_type_contains_blif_model
+// as part of
+// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
+static void MarkIoTypes(std::vector<t_physical_tile_type>& PhysicalTileTypes);
+static void ProcessTileProps(pugi::xml_node Node,
+                             t_physical_tile_type* PhysicalTileType,
+                             const pugiutil::loc_data& loc_data);
+static t_pin_counts ProcessSubTilePorts(pugi::xml_node Parent,
+                                        t_sub_tile* SubTile,
+                                        std::unordered_map<std::string, t_physical_tile_port>& tile_port_names,
+                                        const pugiutil::loc_data& loc_data);
+static void ProcessTilePort(pugi::xml_node Node,
+                            t_physical_tile_port* port,
+                            const pugiutil::loc_data& loc_data);
+static void ProcessTileEquivalentSites(pugi::xml_node Parent,
+                                       t_sub_tile* SubTile,
+                                       t_physical_tile_type* PhysicalTileType,
+                                       std::vector<t_logical_block_type>& LogicalBlockTypes,
+                                       const pugiutil::loc_data& loc_data);
+static void ProcessEquivalentSiteDirectConnection(pugi::xml_node Parent,
+                                                  t_sub_tile* SubTile,
+                                                  t_physical_tile_type* PhysicalTileType,
+                                                  t_logical_block_type* LogicalBlockType,
+                                                  const pugiutil::loc_data& loc_data);
+static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent,
+                                                  t_sub_tile* SubTile,
+                                                  t_physical_tile_type* PhysicalTileType,
+                                                  t_logical_block_type* LogicalBlockType,
+                                                  std::string site_name,
+                                                  const pugiutil::loc_data& loc_data);
+static void ProcessPinLocations(pugi::xml_node Locations,
+                                t_physical_tile_type* PhysicalTileType,
+                                t_sub_tile* SubTile,
+                                t_pin_locs* pin_locs,
+                                const pugiutil::loc_data& loc_data);
+static void ProcessSubTiles(pugi::xml_node Node,
+                            t_physical_tile_type* PhysicalTileType,
+                            std::vector<t_logical_block_type>& LogicalBlockTypes,
+                            std::vector<t_segment_inf>& segments,
+                            const t_default_fc_spec& arch_def_fc,
+                            const pugiutil::loc_data& loc_data);
+static void ProcessPb_Type(vtr::string_internment* strings,
+                           pugi::xml_node Parent,
+                           t_pb_type* pb_type,
+                           t_mode* mode,
+                           const bool timing_enabled,
+                           const t_arch& arch,
+                           const pugiutil::loc_data& loc_data);
+static void ProcessPb_TypePort(pugi::xml_node Parent,
+                               t_port* port,
+                               e_power_estimation_method power_method,
+                               const bool is_root_pb_type,
+                               const pugiutil::loc_data& loc_data);
+static void ProcessPinToPinAnnotations(pugi::xml_node parent,
+                                       t_pin_to_pin_annotation* annotation,
+                                       t_pb_type* parent_pb_type,
+                                       const pugiutil::loc_data& loc_data);
+static void ProcessInterconnect(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const pugiutil::loc_data& loc_data);
+static void ProcessMode(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data);
+static t_metadata_dict ProcessMetadata(vtr::string_internment* strings, pugi::xml_node Parent, const pugiutil::loc_data& loc_data);
+static void Process_Fc_Values(pugi::xml_node Node, t_default_fc_spec& spec, const pugiutil::loc_data& loc_data);
+static void Process_Fc(pugi::xml_node Node,
+                       t_physical_tile_type* PhysicalTileType,
+                       t_sub_tile* SubTile,
+                       t_pin_counts pin_counts,
+                       std::vector<t_segment_inf>& segments,
+                       const t_default_fc_spec& arch_def_fc,
+                       const pugiutil::loc_data& loc_data);
+static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data);
+static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations,
+                                        t_physical_tile_type* type,
+                                        const t_arch& arch,
+                                        const pugiutil::loc_data& loc_data);
+static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data);
+static void ProcessChanWidthDistr(pugi::xml_node Node,
+                                  t_arch* arch,
+                                  const pugiutil::loc_data& loc_data);
+static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pugiutil::loc_data& loc_data);
+static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data);
+static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::set<std::string>& port_names, const pugiutil::loc_data& loc_data);
+static void ProcessLayout(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data);
+static t_grid_def ProcessGridLayout(vtr::string_internment* strings, pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data);
+static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data);
+static void ProcessComplexBlocks(vtr::string_internment* strings, pugi::xml_node Node, std::vector<t_logical_block_type>& LogicalBlockTypes, t_arch& arch, const bool timing_enabled, const pugiutil::loc_data& loc_data);
+static void ProcessSwitches(pugi::xml_node Node,
+                            t_arch_switch_inf** Switches,
+                            int* NumSwitches,
+                            const bool timing_enabled,
+                            const pugiutil::loc_data& loc_data);
+static void ProcessSwitchTdel(pugi::xml_node Node, const bool timing_enabled, const int switch_index, t_arch_switch_inf* Switches, const pugiutil::loc_data& loc_data);
+static void ProcessDirects(pugi::xml_node Parent, t_direct_inf** Directs, int* NumDirects, const t_arch_switch_inf* Switches, const int NumSwitches, const pugiutil::loc_data& loc_data);
+static void ProcessClockMetalLayers(pugi::xml_node parent,
+                                    std::unordered_map<std::string, t_metal_layer>& metal_layers,
+                                    pugiutil::loc_data& loc_data);
+static void ProcessClockNetworks(pugi::xml_node parent,
+                                 std::vector<t_clock_network_arch>& clock_networks,
+                                 const t_arch_switch_inf* switches,
+                                 const int num_switches,
+                                 pugiutil::loc_data& loc_data);
+static void ProcessClockSwitchPoints(pugi::xml_node parent,
+                                     t_clock_network_arch& clock_network,
+                                     const t_arch_switch_inf* switches,
+                                     const int num_switches,
+                                     pugiutil::loc_data& loc_data);
+static void ProcessClockRouting(pugi::xml_node parent,
+                                std::vector<t_clock_connection_arch>& clock_connections,
+                                const t_arch_switch_inf* switches,
+                                const int num_switches,
+                                pugiutil::loc_data& loc_data);
+static void ProcessSegments(pugi::xml_node Parent,
+                            std::vector<t_segment_inf>& Segs,
+                            const t_arch_switch_inf* Switches,
+                            const int NumSwitches,
+                            const bool timing_enabled,
+                            const bool switchblocklist_required,
+                            const pugiutil::loc_data& loc_data);
+static void ProcessSwitchblocks(pugi::xml_node Parent, t_arch* arch, const pugiutil::loc_data& loc_data);
+static void ProcessCB_SB(pugi::xml_node Node, std::vector<bool>& list, const pugiutil::loc_data& loc_data);
+static void ProcessPower(pugi::xml_node parent,
+                         t_power_arch* power_arch,
+                         const pugiutil::loc_data& loc_data);
+
+static void ProcessClocks(pugi::xml_node Parent, t_clock_arch* clocks, const pugiutil::loc_data& loc_data);
+
+static void ProcessNoc(pugi::xml_node noc_tag, t_arch* arch, const pugiutil::loc_data& loc_data);
+
+static void processTopology(pugi::xml_node topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref);
+
+static void processMeshTopology(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref);
+
+static void processRouter(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, std::map<int, std::pair<int, int>>& routers_info_in_arch);
+
+static void ProcessPb_TypePowerEstMethod(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data);
+static void ProcessPb_TypePort_Power(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const pugiutil::loc_data& loc_data);
+
+std::string inst_port_to_port_name(std::string inst_port);
+
+static bool attribute_to_bool(const pugi::xml_node node,
+                              const pugi::xml_attribute attr,
+                              const pugiutil::loc_data& loc_data);
+int find_switch_by_name(const t_arch& arch, std::string switch_name);
+
+e_side string_to_side(std::string side_str);
+
+template<typename T>
+static T* get_type_by_name(const char* type_name, std::vector<T>& types);
+
+static void generate_noc_mesh(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, double mesh_region_start_x, double mesh_region_end_x, double mesh_region_start_y, double mesh_region_end_y, int mesh_size);
+
+static bool parse_noc_router_connection_list(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, int router_id, std::vector<int>& connection_list, std::string connection_list_attribute_value, std::map<int, std::pair<int, int>>& routers_in_arch_info);
+
+static void update_router_info_in_arch(int router_id, bool router_updated_as_a_connection, std::map<int, std::pair<int, int>>& routers_in_arch_info);
+
+static void verify_noc_topology(std::map<int, std::pair<int, int>>& routers_in_arch_info);
+
+/*
+ *
+ *
+ * External Function Implementations
+ *
+ *
+ */
+
+/* Loads the given architecture file. */
+void XmlReadArch(const char* ArchFile,
+                 const bool timing_enabled,
+                 t_arch* arch,
+                 std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                 std::vector<t_logical_block_type>& LogicalBlockTypes) {
+    pugi::xml_node Next;
+    ReqOpt POWER_REQD, SWITCHBLOCKLIST_REQD;
+
+    if (vtr::check_file_name_extension(ArchFile, ".xml") == false) {
+        VTR_LOG_WARN(
+            "Architecture file '%s' may be in incorrect format. "
+            "Expecting .xml format for architecture files.\n",
+            ArchFile);
+    }
+
+    //Create a unique identifier for this architecture file based on it's contents
+    arch->architecture_id = vtr::strdup(vtr::secure_digest_file(ArchFile).c_str());
+
+    /* Parse the file */
+    pugi::xml_document doc;
+    pugiutil::loc_data loc_data;
+    t_default_fc_spec arch_def_fc;
+    try {
+        loc_data = pugiutil::load_xml(doc, ArchFile);
+
+        set_arch_file_name(ArchFile);
+
+        /* Root node should be architecture */
+        auto architecture = get_single_child(doc, "architecture", loc_data);
+
+        /* TODO: do version processing properly with string delimiting on the . */
+#if 0
+        char* Prop = get_attribute(architecture, "version", loc_data, ReqOpt::OPTIONAL).as_string(NULL);
+        if (Prop != NULL) {
+            if (atof(Prop) > atof(VPR_VERSION)) {
+                VTR_LOG_WARN( "This architecture version is for VPR %f while your current VPR version is " VPR_VERSION ", compatability issues may arise\n",
+                        atof(Prop));
+            }
+        }
+#endif
+
+        /* Process models */
+        Next = get_single_child(architecture, "models", loc_data);
+        ProcessModels(Next, arch, loc_data);
+        CreateModelLibrary(arch);
+
+        /* Process layout */
+        Next = get_single_child(architecture, "layout", loc_data);
+        ProcessLayout(Next, arch, loc_data);
+
+        /* Process device */
+        Next = get_single_child(architecture, "device", loc_data);
+        ProcessDevice(Next, arch, arch_def_fc, loc_data);
+
+        /* Process switches */
+        Next = get_single_child(architecture, "switchlist", loc_data);
+        ProcessSwitches(Next, &(arch->Switches), &(arch->num_switches),
+                        timing_enabled, loc_data);
+
+        /* Process switchblocks. This depends on switches */
+        bool switchblocklist_required = (arch->SBType == CUSTOM); //require this section only if custom switchblocks are used
+        SWITCHBLOCKLIST_REQD = BoolToReqOpt(switchblocklist_required);
+
+        /* Process segments. This depends on switches */
+        Next = get_single_child(architecture, "segmentlist", loc_data);
+        ProcessSegments(Next, arch->Segments,
+                        arch->Switches, arch->num_switches, timing_enabled, switchblocklist_required, loc_data);
+
+        Next = get_single_child(architecture, "switchblocklist", loc_data, SWITCHBLOCKLIST_REQD);
+        if (Next) {
+            ProcessSwitchblocks(Next, arch, loc_data);
+        }
+
+        /* Process logical block types */
+        Next = get_single_child(architecture, "complexblocklist", loc_data);
+        ProcessComplexBlocks(&arch->strings, Next, LogicalBlockTypes, *arch, timing_enabled, loc_data);
+
+        /* Process logical block types */
+        Next = get_single_child(architecture, "tiles", loc_data);
+        ProcessTiles(Next, PhysicalTileTypes, LogicalBlockTypes, arch_def_fc, *arch, loc_data);
+
+        /* Link Physical Tiles with Logical Blocks */
+        link_physical_logical_types(PhysicalTileTypes, LogicalBlockTypes);
+
+        /* Process directs */
+        Next = get_single_child(architecture, "directlist", loc_data, ReqOpt::OPTIONAL);
+        if (Next) {
+            ProcessDirects(Next, &(arch->Directs), &(arch->num_directs),
+                           arch->Switches, arch->num_switches,
+                           loc_data);
+        }
+
+        /* Process Clock Networks */
+        Next = get_single_child(architecture, "clocknetworks", loc_data, ReqOpt::OPTIONAL);
+        if (Next) {
+            std::vector<std::string> expected_children = {"metal_layers", "clock_network", "clock_routing"};
+            expect_only_children(Next, expected_children, loc_data);
+
+            ProcessClockMetalLayers(Next, arch->clock_arch.clock_metal_layers, loc_data);
+            ProcessClockNetworks(Next,
+                                 arch->clock_arch.clock_networks_arch,
+                                 arch->Switches,
+                                 arch->num_switches,
+                                 loc_data);
+            ProcessClockRouting(Next,
+                                arch->clock_arch.clock_connections_arch,
+                                arch->Switches,
+                                arch->num_switches,
+                                loc_data);
+        }
+
+        /* Process architecture power information */
+
+        /* If arch->power has been initialized, meaning the user has requested power estimation,
+         * then the power architecture information is required.
+         */
+        if (arch->power) {
+            POWER_REQD = ReqOpt::REQUIRED;
+        } else {
+            POWER_REQD = ReqOpt::OPTIONAL;
+        }
+
+        Next = get_single_child(architecture, "power", loc_data, POWER_REQD);
+        if (Next) {
+            if (arch->power) {
+                ProcessPower(Next, arch->power, loc_data);
+            } else {
+                /* This information still needs to be read, even if it is just
+                 * thrown away.
+                 */
+                t_power_arch* power_arch_fake = (t_power_arch*)vtr::calloc(1,
+                                                                           sizeof(t_power_arch));
+                ProcessPower(Next, power_arch_fake, loc_data);
+                free(power_arch_fake);
+            }
+        }
+
+        // Process Clocks
+        Next = get_single_child(architecture, "clocks", loc_data, POWER_REQD);
+        if (Next) {
+            if (arch->clocks) {
+                ProcessClocks(Next, arch->clocks, loc_data);
+            } else {
+                /* This information still needs to be read, even if it is just
+                 * thrown away.
+                 */
+                t_clock_arch* clocks_fake = (t_clock_arch*)vtr::calloc(1,
+                                                                       sizeof(t_clock_arch));
+                ProcessClocks(Next, clocks_fake, loc_data);
+                free(clocks_fake->clock_inf);
+                free(clocks_fake);
+            }
+        }
+
+        // process NoC (optional)
+        Next = get_single_child(architecture, "noc", loc_data, pugiutil::OPTIONAL);
+
+        if (Next) {
+            ProcessNoc(Next, arch, loc_data);
+        }
+
+        SyncModelsPbTypes(arch, LogicalBlockTypes);
+        check_models(arch);
+
+        MarkIoTypes(PhysicalTileTypes);
+    } catch (pugiutil::XmlError& e) {
+        archfpga_throw(ArchFile, e.line(),
+                       "%s", e.what());
+    }
+}
+
+/*
+ *
+ *
+ * File-scope function implementations
+ *
+ *
+ */
+
+static void LoadPinLoc(pugi::xml_node Locations,
+                       t_physical_tile_type* type,
+                       t_pin_locs* pin_locs,
+                       const pugiutil::loc_data& loc_data) {
+    type->pin_width_offset.resize(type->num_pins, 0);
+    type->pin_height_offset.resize(type->num_pins, 0);
+
+    std::vector<int> physical_pin_counts(type->num_pins, 0);
+    if (pin_locs->distribution == E_SPREAD_PIN_DISTR) {
+        /* evenly distribute pins starting at bottom left corner */
+
+        int num_sides = 4 * (type->width * type->height);
+        int side_index = 0;
+        int count = 0;
+        for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+            for (int width = 0; width < type->width; ++width) {
+                for (int height = 0; height < type->height; ++height) {
+                    for (int pin_offset = 0; pin_offset < (type->num_pins / num_sides) + 1; ++pin_offset) {
+                        int pin_num = side_index + pin_offset * num_sides;
+                        if (pin_num < type->num_pins) {
+                            type->pinloc[width][height][side][pin_num] = true;
+                            type->pin_width_offset[pin_num] += width;
+                            type->pin_height_offset[pin_num] += height;
+                            physical_pin_counts[pin_num] += 1;
+                            count++;
+                        }
+                    }
+                    side_index++;
+                }
+            }
+        }
+        VTR_ASSERT(side_index == num_sides);
+        VTR_ASSERT(count == type->num_pins);
+    } else if (pin_locs->distribution == E_PERIMETER_PIN_DISTR) {
+        //Add one pin at-a-time to perimeter sides in round-robin order
+        int ipin = 0;
+        while (ipin < type->num_pins) {
+            for (int width = 0; width < type->width; ++width) {
+                for (int height = 0; height < type->height; ++height) {
+                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+                        if (((width == 0 && side == LEFT)
+                             || (height == type->height - 1 && side == TOP)
+                             || (width == type->width - 1 && side == RIGHT)
+                             || (height == 0 && side == BOTTOM))
+                            && ipin < type->num_pins) {
+                            //On a side, with pins still to allocate
+
+                            type->pinloc[width][height][side][ipin] = true;
+                            type->pin_width_offset[ipin] += width;
+                            type->pin_height_offset[ipin] += height;
+                            physical_pin_counts[ipin] += 1;
+                            ++ipin;
+                        }
+                    }
+                }
+            }
+        }
+        VTR_ASSERT(ipin == type->num_pins);
+
+    } else if (pin_locs->distribution == E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR) {
+        //Collect the sets of block input/output pins
+        std::vector<int> input_pins;
+        std::vector<int> output_pins;
+        for (int pin_num = 0; pin_num < type->num_pins; ++pin_num) {
+            int iclass = type->pin_class[pin_num];
+
+            if (type->class_inf[iclass].type == RECEIVER) {
+                input_pins.push_back(pin_num);
+            } else {
+                VTR_ASSERT(type->class_inf[iclass].type == DRIVER);
+                output_pins.push_back(pin_num);
+            }
+        }
+
+        //Allocate the inputs one pin at-a-time in a round-robin order
+        //to all sides
+        size_t ipin = 0;
+        while (ipin < input_pins.size()) {
+            for (int width = 0; width < type->width; ++width) {
+                for (int height = 0; height < type->height; ++height) {
+                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+                        if (ipin < input_pins.size()) {
+                            //Pins still to allocate
+
+                            int pin_num = input_pins[ipin];
+
+                            type->pinloc[width][height][side][pin_num] = true;
+                            type->pin_width_offset[pin_num] += width;
+                            type->pin_height_offset[pin_num] += height;
+                            physical_pin_counts[pin_num] += 1;
+                            ++ipin;
+                        }
+                    }
+                }
+            }
+        }
+        VTR_ASSERT(ipin == input_pins.size());
+
+        //Allocate the outputs one pin at-a-time to perimeter sides in round-robin order
+        ipin = 0;
+        while (ipin < output_pins.size()) {
+            for (int width = 0; width < type->width; ++width) {
+                for (int height = 0; height < type->height; ++height) {
+                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+                        if (((width == 0 && side == LEFT)
+                             || (height == type->height - 1 && side == TOP)
+                             || (width == type->width - 1 && side == RIGHT)
+                             || (height == 0 && side == BOTTOM))
+                            && ipin < output_pins.size()) {
+                            //On a perimeter side, with pins still to allocate
+
+                            int pin_num = output_pins[ipin];
+
+                            type->pinloc[width][height][side][pin_num] = true;
+                            type->pin_width_offset[pin_num] += width;
+                            type->pin_height_offset[pin_num] += height;
+                            physical_pin_counts[pin_num] += 1;
+                            ++ipin;
+                        }
+                    }
+                }
+            }
+        }
+        VTR_ASSERT(ipin == output_pins.size());
+
+    } else {
+        VTR_ASSERT(pin_locs->distribution == E_CUSTOM_PIN_DISTR);
+        for (auto& sub_tile : type->sub_tiles) {
+            int sub_tile_index = sub_tile.index;
+            int sub_tile_capacity = sub_tile.capacity.total();
+
+            for (int width = 0; width < type->width; ++width) {
+                for (int height = 0; height < type->height; ++height) {
+                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+                        for (auto token : pin_locs->assignments[sub_tile_index][width][height][side]) {
+                            auto pin_range = ProcessPinString<t_sub_tile*>(Locations,
+                                                                           &sub_tile,
+                                                                           token.c_str(),
+                                                                           loc_data);
+
+                            for (int pin_num = pin_range.first; pin_num < pin_range.second; ++pin_num) {
+                                VTR_ASSERT(pin_num < (int)sub_tile.sub_tile_to_tile_pin_indices.size() / sub_tile_capacity);
+                                for (int capacity = 0; capacity < sub_tile_capacity; ++capacity) {
+                                    int sub_tile_pin_index = pin_num + capacity * sub_tile.num_phy_pins / sub_tile_capacity;
+                                    int physical_pin_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index];
+                                    type->pinloc[width][height][side][physical_pin_index] = true;
+                                    type->pin_width_offset[physical_pin_index] += width;
+                                    type->pin_height_offset[physical_pin_index] += height;
+                                    physical_pin_counts[physical_pin_index] += 1;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    for (int ipin = 0; ipin < type->num_pins; ++ipin) {
+        VTR_ASSERT(physical_pin_counts[ipin] >= 1);
+
+        type->pin_width_offset[ipin] /= physical_pin_counts[ipin];
+        type->pin_height_offset[ipin] /= physical_pin_counts[ipin];
+
+        VTR_ASSERT(type->pin_width_offset[ipin] >= 0 && type->pin_width_offset[ipin] < type->width);
+        VTR_ASSERT(type->pin_height_offset[ipin] >= 0 && type->pin_height_offset[ipin] < type->height);
+    }
+}
+
+template<typename T>
+static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
+                                            T type,
+                                            const char* pin_loc_string,
+                                            const pugiutil::loc_data& loc_data) {
+    int num_tokens;
+    auto tokens = GetTokensFromString(pin_loc_string, &num_tokens);
+
+    int token_index = 0;
+    auto token = tokens[token_index];
+
+    if (token.type != TOKEN_STRING || 0 != strcmp(token.data, type->name)) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "Wrong physical type name of the port: %s\n", pin_loc_string);
+    }
+
+    token_index++;
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_DOT) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No dot is present to separate type name and port name: %s\n", pin_loc_string);
+    }
+
+    token_index++;
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_STRING) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No port name is present: %s\n", pin_loc_string);
+    }
+
+    auto port = get_port_by_name(type, token.data);
+    if (port == nullptr) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "Port %s for %s could not be found: %s\n",
+                       type->name, token.data,
+                       pin_loc_string);
+    }
+    int abs_first_pin_idx = port->absolute_first_pin_index;
+
+    token_index++;
+
+    // All the pins of the port are taken or the port has a single pin
+    if (token_index == num_tokens) {
+        freeTokens(tokens, num_tokens);
+        return std::make_pair(abs_first_pin_idx, abs_first_pin_idx + port->num_pins);
+    }
+
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_OPEN_SQUARE_BRACKET) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No open square bracket present: %s\n", pin_loc_string);
+    }
+
+    token_index++;
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_INT) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No integer to indicate least significant pin index: %s\n", pin_loc_string);
+    }
+
+    int first_pin = vtr::atoi(token.data);
+
+    token_index++;
+    token = tokens[token_index];
+
+    // Single pin is specified
+    if (token.type != TOKEN_COLON) {
+        if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                           "No closing bracket: %s\n", pin_loc_string);
+        }
+
+        token_index++;
+
+        if (token_index != num_tokens) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                           "pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
+        }
+
+        freeTokens(tokens, num_tokens);
+        return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + first_pin + 1);
+    }
+
+    token_index++;
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_INT) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No integer to indicate most significant pin index: %s\n", pin_loc_string);
+    }
+
+    int last_pin = vtr::atoi(token.data);
+
+    token_index++;
+    token = tokens[token_index];
+
+    if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "No closed square bracket: %s\n", pin_loc_string);
+    }
+
+    token_index++;
+
+    if (token_index != num_tokens) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                       "pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
+    }
+
+    if (first_pin > last_pin) {
+        std::swap(first_pin, last_pin);
+    }
+
+    freeTokens(tokens, num_tokens);
+    return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + last_pin + 1);
+}
+
+static void ProcessPinToPinAnnotations(pugi::xml_node Parent,
+                                       t_pin_to_pin_annotation* annotation,
+                                       t_pb_type* parent_pb_type,
+                                       const pugiutil::loc_data& loc_data) {
+    int i = 0;
+    const char* Prop;
+
+    if (get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
+        i++;
+    }
+    if (get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
+        i++;
+    }
+    if (get_attribute(Parent, "type", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
+        i++;
+    }
+    if (get_attribute(Parent, "value", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
+        i++;
+    }
+    if (0 == strcmp(Parent.name(), "C_constant")
+        || 0 == strcmp(Parent.name(), "C_matrix")
+        || 0 == strcmp(Parent.name(), "pack_pattern")) {
+        i = 1;
+    }
+
+    annotation->num_value_prop_pairs = i;
+    annotation->prop = (int*)vtr::calloc(i, sizeof(int));
+    annotation->value = (char**)vtr::calloc(i, sizeof(char*));
+    annotation->line_num = loc_data.line(Parent);
+    /* Todo: This is slow, I should use a case lookup */
+    i = 0;
+    if (0 == strcmp(Parent.name(), "delay_constant")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (Prop) {
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MAX;
+            annotation->value[i] = vtr::strdup(Prop);
+            i++;
+        }
+        Prop = get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (Prop) {
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MIN;
+            annotation->value[i] = vtr::strdup(Prop);
+            i++;
+        }
+        Prop = get_attribute(Parent, "in_port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "out_port", loc_data).value();
+        annotation->output_pins = vtr::strdup(Prop);
+
+    } else if (0 == strcmp(Parent.name(), "delay_matrix")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
+        annotation->format = E_ANNOT_PIN_TO_PIN_MATRIX;
+        Prop = get_attribute(Parent, "type", loc_data).value();
+        annotation->value[i] = vtr::strdup(Parent.child_value());
+
+        if (0 == strcmp(Prop, "max")) {
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MAX;
+        } else {
+            VTR_ASSERT(0 == strcmp(Prop, "min"));
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MIN;
+        }
+
+        i++;
+        Prop = get_attribute(Parent, "in_port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "out_port", loc_data).value();
+        annotation->output_pins = vtr::strdup(Prop);
+
+    } else if (0 == strcmp(Parent.name(), "C_constant")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_CAPACITANCE;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "C", loc_data).value();
+        annotation->value[i] = vtr::strdup(Prop);
+        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_CAPACITANCE_C;
+        i++;
+
+        Prop = get_attribute(Parent, "in_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "out_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        annotation->output_pins = vtr::strdup(Prop);
+        VTR_ASSERT(annotation->output_pins != nullptr || annotation->input_pins != nullptr);
+
+    } else if (0 == strcmp(Parent.name(), "C_matrix")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_CAPACITANCE;
+        annotation->format = E_ANNOT_PIN_TO_PIN_MATRIX;
+        annotation->value[i] = vtr::strdup(Parent.child_value());
+        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_CAPACITANCE_C;
+        i++;
+
+        Prop = get_attribute(Parent, "in_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "out_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        annotation->output_pins = vtr::strdup(Prop);
+        VTR_ASSERT(annotation->output_pins != nullptr || annotation->input_pins != nullptr);
+
+    } else if (0 == strcmp(Parent.name(), "T_setup")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "value", loc_data).value();
+        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_TSETUP;
+        annotation->value[i] = vtr::strdup(Prop);
+
+        i++;
+        Prop = get_attribute(Parent, "port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "clock", loc_data).value();
+        annotation->clock = vtr::strdup(Prop);
+
+        primitives_annotation_clock_match(annotation, parent_pb_type);
+
+    } else if (0 == strcmp(Parent.name(), "T_clock_to_Q")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+
+        bool found_min_max_attrib = false;
+        if (Prop) {
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX;
+            annotation->value[i] = vtr::strdup(Prop);
+            i++;
+            found_min_max_attrib = true;
+        }
+        Prop = get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (Prop) {
+            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN;
+            annotation->value[i] = vtr::strdup(Prop);
+            i++;
+            found_min_max_attrib = true;
+        }
+
+        if (!found_min_max_attrib) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "Failed to find either 'max' or 'min' attribute required for <%s> in <%s>",
+                           Parent.name(), Parent.parent().name());
+        }
+
+        Prop = get_attribute(Parent, "port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "clock", loc_data).value();
+        annotation->clock = vtr::strdup(Prop);
+
+        primitives_annotation_clock_match(annotation, parent_pb_type);
+
+    } else if (0 == strcmp(Parent.name(), "T_hold")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "value", loc_data).value();
+        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_THOLD;
+        annotation->value[i] = vtr::strdup(Prop);
+        i++;
+
+        Prop = get_attribute(Parent, "port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "clock", loc_data).value();
+        annotation->clock = vtr::strdup(Prop);
+
+        primitives_annotation_clock_match(annotation, parent_pb_type);
+
+    } else if (0 == strcmp(Parent.name(), "pack_pattern")) {
+        annotation->type = E_ANNOT_PIN_TO_PIN_PACK_PATTERN;
+        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
+        Prop = get_attribute(Parent, "name", loc_data).value();
+        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME;
+        annotation->value[i] = vtr::strdup(Prop);
+        i++;
+
+        Prop = get_attribute(Parent, "in_port", loc_data).value();
+        annotation->input_pins = vtr::strdup(Prop);
+
+        Prop = get_attribute(Parent, "out_port", loc_data).value();
+        annotation->output_pins = vtr::strdup(Prop);
+
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "Unknown port type %s in %s in %s", Parent.name(),
+                       Parent.parent().name(), Parent.parent().parent().name());
+    }
+    VTR_ASSERT(i == annotation->num_value_prop_pairs);
+}
+
+static void ProcessPb_TypePowerPinToggle(pugi::xml_node parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node cur;
+    const char* prop;
+    t_port* port;
+    int high, low;
+
+    cur = get_first_child(parent, "port", loc_data, ReqOpt::OPTIONAL);
+    while (cur) {
+        prop = get_attribute(cur, "name", loc_data).value();
+
+        port = findPortByName(prop, pb_type, &high, &low);
+        if (!port) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                           "Could not find port '%s' needed for energy per toggle.",
+                           prop);
+        }
+        if (high != port->num_pins - 1 || low != 0) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                           "Pin-toggle does not support pin indices (%s)", prop);
+        }
+
+        if (port->port_power->pin_toggle_initialized) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                           "Duplicate pin-toggle energy for port '%s'", port->name);
+        }
+        port->port_power->pin_toggle_initialized = true;
+
+        /* Get energy per toggle */
+        port->port_power->energy_per_toggle = get_attribute(cur,
+                                                            "energy_per_toggle", loc_data)
+                                                  .as_float(0.);
+
+        /* Get scaled by factor */
+        bool reverse_scaled = false;
+        prop = get_attribute(cur, "scaled_by_static_prob", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (!prop) {
+            prop = get_attribute(cur, "scaled_by_static_prob_n", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+            if (prop) {
+                reverse_scaled = true;
+            }
+        }
+
+        if (prop) {
+            port->port_power->scaled_by_port = findPortByName(prop, pb_type,
+                                                              &high, &low);
+            if (high != low) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Pin-toggle 'scaled_by_static_prob' must be a single pin (%s)",
+                               prop);
+            }
+            port->port_power->scaled_by_port_pin_idx = high;
+            port->port_power->reverse_scaled = reverse_scaled;
+        }
+
+        cur = cur.next_sibling(cur.name());
+    }
+}
+
+static void ProcessPb_TypePower(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node cur, child;
+    bool require_dynamic_absolute = false;
+    bool require_static_absolute = false;
+    bool require_dynamic_C_internal = false;
+
+    cur = get_first_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
+    if (!cur) {
+        return;
+    }
+
+    switch (pb_type->pb_type_power->estimation_method) {
+        case POWER_METHOD_TOGGLE_PINS:
+            ProcessPb_TypePowerPinToggle(cur, pb_type, loc_data);
+            require_static_absolute = true;
+            break;
+        case POWER_METHOD_C_INTERNAL:
+            require_dynamic_C_internal = true;
+            require_static_absolute = true;
+            break;
+        case POWER_METHOD_ABSOLUTE:
+            require_dynamic_absolute = true;
+            require_static_absolute = true;
+            break;
+        default:
+            break;
+    }
+
+    if (require_static_absolute) {
+        child = get_single_child(cur, "static_power", loc_data);
+        pb_type->pb_type_power->absolute_power_per_instance.leakage = get_attribute(child, "power_per_instance", loc_data).as_float(0.);
+    }
+
+    if (require_dynamic_absolute) {
+        child = get_single_child(cur, "dynamic_power", loc_data);
+        pb_type->pb_type_power->absolute_power_per_instance.dynamic = get_attribute(child, "power_per_instance", loc_data).as_float(0.);
+    }
+
+    if (require_dynamic_C_internal) {
+        child = get_single_child(cur, "dynamic_power", loc_data);
+        pb_type->pb_type_power->C_internal = get_attribute(child,
+                                                           "C_internal", loc_data)
+                                                 .as_float(0.);
+    }
+}
+
+static void ProcessPb_TypePowerEstMethod(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node cur;
+    const char* prop;
+
+    e_power_estimation_method parent_power_method;
+
+    prop = nullptr;
+
+    cur = get_first_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
+    if (cur) {
+        prop = get_attribute(cur, "method", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    }
+
+    if (pb_type->parent_mode && pb_type->parent_mode->parent_pb_type) {
+        parent_power_method = pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method;
+    } else {
+        parent_power_method = POWER_METHOD_AUTO_SIZES;
+    }
+
+    if (!prop) {
+        /* default method is auto-size */
+        pb_type->pb_type_power->estimation_method = power_method_inherited(parent_power_method);
+    } else if (strcmp(prop, "auto-size") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_AUTO_SIZES;
+    } else if (strcmp(prop, "specify-size") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_SPECIFY_SIZES;
+    } else if (strcmp(prop, "pin-toggle") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_TOGGLE_PINS;
+    } else if (strcmp(prop, "c-internal") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_C_INTERNAL;
+    } else if (strcmp(prop, "absolute") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_ABSOLUTE;
+    } else if (strcmp(prop, "ignore") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_IGNORE;
+    } else if (strcmp(prop, "sum-of-children") == 0) {
+        pb_type->pb_type_power->estimation_method = POWER_METHOD_SUM_OF_CHILDREN;
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                       "Invalid power estimation method for pb_type '%s'",
+                       pb_type->name);
+    }
+}
+
+/* Takes in a pb_type, allocates and loads data for it and recurses downwards */
+static void ProcessPb_Type(vtr::string_internment* strings, pugi::xml_node Parent, t_pb_type* pb_type, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data) {
+    int num_ports, i, j, k, num_annotations;
+    const char* Prop;
+    pugi::xml_node Cur;
+
+    bool is_root_pb_type = !(mode != nullptr && mode->parent_pb_type != nullptr);
+    bool is_leaf_pb_type = bool(get_attribute(Parent, "blif_model", loc_data, ReqOpt::OPTIONAL));
+
+    std::vector<std::string> children_to_expect = {"input", "output", "clock", "mode", "power", "metadata"};
+    if (!is_leaf_pb_type) {
+        //Non-leafs may have a model/pb_type children
+        children_to_expect.push_back("model");
+        children_to_expect.push_back("pb_type");
+        children_to_expect.push_back("interconnect");
+
+        if (is_root_pb_type) {
+            VTR_ASSERT(!is_leaf_pb_type);
+            //Top level pb_type's may also have the following tag types
+            children_to_expect.push_back("fc");
+            children_to_expect.push_back("pinlocations");
+            children_to_expect.push_back("switchblock_locations");
+        }
+    } else {
+        VTR_ASSERT(is_leaf_pb_type);
+        VTR_ASSERT(!is_root_pb_type);
+
+        //Leaf pb_type's may also have the following tag types
+        children_to_expect.push_back("T_setup");
+        children_to_expect.push_back("T_hold");
+        children_to_expect.push_back("T_clock_to_Q");
+        children_to_expect.push_back("delay_constant");
+        children_to_expect.push_back("delay_matrix");
+    }
+
+    //Sanity check contained tags
+    expect_only_children(Parent, children_to_expect, loc_data);
+
+    char* class_name;
+    /* STL maps for checking various duplicate names */
+    std::map<std::string, int> pb_port_names;
+    std::map<std::string, int> mode_names;
+    std::pair<std::map<std::string, int>::iterator, bool> ret_pb_ports;
+    std::pair<std::map<std::string, int>::iterator, bool> ret_mode_names;
+    int num_in_ports, num_out_ports, num_clock_ports;
+    int num_delay_constant, num_delay_matrix, num_C_constant, num_C_matrix,
+        num_T_setup, num_T_cq, num_T_hold;
+
+    pb_type->parent_mode = mode;
+    if (mode != nullptr && mode->parent_pb_type != nullptr) {
+        pb_type->depth = mode->parent_pb_type->depth + 1;
+        Prop = get_attribute(Parent, "name", loc_data).value();
+        pb_type->name = vtr::strdup(Prop);
+    } else {
+        pb_type->depth = 0;
+        /* same name as type */
+    }
+
+    Prop = get_attribute(Parent, "blif_model", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    pb_type->blif_model = vtr::strdup(Prop);
+
+    pb_type->class_type = UNKNOWN_CLASS;
+    Prop = get_attribute(Parent, "class", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    class_name = vtr::strdup(Prop);
+
+    if (class_name) {
+        if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[LUT_CLASS])) {
+            pb_type->class_type = LUT_CLASS;
+        } else if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[LATCH_CLASS])) {
+            pb_type->class_type = LATCH_CLASS;
+        } else if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[MEMORY_CLASS])) {
+            pb_type->class_type = MEMORY_CLASS;
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "Unknown class '%s' in pb_type '%s'\n", class_name,
+                           pb_type->name);
+        }
+        free(class_name);
+    }
+
+    if (mode == nullptr) {
+        pb_type->num_pb = 1;
+    } else {
+        pb_type->num_pb = get_attribute(Parent, "num_pb", loc_data).as_int(0);
+    }
+
+    VTR_ASSERT(pb_type->num_pb > 0);
+    num_ports = num_in_ports = num_out_ports = num_clock_ports = 0;
+    num_in_ports = count_children(Parent, "input", loc_data, ReqOpt::OPTIONAL);
+    num_out_ports = count_children(Parent, "output", loc_data, ReqOpt::OPTIONAL);
+    num_clock_ports = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
+    num_ports = num_in_ports + num_out_ports + num_clock_ports;
+    pb_type->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
+    pb_type->num_ports = num_ports;
+
+    /* Enforce VPR's definition of LUT/FF by checking number of ports */
+    if (pb_type->class_type == LUT_CLASS
+        || pb_type->class_type == LATCH_CLASS) {
+        if (num_in_ports != 1 || num_out_ports != 1) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "%s primitives must contain exactly one input port and one output port."
+                           "Found '%d' input port(s) and '%d' output port(s) for '%s'",
+                           (pb_type->class_type == LUT_CLASS) ? "LUT" : "Latch",
+                           num_in_ports, num_out_ports, pb_type->name);
+        }
+    }
+
+    /* Initialize Power Structure */
+    pb_type->pb_type_power = (t_pb_type_power*)vtr::calloc(1,
+                                                           sizeof(t_pb_type_power));
+    ProcessPb_TypePowerEstMethod(Parent, pb_type, loc_data);
+
+    /* process ports */
+    j = 0;
+    int absolute_port_first_pin_index = 0;
+
+    for (i = 0; i < 3; i++) {
+        if (i == 0) {
+            k = 0;
+            Cur = get_first_child(Parent, "input", loc_data, ReqOpt::OPTIONAL);
+        } else if (i == 1) {
+            k = 0;
+            Cur = get_first_child(Parent, "output", loc_data, ReqOpt::OPTIONAL);
+        } else {
+            k = 0;
+            Cur = get_first_child(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
+        }
+        while (Cur) {
+            pb_type->ports[j].parent_pb_type = pb_type;
+            pb_type->ports[j].index = j;
+            pb_type->ports[j].port_index_by_type = k;
+            ProcessPb_TypePort(Cur, &pb_type->ports[j],
+                               pb_type->pb_type_power->estimation_method, is_root_pb_type, loc_data);
+
+            pb_type->ports[j].absolute_first_pin_index = absolute_port_first_pin_index;
+            absolute_port_first_pin_index += pb_type->ports[j].num_pins;
+
+            //Check port name duplicates
+            ret_pb_ports = pb_port_names.insert(std::pair<std::string, int>(pb_type->ports[j].name, 0));
+            if (!ret_pb_ports.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "Duplicate port names in pb_type '%s': port '%s'\n",
+                               pb_type->name, pb_type->ports[j].name);
+            }
+
+            /* get next iteration */
+            j++;
+            k++;
+            Cur = Cur.next_sibling(Cur.name());
+        }
+    }
+
+    VTR_ASSERT(j == num_ports);
+
+    /* Count stats on the number of each type of pin */
+    pb_type->num_clock_pins = pb_type->num_input_pins = pb_type->num_output_pins = 0;
+    for (i = 0; i < pb_type->num_ports; i++) {
+        if (pb_type->ports[i].type == IN_PORT
+            && pb_type->ports[i].is_clock == false) {
+            pb_type->num_input_pins += pb_type->ports[i].num_pins;
+        } else if (pb_type->ports[i].type == OUT_PORT) {
+            pb_type->num_output_pins += pb_type->ports[i].num_pins;
+        } else {
+            VTR_ASSERT(pb_type->ports[i].is_clock
+                       && pb_type->ports[i].type == IN_PORT);
+            pb_type->num_clock_pins += pb_type->ports[i].num_pins;
+        }
+    }
+
+    pb_type->num_pins = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins;
+
+    //Warn that max_internal_delay is no longer supported
+    //TODO: eventually remove
+    try {
+        expect_child_node_count(Parent, "max_internal_delay", 0, loc_data);
+    } catch (pugiutil::XmlError& e) {
+        std::string msg = e.what();
+        msg += ". <max_internal_delay> has been replaced with <delay_constant>/<delay_matrix> between sequential primitive ports.";
+        msg += " Please upgrade your architecture file.";
+        archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
+    }
+
+    pb_type->annotations = nullptr;
+    pb_type->num_annotations = 0;
+    i = 0;
+    /* Determine if this is a leaf or container pb_type */
+    if (pb_type->blif_model != nullptr) {
+        /* Process delay and capacitance annotations */
+        num_annotations = 0;
+        num_delay_constant = count_children(Parent, "delay_constant", loc_data, ReqOpt::OPTIONAL);
+        num_delay_matrix = count_children(Parent, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
+        num_C_constant = count_children(Parent, "C_constant", loc_data, ReqOpt::OPTIONAL);
+        num_C_matrix = count_children(Parent, "C_matrix", loc_data, ReqOpt::OPTIONAL);
+        num_T_setup = count_children(Parent, "T_setup", loc_data, ReqOpt::OPTIONAL);
+        num_T_cq = count_children(Parent, "T_clock_to_Q", loc_data, ReqOpt::OPTIONAL);
+        num_T_hold = count_children(Parent, "T_hold", loc_data, ReqOpt::OPTIONAL);
+        num_annotations = num_delay_constant + num_delay_matrix + num_C_constant
+                          + num_C_matrix + num_T_setup + num_T_cq + num_T_hold;
+
+        pb_type->annotations = (t_pin_to_pin_annotation*)vtr::calloc(num_annotations, sizeof(t_pin_to_pin_annotation));
+        pb_type->num_annotations = num_annotations;
+
+        j = 0;
+        for (i = 0; i < 7; i++) {
+            if (i == 0) {
+                Cur = get_first_child(Parent, "delay_constant", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 1) {
+                Cur = get_first_child(Parent, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 2) {
+                Cur = get_first_child(Parent, "C_constant", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 3) {
+                Cur = get_first_child(Parent, "C_matrix", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 4) {
+                Cur = get_first_child(Parent, "T_setup", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 5) {
+                Cur = get_first_child(Parent, "T_clock_to_Q", loc_data, ReqOpt::OPTIONAL);
+            } else if (i == 6) {
+                Cur = get_first_child(Parent, "T_hold", loc_data, ReqOpt::OPTIONAL);
+            }
+            while (Cur) {
+                ProcessPinToPinAnnotations(Cur, &pb_type->annotations[j],
+                                           pb_type, loc_data);
+
+                /* get next iteration */
+                j++;
+                Cur = Cur.next_sibling(Cur.name());
+            }
+        }
+        VTR_ASSERT(j == num_annotations);
+
+        if (timing_enabled) {
+            check_leaf_pb_model_timing_consistency(pb_type, arch);
+        }
+
+        /* leaf pb_type, if special known class, then read class lib otherwise treat as primitive */
+        if (pb_type->class_type == LUT_CLASS) {
+            ProcessLutClass(pb_type);
+        } else if (pb_type->class_type == MEMORY_CLASS) {
+            ProcessMemoryClass(pb_type);
+        } else {
+            /* other leaf pb_type do not have modes */
+            pb_type->num_modes = 0;
+            VTR_ASSERT(count_children(Parent, "mode", loc_data, ReqOpt::OPTIONAL) == 0);
+        }
+    } else {
+        /* container pb_type, process modes */
+        VTR_ASSERT(pb_type->class_type == UNKNOWN_CLASS);
+        pb_type->num_modes = count_children(Parent, "mode", loc_data, ReqOpt::OPTIONAL);
+        pb_type->pb_type_power->leakage_default_mode = 0;
+
+        if (pb_type->num_modes == 0) {
+            /* The pb_type operates in an implied one mode */
+            pb_type->num_modes = 1;
+            pb_type->modes = new t_mode[pb_type->num_modes];
+            pb_type->modes[i].parent_pb_type = pb_type;
+            pb_type->modes[i].index = i;
+            ProcessMode(strings, Parent, &pb_type->modes[i], timing_enabled, arch, loc_data);
+            i++;
+        } else {
+            pb_type->modes = new t_mode[pb_type->num_modes];
+
+            Cur = get_first_child(Parent, "mode", loc_data);
+            while (Cur != nullptr) {
+                if (0 == strcmp(Cur.name(), "mode")) {
+                    pb_type->modes[i].parent_pb_type = pb_type;
+                    pb_type->modes[i].index = i;
+                    ProcessMode(strings, Cur, &pb_type->modes[i], timing_enabled, arch, loc_data);
+
+                    ret_mode_names = mode_names.insert(std::pair<std::string, int>(pb_type->modes[i].name, 0));
+                    if (!ret_mode_names.second) {
+                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                                       "Duplicate mode name: '%s' in pb_type '%s'.\n",
+                                       pb_type->modes[i].name, pb_type->name);
+                    }
+
+                    /* get next iteration */
+                    i++;
+                    Cur = Cur.next_sibling(Cur.name());
+                }
+            }
+        }
+        VTR_ASSERT(i == pb_type->num_modes);
+    }
+
+    pb_port_names.clear();
+    mode_names.clear();
+
+    pb_type->meta = ProcessMetadata(strings, Parent, loc_data);
+    ProcessPb_TypePower(Parent, pb_type, loc_data);
+}
+
+static void ProcessPb_TypePort_Power(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node cur;
+    const char* prop;
+    bool wire_defined = false;
+
+    port->port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
+
+    //Defaults
+    if (power_method == POWER_METHOD_AUTO_SIZES) {
+        port->port_power->wire_type = POWER_WIRE_TYPE_AUTO;
+        port->port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
+    } else if (power_method == POWER_METHOD_SPECIFY_SIZES) {
+        port->port_power->wire_type = POWER_WIRE_TYPE_IGNORED;
+        port->port_power->buffer_type = POWER_BUFFER_TYPE_NONE;
+    }
+
+    cur = get_single_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
+
+    if (cur) {
+        /* Wire capacitance */
+
+        /* Absolute C provided */
+        prop = get_attribute(cur, "wire_capacitance", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (prop) {
+            if (!(power_method == POWER_METHOD_AUTO_SIZES
+                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Wire capacitance defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
+                               port->name, port->parent_pb_type->name);
+            } else {
+                wire_defined = true;
+                port->port_power->wire_type = POWER_WIRE_TYPE_C;
+                port->port_power->wire.C = (float)atof(prop);
+            }
+        }
+
+        /* Wire absolute length provided */
+        prop = get_attribute(cur, "wire_length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (prop) {
+            if (!(power_method == POWER_METHOD_AUTO_SIZES
+                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Wire length defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
+                               port->name, port->parent_pb_type->name);
+            } else if (wire_defined) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Multiple wire properties defined for port '%s', pb_type '%s'.",
+                               port->name, port->parent_pb_type->name);
+            } else if (strcmp(prop, "auto") == 0) {
+                wire_defined = true;
+                port->port_power->wire_type = POWER_WIRE_TYPE_AUTO;
+            } else {
+                wire_defined = true;
+                port->port_power->wire_type = POWER_WIRE_TYPE_ABSOLUTE_LENGTH;
+                port->port_power->wire.absolute_length = (float)atof(prop);
+            }
+        }
+
+        /* Wire relative length provided */
+        prop = get_attribute(cur, "wire_relative_length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (prop) {
+            if (!(power_method == POWER_METHOD_AUTO_SIZES
+                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Wire relative length defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
+                               port->name, port->parent_pb_type->name);
+            } else if (wire_defined) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Multiple wire properties defined for port '%s', pb_type '%s'.",
+                               port->name, port->parent_pb_type->name);
+            } else {
+                wire_defined = true;
+                port->port_power->wire_type = POWER_WIRE_TYPE_RELATIVE_LENGTH;
+                port->port_power->wire.relative_length = (float)atof(prop);
+            }
+        }
+
+        /* Buffer Size */
+        prop = get_attribute(cur, "buffer_size", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (prop) {
+            if (!(power_method == POWER_METHOD_AUTO_SIZES
+                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
+                               "Buffer size defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
+                               port->name, port->parent_pb_type->name);
+            } else if (strcmp(prop, "auto") == 0) {
+                port->port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
+            } else {
+                port->port_power->buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
+                port->port_power->buffer_size = (float)atof(prop);
+            }
+        }
+    }
+}
+
+static void ProcessPb_TypePort(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const bool is_root_pb_type, const pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_attributes = {"name", "num_pins", "port_class"};
+    if (is_root_pb_type) {
+        expected_attributes.push_back("equivalent");
+
+        if (Parent.name() == "input"s || Parent.name() == "clock"s) {
+            expected_attributes.push_back("is_non_clock_global");
+        }
+    }
+
+    expect_only_attributes(Parent, expected_attributes, loc_data);
+
+    const char* Prop;
+    Prop = get_attribute(Parent, "name", loc_data).value();
+    port->name = vtr::strdup(Prop);
+
+    Prop = get_attribute(Parent, "port_class", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    port->port_class = vtr::strdup(Prop);
+
+    Prop = get_attribute(Parent, "equivalent", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    if (Prop) {
+        if (Prop == "none"s) {
+            port->equivalent = PortEquivalence::NONE;
+        } else if (Prop == "full"s) {
+            port->equivalent = PortEquivalence::FULL;
+        } else if (Prop == "instance"s) {
+            if (Parent.name() == "output"s) {
+                port->equivalent = PortEquivalence::INSTANCE;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Invalid pin equivalence '%s' for %s port.", Prop, Parent.name());
+            }
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "Invalid pin equivalence '%s'.", Prop);
+        }
+    }
+    port->num_pins = get_attribute(Parent, "num_pins", loc_data).as_int(0);
+    port->is_non_clock_global = get_attribute(Parent,
+                                              "is_non_clock_global", loc_data, ReqOpt::OPTIONAL)
+                                    .as_bool(false);
+
+    if (port->num_pins <= 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "Invalid number of pins %d for %s port.", port->num_pins, Parent.name());
+    }
+
+    if (0 == strcmp(Parent.name(), "input")) {
+        port->type = IN_PORT;
+        port->is_clock = false;
+
+        /* Check if LUT/FF port class is lut_in/D */
+        if (port->parent_pb_type->class_type == LUT_CLASS) {
+            if ((!port->port_class) || strcmp("lut_in", port->port_class)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Inputs to LUT primitives must have a port class named "
+                               "as \"lut_in\".");
+            }
+        } else if (port->parent_pb_type->class_type == LATCH_CLASS) {
+            if ((!port->port_class) || strcmp("D", port->port_class)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Input to flipflop primitives must have a port class named "
+                               "as \"D\".");
+            }
+            /* Only allow one input pin for FF's */
+            if (port->num_pins != 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Input port of flipflop primitives must have exactly one pin. "
+                               "Found %d.",
+                               port->num_pins);
+            }
+        }
+
+    } else if (0 == strcmp(Parent.name(), "output")) {
+        port->type = OUT_PORT;
+        port->is_clock = false;
+
+        /* Check if LUT/FF port class is lut_out/Q */
+        if (port->parent_pb_type->class_type == LUT_CLASS) {
+            if ((!port->port_class) || strcmp("lut_out", port->port_class)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Output to LUT primitives must have a port class named "
+                               "as \"lut_in\".");
+            }
+            /* Only allow one output pin for LUT's */
+            if (port->num_pins != 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Output port of LUT primitives must have exactly one pin. "
+                               "Found %d.",
+                               port->num_pins);
+            }
+        } else if (port->parent_pb_type->class_type == LATCH_CLASS) {
+            if ((!port->port_class) || strcmp("Q", port->port_class)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Output to flipflop primitives must have a port class named "
+                               "as \"D\".");
+            }
+            /* Only allow one output pin for FF's */
+            if (port->num_pins != 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Output port of flipflop primitives must have exactly one pin. "
+                               "Found %d.",
+                               port->num_pins);
+            }
+        }
+    } else if (0 == strcmp(Parent.name(), "clock")) {
+        port->type = IN_PORT;
+        port->is_clock = true;
+        if (port->is_non_clock_global == true) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "Port %s cannot be both a clock and a non-clock simultaneously\n",
+                           Parent.name());
+        }
+
+        if (port->parent_pb_type->class_type == LATCH_CLASS) {
+            if ((!port->port_class) || strcmp("clock", port->port_class)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Clock to flipflop primitives must have a port class named "
+                               "as \"clock\".");
+            }
+            /* Only allow one output pin for FF's */
+            if (port->num_pins != 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Clock port of flipflop primitives must have exactly one pin. "
+                               "Found %d.",
+                               port->num_pins);
+            }
+        }
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "Unknown port type %s", Parent.name());
+    }
+
+    ProcessPb_TypePort_Power(Parent, port, power_method, loc_data);
+}
+
+static void ProcessInterconnect(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const pugiutil::loc_data& loc_data) {
+    int num_interconnect = 0;
+    int num_complete, num_direct, num_mux;
+    int i, j, k, L_index, num_annotations;
+    int num_delay_constant, num_delay_matrix, num_C_constant, num_C_matrix,
+        num_pack_pattern;
+    const char* Prop;
+    pugi::xml_node Cur;
+    pugi::xml_node Cur2;
+
+    std::map<std::string, int> interc_names;
+    std::pair<std::map<std::string, int>::iterator, bool> ret_interc_names;
+
+    num_complete = num_direct = num_mux = 0;
+    num_complete = count_children(Parent, "complete", loc_data, ReqOpt::OPTIONAL);
+    num_direct = count_children(Parent, "direct", loc_data, ReqOpt::OPTIONAL);
+    num_mux = count_children(Parent, "mux", loc_data, ReqOpt::OPTIONAL);
+    num_interconnect = num_complete + num_direct + num_mux;
+
+    mode->num_interconnect = num_interconnect;
+    mode->interconnect = new t_interconnect[num_interconnect];
+
+    i = 0;
+    for (L_index = 0; L_index < 3; L_index++) {
+        if (L_index == 0) {
+            Cur = get_first_child(Parent, "complete", loc_data, ReqOpt::OPTIONAL);
+        } else if (L_index == 1) {
+            Cur = get_first_child(Parent, "direct", loc_data, ReqOpt::OPTIONAL);
+        } else {
+            Cur = get_first_child(Parent, "mux", loc_data, ReqOpt::OPTIONAL);
+        }
+        while (Cur != nullptr) {
+            if (0 == strcmp(Cur.name(), "complete")) {
+                mode->interconnect[i].type = COMPLETE_INTERC;
+            } else if (0 == strcmp(Cur.name(), "direct")) {
+                mode->interconnect[i].type = DIRECT_INTERC;
+            } else {
+                VTR_ASSERT(0 == strcmp(Cur.name(), "mux"));
+                mode->interconnect[i].type = MUX_INTERC;
+            }
+
+            mode->interconnect[i].line_num = loc_data.line(Cur);
+
+            mode->interconnect[i].parent_mode_index = mode->index;
+            mode->interconnect[i].parent_mode = mode;
+
+            Prop = get_attribute(Cur, "input", loc_data).value();
+            mode->interconnect[i].input_string = vtr::strdup(Prop);
+
+            Prop = get_attribute(Cur, "output", loc_data).value();
+            mode->interconnect[i].output_string = vtr::strdup(Prop);
+
+            Prop = get_attribute(Cur, "name", loc_data).value();
+            mode->interconnect[i].name = vtr::strdup(Prop);
+            mode->interconnect[i].meta = ProcessMetadata(strings, Cur, loc_data);
+
+            ret_interc_names = interc_names.insert(std::pair<std::string, int>(mode->interconnect[i].name, 0));
+            if (!ret_interc_names.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "Duplicate interconnect name: '%s' in mode: '%s'.\n",
+                               mode->interconnect[i].name, mode->name);
+            }
+
+            /* Process delay and capacitance annotations */
+            num_annotations = 0;
+            num_delay_constant = count_children(Cur, "delay_constant", loc_data, ReqOpt::OPTIONAL);
+            num_delay_matrix = count_children(Cur, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
+            num_C_constant = count_children(Cur, "C_constant", loc_data, ReqOpt::OPTIONAL);
+            num_C_matrix = count_children(Cur, "C_matrix", loc_data, ReqOpt::OPTIONAL);
+            num_pack_pattern = count_children(Cur, "pack_pattern", loc_data, ReqOpt::OPTIONAL);
+            num_annotations = num_delay_constant + num_delay_matrix
+                              + num_C_constant + num_C_matrix + num_pack_pattern;
+
+            mode->interconnect[i].annotations = (t_pin_to_pin_annotation*)vtr::calloc(num_annotations,
+                                                                                      sizeof(t_pin_to_pin_annotation));
+            mode->interconnect[i].num_annotations = num_annotations;
+
+            k = 0;
+            for (j = 0; j < 5; j++) {
+                if (j == 0) {
+                    Cur2 = get_first_child(Cur, "delay_constant", loc_data, ReqOpt::OPTIONAL);
+                } else if (j == 1) {
+                    Cur2 = get_first_child(Cur, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
+                } else if (j == 2) {
+                    Cur2 = get_first_child(Cur, "C_constant", loc_data, ReqOpt::OPTIONAL);
+                } else if (j == 3) {
+                    Cur2 = get_first_child(Cur, "C_matrix", loc_data, ReqOpt::OPTIONAL);
+                } else if (j == 4) {
+                    Cur2 = get_first_child(Cur, "pack_pattern", loc_data, ReqOpt::OPTIONAL);
+                }
+                while (Cur2 != nullptr) {
+                    ProcessPinToPinAnnotations(Cur2,
+                                               &(mode->interconnect[i].annotations[k]), nullptr, loc_data);
+
+                    /* get next iteration */
+                    k++;
+                    Cur2 = Cur2.next_sibling(Cur2.name());
+                }
+            }
+            VTR_ASSERT(k == num_annotations);
+
+            /* Power */
+            mode->interconnect[i].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
+                                                                                          sizeof(t_interconnect_power));
+            mode->interconnect[i].interconnect_power->port_info_initialized = false;
+
+            /* get next iteration */
+            Cur = Cur.next_sibling(Cur.name());
+            i++;
+        }
+    }
+
+    interc_names.clear();
+    VTR_ASSERT(i == num_interconnect);
+}
+
+static void ProcessMode(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data) {
+    int i;
+    const char* Prop;
+    pugi::xml_node Cur;
+    std::map<std::string, int> pb_type_names;
+    std::pair<std::map<std::string, int>::iterator, bool> ret_pb_types;
+
+    bool implied_mode = 0 == strcmp(Parent.name(), "pb_type");
+    if (implied_mode) {
+        mode->name = vtr::strdup("default");
+    } else {
+        Prop = get_attribute(Parent, "name", loc_data).value();
+        mode->name = vtr::strdup(Prop);
+    }
+
+    /* Parse XML about if this mode is disable for packing or not
+     * By default, all the mode will be visible to packer 
+     */
+    mode->disable_packing = false;
+
+    /* If the parent mode is disabled for packing,
+     * all the child mode should be disabled for packing as well
+     */
+    if (nullptr != mode->parent_pb_type->parent_mode) {
+        mode->disable_packing = mode->parent_pb_type->parent_mode->disable_packing;
+    }
+
+    /* Override if user specify */
+    mode->disable_packing = get_attribute(Parent, "disable_packing", loc_data, ReqOpt::OPTIONAL).as_bool(mode->disable_packing);
+    if (true == mode->disable_packing) {
+        VTR_LOG("mode '%s[%s]' is defined by user to be disabled in packing\n",
+                mode->parent_pb_type->name,
+                mode->name);
+    }
+
+    mode->num_pb_type_children = count_children(Parent, "pb_type", loc_data, ReqOpt::OPTIONAL);
+    if (mode->num_pb_type_children > 0) {
+        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
+
+        i = 0;
+        Cur = get_first_child(Parent, "pb_type", loc_data);
+        while (Cur != nullptr) {
+            if (0 == strcmp(Cur.name(), "pb_type")) {
+                ProcessPb_Type(strings, Cur, &mode->pb_type_children[i], mode, timing_enabled, arch, loc_data);
+
+                ret_pb_types = pb_type_names.insert(
+                    std::pair<std::string, int>(mode->pb_type_children[i].name, 0));
+                if (!ret_pb_types.second) {
+                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                                   "Duplicate pb_type name: '%s' in mode: '%s'.\n",
+                                   mode->pb_type_children[i].name, mode->name);
+                }
+
+                /* get next iteration */
+                i++;
+                Cur = Cur.next_sibling(Cur.name());
+            }
+        }
+    } else {
+        mode->pb_type_children = nullptr;
+    }
+
+    /* Allocate power structure */
+    mode->mode_power = (t_mode_power*)vtr::calloc(1, sizeof(t_mode_power));
+
+    if (!implied_mode) {
+        // Implied mode metadata is attached to the pb_type, rather than
+        // the t_mode object.
+        mode->meta = ProcessMetadata(strings, Parent, loc_data);
+    }
+
+    /* Clear STL map used for duplicate checks */
+    pb_type_names.clear();
+
+    Cur = get_single_child(Parent, "interconnect", loc_data);
+    ProcessInterconnect(strings, Cur, mode, loc_data);
+}
+
+static t_metadata_dict ProcessMetadata(vtr::string_internment* strings, pugi::xml_node Parent, const pugiutil::loc_data& loc_data) {
+    //	<metadata>
+    //	  <meta>CLBLL_L_</meta>
+    //	</metadata>
+    t_metadata_dict data;
+    auto metadata = get_single_child(Parent, "metadata", loc_data, ReqOpt::OPTIONAL);
+    if (metadata) {
+        auto meta_tag = get_first_child(metadata, "meta", loc_data);
+        while (meta_tag) {
+            auto key = get_attribute(meta_tag, "name", loc_data).as_string();
+
+            auto value = meta_tag.child_value();
+            data.add(strings->intern_string(vtr::string_view(key)),
+                     strings->intern_string(vtr::string_view(value)));
+            meta_tag = meta_tag.next_sibling(meta_tag.name());
+        }
+    }
+    return data;
+}
+
+static void Process_Fc_Values(pugi::xml_node Node, t_default_fc_spec& spec, const pugiutil::loc_data& loc_data) {
+    spec.specified = true;
+
+    /* Load the default fc_in */
+    auto default_fc_in_attrib = get_attribute(Node, "in_type", loc_data);
+    spec.in_value_type = string_to_fc_value_type(default_fc_in_attrib.value(), Node, loc_data);
+
+    auto in_val_attrib = get_attribute(Node, "in_val", loc_data);
+    spec.in_value = vtr::atof(in_val_attrib.value());
+
+    /* Load the default fc_out */
+    auto default_fc_out_attrib = get_attribute(Node, "out_type", loc_data);
+    spec.out_value_type = string_to_fc_value_type(default_fc_out_attrib.value(), Node, loc_data);
+
+    auto out_val_attrib = get_attribute(Node, "out_val", loc_data);
+    spec.out_value = vtr::atof(out_val_attrib.value());
+}
+
+/* Takes in the node ptr for the 'fc' elements and initializes
+ * the appropriate fields of type. */
+static void Process_Fc(pugi::xml_node Node,
+                       t_physical_tile_type* PhysicalTileType,
+                       t_sub_tile* SubTile,
+                       t_pin_counts pin_counts,
+                       std::vector<t_segment_inf>& segments,
+                       const t_default_fc_spec& arch_def_fc,
+                       const pugiutil::loc_data& loc_data) {
+    std::vector<t_fc_override> fc_overrides;
+    t_default_fc_spec def_fc_spec;
+    if (Node) {
+        /* Load the default Fc values from the node */
+        Process_Fc_Values(Node, def_fc_spec, loc_data);
+        /* Load any <fc_override/> tags */
+        for (auto child_node : Node.children()) {
+            t_fc_override fc_override = Process_Fc_override(child_node, loc_data);
+            fc_overrides.push_back(fc_override);
+        }
+    } else {
+        /* Use the default value, if available */
+        if (!arch_def_fc.specified) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "<sub_tile> is missing child <fc>, and no <default_fc> specified in architecture\n");
+        }
+        def_fc_spec = arch_def_fc;
+    }
+
+    /* Go through all the port/segment combinations and create the (potentially
+     * overriden) pin/seg Fc specifications */
+    for (size_t iseg = 0; iseg < segments.size(); ++iseg) {
+        for (int icapacity = 0; icapacity < SubTile->capacity.total(); ++icapacity) {
+            //If capacity > 0, we need t offset the block index by the number of pins per instance
+            //this ensures that all pins have an Fc specification
+            int iblk_pin = icapacity * pin_counts.total();
+
+            for (const auto& port : SubTile->ports) {
+                t_fc_specification fc_spec;
+
+                fc_spec.seg_index = iseg;
+
+                //Apply type and defaults
+                if (port.type == IN_PORT) {
+                    fc_spec.fc_type = e_fc_type::IN;
+                    fc_spec.fc_value_type = def_fc_spec.in_value_type;
+                    fc_spec.fc_value = def_fc_spec.in_value;
+                } else {
+                    VTR_ASSERT(port.type == OUT_PORT);
+                    fc_spec.fc_type = e_fc_type::OUT;
+                    fc_spec.fc_value_type = def_fc_spec.out_value_type;
+                    fc_spec.fc_value = def_fc_spec.out_value;
+                }
+
+                //Apply any matching overrides
+                bool default_overriden = false;
+                for (const auto& fc_override : fc_overrides) {
+                    bool apply_override = false;
+                    if (!fc_override.port_name.empty() && !fc_override.seg_name.empty()) {
+                        //Both port and seg names are specified require exact match on both
+                        if (fc_override.port_name == port.name && fc_override.seg_name == segments[iseg].name) {
+                            apply_override = true;
+                        }
+
+                    } else if (!fc_override.port_name.empty()) {
+                        VTR_ASSERT(fc_override.seg_name.empty());
+                        //Only the port name specified, require it to match
+                        if (fc_override.port_name == port.name) {
+                            apply_override = true;
+                        }
+                    } else {
+                        VTR_ASSERT(!fc_override.seg_name.empty());
+                        VTR_ASSERT(fc_override.port_name.empty());
+                        //Only the seg name specified, require it to match
+                        if (fc_override.seg_name == segments[iseg].name) {
+                            apply_override = true;
+                        }
+                    }
+
+                    if (apply_override) {
+                        //Exact match, or partial match to either port or seg name
+                        // Note that we continue searching, this ensures that the last matching override (in file order)
+                        // is applied last
+
+                        if (default_overriden) {
+                            //Warn if multiple overrides match
+                            VTR_LOGF_WARN(loc_data.filename_c_str(), loc_data.line(Node), "Multiple matching Fc overrides found; the last will be applied\n");
+                        }
+
+                        fc_spec.fc_value_type = fc_override.fc_value_type;
+                        fc_spec.fc_value = fc_override.fc_value;
+
+                        default_overriden = true;
+                    }
+                }
+
+                //Add all the pins from this port
+                for (int iport_pin = 0; iport_pin < port.num_pins; ++iport_pin) {
+                    //XXX: this assumes that iterating through the tile ports
+                    //     in order yields the block pin order
+                    int true_physical_blk_pin = SubTile->sub_tile_to_tile_pin_indices[iblk_pin];
+                    fc_spec.pins.push_back(true_physical_blk_pin);
+                    ++iblk_pin;
+                }
+
+                PhysicalTileType->fc_specs.push_back(fc_spec);
+            }
+        }
+    }
+}
+
+static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    if (node.name() != std::string("fc_override")) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Unexpeted node of type '%s' (expected optional 'fc_override')",
+                       node.name());
+    }
+
+    t_fc_override fc_override;
+
+    expect_child_node_count(node, 0, loc_data);
+
+    bool seen_fc_type = false;
+    bool seen_fc_value = false;
+    bool seen_port_or_seg = false;
+    for (auto attrib : node.attributes()) {
+        if (attrib.name() == std::string("port_name")) {
+            fc_override.port_name = attrib.value();
+            seen_port_or_seg |= true;
+        } else if (attrib.name() == std::string("segment_name")) {
+            fc_override.seg_name = attrib.value();
+            seen_port_or_seg |= true;
+        } else if (attrib.name() == std::string("fc_type")) {
+            fc_override.fc_value_type = string_to_fc_value_type(attrib.value(), node, loc_data);
+            seen_fc_type = true;
+        } else if (attrib.name() == std::string("fc_val")) {
+            fc_override.fc_value = vtr::atof(attrib.value());
+            seen_fc_value = true;
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                           "Unexpected attribute '%s'", attrib.name());
+        }
+    }
+
+    if (!seen_fc_type) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Missing expected attribute 'fc_type'");
+    }
+
+    if (!seen_fc_value) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Missing expected attribute 'fc_value'");
+    }
+
+    if (!seen_port_or_seg) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Missing expected attribute(s) 'port_name' and/or 'segment_name'");
+    }
+
+    return fc_override;
+}
+
+static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    e_fc_value_type fc_value_type = e_fc_value_type::FRACTIONAL;
+
+    if (str == "frac") {
+        fc_value_type = e_fc_value_type::FRACTIONAL;
+    } else if (str == "abs") {
+        fc_value_type = e_fc_value_type::ABSOLUTE;
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Invalid fc_type '%s'. Must be 'abs' or 'frac'.\n",
+                       str.c_str());
+    }
+
+    return fc_value_type;
+}
+
+//Process any custom switchblock locations
+static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations,
+                                        t_physical_tile_type* type,
+                                        const t_arch& arch,
+                                        const pugiutil::loc_data& loc_data) {
+    VTR_ASSERT(type);
+
+    expect_only_attributes(switchblock_locations, {"pattern", "internal_switch"}, loc_data);
+
+    std::string pattern = get_attribute(switchblock_locations, "pattern", loc_data, ReqOpt::OPTIONAL).as_string("external_full_internal_straight");
+
+    //Initialize the location specs
+    size_t width = type->width;
+    size_t height = type->height;
+    type->switchblock_locations = vtr::Matrix<e_sb_type>({{width, height}}, e_sb_type::NONE);
+    type->switchblock_switch_overrides = vtr::Matrix<int>({{width, height}}, DEFAULT_SWITCH);
+
+    if (pattern == "custom") {
+        expect_only_attributes(switchblock_locations, {"pattern"}, loc_data);
+
+        //Load a custom pattern specified with <sb_loc> tags
+        expect_only_children(switchblock_locations, {"sb_loc"}, loc_data); //Only sb_loc child tags
+
+        //Default to no SBs unless specified
+        type->switchblock_locations.fill(e_sb_type::NONE);
+
+        //Track which locations have been assigned to detect overlaps
+        auto assigned_locs = vtr::Matrix<bool>({{width, height}}, false);
+
+        for (pugi::xml_node sb_loc : switchblock_locations.children("sb_loc")) {
+            expect_only_attributes(sb_loc, {"type", "xoffset", "yoffset", "switch_override"}, loc_data);
+
+            //Determine the type
+            std::string sb_type_str = get_attribute(sb_loc, "type", loc_data, ReqOpt::OPTIONAL).as_string("full");
+            e_sb_type sb_type = e_sb_type::FULL;
+            if (sb_type_str == "none") {
+                sb_type = e_sb_type::NONE;
+            } else if (sb_type_str == "horizontal") {
+                sb_type = e_sb_type::HORIZONTAL;
+            } else if (sb_type_str == "vertical") {
+                sb_type = e_sb_type::VERTICAL;
+            } else if (sb_type_str == "turns") {
+                sb_type = e_sb_type::TURNS;
+            } else if (sb_type_str == "straight") {
+                sb_type = e_sb_type::STRAIGHT;
+            } else if (sb_type_str == "full") {
+                sb_type = e_sb_type::FULL;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
+                               "Invalid <sb_loc> 'type' attribute '%s'\n",
+                               sb_type_str.c_str());
+            }
+
+            //Determine the switch type
+            int sb_switch_override = DEFAULT_SWITCH;
+
+            auto sb_switch_override_attr = get_attribute(sb_loc, "switch_override", loc_data, ReqOpt::OPTIONAL);
+            if (sb_switch_override_attr) {
+                std::string sb_switch_override_str = sb_switch_override_attr.as_string();
+                //Use the specified switch
+                sb_switch_override = find_switch_by_name(arch, sb_switch_override_str);
+
+                if (sb_switch_override == OPEN) {
+                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
+                                   "Invalid <sb_loc> 'switch_override' attribute '%s' (no matching switch named '%s' found)\n",
+                                   sb_switch_override_str.c_str(), sb_switch_override_str.c_str());
+                }
+            }
+
+            //Get the horizontal offset
+            size_t xoffset = get_attribute(sb_loc, "xoffset", loc_data, ReqOpt::OPTIONAL).as_uint(0);
+            if (xoffset > width - 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
+                               "Invalid <sb_loc> 'xoffset' attribute '%zu' (must be in range [%d,%d])\n",
+                               xoffset, 0, width - 1);
+            }
+
+            //Get the vertical offset
+            size_t yoffset = get_attribute(sb_loc, "yoffset", loc_data, ReqOpt::OPTIONAL).as_uint(0);
+            if (yoffset > height - 1) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
+                               "Invalid <sb_loc> 'yoffset' attribute '%zu' (must be in range [%d,%d])\n",
+                               yoffset, 0, height - 1);
+            }
+
+            //Check if this location has already been set
+            if (assigned_locs[xoffset][yoffset]) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
+                               "Duplicate <sb_loc> specifications at xoffset=%zu yoffset=%zu\n",
+                               xoffset, yoffset);
+            }
+
+            //Set the custom sb location and type
+            type->switchblock_locations[xoffset][yoffset] = sb_type;
+            type->switchblock_switch_overrides[xoffset][yoffset] = sb_switch_override;
+            assigned_locs[xoffset][yoffset] = true; //Mark the location as set for error detection
+        }
+    } else { //Non-custom patterns
+        //Initialize defaults
+        int internal_switch = DEFAULT_SWITCH;
+        int external_switch = DEFAULT_SWITCH;
+        e_sb_type internal_type = e_sb_type::FULL;
+        e_sb_type external_type = e_sb_type::FULL;
+
+        //Determine any internal switch override
+        auto internal_switch_attr = get_attribute(switchblock_locations, "internal_switch", loc_data, ReqOpt::OPTIONAL);
+        if (internal_switch_attr) {
+            std::string internal_switch_name = internal_switch_attr.as_string();
+            //Use the specified switch
+            internal_switch = find_switch_by_name(arch, internal_switch_name);
+
+            if (internal_switch == OPEN) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
+                               "Invalid <switchblock_locations> 'internal_switch' attribute '%s' (no matching switch named '%s' found)\n",
+                               internal_switch_name.c_str(), internal_switch_name.c_str());
+            }
+        }
+
+        //Identify switch block types
+        if (pattern == "all") {
+            internal_type = e_sb_type::FULL;
+            external_type = e_sb_type::FULL;
+
+        } else if (pattern == "external") {
+            internal_type = e_sb_type::NONE;
+            external_type = e_sb_type::FULL;
+
+        } else if (pattern == "internal") {
+            internal_type = e_sb_type::FULL;
+            external_type = e_sb_type::NONE;
+
+        } else if (pattern == "external_full_internal_straight") {
+            internal_type = e_sb_type::STRAIGHT;
+            external_type = e_sb_type::FULL;
+
+        } else if (pattern == "none") {
+            internal_type = e_sb_type::NONE;
+            external_type = e_sb_type::NONE;
+
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
+                           "Invalid <switchblock_locations> 'pattern' attribute '%s'\n",
+                           pattern.c_str());
+        }
+
+        //Fill in all locations (sets internal)
+        type->switchblock_locations.fill(internal_type);
+        type->switchblock_switch_overrides.fill(internal_switch);
+
+        //Fill in top edge external
+        size_t yoffset = height - 1;
+        for (size_t xoffset = 0; xoffset < width; ++xoffset) {
+            type->switchblock_locations[xoffset][yoffset] = external_type;
+            type->switchblock_switch_overrides[xoffset][yoffset] = external_switch;
+        }
+
+        //Fill in right edge external
+        size_t xoffset = width - 1;
+        for (yoffset = 0; yoffset < height; ++yoffset) {
+            type->switchblock_locations[xoffset][yoffset] = external_type;
+            type->switchblock_switch_overrides[xoffset][yoffset] = external_switch;
+        }
+    }
+}
+
+/* Takes in node pointing to <models> and loads all the
+ * child type objects.  */
+static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node p;
+    t_model* temp = nullptr;
+    int L_index;
+    /* std::maps for checking duplicates */
+    std::map<std::string, int> model_name_map;
+    std::pair<std::map<std::string, int>::iterator, bool> ret_map_name;
+
+    L_index = NUM_MODELS_IN_LIBRARY;
+
+    arch->models = nullptr;
+    for (pugi::xml_node model : Node.children()) {
+        //Process each model
+        if (model.name() != std::string("model")) {
+            bad_tag(model, loc_data, Node, {"model"});
+        }
+
+        try {
+            temp = new t_model;
+            temp->index = L_index;
+            L_index++;
+
+            //Process the <model> tag attributes
+            for (pugi::xml_attribute attr : model.attributes()) {
+                if (attr.name() == std::string("never_prune")) {
+                    auto model_type_str = vtr::strdup(attr.value());
+
+                    if (std::strcmp(model_type_str, "true") == 0) {
+                        temp->never_prune = true;
+                    } else if (std::strcmp(model_type_str, "false") == 0) {
+                        temp->never_prune = false;
+                    } else {
+                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
+                                       "Unsupported never prune attribute value.");
+                    }
+                } else if (attr.name() == std::string("name")) {
+                    if (!temp->name) {
+                        //First name attr. seen
+                        temp->name = vtr::strdup(attr.value());
+                    } else {
+                        //Duplicate name
+                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
+                                       "Duplicate 'name' attribute on <model> tag.");
+                    }
+                } else {
+                    bad_attribute(attr, model, loc_data);
+                }
+            }
+
+            /* Try insert new model, check if already exist at the same time */
+            ret_map_name = model_name_map.insert(std::pair<std::string, int>(temp->name, 0));
+            if (!ret_map_name.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
+                               "Duplicate model name: '%s'.\n", temp->name);
+            }
+
+            //Process the ports
+            std::set<std::string> port_names;
+            for (pugi::xml_node port_group : model.children()) {
+                if (port_group.name() == std::string("input_ports")) {
+                    ProcessModelPorts(port_group, temp, port_names, loc_data);
+                } else if (port_group.name() == std::string("output_ports")) {
+                    ProcessModelPorts(port_group, temp, port_names, loc_data);
+                } else {
+                    bad_tag(port_group, loc_data, model, {"input_ports", "output_ports"});
+                }
+            }
+
+            //Sanity check the model
+            check_model_clocks(temp, loc_data.filename_c_str(), loc_data.line(model));
+            check_model_combinational_sinks(temp, loc_data.filename_c_str(), loc_data.line(model));
+            warn_model_missing_timing(temp, loc_data.filename_c_str(), loc_data.line(model));
+        } catch (ArchFpgaError& e) {
+            free_arch_model(temp);
+            throw;
+        }
+
+        //Add the model
+        temp->next = arch->models;
+        arch->models = temp;
+    }
+    return;
+}
+
+static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::set<std::string>& port_names, const pugiutil::loc_data& loc_data) {
+    for (pugi::xml_attribute attr : port_group.attributes()) {
+        bad_attribute(attr, port_group, loc_data);
+    }
+
+    enum PORTS dir = ERR_PORT;
+    if (port_group.name() == std::string("input_ports")) {
+        dir = IN_PORT;
+    } else {
+        VTR_ASSERT(port_group.name() == std::string("output_ports"));
+        dir = OUT_PORT;
+    }
+
+    //Process each port
+    for (pugi::xml_node port : port_group.children()) {
+        //Should only be ports
+        if (port.name() != std::string("port")) {
+            bad_tag(port, loc_data, port_group, {"port"});
+        }
+
+        //Ports should have no children
+        for (pugi::xml_node port_child : port.children()) {
+            bad_tag(port_child, loc_data, port);
+        }
+
+        t_model_ports* model_port = new t_model_ports;
+
+        model_port->dir = dir;
+
+        //Process the attributes of each port
+        for (pugi::xml_attribute attr : port.attributes()) {
+            if (attr.name() == std::string("name")) {
+                model_port->name = vtr::strdup(attr.value());
+
+            } else if (attr.name() == std::string("is_clock")) {
+                model_port->is_clock = attribute_to_bool(port, attr, loc_data);
+
+            } else if (attr.name() == std::string("is_non_clock_global")) {
+                model_port->is_non_clock_global = attribute_to_bool(port, attr, loc_data);
+
+            } else if (attr.name() == std::string("clock")) {
+                model_port->clock = std::string(attr.value());
+
+            } else if (attr.name() == std::string("combinational_sink_ports")) {
+                model_port->combinational_sink_ports = vtr::split(attr.value());
+
+            } else {
+                bad_attribute(attr, port, loc_data);
+            }
+        }
+
+        //Sanity checks
+        if (model_port->is_clock == true && model_port->is_non_clock_global == true) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
+                           "Model port '%s' cannot be both a clock and a non-clock signal simultaneously", model_port->name);
+        }
+
+        if (model_port->name == nullptr) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
+                           "Model port is missing a name");
+        }
+
+        if (port_names.count(model_port->name)) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
+                           "Duplicate model port named '%s'", model_port->name);
+        }
+
+        if (dir == OUT_PORT && !model_port->combinational_sink_ports.empty()) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
+                           "Model output ports can not have combinational sink ports");
+        }
+
+        //Add the port
+        if (dir == IN_PORT) {
+            model_port->next = model->inputs;
+            model->inputs = model_port;
+
+        } else {
+            VTR_ASSERT(dir == OUT_PORT);
+
+            model_port->next = model->outputs;
+            model->outputs = model_port;
+        }
+    }
+}
+
+static void ProcessLayout(pugi::xml_node layout_tag, t_arch* arch, const pugiutil::loc_data& loc_data) {
+    VTR_ASSERT(layout_tag.name() == std::string("layout"));
+
+    //Expect no attributes on <layout>
+    expect_only_attributes(layout_tag, {}, loc_data);
+
+    //Count the number of <auto_layout> or <fixed_layout> tags
+    size_t auto_layout_cnt = 0;
+    size_t fixed_layout_cnt = 0;
+    for (auto layout_type_tag : layout_tag.children()) {
+        if (layout_type_tag.name() == std::string("auto_layout")) {
+            ++auto_layout_cnt;
+        } else if (layout_type_tag.name() == std::string("fixed_layout")) {
+            ++fixed_layout_cnt;
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
+                           "Unexpected tag type '<%s>', expected '<auto_layout>' or '<fixed_layout>'", layout_type_tag.name());
+        }
+    }
+
+    if (auto_layout_cnt == 0 && fixed_layout_cnt == 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_tag),
+                       "Expected either an <auto_layout> or <fixed_layout> tag");
+    }
+    if (auto_layout_cnt > 1) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_tag),
+                       "Expected at most one <auto_layout> tag");
+    }
+    VTR_ASSERT_MSG(auto_layout_cnt == 0 || auto_layout_cnt == 1, "<auto_layout> may appear at most once");
+
+    for (auto layout_type_tag : layout_tag.children()) {
+        t_grid_def grid_def = ProcessGridLayout(&arch->strings, layout_type_tag, loc_data);
+
+        arch->grid_layouts.emplace_back(std::move(grid_def));
+    }
+}
+
+static t_grid_def ProcessGridLayout(vtr::string_internment* strings, pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data) {
+    t_grid_def grid_def;
+
+    //Determine the grid specification type
+    if (layout_type_tag.name() == std::string("auto_layout")) {
+        expect_only_attributes(layout_type_tag, {"aspect_ratio"}, loc_data);
+
+        grid_def.grid_type = GridDefType::AUTO;
+
+        grid_def.aspect_ratio = get_attribute(layout_type_tag, "aspect_ratio", loc_data, ReqOpt::OPTIONAL).as_float(1.);
+        grid_def.name = "auto";
+
+    } else if (layout_type_tag.name() == std::string("fixed_layout")) {
+        expect_only_attributes(layout_type_tag, {"width", "height", "name"}, loc_data);
+
+        grid_def.grid_type = GridDefType::FIXED;
+        grid_def.width = get_attribute(layout_type_tag, "width", loc_data).as_int();
+        grid_def.height = get_attribute(layout_type_tag, "height", loc_data).as_int();
+        std::string name = get_attribute(layout_type_tag, "name", loc_data).value();
+
+        if (name == "auto") {
+            //We name <auto_layout> as 'auto', so don't allow a user to specify it
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
+                           "The name '%s' is reserved for auto-sized layouts; please choose another name");
+        }
+        grid_def.name = name;
+
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
+                       "Unexpected tag '<%s>'. Expected '<auto_layout>' or '<fixed_layout>'.",
+                       layout_type_tag.name());
+    }
+
+    //Process all the block location specifications
+    for (auto loc_spec_tag : layout_type_tag.children()) {
+        auto loc_type = loc_spec_tag.name();
+        auto type_name = get_attribute(loc_spec_tag, "type", loc_data).value();
+        int priority = get_attribute(loc_spec_tag, "priority", loc_data).as_int();
+        t_metadata_dict meta = ProcessMetadata(strings, loc_spec_tag, loc_data);
+
+        if (loc_type == std::string("perimeter")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
+
+            //The edges
+            t_grid_loc_def left_edge(type_name, priority); //Including corners
+            left_edge.x.start_expr = "0";
+            left_edge.x.end_expr = "0";
+            left_edge.y.start_expr = "0";
+            left_edge.y.end_expr = "H - 1";
+
+            t_grid_loc_def right_edge(type_name, priority); //Including corners
+            right_edge.x.start_expr = "W - 1";
+            right_edge.x.end_expr = "W - 1";
+            right_edge.y.start_expr = "0";
+            right_edge.y.end_expr = "H - 1";
+
+            t_grid_loc_def bottom_edge(type_name, priority); //Exclucing corners
+            bottom_edge.x.start_expr = "1";
+            bottom_edge.x.end_expr = "W - 2";
+            bottom_edge.y.start_expr = "0";
+            bottom_edge.y.end_expr = "0";
+
+            t_grid_loc_def top_edge(type_name, priority); //Excluding corners
+            top_edge.x.start_expr = "1";
+            top_edge.x.end_expr = "W - 2";
+            top_edge.y.start_expr = "H - 1";
+            top_edge.y.end_expr = "H - 1";
+
+            left_edge.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            left_edge.meta = left_edge.owned_meta.get();
+            right_edge.meta = left_edge.owned_meta.get();
+            top_edge.meta = left_edge.owned_meta.get();
+            bottom_edge.meta = left_edge.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(left_edge));
+            grid_def.loc_defs.emplace_back(std::move(right_edge));
+            grid_def.loc_defs.emplace_back(std::move(top_edge));
+            grid_def.loc_defs.emplace_back(std::move(bottom_edge));
+
+        } else if (loc_type == std::string("corners")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
+
+            //The corners
+            t_grid_loc_def bottom_left(type_name, priority);
+            bottom_left.x.start_expr = "0";
+            bottom_left.x.end_expr = "0";
+            bottom_left.y.start_expr = "0";
+            bottom_left.y.end_expr = "0";
+
+            t_grid_loc_def top_left(type_name, priority);
+            top_left.x.start_expr = "0";
+            top_left.x.end_expr = "0";
+            top_left.y.start_expr = "H-1";
+            top_left.y.end_expr = "H-1";
+
+            t_grid_loc_def bottom_right(type_name, priority);
+            bottom_right.x.start_expr = "W-1";
+            bottom_right.x.end_expr = "W-1";
+            bottom_right.y.start_expr = "0";
+            bottom_right.y.end_expr = "0";
+
+            t_grid_loc_def top_right(type_name, priority);
+            top_right.x.start_expr = "W-1";
+            top_right.x.end_expr = "W-1";
+            top_right.y.start_expr = "H-1";
+            top_right.y.end_expr = "H-1";
+
+            bottom_left.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            bottom_left.meta = bottom_left.owned_meta.get();
+            top_left.meta = bottom_left.owned_meta.get();
+            bottom_right.meta = bottom_left.owned_meta.get();
+            top_right.meta = bottom_left.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(bottom_left));
+            grid_def.loc_defs.emplace_back(std::move(top_left));
+            grid_def.loc_defs.emplace_back(std::move(bottom_right));
+            grid_def.loc_defs.emplace_back(std::move(top_right));
+
+        } else if (loc_type == std::string("fill")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
+
+            t_grid_loc_def fill(type_name, priority);
+            fill.x.start_expr = "0";
+            fill.x.end_expr = "W - 1";
+            fill.y.start_expr = "0";
+            fill.y.end_expr = "H - 1";
+
+            fill.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            fill.meta = fill.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(fill));
+
+        } else if (loc_type == std::string("single")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority", "x", "y"}, loc_data);
+
+            t_grid_loc_def single(type_name, priority);
+            single.x.start_expr = get_attribute(loc_spec_tag, "x", loc_data).value();
+            single.y.start_expr = get_attribute(loc_spec_tag, "y", loc_data).value();
+            single.x.end_expr = single.x.start_expr + " + w - 1";
+            single.y.end_expr = single.y.start_expr + " + h - 1";
+
+            single.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            single.meta = single.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(single));
+
+        } else if (loc_type == std::string("col")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority", "startx", "repeatx", "starty", "incry"}, loc_data);
+
+            t_grid_loc_def col(type_name, priority);
+
+            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data);
+
+            col.x.start_expr = startx_attr.value();
+            col.x.end_expr = startx_attr.value() + std::string(" + w - 1"); //end is inclusive so need to include block width
+
+            auto repeat_attr = get_attribute(loc_spec_tag, "repeatx", loc_data, ReqOpt::OPTIONAL);
+            if (repeat_attr) {
+                col.x.repeat_expr = repeat_attr.value();
+            }
+
+            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data, ReqOpt::OPTIONAL);
+            if (starty_attr) {
+                col.y.start_expr = starty_attr.value();
+            }
+
+            auto incry_attr = get_attribute(loc_spec_tag, "incry", loc_data, ReqOpt::OPTIONAL);
+            if (incry_attr) {
+                col.y.incr_expr = incry_attr.value();
+            }
+
+            col.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            col.meta = col.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(col));
+
+        } else if (loc_type == std::string("row")) {
+            expect_only_attributes(loc_spec_tag, {"type", "priority", "starty", "repeaty", "startx", "incrx"}, loc_data);
+
+            t_grid_loc_def row(type_name, priority);
+
+            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data);
+
+            row.y.start_expr = starty_attr.value();
+            row.y.end_expr = starty_attr.value() + std::string(" + h - 1"); //end is inclusive so need to include block height
+
+            auto repeat_attr = get_attribute(loc_spec_tag, "repeaty", loc_data, ReqOpt::OPTIONAL);
+            if (repeat_attr) {
+                row.y.repeat_expr = repeat_attr.value();
+            }
+
+            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data, ReqOpt::OPTIONAL);
+            if (startx_attr) {
+                row.x.start_expr = startx_attr.value();
+            }
+
+            auto incrx_attr = get_attribute(loc_spec_tag, "incrx", loc_data, ReqOpt::OPTIONAL);
+            if (incrx_attr) {
+                row.x.incr_expr = incrx_attr.value();
+            }
+
+            row.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            row.meta = row.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(row));
+        } else if (loc_type == std::string("region")) {
+            expect_only_attributes(loc_spec_tag,
+                                   {"type", "priority",
+                                    "startx", "endx", "repeatx", "incrx",
+                                    "starty", "endy", "repeaty", "incry"},
+                                   loc_data);
+            t_grid_loc_def region(type_name, priority);
+
+            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data, ReqOpt::OPTIONAL);
+            if (startx_attr) {
+                region.x.start_expr = startx_attr.value();
+            }
+
+            auto endx_attr = get_attribute(loc_spec_tag, "endx", loc_data, ReqOpt::OPTIONAL);
+            if (endx_attr) {
+                region.x.end_expr = endx_attr.value();
+            }
+
+            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data, ReqOpt::OPTIONAL);
+            if (starty_attr) {
+                region.y.start_expr = starty_attr.value();
+            }
+
+            auto endy_attr = get_attribute(loc_spec_tag, "endy", loc_data, ReqOpt::OPTIONAL);
+            if (endy_attr) {
+                region.y.end_expr = endy_attr.value();
+            }
+
+            auto repeatx_attr = get_attribute(loc_spec_tag, "repeatx", loc_data, ReqOpt::OPTIONAL);
+            if (repeatx_attr) {
+                region.x.repeat_expr = repeatx_attr.value();
+            }
+
+            auto repeaty_attr = get_attribute(loc_spec_tag, "repeaty", loc_data, ReqOpt::OPTIONAL);
+            if (repeaty_attr) {
+                region.y.repeat_expr = repeaty_attr.value();
+            }
+
+            auto incrx_attr = get_attribute(loc_spec_tag, "incrx", loc_data, ReqOpt::OPTIONAL);
+            if (incrx_attr) {
+                region.x.incr_expr = incrx_attr.value();
+            }
+
+            auto incry_attr = get_attribute(loc_spec_tag, "incry", loc_data, ReqOpt::OPTIONAL);
+            if (incry_attr) {
+                region.y.incr_expr = incry_attr.value();
+            }
+
+            region.owned_meta = std::make_unique<t_metadata_dict>(meta);
+            region.meta = region.owned_meta.get();
+
+            grid_def.loc_defs.emplace_back(std::move(region));
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(loc_spec_tag),
+                           "Unrecognized grid location specification type '%s'\n", loc_type);
+        }
+    }
+
+    //Warn if any type has no grid location specifed
+
+    return grid_def;
+}
+
+/* Takes in node pointing to <device> and loads all the
+ * child type objects. */
+static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data) {
+    const char* Prop;
+    pugi::xml_node Cur;
+    bool custom_switch_block = false;
+
+    //Warn that <timing> is no longer supported
+    //TODO: eventually remove
+    try {
+        expect_child_node_count(Node, "timing", 0, loc_data);
+    } catch (pugiutil::XmlError& e) {
+        std::string msg = e.what();
+        msg += ". <timing> has been replaced with the <switch_block> tag.";
+        msg += " Please upgrade your architecture file.";
+        archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
+    }
+
+    expect_only_children(Node, {"sizing", "area", "chan_width_distr", "switch_block", "connection_block", "default_fc"}, loc_data);
+
+    //<sizing> tag
+    Cur = get_single_child(Node, "sizing", loc_data);
+    expect_only_attributes(Cur, {"R_minW_nmos", "R_minW_pmos"}, loc_data);
+    arch->R_minW_nmos = get_attribute(Cur, "R_minW_nmos", loc_data).as_float();
+    arch->R_minW_pmos = get_attribute(Cur, "R_minW_pmos", loc_data).as_float();
+
+    //<area> tag
+    Cur = get_single_child(Node, "area", loc_data);
+    expect_only_attributes(Cur, {"grid_logic_tile_area"}, loc_data);
+    arch->grid_logic_tile_area = get_attribute(Cur, "grid_logic_tile_area",
+                                               loc_data, ReqOpt::OPTIONAL)
+                                     .as_float(0);
+
+    //<chan_width_distr> tag
+    Cur = get_single_child(Node, "chan_width_distr", loc_data, ReqOpt::OPTIONAL);
+    expect_only_attributes(Cur, {}, loc_data);
+    if (Cur != nullptr) {
+        ProcessChanWidthDistr(Cur, arch, loc_data);
+    }
+
+    //<connection_block> tag
+    Cur = get_single_child(Node, "connection_block", loc_data);
+    expect_only_attributes(Cur, {"input_switch_name"}, loc_data);
+    arch->ipin_cblock_switch_name = get_attribute(Cur, "input_switch_name", loc_data).as_string();
+
+    //<switch_block> tag
+    Cur = get_single_child(Node, "switch_block", loc_data);
+    expect_only_attributes(Cur, {"type", "fs"}, loc_data);
+    Prop = get_attribute(Cur, "type", loc_data).value();
+    if (strcmp(Prop, "wilton") == 0) {
+        arch->SBType = WILTON;
+    } else if (strcmp(Prop, "universal") == 0) {
+        arch->SBType = UNIVERSAL;
+    } else if (strcmp(Prop, "subset") == 0) {
+        arch->SBType = SUBSET;
+    } else if (strcmp(Prop, "custom") == 0) {
+        arch->SBType = CUSTOM;
+        custom_switch_block = true;
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                       "Unknown property %s for switch block type x\n", Prop);
+    }
+
+    ReqOpt CUSTOM_SWITCHBLOCK_REQD = BoolToReqOpt(!custom_switch_block);
+    arch->Fs = get_attribute(Cur, "fs", loc_data, CUSTOM_SWITCHBLOCK_REQD).as_int(3);
+
+    Cur = get_single_child(Node, "default_fc", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        arch_def_fc.specified = true;
+        expect_only_attributes(Cur, {"in_type", "in_val", "out_type", "out_val"}, loc_data);
+        Process_Fc_Values(Cur, arch_def_fc, loc_data);
+    } else {
+        arch_def_fc.specified = false;
+    }
+}
+
+/* Takes in node pointing to <chan_width_distr> and loads all the
+ * child type objects. */
+static void ProcessChanWidthDistr(pugi::xml_node Node,
+                                  t_arch* arch,
+                                  const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Cur;
+
+    expect_only_children(Node, {"x", "y"}, loc_data);
+
+    Cur = get_single_child(Node, "x", loc_data);
+    ProcessChanWidthDistrDir(Cur, &arch->Chans.chan_x_dist, loc_data);
+
+    Cur = get_single_child(Node, "y", loc_data);
+    ProcessChanWidthDistrDir(Cur, &arch->Chans.chan_y_dist, loc_data);
+}
+
+/* Takes in node within <chan_width_distr> and loads all the
+ * child type objects. */
+static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pugiutil::loc_data& loc_data) {
+    const char* Prop;
+
+    ReqOpt hasXpeak, hasWidth, hasDc;
+    hasXpeak = hasWidth = hasDc = ReqOpt::OPTIONAL;
+
+    Prop = get_attribute(Node, "distr", loc_data).value();
+    if (strcmp(Prop, "uniform") == 0) {
+        chan->type = UNIFORM;
+    } else if (strcmp(Prop, "gaussian") == 0) {
+        chan->type = GAUSSIAN;
+        hasXpeak = hasWidth = hasDc = ReqOpt::REQUIRED;
+    } else if (strcmp(Prop, "pulse") == 0) {
+        chan->type = PULSE;
+        hasXpeak = hasWidth = hasDc = ReqOpt::REQUIRED;
+    } else if (strcmp(Prop, "delta") == 0) {
+        hasXpeak = hasDc = ReqOpt::REQUIRED;
+        chan->type = DELTA;
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Unknown property %s for chan_width_distr x\n", Prop);
+    }
+
+    chan->peak = get_attribute(Node, "peak", loc_data).as_float(UNDEFINED);
+    chan->width = get_attribute(Node, "width", loc_data, hasWidth).as_float(0);
+    chan->xpeak = get_attribute(Node, "xpeak", loc_data, hasXpeak).as_float(0);
+    chan->dc = get_attribute(Node, "dc", loc_data, hasDc).as_float(0);
+}
+
+static void ProcessTiles(pugi::xml_node Node,
+                         std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                         std::vector<t_logical_block_type>& LogicalBlockTypes,
+                         const t_default_fc_spec& arch_def_fc,
+                         t_arch& arch,
+                         const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurTileType;
+    pugi::xml_node Cur;
+    std::map<std::string, int> tile_type_descriptors;
+
+    /* Alloc the type list. Need one additional t_type_desctiptors:
+     * 1: empty psuedo-type
+     */
+    t_physical_tile_type EMPTY_PHYSICAL_TILE_TYPE = get_empty_physical_type();
+    EMPTY_PHYSICAL_TILE_TYPE.index = 0;
+    PhysicalTileTypes.push_back(EMPTY_PHYSICAL_TILE_TYPE);
+
+    /* Process the types */
+    int index = 1; /* Skip over 'empty' type */
+
+    CurTileType = Node.first_child();
+    while (CurTileType) {
+        check_node(CurTileType, "tile", loc_data);
+
+        t_physical_tile_type PhysicalTileType;
+
+        PhysicalTileType.index = index;
+
+        /* Parses the properties fields of the type */
+        ProcessTileProps(CurTileType, &PhysicalTileType, loc_data);
+
+        auto result = tile_type_descriptors.insert(std::pair<std::string, int>(PhysicalTileType.name, 0));
+        if (!result.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurTileType),
+                           "Duplicate tile descriptor name: '%s'.\n", PhysicalTileType.name);
+        }
+
+        //Warn that gridlocations is no longer supported
+        //TODO: eventually remove
+        try {
+            expect_child_node_count(CurTileType, "gridlocations", 0, loc_data);
+        } catch (pugiutil::XmlError& e) {
+            std::string msg = e.what();
+            msg += ". <gridlocations> has been replaced by the <auto_layout> and <device_layout> tags in the <layout> section.";
+            msg += " Please upgrade your architecture file.";
+            archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
+        }
+
+        //Load switchblock type and location overrides
+        Cur = get_single_child(CurTileType, "switchblock_locations", loc_data, ReqOpt::OPTIONAL);
+        ProcessSwitchblockLocations(Cur, &PhysicalTileType, arch, loc_data);
+
+        ProcessSubTiles(CurTileType, &PhysicalTileType, LogicalBlockTypes, arch.Segments, arch_def_fc, loc_data);
+
+        /* Type fully read */
+        ++index;
+
+        /* Push newly created Types to corresponding vectors */
+        PhysicalTileTypes.push_back(PhysicalTileType);
+
+        /* Free this node and get its next sibling node */
+        CurTileType = CurTileType.next_sibling(CurTileType.name());
+    }
+    tile_type_descriptors.clear();
+}
+
+static void MarkIoTypes(std::vector<t_physical_tile_type>& PhysicalTileTypes) {
+    for (auto& type : PhysicalTileTypes) {
+        type.is_input_type = false;
+        type.is_output_type = false;
+
+        auto equivalent_sites = get_equivalent_sites_set(&type);
+
+        for (const auto& equivalent_site : equivalent_sites) {
+            if (block_type_contains_blif_model(equivalent_site, MODEL_INPUT)) {
+                type.is_input_type = true;
+                break;
+            }
+        }
+
+        for (const auto& equivalent_site : equivalent_sites) {
+            if (block_type_contains_blif_model(equivalent_site, MODEL_OUTPUT)) {
+                type.is_output_type = true;
+                break;
+            }
+        }
+    }
+}
+
+static void ProcessTileProps(pugi::xml_node Node,
+                             t_physical_tile_type* PhysicalTileType,
+                             const pugiutil::loc_data& loc_data) {
+    expect_only_attributes(Node, {"name", "width", "height", "area"}, loc_data);
+
+    /* Load type name */
+    auto Prop = get_attribute(Node, "name", loc_data).value();
+    PhysicalTileType->name = vtr::strdup(Prop);
+
+    /* Load properties */
+    PhysicalTileType->width = get_attribute(Node, "width", loc_data, ReqOpt::OPTIONAL).as_uint(1);
+    PhysicalTileType->height = get_attribute(Node, "height", loc_data, ReqOpt::OPTIONAL).as_uint(1);
+    PhysicalTileType->area = get_attribute(Node, "area", loc_data, ReqOpt::OPTIONAL).as_float(UNDEFINED);
+
+    if (atof(Prop) < 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Area for type %s must be non-negative\n", PhysicalTileType->name);
+    }
+}
+
+static t_pin_counts ProcessSubTilePorts(pugi::xml_node Parent,
+                                        t_sub_tile* SubTile,
+                                        std::unordered_map<std::string, t_physical_tile_port>& tile_port_names,
+                                        const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Cur;
+
+    std::map<std::string, int> sub_tile_port_names;
+
+    int num_ports, num_in_ports, num_out_ports, num_clock_ports;
+
+    num_ports = num_in_ports = num_out_ports = num_clock_ports = 0;
+    num_in_ports = count_children(Parent, "input", loc_data, ReqOpt::OPTIONAL);
+    num_out_ports = count_children(Parent, "output", loc_data, ReqOpt::OPTIONAL);
+    num_clock_ports = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
+    num_ports = num_in_ports + num_out_ports + num_clock_ports;
+
+    int port_index_by_type;
+    int port_index = 0;
+    int absolute_first_pin_index = 0;
+
+    std::vector<const char*> port_types = {"input", "output", "clock"};
+    for (auto port_type : port_types) {
+        port_index_by_type = 0;
+        Cur = get_first_child(Parent, port_type, loc_data, ReqOpt::OPTIONAL);
+        while (Cur) {
+            t_physical_tile_port port;
+
+            port.index = port_index;
+            port.absolute_first_pin_index = absolute_first_pin_index;
+            port.port_index_by_type = port_index_by_type;
+            ProcessTilePort(Cur, &port, loc_data);
+
+            //Check port name duplicates
+            auto sub_tile_port_result = sub_tile_port_names.insert(std::pair<std::string, int>(port.name, 0));
+            if (!sub_tile_port_result.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "Duplicate port names in tile '%s': port '%s'\n",
+                               SubTile->name, port.name);
+            }
+
+            //Check port name duplicates
+            auto tile_port_result = tile_port_names.insert(std::pair<std::string, t_physical_tile_port>(port.name, port));
+            if (!tile_port_result.second) {
+                if (tile_port_result.first->second.num_pins != port.num_pins || tile_port_result.first->second.equivalent != port.equivalent) {
+                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                                   "Another port found with the same name in other sub tiles "
+                                   "that did not match the current port settings. '%s': port '%s'\n",
+                                   SubTile->name, port.name);
+                }
+            }
+
+            //Push port
+            SubTile->ports.push_back(port);
+
+            /* get next iteration */
+            port_index++;
+            port_index_by_type++;
+            absolute_first_pin_index += port.num_pins;
+
+            Cur = Cur.next_sibling(Cur.name());
+        }
+    }
+
+    VTR_ASSERT(port_index == num_ports);
+
+    t_pin_counts pin_counts;
+
+    /* Count stats on the number of each type of pin */
+    for (const auto& port : SubTile->ports) {
+        if (port.type == IN_PORT && port.is_clock == false) {
+            pin_counts.input += port.num_pins;
+        } else if (port.type == OUT_PORT) {
+            pin_counts.output += port.num_pins;
+        } else {
+            VTR_ASSERT(port.is_clock && port.type == IN_PORT);
+            pin_counts.clock += port.num_pins;
+        }
+    }
+
+    return pin_counts;
+}
+
+static void ProcessTilePort(pugi::xml_node Node,
+                            t_physical_tile_port* port,
+                            const pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_attributes = {"name", "num_pins", "equivalent"};
+
+    if (Node.name() == "input"s || Node.name() == "clock"s) {
+        expected_attributes.push_back("is_non_clock_global");
+    }
+
+    expect_only_attributes(Node, expected_attributes, loc_data);
+
+    const char* Prop;
+    Prop = get_attribute(Node, "name", loc_data).value();
+    port->name = vtr::strdup(Prop);
+
+    Prop = get_attribute(Node, "equivalent", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+    if (Prop) {
+        if (Prop == "none"s) {
+            port->equivalent = PortEquivalence::NONE;
+        } else if (Prop == "full"s) {
+            port->equivalent = PortEquivalence::FULL;
+        } else if (Prop == "instance"s) {
+            if (Node.name() == "output"s) {
+                port->equivalent = PortEquivalence::INSTANCE;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                               "Invalid pin equivalence '%s' for %s port.", Prop, Node.name());
+            }
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Invalid pin equivalence '%s'.", Prop);
+        }
+    }
+    port->num_pins = get_attribute(Node, "num_pins", loc_data).as_int(0);
+    port->is_non_clock_global = get_attribute(Node,
+                                              "is_non_clock_global", loc_data, ReqOpt::OPTIONAL)
+                                    .as_bool(false);
+
+    if (port->num_pins <= 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Invalid number of pins %d for %s port.", port->num_pins, Node.name());
+    }
+
+    if (0 == strcmp(Node.name(), "input")) {
+        port->type = IN_PORT;
+        port->is_clock = false;
+
+    } else if (0 == strcmp(Node.name(), "output")) {
+        port->type = OUT_PORT;
+        port->is_clock = false;
+
+    } else if (0 == strcmp(Node.name(), "clock")) {
+        port->type = IN_PORT;
+        port->is_clock = true;
+
+        if (port->is_non_clock_global == true) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Port %s cannot be both a clock and a non-clock simultaneously\n",
+                           Node.name());
+        }
+
+    } else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Unknown port type %s", Node.name());
+    }
+}
+
+static void ProcessTileEquivalentSites(pugi::xml_node Parent,
+                                       t_sub_tile* SubTile,
+                                       t_physical_tile_type* PhysicalTileType,
+                                       std::vector<t_logical_block_type>& LogicalBlockTypes,
+                                       const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurSite;
+
+    expect_only_children(Parent, {"site"}, loc_data);
+
+    if (count_children(Parent, "site", loc_data) < 1) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "There are no sites corresponding to this tile: %s.\n", SubTile->name);
+    }
+
+    CurSite = Parent.first_child();
+    while (CurSite) {
+        check_node(CurSite, "site", loc_data);
+
+        expect_only_attributes(CurSite, {"pb_type", "pin_mapping"}, loc_data);
+        /* Load equivalent site name */
+        auto Prop = std::string(get_attribute(CurSite, "pb_type", loc_data).value());
+
+        auto LogicalBlockType = get_type_by_name<t_logical_block_type>(Prop.c_str(), LogicalBlockTypes);
+
+        auto pin_mapping = get_attribute(CurSite, "pin_mapping", loc_data, ReqOpt::OPTIONAL).as_string("direct");
+
+        if (0 == strcmp(pin_mapping, "custom")) {
+            // Pin mapping between Tile and Pb Type is user-defined
+            ProcessEquivalentSiteCustomConnection(CurSite, SubTile, PhysicalTileType, LogicalBlockType, Prop, loc_data);
+        } else if (0 == strcmp(pin_mapping, "direct")) {
+            ProcessEquivalentSiteDirectConnection(CurSite, SubTile, PhysicalTileType, LogicalBlockType, loc_data);
+        }
+
+        if (0 == strcmp(LogicalBlockType->pb_type->name, Prop.c_str())) {
+            SubTile->equivalent_sites.push_back(LogicalBlockType);
+
+            check_port_direct_mappings(PhysicalTileType, SubTile, LogicalBlockType);
+        }
+
+        CurSite = CurSite.next_sibling(CurSite.name());
+    }
+}
+
+static void ProcessEquivalentSiteDirectConnection(pugi::xml_node Parent,
+                                                  t_sub_tile* SubTile,
+                                                  t_physical_tile_type* PhysicalTileType,
+                                                  t_logical_block_type* LogicalBlockType,
+                                                  const pugiutil::loc_data& loc_data) {
+    int num_pins = (int)SubTile->sub_tile_to_tile_pin_indices.size() / SubTile->capacity.total();
+
+    if (num_pins != LogicalBlockType->pb_type->num_pins) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "Pin definition differ between site %s and tile %s. User-defined pin mapping is required.\n", LogicalBlockType->pb_type->name, SubTile->name);
+    }
+
+    vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
+
+    for (int npin = 0; npin < num_pins; npin++) {
+        t_physical_pin physical_pin(npin);
+        t_logical_pin logical_pin(npin);
+
+        directs_map.insert(logical_pin, physical_pin);
+    }
+
+    PhysicalTileType->tile_block_pin_directs_map[LogicalBlockType->index][SubTile->index] = directs_map;
+}
+
+static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent,
+                                                  t_sub_tile* SubTile,
+                                                  t_physical_tile_type* PhysicalTileType,
+                                                  t_logical_block_type* LogicalBlockType,
+                                                  std::string site_name,
+                                                  const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurDirect;
+
+    expect_only_children(Parent, {"direct"}, loc_data);
+
+    if (count_children(Parent, "direct", loc_data) < 1) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                       "There are no direct pin mappings between site %s and tile %s.\n", site_name.c_str(), SubTile->name);
+    }
+
+    vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
+
+    CurDirect = Parent.first_child();
+
+    while (CurDirect) {
+        check_node(CurDirect, "direct", loc_data);
+
+        expect_only_attributes(CurDirect, {"from", "to"}, loc_data);
+
+        std::string from, to;
+        // `from` attribute is relative to the physical tile pins
+        from = std::string(get_attribute(CurDirect, "from", loc_data).value());
+
+        // `to` attribute is relative to the logical block pins
+        to = std::string(get_attribute(CurDirect, "to", loc_data).value());
+
+        auto from_pins = ProcessPinString<t_sub_tile*>(CurDirect, SubTile, from.c_str(), loc_data);
+        auto to_pins = ProcessPinString<t_logical_block_type_ptr>(CurDirect, LogicalBlockType, to.c_str(), loc_data);
+
+        // Checking that the number of pins is exactly the same
+        if (from_pins.second - from_pins.first != to_pins.second - to_pins.first) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                           "The number of pins specified in the direct pin mapping is "
+                           "not equivalent for Physical Tile %s and Logical Block %s.\n",
+                           SubTile->name, LogicalBlockType->name);
+        }
+
+        int num_pins = from_pins.second - from_pins.first;
+        for (int i = 0; i < num_pins; i++) {
+            t_physical_pin physical_pin(from_pins.first + i);
+            t_logical_pin logical_pin(to_pins.first + i);
+
+            auto result = directs_map.insert(logical_pin, physical_pin);
+            if (!result.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
+                               "Duplicate logical pin (%d) to physical pin (%d) mappings found for "
+                               "Physical Tile %s and Logical Block %s.\n",
+                               logical_pin.pin, physical_pin.pin, SubTile->name, LogicalBlockType->name);
+            }
+        }
+
+        CurDirect = CurDirect.next_sibling(CurDirect.name());
+    }
+
+    PhysicalTileType->tile_block_pin_directs_map[LogicalBlockType->index][SubTile->index] = directs_map;
+}
+
+static void ProcessPinLocations(pugi::xml_node Locations,
+                                t_physical_tile_type* PhysicalTileType,
+                                t_sub_tile* SubTile,
+                                t_pin_locs* pin_locs,
+                                const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Cur;
+    const char* Prop;
+    enum e_pin_location_distr distribution;
+
+    if (Locations) {
+        expect_only_attributes(Locations, {"pattern"}, loc_data);
+
+        Prop = get_attribute(Locations, "pattern", loc_data).value();
+        if (strcmp(Prop, "spread") == 0) {
+            distribution = E_SPREAD_PIN_DISTR;
+        } else if (strcmp(Prop, "perimeter") == 0) {
+            distribution = E_PERIMETER_PIN_DISTR;
+        } else if (strcmp(Prop, "spread_inputs_perimeter_outputs") == 0) {
+            distribution = E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR;
+        } else if (strcmp(Prop, "custom") == 0) {
+            distribution = E_CUSTOM_PIN_DISTR;
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                           "%s is an invalid pin location pattern.\n", Prop);
+        }
+    } else {
+        distribution = E_SPREAD_PIN_DISTR;
+        Prop = "spread";
+    }
+
+    if (pin_locs->is_distribution_set()) {
+        if (pin_locs->distribution != distribution) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                           "Sub Tile %s has a different pin location pattern (%s) with respect "
+                           "to the sibling sub tiles",
+                           SubTile->name, Prop);
+        }
+    } else {
+        pin_locs->distribution = distribution;
+        pin_locs->set_distribution();
+    }
+
+    int sub_tile_index = SubTile->index;
+
+    /* Load the pin locations */
+    if (distribution == E_CUSTOM_PIN_DISTR) {
+        expect_only_children(Locations, {"loc"}, loc_data);
+        Cur = Locations.first_child();
+        std::set<std::tuple<e_side, int, int>> seen_sides;
+        while (Cur) {
+            check_node(Cur, "loc", loc_data);
+
+            expect_only_attributes(Cur, {"side", "xoffset", "yoffset"}, loc_data);
+
+            /* Get offset (ie. height) */
+            int x_offset = get_attribute(Cur, "xoffset", loc_data, ReqOpt::OPTIONAL).as_int(0);
+            int y_offset = get_attribute(Cur, "yoffset", loc_data, ReqOpt::OPTIONAL).as_int(0);
+
+            /* Get side */
+            e_side side = TOP;
+            Prop = get_attribute(Cur, "side", loc_data).value();
+            if (0 == strcmp(Prop, "left")) {
+                side = LEFT;
+            } else if (0 == strcmp(Prop, "top")) {
+                side = TOP;
+            } else if (0 == strcmp(Prop, "right")) {
+                side = RIGHT;
+            } else if (0 == strcmp(Prop, "bottom")) {
+                side = BOTTOM;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "'%s' is not a valid side.\n", Prop);
+            }
+
+            if ((x_offset < 0) || (x_offset >= PhysicalTileType->width)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "'%d' is an invalid horizontal offset for type '%s' (must be within [0, %d]).\n",
+                               x_offset, PhysicalTileType->name, PhysicalTileType->width - 1);
+            }
+            if ((y_offset < 0) || (y_offset >= PhysicalTileType->height)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "'%d' is an invalid vertical offset for type '%s' (must be within [0, %d]).\n",
+                               y_offset, PhysicalTileType->name, PhysicalTileType->height - 1);
+            }
+
+            //Check for duplicate side specifications, since the code below silently overwrites if there are duplicates
+            auto side_offset = std::make_tuple(side, x_offset, y_offset);
+            if (seen_sides.count(side_offset)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                               "Duplicate pin location side/offset specification."
+                               " Only a single <loc> per side/xoffset/yoffset is permitted.\n");
+            }
+            seen_sides.insert(side_offset);
+
+            /* Go through lists of pins */
+            const std::vector<std::string> Tokens = vtr::split(Cur.child_value());
+            int Count = (int)Tokens.size();
+            if (Count > 0) {
+                for (int pin = 0; pin < Count; ++pin) {
+                    /* Store location assignment */
+                    pin_locs->assignments[sub_tile_index][x_offset][y_offset][side].push_back(std::string(Tokens[pin].c_str()));
+
+                    /* Advance through list of pins in this location */
+                }
+            }
+            Cur = Cur.next_sibling(Cur.name());
+        }
+
+        //Verify that all top-level pins have had their locations specified
+
+        //Record all the specified pins
+        std::map<std::string, std::set<int>> port_pins_with_specified_locations;
+        for (int w = 0; w < PhysicalTileType->width; ++w) {
+            for (int h = 0; h < PhysicalTileType->height; ++h) {
+                for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
+                    for (auto token : pin_locs->assignments[sub_tile_index][w][h][side]) {
+                        InstPort inst_port(token.c_str());
+
+                        //A pin specification should contain only the block name, and not any instace count information
+                        if (inst_port.instance_low_index() != InstPort::UNSPECIFIED || inst_port.instance_high_index() != InstPort::UNSPECIFIED) {
+                            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                                           "Pin location specification '%s' should not contain an instance range (should only be the block name)",
+                                           token.c_str());
+                        }
+
+                        //Check that the block name matches
+                        if (inst_port.instance_name() != SubTile->name) {
+                            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                                           "Mismatched sub tile name in pin location specification (expected '%s' was '%s')",
+                                           SubTile->name, inst_port.instance_name().c_str());
+                        }
+
+                        int pin_low_idx = inst_port.port_low_index();
+                        int pin_high_idx = inst_port.port_high_index();
+
+                        if (pin_low_idx == InstPort::UNSPECIFIED && pin_high_idx == InstPort::UNSPECIFIED) {
+                            //Empty range, so full port
+
+                            //Find the matching pb type to get the total number of pins
+                            const t_physical_tile_port* port = nullptr;
+                            for (const auto& tmp_port : SubTile->ports) {
+                                if (tmp_port.name == inst_port.port_name()) {
+                                    port = &tmp_port;
+                                    break;
+                                }
+                            }
+
+                            if (port) {
+                                pin_low_idx = 0;
+                                pin_high_idx = port->num_pins - 1;
+                            } else {
+                                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                                               "Failed to find port named '%s' on block '%s'",
+                                               inst_port.port_name().c_str(), SubTile->name);
+                            }
+                        }
+                        VTR_ASSERT(pin_low_idx >= 0);
+                        VTR_ASSERT(pin_high_idx >= 0);
+
+                        for (int ipin = pin_low_idx; ipin <= pin_high_idx; ++ipin) {
+                            //Record that the pin has it's location specified
+                            port_pins_with_specified_locations[inst_port.port_name()].insert(ipin);
+                        }
+                    }
+                }
+            }
+        }
+
+        //Check for any pins missing location specs
+        for (const auto& port : SubTile->ports) {
+            for (int ipin = 0; ipin < port.num_pins; ++ipin) {
+                if (!port_pins_with_specified_locations[port.name].count(ipin)) {
+                    //Missing
+                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
+                                   "Pin '%s.%s[%d]' has no pin location specificed (a location is required for pattern=\"custom\")",
+                                   SubTile->name, port.name, ipin);
+                }
+            }
+        }
+    } else if (Locations) {
+        //Non-custom pin locations. There should be no child tags
+        expect_child_node_count(Locations, 0, loc_data);
+    }
+}
+
+static void ProcessSubTiles(pugi::xml_node Node,
+                            t_physical_tile_type* PhysicalTileType,
+                            std::vector<t_logical_block_type>& LogicalBlockTypes,
+                            std::vector<t_segment_inf>& segments,
+                            const t_default_fc_spec& arch_def_fc,
+                            const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurSubTile;
+    pugi::xml_node Cur;
+    int index = 0;
+
+    unsigned long int num_sub_tiles = count_children(Node, "sub_tile", loc_data);
+    unsigned long int width = PhysicalTileType->width;
+    unsigned long int height = PhysicalTileType->height;
+    unsigned long int num_sides = 4;
+
+    std::map<std::string, int> sub_tile_names;
+
+    t_pin_locs pin_locs;
+    pin_locs.assignments.resize({num_sub_tiles, width, height, num_sides});
+
+    if (num_sub_tiles == 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "No sub tile found for the Physical Tile %s.\n"
+                       "At least one sub tile is needed to correctly describe the Physical Tile.\n",
+                       PhysicalTileType->name);
+    }
+
+    CurSubTile = get_first_child(Node, "sub_tile", loc_data);
+
+    while (CurSubTile) {
+        t_sub_tile SubTile;
+
+        SubTile.index = index;
+
+        expect_only_attributes(CurSubTile, {"name", "capacity"}, loc_data);
+
+        /* Load type name */
+        auto name = vtr::strdup(get_attribute(CurSubTile, "name", loc_data).value());
+
+        //Check Sub Tile name duplicates
+        auto result = sub_tile_names.insert(std::pair<std::string, int>(std::string(name), 0));
+        if (!result.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
+                           "Duplicate Sub Tile names in tile '%s': Sub Tile'%s'\n",
+                           PhysicalTileType->name, name);
+        }
+
+        SubTile.name = name;
+
+        /* Load properties */
+        int capacity = get_attribute(CurSubTile, "capacity", loc_data, ReqOpt::OPTIONAL).as_int(1);
+        SubTile.capacity.set(PhysicalTileType->capacity, PhysicalTileType->capacity + capacity - 1);
+        PhysicalTileType->capacity += capacity;
+
+        /* Process sub tile port definitions */
+        std::unordered_map<std::string, t_physical_tile_port> tile_port_names;
+        auto pin_counts = ProcessSubTilePorts(CurSubTile, &SubTile, tile_port_names, loc_data);
+
+        /* Map Sub Tile physical pins with the Physical Tile Type physical pins.
+         * This takes into account the capacity of each sub tiles to add the correct offset.
+         */
+        for (int ipin = 0; ipin < capacity * pin_counts.total(); ipin++) {
+            SubTile.sub_tile_to_tile_pin_indices.push_back(PhysicalTileType->num_pins + ipin);
+        }
+
+        SubTile.num_phy_pins = pin_counts.total() * capacity;
+
+        /* Assign pin counts to the Physical Tile Type */
+        PhysicalTileType->num_input_pins += capacity * pin_counts.input;
+        PhysicalTileType->num_output_pins += capacity * pin_counts.output;
+        PhysicalTileType->num_clock_pins += capacity * pin_counts.clock;
+        PhysicalTileType->num_pins += capacity * pin_counts.total();
+        PhysicalTileType->num_inst_pins += pin_counts.total();
+
+        /* Assign drivers and receivers count to Physical Tile Type */
+        PhysicalTileType->num_receivers += capacity * pin_counts.input;
+        PhysicalTileType->num_drivers += capacity * pin_counts.output;
+
+        Cur = get_single_child(CurSubTile, "pinlocations", loc_data, ReqOpt::OPTIONAL);
+        ProcessPinLocations(Cur, PhysicalTileType, &SubTile, &pin_locs, loc_data);
+
+        /* Load Fc */
+        Cur = get_single_child(CurSubTile, "fc", loc_data, ReqOpt::OPTIONAL);
+        Process_Fc(Cur, PhysicalTileType, &SubTile, pin_counts, segments, arch_def_fc, loc_data);
+
+        //Load equivalent sites infromation
+        Cur = get_single_child(CurSubTile, "equivalent_sites", loc_data, ReqOpt::REQUIRED);
+        ProcessTileEquivalentSites(Cur, &SubTile, PhysicalTileType, LogicalBlockTypes, loc_data);
+
+        PhysicalTileType->sub_tiles.push_back(SubTile);
+
+        index++;
+
+        CurSubTile = CurSubTile.next_sibling(CurSubTile.name());
+    }
+
+    // Initialize pinloc data structure.
+    int num_pins = PhysicalTileType->num_pins;
+    PhysicalTileType->pinloc.resize({width, height, num_sides}, std::vector<bool>(num_pins, false));
+
+    setup_pin_classes(PhysicalTileType);
+    LoadPinLoc(Cur, PhysicalTileType, &pin_locs, loc_data);
+}
+
+/* Takes in node pointing to <typelist> and loads all the
+ * child type objects. */
+static void ProcessComplexBlocks(vtr::string_internment* strings, pugi::xml_node Node, std::vector<t_logical_block_type>& LogicalBlockTypes, t_arch& arch, const bool timing_enabled, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurBlockType;
+    pugi::xml_node Cur;
+    std::map<std::string, int> pb_type_descriptors;
+
+    /* Alloc the type list. Need one additional t_type_desctiptors:
+     * 1: empty psuedo-type
+     */
+    t_logical_block_type EMPTY_LOGICAL_BLOCK_TYPE = get_empty_logical_type();
+    EMPTY_LOGICAL_BLOCK_TYPE.index = 0;
+    LogicalBlockTypes.push_back(EMPTY_LOGICAL_BLOCK_TYPE);
+
+    /* Process the types */
+    int index = 1; /* Skip over 'empty' type */
+
+    CurBlockType = Node.first_child();
+    while (CurBlockType) {
+        check_node(CurBlockType, "pb_type", loc_data);
+
+        t_logical_block_type LogicalBlockType;
+
+        expect_only_attributes(CurBlockType, {"name"}, loc_data);
+
+        /* Load type name */
+        auto Prop = get_attribute(CurBlockType, "name", loc_data).value();
+        LogicalBlockType.name = vtr::strdup(Prop);
+
+        auto result = pb_type_descriptors.insert(std::pair<std::string, int>(LogicalBlockType.name, 0));
+        if (!result.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurBlockType),
+                           "Duplicate pb_type descriptor name: '%s'.\n", LogicalBlockType.name);
+        }
+
+        /* Load pb_type info to assign to the Logical Block Type */
+        LogicalBlockType.pb_type = new t_pb_type;
+        LogicalBlockType.pb_type->name = vtr::strdup(LogicalBlockType.name);
+        ProcessPb_Type(strings, CurBlockType, LogicalBlockType.pb_type, nullptr, timing_enabled, arch, loc_data);
+
+        LogicalBlockType.index = index;
+
+        /* Type fully read */
+        ++index;
+
+        /* Push newly created Types to corresponding vectors */
+        LogicalBlockTypes.push_back(LogicalBlockType);
+
+        /* Free this node and get its next sibling node */
+        CurBlockType = CurBlockType.next_sibling(CurBlockType.name());
+    }
+    pb_type_descriptors.clear();
+}
+
+static void ProcessSegments(pugi::xml_node Parent,
+                            std::vector<t_segment_inf>& Segs,
+                            const t_arch_switch_inf* Switches,
+                            const int NumSwitches,
+                            const bool timing_enabled,
+                            const bool switchblocklist_required,
+                            const pugiutil::loc_data& loc_data) {
+    int i, j, length;
+    const char* tmp;
+
+    pugi::xml_node SubElem;
+    pugi::xml_node Node;
+
+    /* Count the number of segs and check they are in fact
+     * of segment elements. */
+    int NumSegs = count_children(Parent, "segment", loc_data);
+
+    /* Alloc segment list */
+    if (NumSegs > 0) {
+        Segs.resize(NumSegs);
+    }
+
+    /* Load the segments. */
+    Node = get_first_child(Parent, "segment", loc_data);
+
+    bool x_axis_seg_found = false; /*Flags to see if we have any x-directed segment type specified*/
+    bool y_axis_seg_found = false; /*Flags to see if we have any y-directed segment type specified*/
+
+    for (i = 0; i < NumSegs; ++i) {
+        /* Get segment name */
+        tmp = get_attribute(Node, "name", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (tmp) {
+            Segs[i].name = std::string(tmp);
+        } else {
+            /* if swich block is "custom", then you have to provide a name for segment */
+            if (switchblocklist_required) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                               "No name specified for the segment #%d.\n", i);
+            }
+            /* set name to default: "unnamed_segment_<segment_index>" */
+            std::stringstream ss;
+            ss << "unnamed_segment_" << i;
+            std::string dummy = ss.str();
+            tmp = dummy.c_str();
+            Segs[i].name = std::string(tmp);
+        }
+
+        /* Get segment length */
+        length = 1; /* DEFAULT */
+        tmp = get_attribute(Node, "length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (tmp) {
+            if (strcmp(tmp, "longline") == 0) {
+                Segs[i].longline = true;
+            } else {
+                length = vtr::atoi(tmp);
+            }
+        }
+        Segs[i].length = length;
+
+        /* Get the frequency */
+        Segs[i].frequency = 1; /* DEFAULT */
+        tmp = get_attribute(Node, "freq", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (tmp) {
+            Segs[i].frequency = (int)(atof(tmp) * MAX_CHANNEL_WIDTH);
+        }
+
+        /* Get timing info */
+        ReqOpt TIMING_ENABLE_REQD = BoolToReqOpt(timing_enabled);
+        Segs[i].Rmetal = get_attribute(Node, "Rmetal", loc_data, TIMING_ENABLE_REQD).as_float(0);
+        Segs[i].Cmetal = get_attribute(Node, "Cmetal", loc_data, TIMING_ENABLE_REQD).as_float(0);
+
+        /*Get parallel axis*/
+
+        Segs[i].parallel_axis = BOTH_AXIS; /*DEFAULT value if no axis is specified*/
+        tmp = get_attribute(Node, "axis", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+
+        if (tmp) {
+            if (strcmp(tmp, "x") == 0) {
+                Segs[i].parallel_axis = X_AXIS;
+                x_axis_seg_found = true;
+            } else if (strcmp(tmp, "y") == 0) {
+                Segs[i].parallel_axis = Y_AXIS;
+                y_axis_seg_found = true;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Unsopported parralel axis type: %s\n", tmp);
+            }
+        } else {
+            x_axis_seg_found = true;
+            y_axis_seg_found = true;
+        }
+
+        /* Get Power info */
+        /*
+         * (*Segs)[i].Cmetal_per_m = get_attribute(Node, "Cmetal_per_m", false,
+         * 0.);*/
+
+        //Set of expected subtags (exact subtags are dependant on parameters)
+        std::vector<std::string> expected_subtags;
+
+        if (!Segs[i].longline) {
+            //Long line doesn't accpet <sb> or <cb> since it assumes full population
+            expected_subtags.push_back("sb");
+            expected_subtags.push_back("cb");
+        }
+
+        /* Get the type */
+        tmp = get_attribute(Node, "type", loc_data).value();
+        if (0 == strcmp(tmp, "bidir")) {
+            Segs[i].directionality = BI_DIRECTIONAL;
+
+            //Bidir requires the following tags
+            expected_subtags.push_back("wire_switch");
+            expected_subtags.push_back("opin_switch");
+        }
+
+        else if (0 == strcmp(tmp, "unidir")) {
+            Segs[i].directionality = UNI_DIRECTIONAL;
+
+            //Unidir requires the following tags
+            expected_subtags.push_back("mux");
+        }
+
+        else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Invalid switch type '%s'.\n", tmp);
+        }
+
+        //Verify only expected sub-tags are found
+        expect_only_children(Node, expected_subtags, loc_data);
+
+        /* Get the wire and opin switches, or mux switch if unidir */
+        if (UNI_DIRECTIONAL == Segs[i].directionality) {
+            SubElem = get_single_child(Node, "mux", loc_data);
+            tmp = get_attribute(SubElem, "name", loc_data).value();
+
+            /* Match names */
+            for (j = 0; j < NumSwitches; ++j) {
+                if (0 == strcmp(tmp, Switches[j].name)) {
+                    break; /* End loop so j is where we want it */
+                }
+            }
+            if (j >= NumSwitches) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
+                               "'%s' is not a valid mux name.\n", tmp);
+            }
+
+            /* Unidir muxes must have the same switch
+             * for wire and opin fanin since there is
+             * really only the mux in unidir. */
+            Segs[i].arch_wire_switch = j;
+            Segs[i].arch_opin_switch = j;
+        }
+
+        else {
+            VTR_ASSERT(BI_DIRECTIONAL == Segs[i].directionality);
+            SubElem = get_single_child(Node, "wire_switch", loc_data);
+            tmp = get_attribute(SubElem, "name", loc_data).value();
+
+            /* Match names */
+            for (j = 0; j < NumSwitches; ++j) {
+                if (0 == strcmp(tmp, Switches[j].name)) {
+                    break; /* End loop so j is where we want it */
+                }
+            }
+            if (j >= NumSwitches) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
+                               "'%s' is not a valid wire_switch name.\n", tmp);
+            }
+            Segs[i].arch_wire_switch = j;
+            SubElem = get_single_child(Node, "opin_switch", loc_data);
+            tmp = get_attribute(SubElem, "name", loc_data).value();
+
+            /* Match names */
+            for (j = 0; j < NumSwitches; ++j) {
+                if (0 == strcmp(tmp, Switches[j].name)) {
+                    break; /* End loop so j is where we want it */
+                }
+            }
+            if (j >= NumSwitches) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
+                               "'%s' is not a valid opin_switch name.\n", tmp);
+            }
+            Segs[i].arch_opin_switch = j;
+        }
+
+        /* Setup the CB list if they give one, otherwise use full */
+        Segs[i].cb.resize(length);
+        for (j = 0; j < length; ++j) {
+            Segs[i].cb[j] = true;
+        }
+        SubElem = get_single_child(Node, "cb", loc_data, ReqOpt::OPTIONAL);
+        if (SubElem) {
+            ProcessCB_SB(SubElem, Segs[i].cb, loc_data);
+        }
+
+        /* Setup the SB list if they give one, otherwise use full */
+        Segs[i].sb.resize(length + 1);
+        for (j = 0; j < (length + 1); ++j) {
+            Segs[i].sb[j] = true;
+        }
+        SubElem = get_single_child(Node, "sb", loc_data, ReqOpt::OPTIONAL);
+        if (SubElem) {
+            ProcessCB_SB(SubElem, Segs[i].sb, loc_data);
+        }
+
+        /*Store the index of this segment in Segs vector*/
+        Segs[i].seg_index = i;
+        /* Get next Node */
+        Node = Node.next_sibling(Node.name());
+    }
+    /*We need at least one type of segment that applies to each of x- and y-directed wiring.*/
+
+    if (!x_axis_seg_found || !y_axis_seg_found) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Atleast one segment per-axis needs to get specified if no segments with non-specified (default) axis attribute exist.");
+    }
+}
+/* Processes the switchblocklist section from the xml architecture file.
+ * See vpr/SRC/route/build_switchblocks.c for a detailed description of this
+ * switch block format */
+static void ProcessSwitchblocks(pugi::xml_node Parent, t_arch* arch, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Node;
+    pugi::xml_node SubElem;
+    const char* tmp;
+
+    /* get the number of switchblocks */
+    int num_switchblocks = count_children(Parent, "switchblock", loc_data);
+    arch->switchblocks.reserve(num_switchblocks);
+
+    /* read-in all switchblock data */
+    Node = get_first_child(Parent, "switchblock", loc_data);
+    for (int i_sb = 0; i_sb < num_switchblocks; i_sb++) {
+        /* use a temp variable which will be assigned to switchblocks later */
+        t_switchblock_inf sb;
+
+        /* get name */
+        tmp = get_attribute(Node, "name", loc_data).as_string(nullptr);
+        if (tmp) {
+            sb.name = tmp;
+        }
+
+        /* get type */
+        tmp = get_attribute(Node, "type", loc_data).as_string(nullptr);
+        if (tmp) {
+            if (0 == strcmp(tmp, "bidir")) {
+                sb.directionality = BI_DIRECTIONAL;
+            } else if (0 == strcmp(tmp, "unidir")) {
+                sb.directionality = UNI_DIRECTIONAL;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Unsopported switchblock type: %s\n", tmp);
+            }
+        }
+
+        /* get the switchblock location */
+        SubElem = get_single_child(Node, "switchblock_location", loc_data);
+        tmp = get_attribute(SubElem, "type", loc_data).as_string(nullptr);
+        if (tmp) {
+            if (strcmp(tmp, "EVERYWHERE") == 0) {
+                sb.location = E_EVERYWHERE;
+            } else if (strcmp(tmp, "PERIMETER") == 0) {
+                sb.location = E_PERIMETER;
+            } else if (strcmp(tmp, "CORE") == 0) {
+                sb.location = E_CORE;
+            } else if (strcmp(tmp, "CORNER") == 0) {
+                sb.location = E_CORNER;
+            } else if (strcmp(tmp, "FRINGE") == 0) {
+                sb.location = E_FRINGE;
+            } else {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem), "unrecognized switchblock location: %s\n", tmp);
+            }
+        }
+
+        /* get switchblock permutation functions */
+        SubElem = get_first_child(Node, "switchfuncs", loc_data);
+        read_sb_switchfuncs(SubElem, &sb, loc_data);
+
+        read_sb_wireconns(arch->Switches, arch->num_switches, Node, &sb, loc_data);
+
+        /* run error checks on switch blocks */
+        check_switchblock(&sb, arch);
+
+        /* assign the sb to the switchblocks vector */
+        arch->switchblocks.push_back(sb);
+
+        Node = Node.next_sibling(Node.name());
+    }
+
+    return;
+}
+
+static void ProcessCB_SB(pugi::xml_node Node, std::vector<bool>& list, const pugiutil::loc_data& loc_data) {
+    const char* tmp = nullptr;
+    int i;
+    int len = list.size();
+    /* Check the type. We only support 'pattern' for now.
+     * Should add frac back eventually. */
+    tmp = get_attribute(Node, "type", loc_data).value();
+    if (0 == strcmp(tmp, "pattern")) {
+        i = 0;
+
+        /* Get the content string */
+        tmp = Node.child_value();
+        while (*tmp) {
+            switch (*tmp) {
+                case ' ':
+                case '\t':
+                case '\n':
+                    break;
+                case 'T':
+                case '1':
+                    if (i >= len) {
+                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                                       "CB or SB depopulation is too long (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
+                                       i, len - 1, len);
+                    }
+                    list[i] = true;
+                    ++i;
+                    break;
+                case 'F':
+                case '0':
+                    if (i >= len) {
+                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                                       "CB or SB depopulation is too long (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
+                                       i, len - 1, len);
+                    }
+                    list[i] = false;
+                    ++i;
+                    break;
+                default:
+                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                                   "Invalid character %c in CB or SB depopulation list.\n",
+                                   *tmp);
+            }
+            ++tmp;
+        }
+        if (i < len) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "CB or SB depopulation is too short (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
+                           i, len - 1, len);
+        }
+    }
+
+    else {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "'%s' is not a valid type for specifying cb and sb depopulation.\n",
+                       tmp);
+    }
+}
+
+static void ProcessSwitches(pugi::xml_node Parent,
+                            t_arch_switch_inf** Switches,
+                            int* NumSwitches,
+                            const bool timing_enabled,
+                            const pugiutil::loc_data& loc_data) {
+    int i, j;
+    const char* type_name;
+    const char* switch_name;
+    ReqOpt TIMING_ENABLE_REQD = BoolToReqOpt(timing_enabled);
+
+    pugi::xml_node Node;
+
+    /* Count the children and check they are switches */
+    *NumSwitches = count_children(Parent, "switch", loc_data);
+
+    /* Alloc switch list */
+    *Switches = nullptr;
+    if (*NumSwitches > 0) {
+        (*Switches) = new t_arch_switch_inf[(*NumSwitches)];
+    }
+
+    /* Load the switches. */
+    Node = get_first_child(Parent, "switch", loc_data);
+    for (i = 0; i < *NumSwitches; ++i) {
+        t_arch_switch_inf& arch_switch = (*Switches)[i];
+
+        switch_name = get_attribute(Node, "name", loc_data).value();
+
+        /* Check if the switch has conflicts with any reserved names */
+        if (0 == strcmp(switch_name, VPR_DELAYLESS_SWITCH_NAME)) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Switch name '%s' is a reserved name for VPR internal usage! Please use another  name.\n",
+                           switch_name);
+        }
+
+        type_name = get_attribute(Node, "type", loc_data).value();
+
+        /* Check for switch name collisions */
+        for (j = 0; j < i; ++j) {
+            if (0 == strcmp((*Switches)[j].name, switch_name)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                               "Two switches with the same name '%s' were found.\n",
+                               switch_name);
+            }
+        }
+        arch_switch.name = vtr::strdup(switch_name);
+
+        /* Figure out the type of switch */
+        /* As noted above, due to their configuration of pass transistors feeding into a buffer,
+         * only multiplexers and tristate buffers have an internal capacitance element.         */
+
+        SwitchType type = SwitchType::MUX;
+        if (0 == strcmp(type_name, "mux")) {
+            type = SwitchType::MUX;
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data);
+
+        } else if (0 == strcmp(type_name, "tristate")) {
+            type = SwitchType::TRISTATE;
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
+
+        } else if (0 == strcmp(type_name, "buffer")) {
+            type = SwitchType::BUFFER;
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
+
+        } else if (0 == strcmp(type_name, "pass_gate")) {
+            type = SwitchType::PASS_GATE;
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data);
+
+        } else if (0 == strcmp(type_name, "short")) {
+            type = SwitchType::SHORT;
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data);
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Invalid switch type '%s'.\n", type_name);
+        }
+        arch_switch.set_type(type);
+
+        arch_switch.R = get_attribute(Node, "R", loc_data, TIMING_ENABLE_REQD).as_float(0);
+
+        ReqOpt COUT_REQD = TIMING_ENABLE_REQD;
+        ReqOpt CIN_REQD = TIMING_ENABLE_REQD;
+        // We have defined the Cinternal parameter as optional, so that the user may specify an
+        // architecture without Cinternal without breaking the program flow.
+        ReqOpt CINTERNAL_REQD = ReqOpt::OPTIONAL;
+
+        if (arch_switch.type() == SwitchType::SHORT) {
+            //Cin/Cout are optional on shorts, since they really only have one capacitance
+            CIN_REQD = ReqOpt::OPTIONAL;
+            COUT_REQD = ReqOpt::OPTIONAL;
+        }
+        arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0);
+        arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0);
+        arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0);
+
+        if (arch_switch.type() == SwitchType::MUX) {
+            //Only muxes have mux transistors
+            arch_switch.mux_trans_size = get_attribute(Node, "mux_trans_size", loc_data, ReqOpt::OPTIONAL).as_float(1);
+        } else {
+            arch_switch.mux_trans_size = 0.;
+        }
+
+        if (arch_switch.type() == SwitchType::SHORT
+            || arch_switch.type() == SwitchType::PASS_GATE) {
+            //No buffers
+            arch_switch.buf_size_type = BufferSize::ABSOLUTE;
+            arch_switch.buf_size = 0.;
+            arch_switch.power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
+            arch_switch.power_buffer_size = 0.;
+        } else {
+            auto buf_size_attrib = get_attribute(Node, "buf_size", loc_data, ReqOpt::OPTIONAL);
+            if (!buf_size_attrib || buf_size_attrib.as_string() == std::string("auto")) {
+                arch_switch.buf_size_type = BufferSize::AUTO;
+                arch_switch.buf_size = 0.;
+            } else {
+                arch_switch.buf_size_type = BufferSize::ABSOLUTE;
+                arch_switch.buf_size = buf_size_attrib.as_float();
+            }
+
+            auto power_buf_size = get_attribute(Node, "power_buf_size", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+            if (power_buf_size == nullptr) {
+                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_AUTO;
+            } else if (strcmp(power_buf_size, "auto") == 0) {
+                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_AUTO;
+            } else {
+                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
+                arch_switch.power_buffer_size = (float)vtr::atof(power_buf_size);
+            }
+        }
+
+        //Load the Tdel (which may be specfied with sub-tags)
+        ProcessSwitchTdel(Node, timing_enabled, i, (*Switches), loc_data);
+
+        /* Get next switch element */
+        Node = Node.next_sibling(Node.name());
+    }
+}
+
+/* Processes the switch delay. Switch delay can be specified in two ways.
+ * First way: switch delay is specified as a constant via the property Tdel in the switch node.
+ * Second way: switch delay is specified as a function of the switch fan-in. In this
+ * case, multiple nodes in the form
+ *
+ * <Tdel num_inputs="1" delay="3e-11"/>
+ *
+ * are specified as children of the switch node. In this case, Tdel
+ * is not included as a property of the switch node (first way). */
+static void ProcessSwitchTdel(pugi::xml_node Node, const bool timing_enabled, const int switch_index, t_arch_switch_inf* Switches, const pugiutil::loc_data& loc_data) {
+    float Tdel_prop_value;
+    int num_Tdel_children;
+
+    /* check if switch node has the Tdel property */
+    bool has_Tdel_prop = false;
+    Tdel_prop_value = get_attribute(Node, "Tdel", loc_data, ReqOpt::OPTIONAL).as_float(UNDEFINED);
+    if (Tdel_prop_value != UNDEFINED) {
+        has_Tdel_prop = true;
+    }
+
+    /* check if switch node has Tdel children */
+    bool has_Tdel_children = false;
+    num_Tdel_children = count_children(Node, "Tdel", loc_data, ReqOpt::OPTIONAL);
+    if (num_Tdel_children != 0) {
+        has_Tdel_children = true;
+    }
+
+    /* delay should not be specified as a Tdel property AND a Tdel child */
+    if (has_Tdel_prop && has_Tdel_children) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Switch delay should be specified as EITHER a Tdel property OR as a child of the switch node, not both");
+    }
+
+    /* get pointer to the switch's Tdel map, then read-in delay data into this map */
+    if (has_Tdel_prop) {
+        /* delay specified as a constant */
+        Switches[switch_index].set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel_prop_value);
+    } else if (has_Tdel_children) {
+        /* Delay specified as a function of switch fan-in.
+         * Go through each Tdel child, read-in num_inputs and the delay value.
+         * Insert this info into the switch delay map */
+        pugi::xml_node Tdel_child = get_first_child(Node, "Tdel", loc_data);
+        std::set<int> seen_fanins;
+        for (int ichild = 0; ichild < num_Tdel_children; ichild++) {
+            int num_inputs = get_attribute(Tdel_child, "num_inputs", loc_data).as_int(0);
+            float Tdel_value = get_attribute(Tdel_child, "delay", loc_data).as_float(0.);
+
+            if (seen_fanins.count(num_inputs)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Tdel_child),
+                               "Tdel node specified num_inputs (%d) that has already been specified by another Tdel node", num_inputs);
+            } else {
+                Switches[switch_index].set_Tdel(num_inputs, Tdel_value);
+                seen_fanins.insert(num_inputs);
+            }
+            Tdel_child = Tdel_child.next_sibling(Tdel_child.name());
+        }
+    } else {
+        /* No delay info specified for switch */
+        if (timing_enabled) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "Switch should contain intrinsic delay information if timing is enabled");
+        } else {
+            /* set a default value */
+            Switches[switch_index].set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, 0.);
+        }
+    }
+}
+
+static void ProcessDirects(pugi::xml_node Parent, t_direct_inf** Directs, int* NumDirects, const t_arch_switch_inf* Switches, const int NumSwitches, const pugiutil::loc_data& loc_data) {
+    int i, j;
+    const char* direct_name;
+    const char* from_pin_name;
+    const char* to_pin_name;
+    const char* switch_name;
+
+    pugi::xml_node Node;
+
+    /* Count the children and check they are direct connections */
+    expect_only_children(Parent, {"direct"}, loc_data);
+    *NumDirects = count_children(Parent, "direct", loc_data);
+
+    /* Alloc direct list */
+    *Directs = nullptr;
+    if (*NumDirects > 0) {
+        *Directs = (t_direct_inf*)vtr::malloc(*NumDirects * sizeof(t_direct_inf));
+        memset(*Directs, 0, (*NumDirects * sizeof(t_direct_inf)));
+    }
+
+    /* Load the directs. */
+    Node = get_first_child(Parent, "direct", loc_data);
+    for (i = 0; i < *NumDirects; ++i) {
+        expect_only_attributes(Node, {"name", "from_pin", "to_pin", "x_offset", "y_offset", "z_offset", "switch_name", "from_side", "to_side"}, loc_data);
+
+        direct_name = get_attribute(Node, "name", loc_data).value();
+        /* Check for direct name collisions */
+        for (j = 0; j < i; ++j) {
+            if (0 == strcmp((*Directs)[j].name, direct_name)) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                               "Two directs with the same name '%s' were found.\n",
+                               direct_name);
+            }
+        }
+        (*Directs)[i].name = vtr::strdup(direct_name);
+
+        /* Figure out the source pin and sink pin name */
+        from_pin_name = get_attribute(Node, "from_pin", loc_data).value();
+        to_pin_name = get_attribute(Node, "to_pin", loc_data).value();
+
+        /* Check that to_pin and the from_pin are not the same */
+        if (0 == strcmp(to_pin_name, from_pin_name)) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "The source pin and sink pin are the same: %s.\n",
+                           to_pin_name);
+        }
+        (*Directs)[i].from_pin = vtr::strdup(from_pin_name);
+        (*Directs)[i].to_pin = vtr::strdup(to_pin_name);
+
+        (*Directs)[i].x_offset = get_attribute(Node, "x_offset", loc_data).as_int(0);
+        (*Directs)[i].y_offset = get_attribute(Node, "y_offset", loc_data).as_int(0);
+        (*Directs)[i].sub_tile_offset = get_attribute(Node, "z_offset", loc_data).as_int(0);
+
+        std::string from_side_str = get_attribute(Node, "from_side", loc_data, ReqOpt::OPTIONAL).value();
+        (*Directs)[i].from_side = string_to_side(from_side_str);
+        std::string to_side_str = get_attribute(Node, "to_side", loc_data, ReqOpt::OPTIONAL).value();
+        (*Directs)[i].to_side = string_to_side(to_side_str);
+
+        //Set the optional switch type
+        switch_name = get_attribute(Node, "switch_name", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
+        if (switch_name != nullptr) {
+            //Look-up the user defined switch
+            for (j = 0; j < NumSwitches; j++) {
+                if (0 == strcmp(switch_name, Switches[j].name)) {
+                    break; //Found the switch
+                }
+            }
+            if (j >= NumSwitches) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                               "Could not find switch named '%s' in switch list.\n", switch_name);
+            }
+            (*Directs)[i].switch_type = j; //Save the correct switch index
+        } else {
+            //If not defined, use the delayless switch by default
+            //TODO: find a better way of indicating this.  Ideally, we would
+            //specify the delayless switch index here, but it does not appear
+            //to be defined at this point.
+            (*Directs)[i].switch_type = -1;
+        }
+
+        /* Check that the direct chain connection is not zero in both direction */
+        if ((*Directs)[i].x_offset == 0 && (*Directs)[i].y_offset == 0 && (*Directs)[i].sub_tile_offset == 0) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                           "The x_offset, y_offset, z_offset are all zero, this is a length 0 direct chain connection.\n");
+        }
+
+        (*Directs)[i].line = loc_data.line(Node);
+        /* Should I check that the direct chain offset is not greater than the chip? How? */
+
+        /* Get next direct element */
+        Node = Node.next_sibling(Node.name());
+    }
+}
+
+static void ProcessClockMetalLayers(pugi::xml_node parent,
+                                    std::unordered_map<std::string, t_metal_layer>& metal_layers,
+                                    pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_attributes = {"name", "Rmetal", "Cmetal"};
+    std::vector<std::string> expected_children = {"metal_layer"};
+
+    pugi::xml_node metal_layers_parent = get_single_child(parent, "metal_layers", loc_data);
+    int num_metal_layers = count_children(metal_layers_parent, "metal_layer", loc_data);
+
+    pugi::xml_node curr_layer = get_first_child(metal_layers_parent, "metal_layer", loc_data);
+    for (int i = 0; i < num_metal_layers; i++) {
+        expect_only_children(metal_layers_parent, expected_children, loc_data);
+        expect_only_attributes(curr_layer, expected_attributes, loc_data);
+
+        // Get metal layer values: name, r_metal, and c_metal
+        std::string name(get_attribute(curr_layer, "name", loc_data).value());
+        t_metal_layer metal_layer;
+        metal_layer.r_metal = get_attribute(curr_layer, "Rmetal", loc_data).as_float(0.);
+        metal_layer.c_metal = get_attribute(curr_layer, "Cmetal", loc_data).as_float(0.);
+
+        // Insert metal layer into map
+        auto itter = metal_layers.find(name);
+        if (itter != metal_layers.end()) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_layer),
+                           "Two metal layers with the same name '%s' were found.\n",
+                           name.c_str());
+        }
+        metal_layers.insert({name, metal_layer});
+
+        curr_layer = curr_layer.next_sibling(curr_layer.name());
+    }
+}
+
+static void ProcessClockNetworks(pugi::xml_node parent,
+                                 std::vector<t_clock_network_arch>& clock_networks,
+                                 const t_arch_switch_inf* switches,
+                                 const int num_switches,
+                                 pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_spine_attributes = {"name", "num_inst", "metal_layer", "starty", "endy", "x", "repeatx", "repeaty"};
+    std::vector<std::string> expected_rib_attributes = {"name", "num_inst", "metal_layer", "startx", "endx", "y", "repeatx", "repeaty"};
+    std::vector<std::string> expected_children = {"rib", "spine"};
+
+    int num_clock_networks = count_children(parent, "clock_network", loc_data);
+    pugi::xml_node curr_network = get_first_child(parent, "clock_network", loc_data);
+    for (int i = 0; i < num_clock_networks; i++) {
+        expect_only_children(curr_network, expected_children, loc_data);
+
+        t_clock_network_arch clock_network;
+
+        std::string name(get_attribute(curr_network, "name", loc_data).value());
+        clock_network.name = name;
+        clock_network.num_inst = get_attribute(curr_network, "num_inst", loc_data).as_int(0);
+        bool is_supported_clock_type = false;
+        pugi::xml_node curr_type;
+
+        // Parse spine
+        curr_type = get_single_child(curr_network, "spine", loc_data, ReqOpt::OPTIONAL);
+        if (curr_type) {
+            expect_only_attributes(curr_network, expected_spine_attributes, loc_data);
+
+            is_supported_clock_type = true;
+            clock_network.type = e_clock_type::SPINE;
+
+            std::string metal_layer(get_attribute(curr_type, "metal_layer", loc_data).value());
+            std::string starty(get_attribute(curr_type, "starty", loc_data).value());
+            std::string endy(get_attribute(curr_type, "endy", loc_data).value());
+            std::string x(get_attribute(curr_type, "x", loc_data).value());
+
+            std::string repeatx;
+            auto repeatx_attr = get_attribute(curr_type, "repeatx", loc_data, ReqOpt::OPTIONAL);
+            if (repeatx_attr) {
+                repeatx = repeatx_attr.value();
+            } else {
+                repeatx = "W";
+            }
+            std::string repeaty;
+            auto repeaty_attr = get_attribute(curr_type, "repeaty", loc_data, ReqOpt::OPTIONAL);
+            if (repeaty_attr) {
+                repeaty = repeaty_attr.value();
+            } else {
+                repeaty = "H";
+            }
+
+            clock_network.metal_layer = metal_layer;
+            clock_network.wire.start = starty;
+            clock_network.wire.end = endy;
+            clock_network.wire.position = x;
+            clock_network.repeat.x = repeatx;
+            clock_network.repeat.y = repeaty;
+
+            ProcessClockSwitchPoints(curr_type, clock_network, switches, num_switches, loc_data);
+        }
+
+        // Parse rib
+        curr_type = get_single_child(curr_network, "rib", loc_data, ReqOpt::OPTIONAL);
+        if (curr_type) {
+            expect_only_attributes(curr_network, expected_spine_attributes, loc_data);
+
+            is_supported_clock_type = true;
+            clock_network.type = e_clock_type::RIB;
+
+            std::string metal_layer(get_attribute(curr_type, "metal_layer", loc_data).value());
+            std::string startx(get_attribute(curr_type, "startx", loc_data).value());
+            std::string endx(get_attribute(curr_type, "endx", loc_data).value());
+            std::string y(get_attribute(curr_type, "y", loc_data).value());
+
+            std::string repeatx;
+            auto repeatx_attr = get_attribute(curr_type, "repeatx", loc_data, ReqOpt::OPTIONAL);
+            if (repeatx_attr) {
+                repeatx = repeatx_attr.value();
+            } else {
+                repeatx = "W";
+            }
+            std::string repeaty;
+            auto repeaty_attr = get_attribute(curr_type, "repeaty", loc_data, ReqOpt::OPTIONAL);
+            if (repeaty_attr) {
+                repeaty = repeaty_attr.value();
+            } else {
+                repeaty = "H";
+            }
+
+            clock_network.metal_layer = metal_layer;
+            clock_network.wire.start = startx;
+            clock_network.wire.end = endx;
+            clock_network.wire.position = y;
+            clock_network.repeat.x = repeatx;
+            clock_network.repeat.y = repeaty;
+
+            ProcessClockSwitchPoints(curr_type, clock_network, switches, num_switches, loc_data);
+        }
+
+        // Currently their is only support for ribs and spines
+        if (!is_supported_clock_type) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_type),
+                           "Found no supported clock network type for '%s' clock network.\n"
+                           "Currently there is only support for rib and spine networks.\n",
+                           name.c_str());
+        }
+
+        clock_networks.push_back(clock_network);
+        curr_network = curr_network.next_sibling(curr_network.name());
+    }
+}
+
+static void ProcessClockSwitchPoints(pugi::xml_node parent,
+                                     t_clock_network_arch& clock_network,
+                                     const t_arch_switch_inf* switches,
+                                     const int num_switches,
+                                     pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_spine_drive_attributes = {"name", "type", "yoffset", "switch_name"};
+    std::vector<std::string> expected_rib_drive_attributes = {"name", "type", "xoffset", "switch_name"};
+    std::vector<std::string> expected_spine_tap_attributes = {"name", "type", "yoffset", "yincr"};
+    std::vector<std::string> expected_rib_tap_attributes = {"name", "type", "xoffset", "xincr"};
+    std::vector<std::string> expected_children = {"switch_point"};
+
+    int num_clock_switches = count_children(parent, "switch_point", loc_data);
+    pugi::xml_node curr_switch = get_first_child(parent, "switch_point", loc_data);
+
+    //TODO: currently only supporting one drive and one tap. Should change to support
+    //      multiple taps
+    VTR_ASSERT(num_switches != 2);
+
+    //TODO: ensure switch name is unique for every switch of this clock network
+    for (int i = 0; i < num_clock_switches; i++) {
+        expect_only_children(curr_switch, expected_children, loc_data);
+
+        std::string switch_type(get_attribute(curr_switch, "type", loc_data).value());
+        if (switch_type == "drive") {
+            t_clock_drive drive;
+
+            std::string name(get_attribute(curr_switch, "name", loc_data).value());
+            const char* offset;
+            if (clock_network.type == e_clock_type::SPINE) {
+                expect_only_attributes(curr_switch, expected_spine_drive_attributes, loc_data);
+                offset = get_attribute(curr_switch, "yoffset", loc_data).value();
+            } else {
+                VTR_ASSERT(clock_network.type == e_clock_type::RIB);
+                expect_only_attributes(curr_switch, expected_rib_drive_attributes, loc_data);
+                offset = get_attribute(curr_switch, "xoffset", loc_data).value();
+            }
+
+            // get switch index
+            const char* switch_name = get_attribute(curr_switch, "switch_name", loc_data).value();
+            int switch_idx;
+            for (switch_idx = 0; switch_idx < num_switches; switch_idx++) {
+                if (0 == strcmp(switch_name, switches[switch_idx].name)) {
+                    break; // switch_idx has been found
+                }
+            }
+            if (switch_idx >= num_switches) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_switch),
+                               "'%s' is not a valid switch name.\n", switch_name);
+            }
+
+            drive.name = name;
+            drive.offset = offset;
+            drive.arch_switch_idx = switch_idx;
+            clock_network.drive = drive;
+
+        } else if (switch_type == "tap") {
+            t_clock_taps tap;
+
+            std::string name(get_attribute(curr_switch, "name", loc_data).value());
+            const char* offset;
+            const char* increment;
+            if (clock_network.type == e_clock_type::SPINE) {
+                expect_only_attributes(curr_switch, expected_spine_tap_attributes, loc_data);
+                offset = get_attribute(curr_switch, "yoffset", loc_data).value();
+                increment = get_attribute(curr_switch, "yincr", loc_data).value();
+            } else {
+                VTR_ASSERT(clock_network.type == e_clock_type::RIB);
+                expect_only_attributes(curr_switch, expected_rib_tap_attributes, loc_data);
+                offset = get_attribute(curr_switch, "xoffset", loc_data).value();
+                increment = get_attribute(curr_switch, "xincr", loc_data).value();
+            }
+
+            tap.name = name;
+            tap.offset = offset;
+            tap.increment = increment;
+            clock_network.tap = tap;
+
+        } else {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_switch),
+                           "Found unsupported switch type for '%s' clock network.\n"
+                           "Currently there is only support for drive and tap switch types.\n",
+                           clock_network.name.c_str());
+        }
+
+        curr_switch = curr_switch.next_sibling(curr_switch.name());
+    }
+}
+
+static void ProcessClockRouting(pugi::xml_node parent,
+                                std::vector<t_clock_connection_arch>& clock_connections,
+                                const t_arch_switch_inf* switches,
+                                const int num_switches,
+                                pugiutil::loc_data& loc_data) {
+    std::vector<std::string> expected_attributes = {"from", "to", "switch", "fc_val", "locationx", "locationy"};
+
+    pugi::xml_node clock_routing_parent = get_single_child(parent, "clock_routing", loc_data);
+    int num_routing_connections = count_children(clock_routing_parent, "tap", loc_data);
+
+    pugi::xml_node curr_connection = get_first_child(clock_routing_parent, "tap", loc_data);
+    for (int i = 0; i < num_routing_connections; i++) {
+        expect_only_attributes(curr_connection, expected_attributes, loc_data);
+
+        t_clock_connection_arch clock_connection;
+
+        const char* from = get_attribute(curr_connection, "from", loc_data).value();
+        const char* to = get_attribute(curr_connection, "to", loc_data).value();
+        const char* switch_name = get_attribute(curr_connection, "switch", loc_data).value();
+        const char* locationx = get_attribute(curr_connection, "locationx", loc_data, ReqOpt::OPTIONAL).value();
+        const char* locationy = get_attribute(curr_connection, "locationy", loc_data, ReqOpt::OPTIONAL).value();
+        float fc = get_attribute(curr_connection, "fc_val", loc_data).as_float(0.);
+
+        int switch_idx;
+        for (switch_idx = 0; switch_idx < num_switches; switch_idx++) {
+            if (0 == strcmp(switch_name, switches[switch_idx].name)) {
+                break; // switch_idx has been found
+            }
+        }
+        if (switch_idx >= num_switches) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_connection),
+                           "'%s' is not a valid switch name.\n", switch_name);
+        }
+
+        clock_connection.from = from;
+        clock_connection.to = to;
+        clock_connection.arch_switch_idx = switch_idx;
+        clock_connection.locationx = locationx;
+        clock_connection.locationy = locationy;
+        clock_connection.fc = fc;
+
+        clock_connections.push_back(clock_connection);
+
+        curr_connection = curr_connection.next_sibling(curr_connection.name());
+    }
+}
+
+static void ProcessPower(pugi::xml_node parent,
+                         t_power_arch* power_arch,
+                         const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Cur;
+
+    /* Get the local interconnect capacitances */
+    power_arch->local_interc_factor = 0.5;
+    Cur = get_single_child(parent, "local_interconnect", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->C_wire_local = get_attribute(Cur, "C_wire", loc_data, ReqOpt::OPTIONAL).as_float(0.);
+        power_arch->local_interc_factor = get_attribute(Cur, "factor", loc_data, ReqOpt::OPTIONAL).as_float(0.5);
+    }
+
+    /* Get logical effort factor */
+    power_arch->logical_effort_factor = 4.0;
+    Cur = get_single_child(parent, "buffers", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->logical_effort_factor = get_attribute(Cur,
+                                                          "logical_effort_factor", loc_data)
+                                                .as_float(0);
+        ;
+    }
+
+    /* Get SRAM Size */
+    power_arch->transistors_per_SRAM_bit = 6.0;
+    Cur = get_single_child(parent, "sram", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->transistors_per_SRAM_bit = get_attribute(Cur,
+                                                             "transistors_per_bit", loc_data)
+                                                   .as_float(0);
+    }
+
+    /* Get Mux transistor size */
+    power_arch->mux_transistor_size = 1.0;
+    Cur = get_single_child(parent, "mux_transistor_size", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->mux_transistor_size = get_attribute(Cur,
+                                                        "mux_transistor_size", loc_data)
+                                              .as_float(0);
+    }
+
+    /* Get FF size */
+    power_arch->FF_size = 1.0;
+    Cur = get_single_child(parent, "FF_size", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->FF_size = get_attribute(Cur, "FF_size", loc_data).as_float(0);
+    }
+
+    /* Get LUT transistor size */
+    power_arch->LUT_transistor_size = 1.0;
+    Cur = get_single_child(parent, "LUT_transistor_size", loc_data, ReqOpt::OPTIONAL);
+    if (Cur) {
+        power_arch->LUT_transistor_size = get_attribute(Cur,
+                                                        "LUT_transistor_size", loc_data)
+                                              .as_float(0);
+    }
+}
+
+/* Get the clock architcture */
+static void ProcessClocks(pugi::xml_node Parent, t_clock_arch* clocks, const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Node;
+    int i;
+    const char* tmp;
+
+    clocks->num_global_clocks = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
+
+    /* Alloc the clockdetails */
+    clocks->clock_inf = nullptr;
+    if (clocks->num_global_clocks > 0) {
+        clocks->clock_inf = (t_clock_network*)vtr::malloc(clocks->num_global_clocks * sizeof(t_clock_network));
+        memset(clocks->clock_inf, 0,
+               clocks->num_global_clocks * sizeof(t_clock_network));
+    }
+
+    /* Load the clock info. */
+    Node = get_first_child(Parent, "clock", loc_data);
+    for (i = 0; i < clocks->num_global_clocks; ++i) {
+        tmp = get_attribute(Node, "buffer_size", loc_data).value();
+        if (strcmp(tmp, "auto") == 0) {
+            clocks->clock_inf[i].autosize_buffer = true;
+        } else {
+            clocks->clock_inf[i].autosize_buffer = false;
+            clocks->clock_inf[i].buffer_size = (float)atof(tmp);
+        }
+
+        clocks->clock_inf[i].C_wire = get_attribute(Node, "C_wire", loc_data).as_float(0);
+
+        /* get the next clock item */
+        Node = Node.next_sibling(Node.name());
+    }
+}
+/*
+ * Get the NoC design 
+ */
+static void ProcessNoc(pugi::xml_node noc_tag, t_arch* arch, const pugiutil::loc_data& loc_data) {
+    // a vector representing all the possible attributes within the noc tag
+    std::vector<std::string> expected_noc_attributes = {"link_bandwidth", "link_latency", "router_latency", "noc_router_tile_name"};
+
+    std::vector<std::string> expected_noc_children_tags = {"mesh", "topology"};
+
+    pugi::xml_node noc_topology;
+    pugi::xml_node noc_mesh_topology;
+
+    // identifier that lets us know when we could not properly convert an attribute value to a integer
+    int attribute_conversion_failure = -1;
+
+    // identifier that lets us know when we could not properly convert a string conversion value
+    std::string attribute_conversion_failure_string = "";
+
+    // if we are here, then the user has a NoC in their architecture, so need to add it
+    arch->noc = new t_noc_inf;
+    t_noc_inf* noc_ref = arch->noc;
+
+    /* process the noc attributes first */
+
+    // quick error check to make sure that we dont have unexpected attributes
+    pugiutil::expect_only_attributes(noc_tag, expected_noc_attributes, loc_data);
+
+    // now go through and parse the required attributes for noc tag
+    noc_ref->link_bandwidth = pugiutil::get_attribute(noc_tag, "link_bandwidth", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    noc_ref->link_latency = pugiutil::get_attribute(noc_tag, "link_latency", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    noc_ref->router_latency = pugiutil::get_attribute(noc_tag, "router_latency", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    noc_ref->noc_router_tile_name = pugiutil::get_attribute(noc_tag, "noc_router_tile_name", loc_data, pugiutil::REQUIRED).as_string();
+
+    // the noc parameters can only be non-zero positive values
+    if ((noc_ref->link_bandwidth < 0) || (noc_ref->link_latency < 0) || (noc_ref->router_latency < 0)) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(noc_tag),
+                       "The link bandwidth, link latency and router latency for the NoC must be a positive non-zero value.");
+    }
+
+    // check that the router tile name was supplied properly
+    if (!(noc_ref->noc_router_tile_name.compare(attribute_conversion_failure_string))) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(noc_tag),
+                       "The noc router tile name must be a string.");
+    }
+
+    /* We processed the NoC node, so now process the topology*/
+
+    // make sure that only the topology tag is found under NoC
+    pugiutil::expect_only_children(noc_tag, expected_noc_children_tags, loc_data);
+
+    noc_mesh_topology = pugiutil::get_single_child(noc_tag, "mesh", loc_data, pugiutil::OPTIONAL);
+
+    // we cannot check for errors related to number of routers and as well as whether a router is out of bounds (this will be done later)
+    // the chip still needs to be sized
+
+    if (noc_mesh_topology) {
+        processMeshTopology(noc_mesh_topology, loc_data, noc_ref);
+
+        for (auto i = noc_ref->router_list.begin(); i != noc_ref->router_list.end(); i++) {
+            std::cout << "router " << i->id << ": ";
+
+            for (auto j = i->connection_list.begin(); j != i->connection_list.end(); j++) {
+                std::cout << *j << ",";
+            }
+
+            std::cout << "\n";
+        }
+    } else {
+        noc_topology = pugiutil::get_single_child(noc_tag, "topology", loc_data, pugiutil::REQUIRED);
+
+        processTopology(noc_topology, loc_data, noc_ref);
+    }
+
+    return;
+}
+
+/*
+ * A NoC mesh is created based on the user supplied size and region location.
+ */
+static void processMeshTopology(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref) {
+    // noc mesh topology properties
+    double mesh_region_start_x = 0;
+    double mesh_region_end_x = 0;
+    double mesh_region_start_y = 0;
+    double mesh_region_end_y = 0;
+    int mesh_size = 0;
+
+    // identifier that lets us know when we could not properly convert an attribute value to a integer
+    int attribute_conversion_failure = -1;
+
+    // a list of attrbutes that should be found for the mesh tag
+    std::vector<std::string> expected_router_attributes = {"startx", "endx", "starty", "endy", "size"};
+
+    // verify that only the acceptable attributes were supplied
+    pugiutil::expect_only_attributes(mesh_topology_tag, expected_router_attributes, loc_data);
+
+    // go through the attributes and store their values
+    mesh_region_start_x = pugiutil::get_attribute(mesh_topology_tag, "startx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    mesh_region_end_x = pugiutil::get_attribute(mesh_topology_tag, "endx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    mesh_region_start_y = pugiutil::get_attribute(mesh_topology_tag, "starty", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    mesh_region_end_y = pugiutil::get_attribute(mesh_topology_tag, "endy", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    mesh_size = pugiutil::get_attribute(mesh_topology_tag, "size", loc_data, pugiutil::REQUIRED).as_int(attribute_conversion_failure);
+
+    // verify that the attrbiutes provided were legal
+    if ((mesh_region_start_x < 0) || (mesh_region_end_x < 0) || (mesh_region_start_y < 0) || (mesh_region_end_y < 0) || (mesh_size < 0)) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
+                       "The parameters for the mesh topology have to be positive values.");
+    }
+
+    // now create the mesh topology for the noc
+    // create routers, make connections and detertmine positions
+    generate_noc_mesh(mesh_topology_tag, loc_data, noc_ref, mesh_region_start_x, mesh_region_end_x, mesh_region_start_y, mesh_region_end_y, mesh_size);
+
+    return;
+}
+
+/*
+ * Go through each router in the NoC and store the list of routers that connect to it.
+ */
+static void processTopology(pugi::xml_node topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref) {
+    // The topology tag should have no attributes, check that
+    pugiutil::expect_only_attributes(topology_tag, {}, loc_data);
+
+    /**
+     * Stores router information that includes the number of connections a router has within a given topology and also the number of times a router was declared in the arch file using the <router> tag.
+     * In the datastructure below, the router id is the key and the stored data is a pair, where the first element describes the number of router declarations and the second element describes the number of router connections.
+     * This is used only for error checking.
+     */
+    std::map<int, std::pair<int, int>> routers_in_arch_info;
+
+    /* Now go through the children tags of topology, which is basically
+     * each router found within the NoC 
+     */
+    for (pugi::xml_node router : topology_tag.children()) {
+        // we can only have router tags within the topology
+        if (router.name() != std::string("router")) {
+            bad_tag(router, loc_data, topology_tag, {"router"});
+        } else {
+            // curent tag is a valid router, so process it
+            processRouter(router, loc_data, noc_ref, routers_in_arch_info);
+        }
+    }
+
+    // check whether any routers were supplied
+    if (noc_ref->router_list.size() == 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(topology_tag),
+                       "No routers were supplied for the NoC.");
+    }
+
+    // check that the topology of the noc was correctly described in the arch file
+    verify_noc_topology(routers_in_arch_info);
+
+    return;
+}
+
+/*
+ * Store the properties of a single router and then store the list of routers that connect to it.
+ */
+static void processRouter(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
+    // identifier that lets us know when we could not properly convert an attribute value to a integer
+    int attribute_conversion_failure = -1;
+
+    // an accepted list of attributes for the router tag
+    std::vector<std::string> expected_router_attributes = {"id", "positionx", "positiony", "connections"};
+
+    // variable to store current router info
+    t_router router_info;
+
+    // router connection list attribute information
+    std::string router_connection_list_attribute_value;
+
+    // lets us know if there was an error processing the router connection list
+    bool router_connection_list_result = true;
+
+    // check that only the accepted router attributes are found in the tag
+    pugiutil::expect_only_attributes(router_tag, expected_router_attributes, loc_data);
+
+    // store the router information from the attributes
+    router_info.id = pugiutil::get_attribute(router_tag, "id", loc_data, pugiutil::REQUIRED).as_int(attribute_conversion_failure);
+
+    router_info.device_x_position = pugiutil::get_attribute(router_tag, "positionx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    router_info.device_y_position = pugiutil::get_attribute(router_tag, "positiony", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
+
+    // verify whether the attribute information was legal
+    if ((router_info.id < 0) || (router_info.device_x_position < 0) || (router_info.device_y_position < 0)) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
+                       "The router id, and position (x & y) for the router must be a positive number.");
+    }
+
+    // get the current router connection list
+    router_connection_list_attribute_value.assign(pugiutil::get_attribute(router_tag, "connections", loc_data, pugiutil::REQUIRED).as_string());
+
+    // if the connections attrbiute was not provided or it was empty, then we don't process it and throw a warning
+
+    if (router_connection_list_attribute_value.compare("") != 0) {
+        // process the router connection list
+        router_connection_list_result = parse_noc_router_connection_list(router_tag, loc_data, router_info.id, router_info.connection_list, router_connection_list_attribute_value, routers_in_arch_info);
+
+        // check if the user provided a legal router connection list
+        if (!router_connection_list_result) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
+                           "The 'connections' attribute for the router must be a list of integers seperated by spaces, where each integer represents a router id that the current router is connected to.");
+        }
+
+    } else {
+        VTR_LOGF_WARN(loc_data.filename_c_str(), loc_data.line(router_tag),
+                      "The router with id:%d either has an empty 'connections' attrtibute or does not have any associated connections to other routers in the NoC.\n", router_info.id);
+    }
+
+    // at this point the current router information was completely legal, so we store the newly created router within the noc
+    noc_ref->router_list.push_back(router_info);
+
+    // update the number of declarations info for the current router (since we just finished processing one <router> tag)
+    update_router_info_in_arch(router_info.id, false, routers_in_arch_info);
+
+    return;
+}
+
+std::string inst_port_to_port_name(std::string inst_port) {
+    auto pos = inst_port.find('.');
+    if (pos != std::string::npos) {
+        return inst_port.substr(pos + 1);
+    }
+    return inst_port;
+}
+
+static bool attribute_to_bool(const pugi::xml_node node,
+                              const pugi::xml_attribute attr,
+                              const pugiutil::loc_data& loc_data) {
+    if (attr.value() == std::string("1")) {
+        return true;
+    } else if (attr.value() == std::string("0")) {
+        return false;
+    } else {
+        bad_attribute_value(attr, node, loc_data, {"0", "1"});
+    }
+
+    return false;
+}
+
+int find_switch_by_name(const t_arch& arch, std::string switch_name) {
+    for (int iswitch = 0; iswitch < arch.num_switches; ++iswitch) {
+        const t_arch_switch_inf& arch_switch = arch.Switches[iswitch];
+        if (arch_switch.name == switch_name) {
+            return iswitch;
+        }
+    }
+
+    return OPEN;
+}
+
+e_side string_to_side(std::string side_str) {
+    e_side side = NUM_SIDES;
+    if (side_str.empty()) {
+        side = NUM_SIDES;
+    } else if (side_str == "left") {
+        side = LEFT;
+    } else if (side_str == "right") {
+        side = RIGHT;
+    } else if (side_str == "top") {
+        side = TOP;
+    } else if (side_str == "bottom") {
+        side = BOTTOM;
+    } else {
+        archfpga_throw(__FILE__, __LINE__,
+                       "Invalid side specification");
+    }
+    return side;
+}
+
+template<typename T>
+static T* get_type_by_name(const char* type_name, std::vector<T>& types) {
+    for (auto& type : types) {
+        if (0 == strcmp(type.name, type_name)) {
+            return &type;
+        }
+    }
+
+    archfpga_throw(__FILE__, __LINE__,
+                   "Could not find type: %s\n", type_name);
+}
+
+/*
+ * Create routers and set their properties so that a mesh grid of routers is created. Then connect the routers together so that a mesh topology is created.
+ */
+static void generate_noc_mesh(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, double mesh_region_start_x, double mesh_region_end_x, double mesh_region_start_y, double mesh_region_end_y, int mesh_size) {
+    // check that the mesh size of the router is not 0
+    if (mesh_size == 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
+                       "The NoC mesh size cannot be 0.");
+    }
+
+    // calculating the vertical horizontal distances between routers in the supplied region
+    // we decrease the mesh size by 1 when calculating the spacing so that the first and last routers of each row or column are positioned on the mesh boundary
+    /*
+     * For example:
+     * - If we had a mesh size of 3, then using 3 would result in a spacing that would result in one router positions being placed in either the start of the reigion or end of the region. This is because the distance calculation resulted in having 3 spaces between the ends of the region 
+     *
+     * start              end
+     ***   ***   ***   ***
+     *
+     * - if we instead used 2 in the distance calculation, the the resulting positions would result in having 2 routers positioned on the start and end of the region. This is beacuse we now specified 2 spaces between the region and this allows us to place 2 routers on the regions edges and one router in the center.
+     *
+     * start        end
+     ***   ***   ***
+     *
+     * THe reasoning for this is to reduce the number of calculated router positions.
+     */
+    double vertical_router_separation = (mesh_region_end_y - mesh_region_start_y) / (mesh_size - 1);
+    double horizontal_router_separation = (mesh_region_end_x - mesh_region_start_x) / (mesh_size - 1);
+
+    t_router temp_router;
+
+    // improper region check
+    if ((vertical_router_separation <= 0) || (horizontal_router_separation <= 0)) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
+                       "The NoC region is invalid.");
+    }
+
+    // create routers and their connections
+    // start with router id 0 (bottom left of the chip) to the maximum router id (top right of the chip)
+    for (int j = 0; j < mesh_size; j++) {
+        for (int i = 0; i < mesh_size; i++) {
+            // assign router id
+            temp_router.id = (mesh_size * j) + i;
+
+            // calculate router position
+            /* The first and last router of each column or row will be located on the mesh region boundary, the remaining routers will be placed within the region and seperated from other routers using the distance calculated previously.
+             */
+            temp_router.device_x_position = (i * horizontal_router_separation) + mesh_region_start_x;
+            temp_router.device_y_position = (j * vertical_router_separation) + mesh_region_start_y;
+
+            // assign connections
+            // check if there is a router to the left
+            if ((i - 1) >= 0) {
+                // add the left router as a connection
+                temp_router.connection_list.push_back((mesh_size * j) + i - 1);
+            }
+
+            // check if there is a router to the top
+            if ((j + 1) <= (mesh_size - 1)) {
+                // add the top router as a connection
+                temp_router.connection_list.push_back((mesh_size * (j + 1)) + i);
+            }
+
+            // check if there is a router to the right
+            if ((i + 1) <= (mesh_size - 1)) {
+                // add the router located to the right
+                temp_router.connection_list.push_back((mesh_size * j) + i + 1);
+            }
+
+            // check of there is a router below
+            if ((j - 1) >= (0)) {
+                // add the bottom router as a connection
+                temp_router.connection_list.push_back((mesh_size * (j - 1)) + i);
+            }
+
+            // add the router to the list
+            noc_ref->router_list.push_back(temp_router);
+
+            // clear the current router information for the next router
+            temp_router.connection_list.clear();
+        }
+    }
+
+    return;
+}
+
+/*
+ * THe user provides the list of routers any given router is connected to by the router ids seperated by spaces. For example:
+ *
+ * connections= 1 2 3 4 5
+ *
+ * Go through the connections here and store them. Also make sure the list is legal.
+ */
+static bool parse_noc_router_connection_list(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, int router_id, std::vector<int>& connection_list, std::string connection_list_attribute_value, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
+    // we wil be modifying the string so store it in a temporary variable
+    // additinally, we peocess substrings seperated by spaces, so we add a space at the end of the string to be able to process the last sub-string
+    std::string modified_attribute_value = connection_list_attribute_value + " ";
+    std::string delimiter = " ";
+    std::stringstream single_connection;
+    int converted_connection;
+
+    size_t position = 0;
+
+    bool result = true;
+
+    // find the position of the first space in the connection list string
+    while ((position = modified_attribute_value.find(delimiter)) != std::string::npos) {
+        // the string upto the space represent a single connection, so grab the substring
+        single_connection << modified_attribute_value.substr(0, position);
+
+        // convert the connection to an integer
+        single_connection >> converted_connection;
+
+        /* we expect the connection list to be a string of integers seperated by spaces, where each integer represents a router id that the current router is connected to. So we make sure that the router id was an integer.
+         */
+        if (single_connection.fail()) {
+            // if we are here, then an integer was not supplied
+            result = false;
+            break;
+        }
+
+        // check the case where a duplicate connection was provided
+        if (std::find(connection_list.begin(), connection_list.end(), converted_connection) != connection_list.end()) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
+                           "The router with id:'%d' was included multiple times in the connection list for another router.", converted_connection);
+        }
+
+        // make sure that the current router isn't connected to itself
+        if (router_id == converted_connection) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
+                           "The router with id:%d was added to its own connection list. A router cannot connect to itself.", router_id);
+        }
+
+        // if we are here then a legal router id was supplied, so store it
+        connection_list.push_back(converted_connection);
+        // update the connection information for the current router in the connection list
+        update_router_info_in_arch(converted_connection, true, routers_in_arch_info);
+
+        // before we process the next router connection, we need to delete the substring (current router connection)
+        modified_attribute_value.erase(0, position + delimiter.length());
+        // clear the buffer that stores the router connection in a string format for the next iteration
+        single_connection.clear();
+    }
+
+    return result;
+}
+
+/* Each router needs a sperate <router> tag in the architecture description
+ * to declare it. The number of declarations for each router in the 
+ * architecture file is updated here.
+ *
+ * Additionally, for any given topology, a router can connect to other routers.
+ * THe number of connections for each router is also updated here. 
+ *
+ */
+static void update_router_info_in_arch(int router_id, bool router_updated_as_a_connection, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
+    // get the corresponding router info for the given router id
+    std::map<int, std::pair<int, int>>::iterator curr_router_info = routers_in_arch_info.find(router_id);
+
+    // check if the router previously existed in the router indo database
+    if (curr_router_info == routers_in_arch_info.end()) {
+        // case where the router did not exist previosuly, so we add it here and also get a reference to it
+        // initially a router has no declarations or connections
+        curr_router_info = routers_in_arch_info.insert(std::pair<int, std::pair<int, int>>(router_id, std::pair<int, int>(0, 0))).first;
+    }
+
+    // case where the current router was provided while parsing the connections of another router
+    if (router_updated_as_a_connection) {
+        // since we are within the case where the current router is being processed as a connection to another router we just increment its number of connections
+        (curr_router_info->second.second)++;
+
+    } else {
+        // since we are within the case where the current router is processed from a <router> tag, we just increment its number of declarations
+        (curr_router_info->second.first)++;
+    }
+
+    return;
+}
+
+/*
+ * Verify each router in the noc by checking whether they satisfy the following conditions:
+ * - The router has only one declaration in the arch file
+ * - The router has atleast one connection to another router
+ * If any of the conditions above are not met, then an error is thrown. 
+ */
+static void verify_noc_topology(std::map<int, std::pair<int, int>>& routers_in_arch_info) {
+    for (auto router_info = routers_in_arch_info.begin(); router_info != routers_in_arch_info.end(); router_info++) {
+        // case where the router was included in the architecture and had no connections to other routers
+        if ((router_info->second.first == 1) && (router_info->second.second == 0)) {
+            archfpga_throw("", -1,
+                           "The router with id:'%d' is not connected to any other router in the NoC.", router_info->first);
+
+        } // case where a router was found to be connected to another router but not declared using the <router> tag in the arch file (ie. missing)
+        else if ((router_info->second.first == 0) && (router_info->second.second > 0)) {
+            archfpga_throw("", -1,
+                           "The router with id:'%d' was found to be connected to another router but missing in the architecture file. Add the router using the <router> tag.", router_info->first);
+
+        } // case where the router was delcared multiple times in the architecture file (multiple <router> tags for the same router)
+        else if (router_info->second.first > 1) {
+            archfpga_throw("", -1,
+                           "The router with id:'%d' was included more than once in the architecture file. Routers should only be declared once.", router_info->first);
+        }
+    }
+
+    return;
+}
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
new file mode 100644
index 000000000..5021d0317
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
@@ -0,0 +1,27 @@
+#ifndef READ_XML_ARCH_FILE_H
+#define READ_XML_ARCH_FILE_H
+
+#include "arch_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* special type indexes, necessary for initialization, everything afterwards
+ * should use the pointers to these type indices*/
+
+#define NUM_MODELS_IN_LIBRARY 4
+#define EMPTY_TYPE_INDEX 0
+
+/* function declarations */
+void XmlReadArch(const char* ArchFile,
+                 const bool timing_enabled,
+                 t_arch* arch,
+                 std::vector<t_physical_tile_type>& PhysicalTileTypes,
+                 std::vector<t_logical_block_type>& LogicalBlockTypes);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_util.cc b/third_party/vtr/libs/archfpga/src/read_xml_util.cc
new file mode 100644
index 000000000..784d08a9b
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_xml_util.cc
@@ -0,0 +1,142 @@
+#include "read_xml_util.h"
+
+#include "vtr_util.h"
+#include "arch_error.h"
+
+using pugiutil::ReqOpt;
+
+/* Convert bool to ReqOpt enum */
+extern ReqOpt BoolToReqOpt(bool b) {
+    if (b) {
+        return ReqOpt::REQUIRED;
+    }
+    return ReqOpt::OPTIONAL;
+}
+
+InstPort make_inst_port(std::string str, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    InstPort inst_port;
+    try {
+        inst_port = InstPort(str);
+    } catch (const ArchFpgaError& e) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Failed to parse instance port specification '%s' for"
+                       " on <%s> tag, %s",
+                       str.c_str(), node.name(), e.what());
+    }
+
+    return inst_port;
+}
+
+InstPort make_inst_port(pugi::xml_attribute attr, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
+    InstPort inst_port;
+    try {
+        inst_port = InstPort(attr.value());
+    } catch (const ArchFpgaError& e) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
+                       "Failed to parse instance port specification '%s' for"
+                       " attribute '%s' on <%s> tag, %s",
+                       attr.value(), attr.name(), node.name(), e.what());
+    }
+    return inst_port;
+}
+
+void bad_tag(const pugi::xml_node node,
+             const pugiutil::loc_data& loc_data,
+             const pugi::xml_node parent_node,
+             const std::vector<std::string> expected_tags) {
+    std::string msg = "Unexpected tag ";
+    msg += "<";
+    msg += node.name();
+    msg += ">";
+
+    if (parent_node) {
+        msg += " in section <";
+        msg += parent_node.name();
+        msg += ">";
+    }
+
+    if (!expected_tags.empty()) {
+        msg += ", expected ";
+        for (auto iter = expected_tags.begin(); iter != expected_tags.end(); ++iter) {
+            msg += "<";
+            msg += *iter;
+            msg += ">";
+
+            if (iter < expected_tags.end() - 2) {
+                msg += ", ";
+            } else if (iter == expected_tags.end() - 2) {
+                msg += " or ";
+            }
+        }
+    }
+
+    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
+}
+
+void bad_attribute(const pugi::xml_attribute attr,
+                   const pugi::xml_node node,
+                   const pugiutil::loc_data& loc_data,
+                   const std::vector<std::string> expected_attributes) {
+    std::string msg = "Unexpected attribute ";
+    msg += "'";
+    msg += attr.name();
+    msg += "'";
+
+    if (node) {
+        msg += " on <";
+        msg += node.name();
+        msg += "> tag";
+    }
+
+    if (!expected_attributes.empty()) {
+        msg += ", expected ";
+        for (auto iter = expected_attributes.begin(); iter != expected_attributes.end(); ++iter) {
+            msg += "'";
+            msg += *iter;
+            msg += "'";
+
+            if (iter < expected_attributes.end() - 2) {
+                msg += ", ";
+            } else if (iter == expected_attributes.end() - 2) {
+                msg += " or ";
+            }
+        }
+    }
+
+    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
+}
+
+void bad_attribute_value(const pugi::xml_attribute attr,
+                         const pugi::xml_node node,
+                         const pugiutil::loc_data& loc_data,
+                         const std::vector<std::string> expected_values) {
+    std::string msg = "Invalid value '";
+    msg += attr.value();
+    msg += "'";
+    msg += " for attribute '";
+    msg += attr.name();
+    msg += "'";
+
+    if (node) {
+        msg += " on <";
+        msg += node.name();
+        msg += "> tag";
+    }
+
+    if (!expected_values.empty()) {
+        msg += ", expected value ";
+        for (auto iter = expected_values.begin(); iter != expected_values.end(); ++iter) {
+            msg += "'";
+            msg += *iter;
+            msg += "'";
+
+            if (iter < expected_values.end() - 2) {
+                msg += ", ";
+            } else if (iter == expected_values.end() - 2) {
+                msg += " or ";
+            }
+        }
+    }
+
+    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
+}
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_util.h b/third_party/vtr/libs/archfpga/src/read_xml_util.h
new file mode 100644
index 000000000..05a77ab03
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/src/read_xml_util.h
@@ -0,0 +1,28 @@
+#ifndef READ_XML_UTIL_H
+#define READ_XML_UTIL_H
+
+#include "pugixml.hpp"
+#include "pugixml_loc.hpp"
+#include "pugixml_util.hpp"
+#include "arch_util.h"
+
+pugiutil::ReqOpt BoolToReqOpt(bool b);
+
+void bad_tag(const pugi::xml_node node,
+             const pugiutil::loc_data& loc_data,
+             const pugi::xml_node parent_node = pugi::xml_node(),
+             const std::vector<std::string> expected_tags = std::vector<std::string>());
+
+void bad_attribute(const pugi::xml_attribute attr,
+                   const pugi::xml_node node,
+                   const pugiutil::loc_data& loc_data,
+                   const std::vector<std::string> expected_attributes = std::vector<std::string>());
+void bad_attribute_value(const pugi::xml_attribute attr,
+                         const pugi::xml_node node,
+                         const pugiutil::loc_data& loc_data,
+                         const std::vector<std::string> expected_attributes = std::vector<std::string>());
+
+InstPort make_inst_port(std::string str, pugi::xml_node node, const pugiutil::loc_data& loc_data);
+InstPort make_inst_port(pugi::xml_attribute attr, pugi::xml_node node, const pugiutil::loc_data& loc_data);
+
+#endif
diff --git a/third_party/vtr/libs/archfpga/test/main.cpp b/third_party/vtr/libs/archfpga/test/main.cpp
new file mode 100644
index 000000000..2a2e12d62
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/test/main.cpp
@@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include "catch2/catch_test_macros.hpp"
\ No newline at end of file
diff --git a/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp b/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
new file mode 100644
index 000000000..adc9eab42
--- /dev/null
+++ b/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
@@ -0,0 +1,269 @@
+// test framework
+#include "catch2/catch_test_macros.hpp"
+#include "catch2/matchers/catch_matchers_all.hpp"
+
+// testting statuc functions so include whole source file it is in
+#include "read_xml_arch_file.cpp"
+
+// for comparing floats
+#include "vtr_math.h"
+
+TEST_CASE("Updating router info in arch", "[NoC Arch Tests]") {
+    std::map<int, std::pair<int, int>> test_router_list;
+
+    std::map<int, std::pair<int, int>>::iterator it;
+
+    // initial conditions
+    int router_id = 1;
+    bool router_is_from_connection_list = false;
+
+    // we initially need the map to be empty
+    REQUIRE(test_router_list.size() == 0);
+
+    SECTION("Update the number of declarations for a router for the first time ") {
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        it = test_router_list.find(router_id);
+
+        // check first that the router was newly added to the router databse
+        REQUIRE(it != test_router_list.end());
+
+        // no verify the components of the router parameter
+        REQUIRE(it->second.first == 1);
+        REQUIRE(it->second.second == 0);
+    }
+    SECTION("Update the number of connections for a router for the first time") {
+        router_is_from_connection_list = true;
+
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        it = test_router_list.find(router_id);
+
+        // check first that the router was newly added to the router databse
+        REQUIRE(it != test_router_list.end());
+
+        // no verify the components of the router parameter
+        REQUIRE(it->second.first == 0);
+        REQUIRE(it->second.second == 1);
+    }
+    SECTION("Update the number of declarations for a router when it already exists") {
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        // verify that a router was added
+        REQUIRE(test_router_list.size() != 0);
+
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        it = test_router_list.find(router_id);
+
+        // check first that the router was newly added to the router databse
+        REQUIRE(it != test_router_list.end());
+
+        // no verify the components of the router parameter
+        REQUIRE(it->second.first == 2);
+        REQUIRE(it->second.second == 0);
+    }
+    SECTION("Update the number of connections for a router when it already exists") {
+        router_is_from_connection_list = true;
+
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        // verify that a router was added
+        REQUIRE(test_router_list.size() != 0);
+
+        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
+
+        it = test_router_list.find(router_id);
+
+        // check first that the router was newly added to the router databse
+        REQUIRE(it != test_router_list.end());
+
+        // no verify the components of the router parameter
+        REQUIRE(it->second.first == 0);
+        REQUIRE(it->second.second == 2);
+    }
+}
+
+TEST_CASE("Verifying a parsed NoC topology", "[NoC Arch Tests]") {
+    std::map<int, std::pair<int, int>> test_router_list;
+
+    REQUIRE(test_router_list.size() == 0);
+
+    SECTION("Check the error where a router in the NoC is not connected to other routers.") {
+        // error router
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 0)));
+
+        // sonme normal routers
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 5)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(1, 6)));
+
+        REQUIRE(test_router_list.size() == 3);
+
+        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'1' is not connected to any other router in the NoC.");
+    }
+    SECTION("Check the error where a router in the NoC is connected to other routers but missing a declaration in the arch file.") {
+        // normal routers
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 5)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 3)));
+
+        // error router
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(0, 5)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(4, std::pair<int, int>(1, 10)));
+
+        REQUIRE(test_router_list.size() == 4);
+
+        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'3' was found to be connected to another router but missing in the architecture file. Add the router using the <router> tag.");
+    }
+    SECTION("Check the error where the router is included more than once in the architecture file.") {
+        // normal routers
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 5)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 3)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(1, 10)));
+
+        // error routers
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(4, std::pair<int, int>(2, 10)));
+
+        // normal routers
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(5, std::pair<int, int>(1, 3)));
+
+        test_router_list.insert(std::pair<int, std::pair<int, int>>(6, std::pair<int, int>(1, 10)));
+
+        REQUIRE(test_router_list.size() == 6);
+
+        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'4' was included more than once in the architecture file. Routers should only be declared once.");
+    }
+}
+
+TEST_CASE("Verifying mesh topology creation", "[NoC Arch Tests]") {
+    // data for the xml parsing
+    pugi::xml_node test;
+    pugiutil::loc_data test_location;
+
+    // the noc storage
+    t_noc_inf test_noc;
+
+    // mesh parameters
+    double mesh_start_x = 10;
+    double mesh_start_y = 10;
+    double mesh_end_x = 5;
+    double mesh_end_y = 56;
+    double mesh_size = 0;
+
+    SECTION("Check the error where a mesh size was illegal.") {
+        REQUIRE_THROWS_WITH(generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size), "The NoC mesh size cannot be 0.");
+    }
+    SECTION("Check the error where a mesh region size was invalid.") {
+        mesh_size = 3;
+
+        REQUIRE_THROWS_WITH(generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size), "The NoC region is invalid.");
+    }
+    SECTION("Check the mesh creation for integer precision coordinates.") {
+        // define test parameters
+        mesh_size = 3;
+
+        mesh_start_x = 0;
+        mesh_start_y = 0;
+
+        mesh_end_x = 4;
+        mesh_end_y = 4;
+
+        // create the golden golden results
+        double golden_results_x[9];
+        double golden_results_y[9];
+
+        // first row of the mesh
+        golden_results_x[0] = 0;
+        golden_results_y[0] = 0;
+        golden_results_x[1] = 2;
+        golden_results_y[1] = 0;
+        golden_results_x[2] = 4;
+        golden_results_y[2] = 0;
+
+        // second row of the mesh
+        golden_results_x[3] = 0;
+        golden_results_y[3] = 2;
+        golden_results_x[4] = 2;
+        golden_results_y[4] = 2;
+        golden_results_x[5] = 4;
+        golden_results_y[5] = 2;
+
+        // third row of the mesh
+        golden_results_x[6] = 0;
+        golden_results_y[6] = 4;
+        golden_results_x[7] = 2;
+        golden_results_y[7] = 4;
+        golden_results_x[8] = 4;
+        golden_results_y[8] = 4;
+
+        generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size);
+
+        // go through all the expected routers
+        for (int expected_router_id = 0; expected_router_id < (mesh_size * mesh_size); expected_router_id++) {
+            // make sure the router ids match
+            REQUIRE(test_noc.router_list[expected_router_id].id == expected_router_id);
+
+            // make sure the position of the routers are correct
+            // x position
+            REQUIRE(golden_results_x[expected_router_id] == test_noc.router_list[expected_router_id].device_x_position);
+            // y position
+            REQUIRE(golden_results_y[expected_router_id] == test_noc.router_list[expected_router_id].device_y_position);
+        }
+    }
+    SECTION("Check the mesh creation for double precision coordinates.") {
+        // define test parameters
+        mesh_size = 3;
+
+        mesh_start_x = 3.5;
+        mesh_start_y = 5.7;
+
+        mesh_end_x = 10.8;
+        mesh_end_y = 6.4;
+
+        // create the golden golden results
+        double golden_results_x[9];
+        double golden_results_y[9];
+
+        // first row of the mesh
+        golden_results_x[0] = 3.5;
+        golden_results_y[0] = 5.7;
+        golden_results_x[1] = 7.15;
+        golden_results_y[1] = 5.7;
+        golden_results_x[2] = 10.8;
+        golden_results_y[2] = 5.7;
+
+        // second row of the mesh
+        golden_results_x[3] = 3.5;
+        golden_results_y[3] = 6.05;
+        golden_results_x[4] = 7.15;
+        golden_results_y[4] = 6.05;
+        golden_results_x[5] = 10.8;
+        golden_results_y[5] = 6.05;
+
+        // third row of the mesh
+        golden_results_x[6] = 3.5;
+        golden_results_y[6] = 6.4;
+        golden_results_x[7] = 7.15;
+        golden_results_y[7] = 6.4;
+        golden_results_x[8] = 10.8;
+        golden_results_y[8] = 6.4;
+
+        generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size);
+
+        // go through all the expected routers
+        for (int expected_router_id = 0; expected_router_id < (mesh_size * mesh_size); expected_router_id++) {
+            // make sure the router ids match
+            REQUIRE(test_noc.router_list[expected_router_id].id == expected_router_id);
+
+            // make sure the position of the routers are correct
+            // x position
+            REQUIRE(vtr::isclose(golden_results_x[expected_router_id], test_noc.router_list[expected_router_id].device_x_position));
+            // y position
+            REQUIRE(vtr::isclose(golden_results_y[expected_router_id], test_noc.router_list[expected_router_id].device_y_position));
+        }
+    }
+}
\ No newline at end of file
diff --git a/third_party/vtr/libs/log/.gitignore b/third_party/vtr/libs/log/.gitignore
new file mode 100644
index 000000000..da50203d3
--- /dev/null
+++ b/third_party/vtr/libs/log/.gitignore
@@ -0,0 +1 @@
+test_log
diff --git a/third_party/vtr/libs/log/CMakeLists.txt b/third_party/vtr/libs/log/CMakeLists.txt
new file mode 100644
index 000000000..7445ef341
--- /dev/null
+++ b/third_party/vtr/libs/log/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.9)
+
+project("liblog")
+
+file(GLOB_RECURSE EXEC_SOURCES src/main.cpp)
+file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
+file(GLOB_RECURSE LIB_HEADERS src/*.h)
+files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
+
+#Remove test executable from library
+list(REMOVE_ITEM LIB_SOURCES ${EXEC_SOURCES})
+
+#Create the library
+add_library(liblog STATIC
+             ${LIB_HEADERS}
+             ${LIB_SOURCES})
+target_include_directories(liblog PUBLIC ${LIB_INCLUDE_DIRS})
+set_target_properties(liblog PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
+
+#Create the test executable
+add_executable(test_log ${EXEC_SOURCES})
+target_link_libraries(test_log liblog)
+
+install(TARGETS liblog DESTINATION bin)
diff --git a/third_party/vtr/libs/log/LICENSE.txt b/third_party/vtr/libs/log/LICENSE.txt
new file mode 100644
index 000000000..41116e903
--- /dev/null
+++ b/third_party/vtr/libs/log/LICENSE.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Jason Luu
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/third_party/vtr/libs/log/Readme.txt b/third_party/vtr/libs/log/Readme.txt
new file mode 100644
index 000000000..8f89aafdb
--- /dev/null
+++ b/third_party/vtr/libs/log/Readme.txt
@@ -0,0 +1,13 @@
+Simple Logger Library
+
+Author: Jason Luu
+Date: Sept 5, 2014
+
+This library provides simple logging operations.
+
+- Output messages to both file and terminal
+- Tag messages based on feedback type (eg. info, warning, or error)
+- Track number of warnings and errors
+
+Specialized programming knowledge:
+- Requires usage of variable arguments in <stdarg.h> standard C library to properly wrap printf and fprintf
diff --git a/third_party/vtr/libs/log/src/log.cc b/third_party/vtr/libs/log/src/log.cc
new file mode 100644
index 000000000..cbc2e79fa
--- /dev/null
+++ b/third_party/vtr/libs/log/src/log.cc
@@ -0,0 +1,117 @@
+/**
+ * Lightweight logging tool.  Automatically prepend messages with prefixes and store in log file.
+ *
+ * Author: Jason Luu
+ * Date: Sept 5, 2014
+ */
+
+#include <stdio.h>
+#include <stdarg.h> /* Allows for variable arguments, necessary for wrapping printf */
+#include "log.h"
+
+#define LOG_DEFAULT_FILE_NAME "output.log"
+
+static int log_warning = 0;
+static int log_error = 0;
+FILE* log_stream = nullptr;
+
+static void check_init();
+
+/* Set the output file of logger.
+ * If different than current log file, close current log file and reopen to new log file
+ */
+void log_set_output_file(const char* filename) {
+    if (log_stream != nullptr) {
+        fclose(log_stream);
+    }
+
+    if (filename == nullptr) {
+        log_stream = nullptr;
+    } else {
+        log_stream = fopen(filename, "w");
+        if (log_stream == nullptr) {
+            printf("Error writing to file %s\n\n", filename);
+        }
+    }
+}
+
+void log_print_direct(const char* message, ...) {
+    va_list args;
+    va_start(args, message);
+    vprintf(message, args);
+    va_end(args);
+}
+
+void log_print_info(const char* message, ...) {
+    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
+
+    va_list args;
+    va_start(args, message);
+    vprintf(message, args);
+    va_end(args);
+
+    if (log_stream) {
+        va_start(args, message); /* Must reset variable arguments so that they can be read again */
+        vfprintf(log_stream, message, args);
+        va_end(args);
+
+        fflush(log_stream);
+    }
+}
+
+void log_print_warning(const char* /*filename*/, unsigned int /*line_num*/, const char* message, ...) {
+    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
+
+    va_list args;
+    va_start(args, message);
+    log_warning++;
+
+    printf("Warning %d: ", log_warning);
+    vprintf(message, args);
+    va_end(args);
+
+    if (log_stream) {
+        va_start(args, message); /* Must reset variable arguments so that they can be read again */
+        fprintf(log_stream, "Warning %d: ", log_warning);
+        vfprintf(log_stream, message, args);
+
+        va_end(args);
+        fflush(log_stream);
+    }
+}
+
+void log_print_error(const char* /*filename*/, unsigned int /*line_num*/, const char* message, ...) {
+    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
+
+    va_list args;
+    va_start(args, message);
+    log_error++;
+
+    check_init();
+    fprintf(stderr, "Error %d: ", log_error);
+    vfprintf(stderr, message, args);
+    va_end(args);
+
+    if (log_stream) {
+        va_start(args, message); /* Must reset variable arguments so that they can be read again */
+        fprintf(log_stream, "Error %d: ", log_error);
+        vfprintf(log_stream, message, args);
+
+        va_end(args);
+
+        fflush(log_stream);
+    }
+}
+
+/**
+ * Check if output log file setup, if not, then this function also sets it up
+ */
+static void check_init() {
+    //We now allow a nullptr log_stream (i.e. no log file) so nothing to do here
+}
+
+void log_close() {
+    if (log_stream) {
+        fclose(log_stream);
+    }
+}
diff --git a/third_party/vtr/libs/log/src/log.h b/third_party/vtr/libs/log/src/log.h
new file mode 100644
index 000000000..a350a64c5
--- /dev/null
+++ b/third_party/vtr/libs/log/src/log.h
@@ -0,0 +1,22 @@
+/**
+ * Lightweight logging tool.  Automatically prepend messages with prefixes and store in log file.
+ *
+ * Init/Change name of log file using log_set_output_file, when done, call log_close
+ *
+ * Author: Jason Luu
+ * Date: Sept 5, 2014
+ */
+
+#ifndef LOG_H
+#define LOG_H
+
+void log_set_output_file(const char* filename);
+
+void log_print_direct(const char* message, ...);
+void log_print_info(const char* message, ...);
+void log_print_warning(const char* filename, unsigned int line_num, const char* message, ...);
+void log_print_error(const char* filename, unsigned int line_num, const char* message, ...);
+
+void log_close();
+
+#endif
diff --git a/third_party/vtr/libs/log/src/main.cc b/third_party/vtr/libs/log/src/main.cc
new file mode 100644
index 000000000..653882560
--- /dev/null
+++ b/third_party/vtr/libs/log/src/main.cc
@@ -0,0 +1,18 @@
+/** Jason Luu
+ * Test program for logger
+ */
+
+#include "log.h"
+
+int main() {
+    int x = 10, y = 20;
+    float a = 1.5f, b = -2.01f;
+    log_print_info("Testing logger\n\n");
+    log_print_info("Output separate strings: %s %s\n", "pass", "[PASS]");
+    log_print_info("Output two integers: x = %d y = %d\n", x, y);
+    log_print_warning(__FILE__, __LINE__, "Test warning on floating point arguments %g %g\n", a, b);
+    log_print_error(__FILE__, __LINE__, "Test error on two variables %g %g \n\n", a - x, b + y);
+
+    log_print_info("Test complete\n");
+    return 0;
+}
\ No newline at end of file
diff --git a/third_party/vtr/libs/pugiutil/CMakeLists.txt b/third_party/vtr/libs/pugiutil/CMakeLists.txt
new file mode 100644
index 000000000..edbd4c988
--- /dev/null
+++ b/third_party/vtr/libs/pugiutil/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.9)
+
+project("libpugiutil")
+
+#
+# Source files and library
+#
+file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
+file(GLOB_RECURSE LIB_HEADERS src/*.hpp src/*.h)
+files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
+
+#Create the library
+add_library(libpugiutil STATIC
+             ${LIB_HEADERS}
+             ${LIB_SOURCES})
+target_include_directories(libpugiutil PUBLIC ${LIB_INCLUDE_DIRS})
+set_target_properties(libpugiutil PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
+
+target_link_libraries(libpugiutil
+                        libpugixml)
+
+install(TARGETS libpugiutil DESTINATION bin)
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc b/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
new file mode 100644
index 000000000..b773b410b
--- /dev/null
+++ b/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
@@ -0,0 +1,49 @@
+#include <cstdio>
+#include <algorithm>
+#include "pugixml_util.hpp"
+#include "pugixml_loc.hpp"
+
+namespace pugiutil {
+
+//Return the line number from the given offset
+std::size_t loc_data::line(std::ptrdiff_t offset) const {
+    auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
+    std::size_t index = it - offsets_.begin();
+
+    return 1 + index;
+}
+
+//Return the column number from the given offset
+std::size_t loc_data::col(std::ptrdiff_t offset) const {
+    auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
+    std::size_t index = it - offsets_.begin();
+
+    return index == 0 ? offset + 1 : offset - offsets_[index - 1];
+}
+
+void loc_data::build_loc_data() {
+    FILE* f = fopen(filename_.c_str(), "rb");
+
+    if (f == nullptr) {
+        throw XmlError("Failed to open file", filename_);
+    }
+
+    std::ptrdiff_t offset = 0;
+
+    char buffer[1024];
+    std::size_t size;
+
+    while ((size = fread(buffer, 1, sizeof(buffer), f)) > 0) {
+        for (std::size_t i = 0; i < size; ++i) {
+            if (buffer[i] == '\n') {
+                offsets_.push_back(offset + i);
+            }
+        }
+
+        offset += size;
+    }
+
+    fclose(f);
+}
+
+} // namespace pugiutil
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp b/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
new file mode 100644
index 000000000..0f597a593
--- /dev/null
+++ b/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
@@ -0,0 +1,51 @@
+#ifndef PUGIXML_LOC_H
+#define PUGIXML_LOC_H
+/*
+ * This file contains utilities for the  PUGI XML parser,
+ * hanlding the retrieval of line numbers (useful for error messages)
+ */
+
+#include <vector>
+#include "pugixml.hpp"
+
+namespace pugiutil {
+
+//pugi offset to line/col data based on: https://stackoverflow.com/questions/21003471/convert-pugixmls-result-offset-to-column-line
+class loc_data {
+  public:
+    loc_data() = default;
+
+    loc_data(std::string filename_val)
+        : filename_(filename_val) {
+        build_loc_data();
+    }
+
+    //The filename this location data is for
+    const std::string& filename() const { return filename_; }
+    const char* filename_c_str() const { return filename_.c_str(); }
+
+    //Convenience wrapper which takes xml_nodes
+    std::size_t line(pugi::xml_node node) const {
+        return line(node.offset_debug());
+    }
+
+    //Convenience wrapper which takes xml_nodes
+    std::size_t col(pugi::xml_node node) const {
+        return col(node.offset_debug());
+    }
+
+    //Return the line number from the given offset
+    std::size_t line(std::ptrdiff_t offset) const;
+
+    //Return the column number from the given offset
+    std::size_t col(std::ptrdiff_t offset) const;
+
+  private:
+    void build_loc_data();
+
+    std::string filename_;
+    std::vector<std::ptrdiff_t> offsets_;
+};
+} // namespace pugiutil
+
+#endif
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_util.cc b/third_party/vtr/libs/pugiutil/src/pugixml_util.cc
new file mode 100644
index 000000000..d4d2a3982
--- /dev/null
+++ b/third_party/vtr/libs/pugiutil/src/pugixml_util.cc
@@ -0,0 +1,298 @@
+#include "pugixml_util.hpp"
+#include <algorithm>
+
+namespace pugiutil {
+
+//Loads the XML file specified by filename into the passed pugi::xml_document
+//
+//Returns loc_data look-up for xml node line numbers
+loc_data load_xml(pugi::xml_document& doc,      //Document object to be loaded with file contents
+                  const std::string filename) { //Filename to load from
+    auto location_data = loc_data(filename);
+
+    auto load_result = doc.load_file(filename.c_str());
+    if (!load_result) {
+        std::string msg = load_result.description();
+        auto line = location_data.line(load_result.offset);
+        auto col = location_data.col(load_result.offset);
+        throw XmlError("Unable to load XML file '" + filename + "', " + msg
+                           + " (line: " + std::to_string(line) + " col: " + std::to_string(col) + ")",
+                       filename.c_str(), line);
+    }
+
+    return location_data;
+}
+
+//Gets the first child element of the given name and returns it.
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_node get_first_child(const pugi::xml_node node,
+                               const std::string& child_name,
+                               const loc_data& loc_data,
+                               const ReqOpt req_opt) {
+    pugi::xml_node child = node.child(child_name.c_str());
+    if (!child && req_opt == REQUIRED) {
+        throw XmlError("Missing required child node '" + child_name + "' in parent node '" + node.name() + "'",
+                       loc_data.filename(), loc_data.line(node));
+    }
+    return child;
+}
+
+//Gets the child element of the given name and returns it.
+//Errors if more than one matching child is found.
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_node get_single_child(const pugi::xml_node node,
+                                const std::string& child_name,
+                                const loc_data& loc_data,
+                                const ReqOpt req_opt) {
+    pugi::xml_node child = get_first_child(node, child_name, loc_data, req_opt);
+
+    if (child && child.next_sibling(child_name.c_str())) {
+        throw XmlError("Multiple child '" + child_name + "' nodes found in parent node '" + node.name() + "' (only one expected)",
+                       loc_data.filename(), loc_data.line(node));
+    }
+
+    return child;
+}
+
+//Counts the number of child nodes of type 'child_name'
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+size_t count_children(const pugi::xml_node node,
+                      const std::string& child_name,
+                      const loc_data& loc_data,
+                      const ReqOpt req_opt) {
+    size_t count = 0;
+
+    pugi::xml_node child = get_first_child(node, child_name, loc_data, req_opt);
+
+    while (child) {
+        ++count;
+        child = child.next_sibling(child_name.c_str());
+    }
+
+    //Note that we don't do any error checking here since get_first_child does the existance check
+
+    return count;
+}
+
+//Counts the number of child nodes (any type)
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+size_t count_children(const pugi::xml_node node,
+                      const loc_data& loc_data,
+                      const ReqOpt req_opt) {
+    size_t count = std::distance(node.begin(), node.end());
+
+    if (count == 0 && req_opt == REQUIRED) {
+        throw XmlError("Expected child node(s) in node '" + std::string(node.name()) + "'",
+                       loc_data.filename(), loc_data.line(node));
+    }
+
+    return count;
+}
+
+//Throws a well formatted error if the actual count of child nodes name 'child_name' does not equal the 'expected_count'
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  expected_count - The expected number of child nodes
+void expect_child_node_count(const pugi::xml_node node,
+                             std::string child_name,
+                             size_t expected_count,
+                             const loc_data& loc_data) {
+    size_t actual_count = count_children(node, child_name, loc_data, OPTIONAL);
+
+    if (actual_count != expected_count) {
+        throw XmlError("Found " + std::to_string(actual_count)
+                           + " '" + child_name + "' child node(s) of "
+                           + "'" + std::string(node.name()) + "'"
+                           + " (expected " + std::to_string(expected_count) + ")",
+                       loc_data.filename(), loc_data.line(node));
+    }
+}
+
+//Throws a well formatted error if the actual child count does not equal the 'expected_count'
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  expected_count - The expected number of child nodes
+void expect_child_node_count(const pugi::xml_node node,
+                             size_t expected_count,
+                             const loc_data& loc_data) {
+    size_t actual_count = count_children(node, loc_data, OPTIONAL);
+
+    if (actual_count != expected_count) {
+        throw XmlError("Found " + std::to_string(actual_count)
+                           + " child node(s) of "
+                           + "'" + std::string(node.name()) + "'"
+                           + " (expected " + std::to_string(expected_count) + ")",
+                       loc_data.filename(), loc_data.line(node));
+    }
+}
+
+//Throws a well formatted error if any of node's children are not part of child_names.
+//Note this does not check whether the nodes in 'attribute_names' actually exist.
+//
+//  node - The parent xml node
+//  child_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_children(const pugi::xml_node node,
+                          std::vector<std::string> child_names,
+                          const loc_data& loc_data) {
+    for (auto child : node.children()) {
+        std::string child_name = child.name();
+        auto iter = std::find(child_names.begin(),
+                              child_names.end(),
+                              child_name);
+        if (iter == child_names.end()) {
+            std::string msg = "Unexpected child '" + child_name + "'"
+                              + " of node '" + node.name() + "'.";
+
+            if (child_names.size() > 0) {
+                msg += " Expected (possibly) one of: ";
+                for (size_t i = 0; i < child_names.size(); i++) {
+                    if (i != 0) {
+                        msg += ", ";
+                    }
+                    if (i > 0 && i == child_names.size() - 1) {
+                        msg += "or ";
+                    }
+                    msg += "'" + child_names[i] + "'";
+                }
+                msg += ".";
+            }
+
+            throw XmlError(msg, loc_data.filename(), loc_data.line(child));
+        }
+    }
+}
+
+//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node' with an additional explanation.
+//Note this does not check whether the attribues in 'attribute_names' actually exist.
+//
+//  node - The parent xml node
+//  attribute_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_attributes(const pugi::xml_node node,
+                            std::vector<std::string> attribute_names,
+                            std::string explanation,
+                            const loc_data& loc_data) {
+    for (auto attrib : node.attributes()) {
+        std::string attrib_name = attrib.name();
+        auto iter = std::find(attribute_names.begin(),
+                              attribute_names.end(),
+                              attrib_name);
+        if (iter == attribute_names.end()) {
+            std::string msg = "Unexpected attribute '" + attrib_name + "'"
+                              + " found on node '" + node.name() + "'";
+
+            if (!explanation.empty()) {
+                msg += explanation;
+            }
+
+            msg += ".";
+
+            if (attribute_names.size() > 0) {
+                msg += " Expected (possibly) one of: ";
+                for (size_t i = 0; i < attribute_names.size(); i++) {
+                    if (i != 0) {
+                        msg += ", ";
+                    }
+                    if (i > 0 && i == attribute_names.size() - 1) {
+                        msg += "or ";
+                    }
+                    msg += "'" + attribute_names[i] + "'";
+                }
+                msg += ".";
+            }
+
+            throw XmlError(msg, loc_data.filename(), loc_data.line(node));
+        }
+    }
+}
+
+//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node'.
+//Note this does not check whether the attribues in 'attribute_names' actually exist; for that use get_attribute().
+//
+//  node - The parent xml node
+//  attribute_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_attributes(const pugi::xml_node node,
+                            std::vector<std::string> attribute_names,
+                            const loc_data& loc_data) {
+    expect_only_attributes(node, attribute_names, "", loc_data);
+}
+
+//Counts the number of attributes on the specified node
+//
+//  node - The xml node
+//  loc_data - XML file location data
+//  req_opt - Whether any attributes are required (will error if required and none are found) or optional. Defaults to REQUIRED
+size_t count_attributes(const pugi::xml_node node,
+                        const loc_data& loc_data,
+                        const ReqOpt req_opt) {
+    size_t count = std::distance(node.attributes_begin(), node.attributes_end());
+
+    if (count == 0 && req_opt == REQUIRED) {
+        throw XmlError("Expected attributes on node'" + std::string(node.name()) + "'",
+                       loc_data.filename(), loc_data.line(node));
+    }
+
+    return count;
+}
+
+//Gets a named property on an node and returns it.
+//
+//  node - The xml node
+//  attr_name - The attribute name
+//  loc_data - XML file location data
+//  req_opt - Whether the peropry is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_attribute get_attribute(const pugi::xml_node node,
+                                  const std::string& attr_name,
+                                  const loc_data& loc_data,
+                                  const ReqOpt req_opt) {
+    pugi::xml_attribute attr = node.attribute(attr_name.c_str());
+
+    if (!attr && req_opt == REQUIRED) {
+        throw XmlError("Expected '" + attr_name + "' attribute on node '" + node.name() + "'",
+                       loc_data.filename(), loc_data.line(node));
+    }
+
+    return attr;
+}
+
+//Checks that the given node matches the given tag name.
+//
+//  node - The xml node
+//  tag_name - The expected tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the tag name is required (will error if required and not found) or optional. Defaults to REQUIRED
+bool check_node(const pugi::xml_node node,
+                const std::string& tag_name,
+                const loc_data& loc_data,
+                const ReqOpt req_opt) {
+    if (node.name() == tag_name) {
+        return true;
+    } else {
+        if (req_opt == REQUIRED) {
+            throw XmlError(std::string("Unexpected node type '") + node.name() + "' expected '" + tag_name + "'",
+                           loc_data.filename(), loc_data.line(node));
+        }
+        return false;
+    }
+}
+
+} // namespace pugiutil
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp b/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
new file mode 100644
index 000000000..8e55f232b
--- /dev/null
+++ b/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
@@ -0,0 +1,198 @@
+#ifndef PUGIXML_UTIL_H
+#define PUGIXML_UTIL_H
+/*
+ * This file contains utilities for the  PUGI XML parser.
+ *
+ * They primarily relate to:
+ *   - Checking for node/attribute exitance and reporting errors if not
+ *   - Misc. utilities like counting tags
+ *
+ * Using these utilities simplifies error handling while manipulating XML
+ * since the user doesn't need to explicitly check for node/attribute existance
+ * (by default most of these functions will raise exceptions with useful error
+ * messages if the requested item does not exists).
+ */
+
+#include <vector>
+#include <stdexcept>
+#include <cstdio>
+
+#include "pugixml.hpp"
+
+#include "pugixml_loc.hpp"
+
+namespace pugiutil {
+
+//An error produced while getting an XML node/attribute
+class XmlError : public std::runtime_error {
+  public:
+    XmlError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
+        : std::runtime_error(msg)
+        , filename_(new_filename)
+        , linenumber_(new_linenumber) {}
+
+    //Returns the filename associated with this error
+    //returns an empty string if none is specified
+    std::string filename() const { return filename_; }
+    const char* filename_c_str() const { return filename_.c_str(); }
+
+    //Returns the line number associated with this error
+    //returns zero if none is specified
+    size_t line() const { return linenumber_; }
+
+  private:
+    std::string filename_;
+    size_t linenumber_;
+};
+
+//Loads the XML file specified by filename into the passed pugi::xml_docment
+//
+//Returns loc_data look-up for xml node line numbers
+loc_data load_xml(pugi::xml_document& doc,     //Document object to be loaded with file contents
+                  const std::string filename); //Filename to load from
+
+//Defines whether something (e.g. a node/attribute) is optional or required.
+//  We use this to improve clarity at the function call site (compared to just
+//  using boolean values).
+//
+//  For example:
+//
+//      auto node = get_first_child(node, "port", loc_data, true);
+//
+//  is ambiguous without looking up what the 4th argument represents, where as:
+//
+//      auto node = get_first_child(node, "port", loc_data, REQUIRED);
+//
+//  is much more explicit.
+enum ReqOpt {
+    REQUIRED,
+    OPTIONAL
+};
+
+//Gets the first child element of the given name and returns it.
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_node get_first_child(const pugi::xml_node node,
+                               const std::string& child_name,
+                               const loc_data& loc_data,
+                               const ReqOpt req_opt = REQUIRED);
+
+//Gets the child element of the given name and returns it.
+//Errors if more than one matching child is found.
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_node get_single_child(const pugi::xml_node node,
+                                const std::string& child_name,
+                                const loc_data& loc_data,
+                                const ReqOpt req_opt = REQUIRED);
+
+//Counts the number of child nodes of type 'child_name'
+//
+//  node - The parent xml node
+//  child_name - The child tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+size_t count_children(const pugi::xml_node node,
+                      const std::string& child_name,
+                      const loc_data& loc_data,
+                      const ReqOpt req_opt = REQUIRED);
+
+//Counts the number of child nodes (any type)
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
+size_t count_children(const pugi::xml_node node,
+                      const loc_data& loc_data,
+                      const ReqOpt req_opt);
+
+//Throws a well formatted error if the actual count of child nodes named 'child_name' does not equal the 'expected_count'
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  expected_count - The expected number of child nodes
+void expect_child_node_count(const pugi::xml_node node,
+                             std::string child_name,
+                             size_t expected_count,
+                             const loc_data& loc_data);
+
+//Throws a well formatted error if the actual child count does not equal the 'expected_count'
+//
+//  node - The parent xml node
+//  loc_data - XML file location data
+//  expected_count - The expected number of child nodes
+void expect_child_node_count(const pugi::xml_node node,
+                             size_t expected_count,
+                             const loc_data& loc_data);
+
+//Throws a well formatted error if any of node's children are not part of child_names.
+//Note this does not check whether the nodes in 'child_names' actually exist.
+//
+//  node - The parent xml node
+//  child_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_children(const pugi::xml_node node,
+                          std::vector<std::string> child_names,
+                          const loc_data& loc_data);
+
+//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node'.
+//Note this does not check whether the attribues in 'attribute_names' actually exist.
+//
+//  node - The parent xml node
+//  attribute_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_attributes(const pugi::xml_node node,
+                            std::vector<std::string> attribute_names,
+                            const loc_data& loc_data);
+
+//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node' with an additional explanation.
+//Note this does not check whether the attribues in 'attribute_names' actually exist.
+//
+//  node - The parent xml node
+//  attribute_names - expected attribute names
+//  loc_data - XML file location data
+void expect_only_attributes(const pugi::xml_node node,
+                            std::vector<std::string> attribute_names,
+                            std::string explanation,
+                            const loc_data& loc_data);
+
+//Counts the number of attributes on the specified node
+//
+//  node - The xml node
+//  loc_data - XML file location data
+//  req_opt - Whether any attributes are required (will error if required and none are found) or optional. Defaults to REQUIRED
+size_t count_attributes(const pugi::xml_node node,
+                        const loc_data& loc_data,
+                        const ReqOpt req_opt = REQUIRED);
+
+//Gets a named property on an node and returns it.
+//
+//  node - The xml node
+//  attr_name - The attribute name
+//  loc_data - XML file location data
+//  req_opt - Whether the attribute is required (will error if required and not found) or optional. Defaults to REQUIRED
+pugi::xml_attribute get_attribute(const pugi::xml_node node,
+                                  const std::string& attr_name,
+                                  const loc_data& loc_data,
+                                  const ReqOpt req_opt = REQUIRED);
+
+//Checks that the given node matches the given tag name.
+//
+//  node - The xml node
+//  tag_name - The expected tag name
+//  loc_data - XML file location data
+//  req_opt - Whether the tag name is required (will error if required and not found) or optional. Defaults to REQUIRED
+bool check_node(const pugi::xml_node node,
+                const std::string& tag_name,
+                const loc_data& loc_data,
+                const ReqOpt req_opt = REQUIRED);
+
+} // namespace pugiutil
+
+#endif
diff --git a/third_party/vtr/libs/rtlnumber/.gitignore b/third_party/vtr/libs/rtlnumber/.gitignore
new file mode 100644
index 000000000..620109a7a
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/.gitignore
@@ -0,0 +1 @@
+rtl_number
diff --git a/third_party/vtr/libs/rtlnumber/CMakeLists.txt b/third_party/vtr/libs/rtlnumber/CMakeLists.txt
new file mode 100644
index 000000000..33c84b3b9
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/CMakeLists.txt
@@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.9)
+
+project("librtlnumber")
+
+option(RTL_ALLOW_UNKNOWN_COMPARE "Skips initial check for unknowns in comparison and compares MSB-to-LSB" OFF)
+
+if(RTL_ALLOW_UNKNOWN_COMPARE)
+    add_definitions(-DRTL_ALLOW_UNKNOWN_COMPARE)
+endif()
+
+file(GLOB_RECURSE EXEC_SOURCES main.cpp)
+file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
+file(GLOB_RECURSE LIB_HEADERS src/include/*.hpp)
+files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
+
+#Create the library
+add_library(librtlnumber STATIC
+             ${LIB_HEADERS}
+             ${LIB_SOURCES})
+target_include_directories(librtlnumber PUBLIC ${LIB_INCLUDE_DIRS})
+set_target_properties(librtlnumber PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
+
+#Create the test executable
+target_link_libraries(librtlnumber)
+
+#Create the executable
+add_executable(rtl_number ${EXEC_SOURCES})
+
+
+target_link_libraries(rtl_number
+                        librtlnumber)
+
+install(TARGETS rtl_number librtlnumber DESTINATION bin)
diff --git a/third_party/vtr/libs/rtlnumber/Makefile b/third_party/vtr/libs/rtlnumber/Makefile
new file mode 100644
index 000000000..0e1fd16d5
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/Makefile
@@ -0,0 +1,81 @@
+#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
+#          Dr. Kenneth B. Kent (ken@unb.ca)
+#          for the Reconfigurable Computing Research Lab at the
+#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+
+# If the first argument is "run"...
+ifeq (build,$(firstword $(MAKECMDGOALS)))
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (run,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (gdb,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (valgrind,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (debug,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+
+INCLUDE =-Isrc/include
+SRC =src/*.cpp
+
+BIN = rtl_number
+
+C = clang++ -std=c++14 -lpthread #-DENABLE_DEBUG_MESSAGES
+
+cleanup_flags=\
+-ferror-limit=1000 \
+-Werror \
+-Wpedantic \
+-Weverything \
+-Wall \
+-ggdb -O0 -g \
+-Wno-c++98-compat \
+-Wno-padded
+#  \
+# -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls
+
+PHONY: error
+
+error: 
+	echo "can only use 'clean', 'debug <testname>.cpp', 'build <testname>.cpp' or 'run <arguments>'"
+
+debug: clean
+	mkdir -p bin
+	$(C) $(cleanup_flags) $(INCLUDE) $(SRC) main.cpp -o $(BIN)
+
+build: clean
+	$(C) $(INCLUDE) $(SRC) main.cpp -o $(BIN)
+
+run:
+	./$(BIN) $(RUN_ARGS) 
+
+valgrind: build
+	valgrind --tool=helgrind $(BIN) $(RUN_ARGS) 
+
+gdb:
+	gdb --args $(BIN) $(RUN_ARGS)
+
+clean:
+	$(RM) -Rf bin
+
diff --git a/third_party/vtr/libs/rtlnumber/README.md b/third_party/vtr/libs/rtlnumber/README.md
new file mode 100644
index 000000000..28054545c
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/README.md
@@ -0,0 +1,9 @@
+librtlnumber - Register Transfer Level (RTL) Verilog Number Library
+
+Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com)
+		  and Dr. Kenneth B. Kent (ken@unb.ca)
+           for the Reconfigurable Computing Research Lab at the
+            Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+
+Arbitrary Length Verilog Number Library that can Handle `X` and `Z` inputs.
diff --git a/third_party/vtr/libs/rtlnumber/main.cpp b/third_party/vtr/libs/rtlnumber/main.cpp
new file mode 100644
index 000000000..9a59be4d8
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/main.cpp
@@ -0,0 +1,200 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
+ *            Dr. Kenneth B. Kent (ken@unb.ca)
+ *            for the Reconfigurable Computing Research Lab at the
+ *             Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+#include "rtl_int.hpp"
+#include "rtl_utils.hpp"
+
+#define bad_ops(test) _bad_ops(test, __func__, __LINE__)
+inline static std::string _bad_ops(std::string test, const char* FUNCT, int LINE) {
+    std::cerr << "INVALID INPUT OPS: (" << test << ")@" << FUNCT << "::" << std::to_string(LINE) << std::endl;
+    std::abort();
+}
+
+/***
+ *     __   __       ___  __   __           ___       __       
+ *    /  ` /  \ |\ |  |  |__) /  \ |       |__  |    /  \ |  | 
+ *    \__, \__/ | \|  |  |  \ \__/ |___    |    |___ \__/ |/\| 
+ *                                                             
+ * 	This is used for testing purposes only, unused in ODIN as the input is already preprocessed
+ */
+static std::string arithmetic(std::string op, std::string a_in) {
+    VNumber a(a_in);
+    VNumber result;
+
+    if (op == "is_true") {
+        result = VNumber(V_TRUE(a));
+    } else if (op == "is_false") {
+        result = VNumber(V_FALSE(a));
+    } else if (op == "is_unk") {
+        result = VNumber(V_UNK(a));
+    } else if (op == "is_x") {
+        result = VNumber(V_IS_X(a));
+    } else if (op == "is_z") {
+        result = VNumber(V_IS_Z(a));
+    } else if (op == "is_unsigned") {
+        result = VNumber(V_IS_UNSIGNED(a));
+    } else if (op == "is_signed") {
+        result = VNumber(V_IS_SIGNED(a));
+    } else if (op == "to_unsigned") {
+        result = V_UNSIGNED(a);
+    } else if (op == "to_signed") {
+        result = V_SIGNED(a);
+    } else if (op == "~") {
+        result = V_BITWISE_NOT(a);
+    } else if (op == "-") {
+        result = V_MINUS(a);
+    } else if (op == "+") {
+        result = V_ADD(a);
+    } else if (op == "&") {
+        result = V_BITWISE_AND(a);
+    } else if (op == "|") {
+        result = V_BITWISE_OR(a);
+    } else if (op == "^") {
+        result = V_BITWISE_XOR(a);
+    } else if (op == "~&") {
+        result = V_BITWISE_NAND(a);
+    } else if (op == "~|") {
+        result = V_BITWISE_NOR(a);
+    } else if (op == "~^" || op == "^~") {
+        result = V_BITWISE_XNOR(a);
+    } else if (op == "!") {
+        result = V_LOGICAL_NOT(a);
+    } else {
+        bad_ops(op);
+    }
+
+    return result.to_verilog_bitstring();
+}
+
+static std::string arithmetic(std::string a_in, std::string op, std::string b_in) {
+    VNumber a(a_in);
+    VNumber b(b_in);
+    VNumber result;
+
+    if (op == "&") {
+        result = V_BITWISE_AND(a, b);
+    } else if (op == "|") {
+        result = V_BITWISE_OR(a, b);
+    } else if (op == "^") {
+        result = V_BITWISE_XOR(a, b);
+    } else if (op == "~&") {
+        result = V_BITWISE_NAND(a, b);
+    } else if (op == "~|") {
+        result = V_BITWISE_NOR(a, b);
+    } else if (op == "~^" || op == "^~") {
+        result = V_BITWISE_XNOR(a, b);
+    } else if (op == "===") {
+        result = V_CASE_EQUAL(a, b);
+    } else if (op == "!==") {
+        result = V_CASE_NOT_EQUAL(a, b);
+    } else if (op == "<<") {
+        result = V_SHIFT_LEFT(a, b);
+    } else if (op == "<<<") {
+        result = V_SIGNED_SHIFT_LEFT(a, b);
+    } else if (op == ">>") {
+        result = V_SHIFT_RIGHT(a, b);
+    } else if (op == ">>>") {
+        result = V_SIGNED_SHIFT_RIGHT(a, b);
+    } else if (op == "&&") {
+        result = V_LOGICAL_AND(a, b);
+    } else if (op == "||") {
+        result = V_LOGICAL_OR(a, b);
+    } else if (op == "<") {
+        result = V_LT(a, b);
+    } else if (op == ">") {
+        result = V_GT(a, b);
+    } else if (op == "<=") {
+        result = V_LE(a, b);
+    } else if (op == ">=") {
+        result = V_GE(a, b);
+    } else if (op == "==") {
+        result = V_EQUAL(a, b);
+    } else if (op == "!=") {
+        result = V_NOT_EQUAL(a, b);
+    } else if (op == "+") {
+        result = V_ADD(a, b);
+    } else if (op == "-") {
+        result = V_MINUS(a, b);
+    } else if (op == "*") {
+        result = V_MULTIPLY(a, b);
+    } else if (op == "**") {
+        result = V_POWER(a, b);
+    } else if (op == "/") {
+        result = V_DIV(a, b);
+    } else if (op == "%") {
+        result = V_MOD(a, b);
+    } else {
+        bad_ops(op);
+    }
+
+    return result.to_verilog_bitstring();
+}
+
+int main(int argc, char** argv) {
+    std::vector<std::string> input;
+    for (int i = 0; i < argc; i++)
+        input.push_back(argv[i]);
+
+    std::string result = "";
+
+    if (argc < 3) {
+        ERR_MSG("Not Enough Arguments: " << std::to_string(argc - 1));
+
+        return -1;
+    } else if (argc == 3) {
+        result = arithmetic(input[1], input[2]);
+    } else if (argc == 4 && input[1] == "display") {
+        VNumber a(input[3]);
+        result = V_STRING(a, input[2][0]);
+    } else if (argc == 4 && input[1] == "bufif0") {
+        VNumber bus(input[2]);
+        VNumber trigger(input[3]);
+        result = V_BITWISE_BUFIF0(bus, trigger).to_verilog_bitstring();
+    } else if (argc == 4 && input[1] == "bufif1") {
+        VNumber bus(input[2]);
+        VNumber trigger(input[3]);
+        result = V_BITWISE_BUFIF1(bus, trigger).to_verilog_bitstring();
+    } else if (argc == 4 && input[1] == "notif0") {
+        VNumber bus(input[2]);
+        VNumber trigger(input[3]);
+        result = V_BITWISE_NOTIF0(bus, trigger).to_verilog_bitstring();
+    } else if (argc == 4 && input[1] == "notif1") {
+        VNumber bus(input[2]);
+        VNumber trigger(input[3]);
+        result = V_BITWISE_NOTIF1(bus, trigger).to_verilog_bitstring();
+    } else if (argc == 4) {
+        result = arithmetic(input[1], input[2], input[3]);
+    } else if (argc == 6 && (input[2] == "?" && input[4] == ":")) {
+        VNumber a(input[1]);
+        VNumber b(input[3]);
+        VNumber c(input[5]);
+
+        result = V_TERNARY(a, b, c).to_verilog_bitstring();
+    } else if (argc == 6 && (input[1] == "{" && input[3] == "," && input[5] == "}")) {
+        VNumber a(input[2]);
+        VNumber b(input[4]);
+
+        result = V_CONCAT({a, b}).to_verilog_bitstring();
+    } else if (argc == 7 && (input[1] == "{" && input[3] == "{" && input[5] == "}" && input[6] == "}")) {
+        VNumber n_times(input[2]);
+        VNumber replicant(input[4]);
+
+        result = V_REPLICATE(replicant, n_times).to_verilog_bitstring();
+    } else {
+        ERR_MSG("invalid Arguments: " << std::to_string(argc - 1));
+        return -1;
+    }
+
+    std::cout << result << std::endl;
+
+    return 0;
+}
diff --git a/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv b/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
new file mode 100644
index 000000000..444bf1199
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
@@ -0,0 +1,310 @@
+#####################
+# Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+#  Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+#   Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+#    Dr. Kenneth B. Kent (ken@unb.ca)
+#    for the Reconfigurable Computing Research Lab at the
+#     Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+#####################
+
+# truth test
+simple_true,        is_true, 1'b1, 1'b1
+simple_fail,        is_true, 1'b0, 1'b0
+decimal_true,       is_true, 1, 1'b1
+decimal_fail,       is_true, 0, 1'b0
+complex_true,       is_true, 3'b1xz, 1'b1
+complex_fail,       is_true, 3'b0xz, 1'b0
+unknown_x,          is_true, 1'bx, 1'b0
+unknown_z,          is_true, 1'bz, 1'b0
+large_number_pass,  is_true, 128'h8000_0000_0000_0000, 1'b1
+
+# type test
+simple_x_pass,      is_x,   1'bx,    1'b1
+simple_x_fail,      is_x,   1'b1,    1'b0
+complex_x_pass,     is_x,   4'bxxxx, 1'b1
+complex_x_fail,     is_x,   4'bzxxx, 1'b0
+simple_z_pass,      is_z,   1'bz,    1'b1
+simple_z_fail,      is_z,   1'b1,    1'b0
+complex_z_pass,     is_z,   4'bzzzz, 1'b1
+complex_z_fail,     is_z,   4'bxzzz, 1'b0
+simple_unk_x_pass,  is_unk, 2'bx1,   1'b1
+simple_unk_z_pass,  is_unk, 2'bz1,   1'b1
+simple_unk_fail,    is_unk, 2'b10,   1'b0
+
+# sign test
+simple_is_unsigned_pass,     is_unsigned,    2'b11,  1'b1
+simple_is_unsigned_fail,     is_unsigned,    2'sb11, 1'b0
+simple_is_signed_pass,       is_signed,      2'b11,  1'b0
+simple_is_signed_fail,       is_signed,      2'sb11, 1'b1
+
+# type conversion test
+simple_is_unsigned_1,        to_unsigned,    2'b11,  2'b11
+simple_is_unsigned_2,        to_unsigned,    2'sb11, 2'b11
+simple_is_signed_1,          to_signed,      2'b11,  2'sb11
+simple_is_signed_2,          to_signed,      2'sb11, 2'sb11
+
+# string display
+simple_string_b,          display, b, 64'd18446744073709551615, 1111111111111111111111111111111111111111111111111111111111111111
+simple_string_B,          display, B, 65'd18446744073709551616, 10000000000000000000000000000000000000000000000000000000000000000
+simple_string_u,          display, u, 1'bz, 1'b0        # todo, figure out the real result for this
+simple_string_U,          display, U, 1'bx, 1'b0        # todo, figure out the real result for this
+simple_string_z,          display, z, 1'bz, z
+simple_string_Z,          display, Z, 1'bx, X
+simple_string_s,          display, s, "hello world", hello world
+simple_string_S,          display, S, "hello world", hello world
+simple_string_o,          display, o, 64'd18446744073709551615, 1777777777777777777777
+simple_string_O,          display, O, 65'd18446744073709551616, 2000000000000000000000
+simple_string_h,          display, h, 64'd18446744073709551615, ffffffffffffffff
+simple_string_H,          display, H, 64'd18446744073709551614, FFFFFFFFFFFFFFFE
+simple_string_d,          display, d, 265663, 265663
+simple_string_D,          display, D, 265663, 265663
+simple_string_c,          display, c, "hello world", h
+simple_string_C,          display, C, "world", w
+
+# string test
+simple_string,          display,        s,   "hello world", hello world
+string_compare_pass,    "hello world",  ==,  "hello world", 1'b1
+string_compare_fail,    "hello world",  ==,  "hello", 1'b0
+string_ne_pass,         "hello world",  !=,  "hello", 1'b1
+string_ne_fail,         "hello world",  !=,  "hello world", 1'b0
+string_add,             "a",            +,    1,      "b"
+string_add_empty,       "b",             +,    "",    "b"
+
+# replicate test
+simple_replicate,       {, 3, {, 2'b10, }, }, 6'b101010
+
+# concat test
+simple_concat,          {, 2'b01, \, , 2'b10, }, 4'b0110
+
+# base conversions
+Decimal-To-Binary,      10,         ==, 4'b1010,    1'b1
+Binary-To-Decimal,      4'b1100,    ==, 12,         1'b1
+Decimal-To-Hex,         10,         ==, 'hA,        1'b1
+Hex-To-Decimal,         'hBF,       ==, 191,        1'b1
+Decimal-To-Octal,       10,         ==, 'o12,       1'b1
+Octal-To-Decimal,       'o37,       ==, 31,         1'b1
+
+# Sign modifier
+Sign_minus,		        -,  10'b1010101010,	'b0101010110
+Sign_plus,			    +,  4'b1010,	    'b1010
+
+# Simple Base 10 conversion
+simple_decimal_conversion,			+,  6,	6
+simple_decimal_minus,			    6, -, 1, 5
+simple_decimal_plus,			    6, +, 1, 7
+simple_decimal_shift_right,			6,  >>>, 1, 3
+simple_decimal_shift_left,			6,  <<<, 1, 12
+
+#################
+# tristate
+
+# single bit trigger
+# ======
+
+# bufif0
+bufif0_on,                 bufif0, 4'b10xz, 1'b0, 4'b10xx
+bufif0_off,                bufif0, 4'b10xz, 1'b1, 4'bzzzz
+bufif0_dc,                 bufif0, 4'b10xz, 1'bx, 4'bxxxx
+bufif0_hihz,               bufif0, 4'b10xz, 1'bz, 4'bxxxx
+
+# bufif1
+bufif1_on,                 bufif1, 4'b10xz, 1'b1, 4'b10xx
+bufif1_off,                bufif1, 4'b10xz, 1'b0, 4'bzzzz
+bufif1_dc,                 bufif1, 4'b10xz, 1'bx, 4'bxxxx
+bufif1_hihz,               bufif1, 4'b10xz, 1'bz, 4'bxxxx
+
+# notif0
+notif0_on,                 notif0, 4'b10xz, 1'b0, 4'b01xx
+notif0_off,                notif0, 4'b10xz, 1'b1, 4'bzzzz
+notif0_dc,                 notif0, 4'b10xz, 1'bx, 4'bxxxx
+notif0_hihz,               notif0, 4'b10xz, 1'bz, 4'bxxxx
+
+# notif1
+notif1_on,                 notif1, 4'b10xz, 1'b1, 4'b01xx
+notif1_off,                notif1, 4'b10xz, 1'b0, 4'bzzzz
+notif1_dc,                 notif1, 4'b10xz, 1'bx, 4'bxxxx
+notif1_hihz,               notif1, 4'b10xz, 1'bz, 4'bxxxx
+
+# wide trigger
+# ======
+
+# bufif0
+bufif0_upper,              bufif0, 4'b10xz, 4'b1100, 4'bzzxx
+bufif0_lower,              bufif0, 4'b10xz, 4'b0011, 4'b10zz
+
+# bufif1
+bufif1_upper,              bufif1, 4'b10xz, 4'b1100, 4'b10zz
+bufif1_lower,              bufif1, 4'b10xz, 4'b0011, 4'bzzxx
+
+# notif0
+notif0_upper,              notif0, 4'b10xz, 4'b1100, 4'bzzxx
+notif0_lower,              notif0, 4'b10xz, 4'b0011, 4'b01zz
+
+# notif1
+notif1_upper,              notif1, 4'b10xz, 4'b1100, 4'b01zz
+notif1_lower,              notif1, 4'b10xz, 4'b0011, 4'bzzxx
+
+
+# Reduction
+Reduction-and,			&,  4'b1010,	1'b0
+Reduction-or,			|,  4'b1010,	1'b1
+Reduction-xor,			^,  4'b1010,	1'b0
+Reduction-nand,			~&,	4'b1010,	1'b1
+Reduction-nor,			~|,	4'b1010,	1'b0
+Reduction-xnor,			~^,	4'b1010,	1'b1
+
+# Reduction unknowns
+Reduction-and-XZ,		&,  4'b10xz,	1'b0
+Reduction-or-XZ,		|,  4'b10xz,	1'b1
+Reduction-xor-XZ,		^,  4'b10xz,	1'bx
+Reduction-nand-XZ,		~&,	4'b10xz,	1'b1
+Reduction-nor-XZ,		~|,	4'b10xz,	1'b0
+Reduction-xnor-XZ,		~^,	4'b10xz,	1'bx
+
+# bitwise
+Bitwise-Not,	            ~,  4'b1010,	'b0101
+Bitwise-And,	4'b1010,	&,  4'b1000,	'b1000
+Bitwise-Or,		5'b1010,	|,  5'b1000,	'b1010
+Bitwise-Nor,	5'b1010,	~|, 5'b1000,	5'b10101
+Bitwise-Nand,   5'b1010,	~&, 5'b1000,	5'b10111
+Bitwise-Xnor,   5'b1010,	~^, 5'b1000,	5'b11101
+Bitwise-Xor,	5'b1010,	^,  5'b1000,	2'b10
+
+# bitwise unknowns
+Bitwise-Not-XZ,	                    ~,  4'b10xz,        4'b01xx
+Bitwise-And-XZ,	    8'bxxxxzzzz,    &,  8'b10xz10xz,    8'bx0xxx0xx
+Bitwise-Or-XZ,	    8'bxxxxzzzz,    |,  8'b10xz10xz,    8'b1xxx1xxx
+Bitwise-Nor-XZ,	    8'bxxxxzzzz,    ~|, 8'b10xz10xz,    8'b0xxx0xxx
+Bitwise-Nand-XZ,    8'bxxxxzzzz,    ~&, 8'b10xz10xz,    8'bx1xxx1xx
+Bitwise-Xnor-XZ,    8'bxxxxzzzz,    ~^, 8'b10xz10xz,    8'bxxxxxxxx
+Bitwise-Xor-XZ,	    8'bxxxxzzzz,    ^,  8'b10xz10xz,    8'bxxxxxxxx
+
+# case equivalence
+Case-eq,		5'b1xz10,	===,  5'b1xz10,	1'b1
+Case-ne,		5'b1xz11,	!==,  5'b1xz10,	1'b1
+Case-ne-XZ,		5'b1xz10,	!==,  5'b1zx10,	1'b1
+Case-ne,		5'b1xz10,	!==,  5'b1xz10,	1'b0
+Case-eq,		5'b1xz11,	===,  5'b1xz10,	1'b0
+Case-eq-XZ,		5'b1xz10,	===,  5'b1zx10,	1'b0
+
+# logical operation
+Logical-Not,	                    !,  4'b1010,	1'b0
+Logical-Not-XZ,	                    !,  4'b1x1z,	1'bx
+Logical-And,			4'b1010,	&&, 4'b1000,	1'b1
+Logical-And-XZ,			4'b1x1z,	&&, 4'b1000,	1'bx
+Logical-Or,				4'b1010,	||, 4'b1000,	1'b1
+Logical-Or,				4'b0000,	||, 4'b1000,	1'b1
+Logical-Or,				4'b0000,	||, 4'b0000,	1'b0
+Logical-Or,				4'b0zx0,	||, 4'b0000,	1'bx
+Logical-less,           4'b0000,    <,  4'b0000,    1'b0	
+Logical-less-1,			4'b0000,	<,	4'b0001,	1'b1
+Logical-less-2,         4'b0001,    <,  4'b0000,    1'b0
+Logical-less-3,         4'bxxxx,    <,  4'b0001,    1'bx
+Logical-less-4,         4'b0001,    <,  4'bxxxx,    1'bx
+Logical-less-5,         4'b0xxx,    <,  4'b1001,    1'bx
+Logical-less-6,         4'b1001,    <,  4'b0xxx,    1'bx
+Logical-less-7,         4'sb1001,   <,  4'sb0xxx,   1'bx
+Logical-less-8,         4'sb0xxx,   <,  4'sb1001,   1'bx
+Logical-less-9,         4'b0zzz,    <,  4'b1001,    1'bx
+Logical-less-10,         4'sb0zzz,   <,  4'sb1001,   1'bx
+Logical-greater,        4'b0000,    >,  4'b0000,    1'b0
+Logical-greater-1,		4'b0000,	>,	4'b0001,	1'b0
+Logical-greater-2,      4'b0001,    >,  4'b0000,    1'b1
+Logical-greater-3,      4'bxxxx,    >,  4'b0001,    1'bx
+Logical-greater-4,      4'b0001,    >,  4'bxxxx,    1'bx
+Logical-greater-5,      4'b0xxx,    >,  4'b1001,    1'bx
+Logical-greater-6,      4'b1001,    >,  4'b0xxx,    1'bx
+Logical-greater-7,      4'sb1001,   >,  4'sb0xxx,   1'bx
+Logical-greater-8,      4'sb0xxx,   >,  4'sb1001,   1'bx
+Logical-gr-equal,	    4'b0000,	>=, 4'b0000,	1'b1
+Logical-gr-equal-1,		4'b0000,	>=,	4'b0001,	1'b0
+Logical-gr-equal-2,     4'b0001,    >=, 4'b0000,    1'b1
+Logical-gr-equal-3,     4'bxxxx,    >=, 4'b0001,    1'bx
+Logical-gr-equal-4,     4'b0001,    >=, 4'bxxxx,    1'bx
+Logical-gr-equal-5,     4'b0001,    >=, 4'bzzzz,    1'bx
+Logical-less-equal,		4'b0000,	<=, 4'b0000,	1'b1
+Logical-less-equal-1,	4'b0000,	<=,	4'b0001,	1'b1
+Logical-less-equal-2,   4'b0001,    <=, 4'b0000,    1'b0
+Logical-less-equal-3,   4'bxxxx,    <=, 4'b0001,    1'bx
+Logical-less-equal-4,   4'b0001,    <=, 4'bxxxx,    1'bx	
+Logical-less-equal-5,   4'b0001,    <=, 4'bzzzz,    1'bx	
+Logical-equal,			4'b0000,	==, 4'b0000,	1'b1
+Logical-equal-1,		4'b0000,	==,	4'b0001,	1'b0
+Logical-equal-2,        4'b0001,    ==, 4'b0000,    1'b0
+Logical-equal-3,        4'bxxxx,    ==, 4'b0001,    1'bx
+Logical-equal-4,        4'b0001,    ==, 4'bxxxx,    1'bx
+Logical-equal-5,        4'b0001,    ==, 4'bzzzz,    1'bx
+Logical-bits,			'b1110,	    ==, 4'b1110,	1'b1	
+Logical-not-equal,		4'b0000,	!=,	4'b0000,	1'b0
+Logical-not-equal-1,	4'b0000,	!=,	4'b0001,	1'b1
+Logical-not-equal-2,    4'b0001,    !=, 4'b0000,    1'b1
+Logical-not-equal-3,    4'bxxxx,    !=, 4'b0001,    1'bx
+Logical-not-equal-4,    4'b0001,    !=, 4'bxxxx,    1'bx
+Logical-not-equal-5,    4'b0001,    !=, 4'bzzzz,    1'bx
+
+# Tests for correct sign extension
+Sign-Extend,                    4'b0001,    ===,    1'b1,   1'b1
+Sign-Extend-Unknown,            4'b00xz,    ===,    2'bxz,  1'b1
+Sign-Extend-Sign,               4'sb1111,   ===,    1'sb1,  1'b1
+Sign-Extend-Sign-Unknown,       4'sbxxxz,   ===,    2'sbxz,  1'b1
+
+# shift operation
+Shift-left,				        6'b0001zx,	<<,	    2'b10,	6'b01zx00
+Shift-right, 			        6'b0zx100,	>>,		2'b10,	3'bzx1
+Signed-shift-left,		        5'b001zx,	<<<,	2'b10,	5'b1zx00
+Unsigned-Signed-shift-right,	6'b1z1x00,	>>>,	2'b10,	6'b001z1x
+Unsigned-arithmetic-shift-right,6'b1z1x00,	>>>,	2'b10,	6'b001z1x
+Signed-arithmetic-shift-right,  6'sb1z1x00,	>>>,	2'b10,	6'sb111z1x
+Shift-left-X,                   6'b1010,	<<,	1'bx,	'bx
+Shift-left-Z,                   6'b1010,	<<,	1'bz,	'bx
+Shift-right-X,                  6'b1010,	>>,	1'bx,	'bx
+Shift-right-Z,                  6'b1010,	>>,	1'bz,	'bx
+
+# arithmetic
+Addition,			        4'b0110,    +,  4'b0011,    'b1001
+Addition-Base-10,			2,          +,  2,          4
+Subtraction,		        4'b0100,    -,  4'b0010,    'b10
+Subtraction-Base-10,        4,          -,  2,          2
+Subtraction-Smaller-Larger, 4'b0010,    -,  4'b0100,    4'b1110
+Subtraction-Small-Base-10,  2,          -,  4,          3'sb110
+Division,			        4'b1010,    /,  4'b0010,    'b101
+Division-Base-10,           10,         /,  5,          2
+Division-Neg,               4'sb1010,   /,  3'sb010,    3'sb101
+Multiplication,	            4'b0010,    *,  4'b0010,    'b100
+Multiplication-Base-10,     2,          *,  3,          6
+Multiplication-Neg,         4,          *,  4'sb1100,   7'sb1110000
+Modulo,			            4'b1011,    %,  4'b0010,    'b1
+Modulo-Base-10,             13,         %,  5,          3
+Modulo-Neg,                 4'sb1011,   %,  4'sb0010,   2'sb11
+Power,                      2'b10,      **, 2'b10,      4'b0100
+Power-Base-10,              2,          **, 3,          8
+Power-Zero,                 4'b0010,    **, 4'b0000,    'b1
+Power-Zero-Base-10,         3,          **, 0,          1
+Power-Unknown,              4'b0000,    **, 4'sb1110,   'bx
+Power-Neg-Even,             4'sb1111,   **, 2'b10,      'b1
+Power-Neg-Odd,              4'sb1111,   **, 3'b011,     2'sb11
+
+# Ternary operations
+Ternary,			1'b1,   ?,  2'b01,  :,  2'b10,  2'b01
+
+# Test Cases As-Per Verilog 2005 Specification: 
+# Page 46 "Table 5-8 Examples of modulus and power operators"
+V2005-1,                    10,         %,  3,       1          # 10/3 yields a remainder of 1.
+V2005-2,                    11,         %,  3,       2          # 11/3 yields a remainder of 2.
+V2005-3,                    12,         %,  3,       0          # 12/3 yields no remainder.
+V2005-4,                    5'sb10110,  %,  3'sb011, 2'sb11     # The result takes the sign of the first operand.
+V2005-5,                    11,         %,  3'sb101, 2          # The result takes the sign of the first operand
+#V2005-6,                    5'sb10100, %,  3,       1          # -4'd12 is seen as a large positive number that leaves a remainder of 1 when divided by 3.
+V2005-7,                    3,          **, 2,       9          # 3 * 3
+V2005-8,                    2,          **, 3,       8          # 2 * 2 * 2
+V2005-9,                    2,          **, 0,       1          # Anything to the zero exponent is 1.
+V2005-10,                   0,          **, 0,       1          # Zero to the zero exponent is also 1.
+V2005-11,                   2,          **, 3'sb111, 0          # 2 ** -1 = 1/2. Integer division truncates to zero.
+V2005-12,                   0,          **, 2'sb11,  'bx     # 0 ** -1 = 1/0. Integer division by zero is 'bx.
+
+#2.0 ** -3'sb1 0.5 2.0 is real, giving real reciprocal.
+#9 ** 0.5 3.0 Real square root.
+#9.0 ** (1/2) 1.0 Integer division truncates exponent to zero.
+#-3.0 ** 2.0 9.0 Defined because real 2.0 is still integral value
+
+
diff --git a/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp b/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
new file mode 100644
index 000000000..c3a8092d1
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
@@ -0,0 +1,1140 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+ *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+ *             Dr. Kenneth B. Kent (ken@unb.ca)
+ *             for the Reconfigurable Computing Research Lab at the
+ *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#ifndef INTERNAL_BITS_HPP
+#define INTERNAL_BITS_HPP
+
+#include <cstdint>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include <bitset>
+#include "rtl_utils.hpp"
+
+typedef uint16_t veri_internal_bits_t;
+
+using integer_t = int64_t;
+constexpr short integer_t_size = (sizeof(integer_t) * 8);
+
+#define _static_unused(x)    \
+    namespace {              \
+    constexpr auto _##x = x; \
+    }
+
+#define unroll_1d(lut) \
+    { lut[_0], lut[_1], lut[_x], lut[_z] }
+#define unroll_2d(lut) \
+    { unroll_1d(lut[_0]), unroll_1d(lut[_1]), unroll_1d(lut[_x]), unroll_1d(lut[_z]) }
+
+#define unroll_1d_invert(lut) \
+    { l_not[lut[_0]], l_not[lut[_1]], l_not[lut[_x]], l_not[lut[_z]] }
+#define unroll_2d_invert(lut) \
+    { unroll_1d_invert(lut[_0]), unroll_1d_invert(lut[_1]), unroll_1d_invert(lut[_x]), unroll_1d_invert(lut[_z]) }
+
+namespace BitSpace {
+typedef uint8_t bit_value_t;
+
+constexpr veri_internal_bits_t _All_0 = static_cast<veri_internal_bits_t>(0x0000000000000000UL);
+constexpr veri_internal_bits_t _All_1 = static_cast<veri_internal_bits_t>(0x5555555555555555UL);
+constexpr veri_internal_bits_t _All_x = static_cast<veri_internal_bits_t>(0xAAAAAAAAAAAAAAAAUL);
+constexpr veri_internal_bits_t _All_z = static_cast<veri_internal_bits_t>(0xFFFFFFFFFFFFFFFFUL);
+
+constexpr bit_value_t _0 = 0x0;
+constexpr bit_value_t _1 = 0x1;
+constexpr bit_value_t _x = 0x2;
+constexpr bit_value_t _z = 0x3;
+
+/***                                                              
+ * these are taken from the raw verilog truth tables so that the evaluation are correct.
+ * only use this to evaluate any expression for the number_t binary digits.
+ * reference: http://staff.ustc.edu.cn/~songch/download/IEEE.1364-2005.pdf
+ * 
+ *******************************************************/
+
+constexpr bit_value_t l_buf[4] = {
+    /*	 0   1   x   z  <- a*/
+    _0, _1, _x, _x};
+_static_unused(l_buf)
+
+    constexpr bit_value_t l_not[4]
+    = {
+        /*   0   1   x   z 	<- a */
+        _1, _0, _x, _x};
+_static_unused(l_not)
+
+    constexpr bit_value_t is_unk[4]
+    = {
+        /*	 0   1   x   z  <- a*/
+        _0, _0, _1, _1};
+_static_unused(is_unk)
+
+    constexpr bit_value_t is_x_bit[4]
+    = {
+        /*	 0   1   x   z  <- a*/
+        _0, _0, _1, _0};
+_static_unused(is_x_bit)
+
+    constexpr bit_value_t is_z_bit[4]
+    = {
+        /*	 0   1   x   z  <- a*/
+        _0, _0, _0, _1};
+_static_unused(is_z_bit)
+
+    constexpr bit_value_t is_one_bit[4]
+    = {
+        /*	 0   1   x   z  <- a*/
+        _0, _1, _0, _0};
+_static_unused(is_one_bit)
+
+    constexpr bit_value_t is_zero_bit[4]
+    = {
+        /*	 0   1   x   z  <- a*/
+        _1, _0, _0, _0};
+_static_unused(is_zero_bit)
+
+    constexpr bit_value_t l_and[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_0, _0, _0, _0},
+        /* 1 */ {_0, _1, _x, _x},
+        /* x */ {_0, _x, _x, _x},
+        /* z */ {_0, _x, _x, _x}};
+_static_unused(l_and)
+
+    constexpr bit_value_t l_nand[4][4]
+    = unroll_2d_invert(l_and);
+_static_unused(l_nand)
+
+    constexpr bit_value_t l_or[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_0, _1, _x, _x},
+        /* 1 */ {_1, _1, _1, _1},
+        /* x */ {_x, _1, _x, _x},
+        /* z */ {_x, _1, _x, _x}};
+_static_unused(l_or)
+
+    constexpr bit_value_t l_nor[4][4]
+    = unroll_2d_invert(l_or);
+_static_unused(l_nor)
+
+    constexpr bit_value_t l_xor[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_0, _1, _x, _x},
+        /* 1 */ {_1, _0, _x, _x},
+        /* x */ {_x, _x, _x, _x},
+        /* z */ {_x, _x, _x, _x}};
+_static_unused(l_xor)
+
+    constexpr bit_value_t l_xnor[4][4]
+    = unroll_2d_invert(l_xor);
+_static_unused(l_xnor)
+
+    constexpr bit_value_t l_notif1[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_z, _1, _x, _x},
+        /* 1 */ {_z, _0, _x, _x},
+        /* x */ {_z, _x, _x, _x},
+        /* z */ {_z, _x, _x, _x}};
+_static_unused(l_notif1)
+
+    constexpr bit_value_t l_notif0[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_1, _z, _x, _x},
+        /* 1 */ {_0, _z, _x, _x},
+        /* x */ {_x, _z, _x, _x},
+        /* z */ {_x, _z, _x, _x}};
+_static_unused(l_notif0)
+
+    constexpr bit_value_t l_bufif1[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_z, _0, _x, _x},
+        /* 1 */ {_z, _1, _x, _x},
+        /* x */ {_z, _x, _x, _x},
+        /* z */ {_z, _x, _x, _x}};
+_static_unused(l_bufif1)
+
+    constexpr bit_value_t l_bufif0[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_0, _z, _x, _x},
+        /* 1 */ {_1, _z, _x, _x},
+        /* x */ {_x, _z, _x, _x},
+        /* z */ {_x, _z, _x, _x}};
+_static_unused(l_bufif0)
+
+    /*****************************************************
+     *  Tran NO SUPPORT FOR THESE YET 
+     */
+    /* cmos gates */
+    constexpr bit_value_t l_rpmos[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_0, _z, _x, _x},
+        /* 1 */ {_1, _z, _x, _x},
+        /* x */ {_x, _z, _x, _x},
+        /* z */ {_z, _z, _z, _z}};
+_static_unused(l_rpmos)
+
+    constexpr bit_value_t l_rnmos[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_z, _0, _x, _x},
+        /* 1 */ {_z, _1, _x, _x},
+        /* x */ {_z, _x, _x, _x},
+        /* z */ {_z, _z, _z, _z}};
+_static_unused(l_rnmos)
+
+    constexpr bit_value_t l_nmos[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_z, _0, _x, _x},
+        /* 1 */ {_z, _1, _x, _x},
+        /* x */ {_z, _x, _x, _x},
+        /* z */ {_z, _z, _z, _z}};
+_static_unused(l_nmos)
+
+    // see table 5-21 p:54 IEEE 1364-2005
+    constexpr bit_value_t l_ternary[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_0, _x, _x, _x},
+        /* 1 */ {_x, _1, _x, _x},
+        /* x */ {_x, _x, _x, _x},
+        /* z */ {_x, _x, _x, _x}};
+_static_unused(l_ternary)
+
+    /*****
+     * these extend the library and simplify the process
+     */
+    /* helper */
+    constexpr bit_value_t l_unk[4][4]
+    = {
+        /* in /	 0   1   x   z 	<-control */
+        /* 0 */ {_x, _x, _x, _x},
+        /* 1 */ {_x, _x, _x, _x},
+        /* x */ {_x, _x, _x, _x},
+        /* z */ {_x, _x, _x, _x}};
+_static_unused(l_unk)
+
+    constexpr bit_value_t l_case_eq[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_1, _0, _0, _0},
+        /* 1 */ {_0, _1, _0, _0},
+        /* x */ {_0, _0, _1, _0},
+        /* z */ {_0, _0, _0, _1}};
+_static_unused(l_case_eq)
+
+    constexpr bit_value_t l_lt[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_0, _1, _x, _x},
+        /* 1 */ {_0, _0, _x, _x},
+        /* x */ {_x, _x, _x, _x},
+        /* z */ {_x, _x, _x, _x}};
+_static_unused(l_lt)
+
+    constexpr bit_value_t l_gt[4][4]
+    = {
+        /* a  /	 0   1   x   z 	<-b */
+        /* 0 */ {_0, _0, _x, _x},
+        /* 1 */ {_1, _0, _x, _x},
+        /* x */ {_x, _x, _x, _x},
+        /* z */ {_x, _x, _x, _x}};
+_static_unused(l_gt)
+
+    constexpr bit_value_t l_eq[4][4]
+    = unroll_2d(l_xnor);
+_static_unused(l_eq)
+
+    constexpr bit_value_t l_sum[4][4][4]
+    = {
+        /* c_in */
+        /* 0 */ unroll_2d(l_xor),
+        /* 1 */ unroll_2d(l_xnor),
+        /* x */ unroll_2d(l_unk),
+        /* z */ unroll_2d(l_unk)};
+_static_unused(l_sum)
+
+    constexpr bit_value_t l_carry[4][4][4]
+    = {
+        /* c_in */
+        /* 0 */ unroll_2d(l_and),
+        /* 1 */ unroll_2d(l_or),
+        /* x */ unroll_2d(l_ternary),
+        /* z */ unroll_2d(l_ternary)};
+_static_unused(l_carry)
+
+    constexpr bit_value_t l_half_carry[4][4]
+    = unroll_2d(l_carry[_0]);
+_static_unused(l_half_carry)
+
+    constexpr bit_value_t l_half_sum[4][4]
+    = unroll_2d(l_sum[_0]);
+_static_unused(l_half_sum)
+
+    static char bit_to_c(bit_value_t bit, bool uppercase) {
+    switch (bit) {
+        case _0:
+            return '0';
+        case _1:
+            return '1';
+        case _z:
+            return (uppercase) ? 'Z' : 'z';
+        default:
+            return (uppercase) ? 'X' : 'x';
+    }
+}
+
+static char bit_to_u(bit_value_t bit) {
+    switch (bit) {
+        case _1:
+            return '1';
+        default:
+            return '0';
+    }
+}
+
+static char bits_to_hex_c(short digit, bool uppercase) {
+    switch (digit) {
+        case 0:
+            return '0';
+        case 1:
+            return '1';
+        case 2:
+            return '2';
+        case 3:
+            return '3';
+        case 4:
+            return '4';
+        case 5:
+            return '5';
+        case 6:
+            return '6';
+        case 7:
+            return '7';
+        case 8:
+            return '8';
+        case 9:
+            return '9';
+        case 10:
+            return (uppercase) ? 'A' : 'a';
+        case 11:
+            return (uppercase) ? 'B' : 'b';
+        case 12:
+            return (uppercase) ? 'C' : 'c';
+        case 13:
+            return (uppercase) ? 'D' : 'd';
+        case 14:
+            return (uppercase) ? 'E' : 'e';
+        case 15:
+            return (uppercase) ? 'F' : 'f';
+        default:
+            assert_Werr(0,
+                        "Invalid bits input" + std::to_string(digit));
+
+            break;
+    }
+
+    std::abort();
+}
+
+static bit_value_t c_to_bit(char c) {
+    switch (tolower(c)) {
+        case '0':
+            return _0;
+        case '1':
+            return _1;
+        case 'z':
+            return _z;
+        case 'x':
+            return _x;
+        default:
+            break;
+    }
+    assert_Werr(0,
+                "Invalid bits input " + std::string(1, c));
+    return 0;
+}
+
+template<typename T>
+class BitFields {
+  private:
+    T bits = static_cast<T>(_All_x);
+
+    template<typename Addr_t>
+    size_t get_bit_location(Addr_t address) {
+        size_t current_address = static_cast<size_t>(address);
+        current_address %= this->size();
+        current_address <<= 1;
+        return current_address;
+    }
+
+  public:
+    BitFields(bit_value_t init_v) {
+        this->bits = static_cast<T>(
+            (_0 == init_v) ? _All_0 : (_1 == init_v) ? _All_1 : (_z == init_v) ? _All_z : _All_x);
+    }
+
+    template<typename Addr_t>
+    bit_value_t get_bit(Addr_t address) {
+        auto result = this->bits >> this->get_bit_location(address);
+        result &= 0x3;
+
+        return static_cast<bit_value_t>(result);
+    }
+
+    template<typename Addr_t>
+    void set_bit(Addr_t address, bit_value_t value) {
+        size_t real_address = this->get_bit_location(address);
+
+        T set_value = static_cast<T>(value);
+        set_value = static_cast<T>(set_value << real_address);
+
+        T mask = static_cast<T>(0x3);
+        mask = static_cast<T>(mask << real_address);
+        mask = static_cast<T>(~(mask));
+
+        this->bits = static_cast<T>(this->bits & mask);
+        this->bits = static_cast<T>(this->bits | set_value);
+    }
+
+    /**
+     * get 16 real bit (8 verilog bits) as 8 bit (char)
+     */
+    template<typename Addr_t>
+    char get_as_char(Addr_t address) {
+        char value = 0;
+        for (size_t i = 0; i < 8; i++) {
+            value += (((this->get_bit((address * 8) + i)) ? 1 : 0) << i);
+        }
+
+        return value;
+    }
+
+    static size_t size() {
+        return (sizeof(T) << 2); // 8 bit in a byte, 2 bits for a verilog bits = 4 bits in a byte, << 2 = sizeof x 4
+    }
+};
+
+// #define DEBUG_V_BITS
+
+/*****
+ * we use large array since we process the bits in chunks
+ */
+class VerilogBits {
+  private:
+    std::vector<BitFields<veri_internal_bits_t>> bits;
+    size_t bit_size = 0;
+
+    size_t to_index(size_t address) {
+        return (address / BitFields<veri_internal_bits_t>::size());
+    }
+
+    size_t list_size() {
+        return this->bits.size();
+    }
+
+  public:
+    VerilogBits() {
+        this->bit_size = 0;
+        this->bits = std::vector<BitSpace::BitFields<veri_internal_bits_t>>();
+    }
+
+    VerilogBits(size_t data_size, bit_value_t value_in) {
+        this->bit_size = data_size;
+        this->bits = std::vector<BitSpace::BitFields<veri_internal_bits_t>>();
+
+        size_t bitfield_count = (this->bit_size / BitFields<veri_internal_bits_t>::size()) + 1;
+
+        for (size_t i = 0; i < bitfield_count; i++) {
+            this->bits.push_back(BitSpace::BitFields<veri_internal_bits_t>(value_in));
+        }
+    }
+
+    VerilogBits(VerilogBits* other) {
+        this->bit_size = other->size();
+        this->bits = other->get_internal_bitvector();
+    }
+
+    size_t size() {
+        return this->bit_size;
+    }
+
+    std::vector<BitFields<veri_internal_bits_t>> get_internal_bitvector() {
+        return this->bits;
+    }
+
+    BitFields<veri_internal_bits_t>* get_bitfield(size_t index) {
+#ifdef DEBUG_V_BITS
+        if (index >= this->bits.size()) {
+            std::cerr << "Bit array indexing out of bounds " << index << " but size is " << this->bit_size << std::endl;
+            std::abort();
+        }
+#endif
+
+        return (&this->bits[index]);
+    }
+
+    bit_value_t get_bit(size_t address) {
+#ifdef DEBUG_V_BITS
+        if (address >= this->bit_size) {
+            std::cerr << "Bit index array out of bounds " << address << " but size is " << this->bit_size << std::endl;
+            std::abort();
+        }
+#endif
+
+        return (this->get_bitfield(to_index(address))->get_bit(address));
+    }
+
+    void set_bit(size_t address, bit_value_t value) {
+#ifdef DEBUG_V_BITS
+        if (address >= this->bit_size) {
+            std::cerr << "Bit index array out of bounds " << address << " but size is " << this->bit_size << std::endl;
+            std::abort();
+        }
+#endif
+        (this->get_bitfield(to_index(address))->set_bit(address, value));
+    }
+
+    std::string to_printable() {
+        std::string to_return = "";
+
+        for (size_t i = 0; i < this->size(); i += 8) {
+            to_return.insert(0, 1, this->get_bitfield(to_index(i))->get_as_char(i));
+        }
+
+        return to_return;
+    }
+
+    char getc() {
+        size_t last_index = this->size() - 1;
+        return this->get_bitfield(to_index(last_index))->get_as_char(last_index);
+    }
+
+    bool has_unknown() {
+        for (size_t address = 0x0; address < this->size(); address++) {
+            if (is_unk[this->get_bit(address)])
+                return true;
+        }
+
+        return false;
+    }
+
+    bool is_only_z() {
+        for (size_t address = 0x0; address < this->size(); address++) {
+            if (!is_z_bit[this->get_bit(address)])
+                return false;
+        }
+
+        return true;
+    }
+
+    bool is_only_x() {
+        for (size_t address = 0x0; address < this->size(); address++) {
+            if (!is_x_bit[this->get_bit(address)])
+                return false;
+        }
+
+        return true;
+    }
+
+    bool is_true() {
+        for (size_t address = 0x0; address < this->size(); address++) {
+            if (is_one_bit[this->get_bit(address)])
+                return true;
+        }
+
+        return false;
+    }
+
+    bool is_false() {
+        for (size_t address = 0x0; address < this->size(); address++) {
+            if (!is_zero_bit[this->get_bit(address)])
+                return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Unary Reduction operations
+     * This is Msb to Lsb on purpose, as per specs
+     */
+    VerilogBits bitwise_reduce(const bit_value_t lut[4][4]) {
+        bit_value_t result = this->get_bit(this->size() - 1);
+        for (size_t i = this->size() - 2; i < this->size(); i--) {
+            result = lut[result][this->get_bit(i)];
+        }
+
+        return VerilogBits(1, result);
+    }
+
+    /**
+     * Unary Bitwise operations
+     */
+    VerilogBits bitwise(const bit_value_t lut[4]) {
+        VerilogBits other(this->bit_size, _0);
+
+        for (size_t i = 0; i < this->size(); i++)
+            other.set_bit(i, lut[this->get_bit(i)]);
+
+        return other;
+    }
+
+    VerilogBits twos_complement(BitSpace::bit_value_t previous_carry) {
+        VerilogBits other(this->bit_size, _0);
+
+        for (size_t i = 0; i < this->size(); i++) {
+            BitSpace::bit_value_t not_bit_i = BitSpace::l_not[this->get_bit(i)];
+
+            other.set_bit(i, BitSpace::l_half_sum[previous_carry][not_bit_i]);
+            previous_carry = BitSpace::l_half_carry[previous_carry][not_bit_i];
+        }
+
+        return other;
+    }
+
+    VerilogBits twos_complement() {
+        return this->twos_complement(BitSpace::_1);
+    }
+
+    /**
+     * size of zero compact to the least amount of bits
+     */
+    VerilogBits resize(BitSpace::bit_value_t pad, size_t new_size) {
+        /**
+         * find the new size
+         */
+        if (new_size == 0) {
+            size_t last_bit_id = this->size() - 1;
+            size_t next_bit_id = last_bit_id - 1;
+
+            while (next_bit_id < this->size() - 1) {
+                BitSpace::bit_value_t current = this->get_bit(last_bit_id);
+                BitSpace::bit_value_t next = this->get_bit(next_bit_id);
+
+                if (current == next && current == pad) {
+                    last_bit_id--;
+                    next_bit_id--;
+                } else {
+                    break; /* it down. oh. oh! */
+                }
+            }
+
+            new_size = last_bit_id + 1;
+        }
+
+        VerilogBits other(new_size, BitSpace::_0);
+
+        size_t i = 0;
+
+        while (i < this->size() && i < new_size) {
+            other.set_bit(i, this->get_bit(i));
+            i++;
+        }
+
+        while (i < new_size) {
+            other.set_bit(i, pad); /* <- ask Eve about it */
+            i++;
+        }
+
+        return other;
+    }
+
+    /**
+     * replicates the bitset n times
+     */
+    VerilogBits replicate(size_t n_times) {
+        size_t old_size = this->size();
+        size_t new_size = old_size * n_times;
+
+        VerilogBits other(new_size, BitSpace::_0);
+
+        for (size_t i = 0; i < new_size; i += 1) {
+            other.set_bit(i, this->get_bit(i % old_size));
+        }
+
+        return other;
+    }
+};
+} // namespace BitSpace
+
+//template<size_t bit_size>
+class VNumber {
+  private:
+    bool sign = false;
+    bool defined_size = false;
+    BitSpace::VerilogBits bitstring = BitSpace::VerilogBits(1, BitSpace::_x);
+
+    VNumber(BitSpace::VerilogBits other_bitstring, bool other_defined_size, bool other_sign) {
+        bitstring = BitSpace::VerilogBits(other_bitstring);
+        sign = other_sign;
+        defined_size = other_defined_size;
+    }
+
+    VNumber insert(VNumber& other, size_t index_to_insert_at, size_t insertion_size) {
+        assert_Werr(other.is_defined_size() && this->is_defined_size(), "Size must be defined on both operand for insertion");
+
+        VNumber new_bitstring(this->size() + insertion_size, BitSpace::_0, this->is_signed() && other.is_signed(), true);
+
+        size_t index = 0;
+
+        for (size_t i = 0; i < this->size() && i < index_to_insert_at; i += 1, index += 1)
+            new_bitstring.set_bit_from_lsb(index, this->get_bit_from_lsb(i));
+
+        for (size_t i = 0; i < insertion_size; i += 1, index += 1)
+            new_bitstring.set_bit_from_lsb(index, other.get_bit_from_lsb(i));
+
+        for (size_t i = index_to_insert_at; i < this->size(); i += 1, index += 1)
+            new_bitstring.set_bit_from_lsb(index, this->get_bit_from_lsb(i));
+
+        return new_bitstring;
+    }
+
+  public:
+    VNumber() {
+        this->sign = false;
+        this->bitstring = BitSpace::VerilogBits(1, BitSpace::_x);
+        this->defined_size = false;
+    }
+
+    VNumber(VNumber&&) = default;
+    VNumber& operator=(VNumber&&) = default;
+    VNumber& operator=(const VNumber& other) = default;
+
+    VNumber(const VNumber& other) {
+        this->sign = other.sign;
+        this->bitstring = other.bitstring;
+        this->defined_size = other.defined_size;
+    }
+
+    VNumber(VNumber* other) {
+        this->sign = other->sign;
+        this->bitstring = other->bitstring;
+        this->defined_size = other->defined_size;
+    }
+
+    VNumber(VNumber other, size_t length) {
+        this->sign = other.sign;
+        this->bitstring = other.bitstring.resize(other.get_padding_bit(), length);
+        this->defined_size = other.defined_size;
+    }
+
+    VNumber(const std::string& verilog_string) {
+        set_value(verilog_string);
+    }
+
+    VNumber(int64_t numeric_value) {
+        set_value(numeric_value);
+    }
+
+    VNumber(size_t len, BitSpace::bit_value_t initial_bits, bool input_sign, bool this_defined_size) {
+        this->bitstring = BitSpace::VerilogBits(len, initial_bits);
+        this->sign = input_sign;
+        this->defined_size = this_defined_size;
+    }
+
+    /***
+     * getters to 64 bit int
+     */
+    int64_t get_value() {
+        size_t bit_size = 8 * sizeof(integer_t);
+
+        assert_Werr((!this->bitstring.has_unknown()),
+                    "Invalid Number contains dont care values. number: " + this->to_verilog_bitstring());
+
+        size_t end = this->size();
+        if (end > integer_t_size) {
+            printf(" === Warning: Returning a 64 bit integer from a larger bitstring (%zu). The bitstring will be truncated\n", bit_size);
+            end = bit_size;
+        }
+
+        integer_t result = 0;
+        BitSpace::bit_value_t pad = this->get_padding_bit();
+
+        for (size_t bit_index = 0; bit_index < end; bit_index++) {
+            integer_t current_bit = static_cast<integer_t>(pad);
+            if (bit_index < this->size())
+                current_bit = this->bitstring.get_bit(bit_index);
+
+            result |= (current_bit << bit_index);
+        }
+
+        return result;
+    }
+
+    std::string to_string(bool big_endian, bool uppercase) {
+        // make a big endian string
+        std::string to_return = "";
+        for (size_t address = 0x0; address < this->size(); address++) {
+            char value = BitSpace::bit_to_c(this->get_bit_from_lsb(address), uppercase);
+            if (big_endian) {
+                to_return.push_back(value);
+            } else {
+                to_return.insert(0, 1, value);
+            }
+        }
+
+        return to_return;
+    }
+
+    std::string to_Ustring(bool big_endian) {
+        // make a big endian string
+        std::string to_return = "";
+        for (size_t address = 0x0; address < this->size(); address++) {
+            char value = BitSpace::bit_to_u(this->get_bit_from_lsb(address));
+            if (big_endian) {
+                to_return.push_back(value);
+            } else {
+                to_return.insert(0, 1, value);
+            }
+        }
+
+        return to_return;
+    }
+
+    std::string to_log2radix(short bit_count, bool big_endian, bool uppercase) {
+        std::string to_return = "";
+        int temp = 0;
+        int i = 0;
+        for (size_t address = 0x0; address < this->size(); address++) {
+            temp |= (this->get_bit_from_lsb(address) << i);
+            i += 1;
+
+            // 3 bit for octal value
+            if (i >= bit_count || address == this->size() - 1) {
+                // share the same digits so we use hex
+                char value = BitSpace::bits_to_hex_c(temp, uppercase);
+                if (big_endian) {
+                    to_return.push_back(value);
+                } else {
+                    to_return.insert(0, 1, value);
+                }
+                temp = 0;
+                i = 0;
+            }
+        }
+
+        return to_return;
+    }
+
+    std::string to_base10(bool big_endian, bool uppercase) {
+        VNumber temp(this);
+        std::string to_return = "";
+        while (!temp.is_false()) {
+            int carry = 0;
+            for (size_t address = 0x0; address < temp.size(); address++) {
+                // we read from msb to lsb
+                int temp_value = temp.get_bit_from_msb(address);
+                temp_value += carry << 1;
+                carry = temp_value % 10;
+                temp_value = temp_value / 10;
+                temp.set_bit_from_msb(address, temp_value);
+            }
+            char value = BitSpace::bits_to_hex_c(carry, uppercase);
+
+            if (big_endian) {
+                to_return.push_back(value);
+            } else {
+                to_return.insert(0, 1, value);
+            }
+        }
+        return to_return;
+    }
+
+    // convert lsb_msb bitstring to verilog
+    std::string to_verilog_bitstring() {
+        std::string out = this->to_vstring('b');
+        size_t len = this->bitstring.size();
+
+        return std::to_string(len) + ((this->is_signed()) ? "\'sb" : "\'b") + out;
+    }
+
+    std::string to_vstring(char input_base) {
+        std::string out = "";
+        char base = tolower(input_base);
+        bool upercase = (base != input_base);
+        if (this->has_unknown() && (base == 'o' || base == 'h' || base == 'd')) {
+            // hot swap to binary since that is all we can print
+            base = 'b';
+        }
+
+        switch (base) {
+            case 'b':
+                return this->to_string(false, upercase);
+            case 'z':
+                return this->to_string(false, upercase);
+            case 'u':
+                return this->to_Ustring(false);
+            case 'o':
+                return this->to_log2radix(3, false, upercase);
+            case 'd':
+                return this->to_base10(false, upercase);
+            case 'h':
+                return this->to_log2radix(4, false, upercase);
+            case 's':
+                return this->bitstring.to_printable();
+            case 'c':
+                // forcefully truncate to a char
+                return std::string(1, this->bitstring.getc());
+            default:
+                assert_Werr(0,
+                            "Invalid base for conversion");
+                break;
+        }
+        std::abort();
+    }
+
+    /***
+     * setters
+     */
+    void set_value(const std::string& input) {
+        if (!input.size()) {
+            return;
+        }
+
+        std::string verilog_string(input);
+
+        /**
+         * set defaults
+         */
+        size_t bitsize = 32;        // 32 bit is the fall back
+        this->defined_size = false; // the size is undefined unless otherwise specified
+        size_t radix = 0;           // the radix is unknown to start with
+        this->sign = false;         // we treat everything as unsigned unless specified
+
+        // if this is a string
+        if (verilog_string[0] == '\"') {
+            assert_Werr(verilog_string.size() >= 2,
+                        "Malformed input String for VNumber, only open quote" + verilog_string);
+
+            assert_Werr(verilog_string.back() == '\"',
+                        "Malformed input String for VNumber, expected closing quotes" + verilog_string);
+
+            verilog_string.erase(0, 1);
+            verilog_string.pop_back();
+
+            size_t string_size = verilog_string.size();
+            if (string_size == 0)
+                string_size = 1;
+
+            bitsize = string_size * 8;
+            this->defined_size = true;
+            radix = 256;
+        } else {
+            size_t loc = verilog_string.find("\'");
+            if (loc == std::string::npos) {
+                verilog_string.insert(0, "\'sd");
+                loc = 0;
+            }
+
+            if (loc != 0) {
+                std::string bit_length_char = verilog_string.substr(0, loc);
+                bitsize = strtoul(bit_length_char.c_str(), nullptr, 10);
+                this->defined_size = true;
+            }
+
+            if (std::tolower(verilog_string[loc + 1]) == 's') {
+                this->sign = true;
+            }
+
+            char base = static_cast<char>(std::tolower(verilog_string[loc + 1 + sign]));
+            switch (base) {
+                case 'b':
+                    radix = 2;
+                    break; // binary
+                case 'o':
+                    radix = 8;
+                    break; // octal
+                case 'd':
+                    radix = 10;
+                    break; // decimal
+                case 'h':
+                    radix = 16;
+                    break; // hexadecimal
+                default:
+                    assert_Werr(false,
+                                "Invalid radix base for number: " + std::string(1, base));
+                    break;
+            }
+
+            //remove underscores
+            verilog_string = verilog_string.substr(loc + 2 + sign);
+            verilog_string.erase(std::remove(verilog_string.begin(), verilog_string.end(), '_'), verilog_string.end());
+
+            //little endian bitstring string
+        }
+
+        std::string temp_bitstring = string_of_radix_to_bitstring(verilog_string, radix);
+
+        char pad = temp_bitstring[0];
+        if (!this->sign && pad == '1') {
+            pad = '0';
+        }
+
+        // convert the bits to the internal data struct (bit at index 0 in string is msb since string go from msb to lsb)
+        BitSpace::VerilogBits new_bitstring(temp_bitstring.size(), BitSpace::_0);
+        size_t counter = temp_bitstring.size() - 1;
+        for (char in : temp_bitstring) {
+            new_bitstring.set_bit(counter--, BitSpace::c_to_bit(in));
+        }
+
+        this->bitstring = new_bitstring.resize(BitSpace::c_to_bit(pad), bitsize);
+    }
+
+    void set_value(int64_t in) {
+        this->set_value(std::to_string(in));
+    }
+
+    size_t msb_index() {
+        return this->bitstring.size() - 1;
+    }
+
+    /****
+     * bit twiddling functions
+     */
+    BitSpace::bit_value_t get_bit_from_msb(size_t index) {
+        assert_Werr(index <= msb_index(), "Index out of range");
+        return this->bitstring.get_bit(msb_index() - index);
+    }
+
+    BitSpace::bit_value_t get_bit_from_lsb(size_t index) {
+        if (index < this->size())
+            return this->bitstring.get_bit(index);
+        else
+            return this->get_padding_bit();
+    }
+
+    void set_bit_from_msb(size_t index, BitSpace::bit_value_t val) {
+        this->bitstring.set_bit(msb_index() - index, val);
+    }
+
+    void set_bit_from_lsb(size_t index, BitSpace::bit_value_t val) {
+        this->bitstring.set_bit(index, val);
+    }
+
+    /***
+     *  other
+     */
+    size_t size() {
+        return this->bitstring.size();
+    }
+
+    BitSpace::bit_value_t get_padding_bit() {
+        return (this->is_signed()) ? get_bit_from_msb(0) : BitSpace::_0;
+    }
+
+    bool is_signed() const {
+        return this->sign;
+    }
+
+    bool is_defined_size() {
+        return this->defined_size;
+    }
+
+    bool is_negative() {
+        return (this->get_bit_from_msb(0) == BitSpace::_1 && this->sign);
+    }
+
+    bool has_unknown() {
+        return this->bitstring.has_unknown();
+    }
+
+    bool is_z() {
+        return this->bitstring.is_only_z();
+    }
+
+    bool is_x() {
+        return this->bitstring.is_only_x();
+    }
+
+    bool is_true() {
+        return this->bitstring.is_true();
+    }
+
+    bool is_false() {
+        return this->bitstring.is_false();
+    }
+
+    VNumber twos_complement(BitSpace::bit_value_t carry) {
+        return VNumber(this->bitstring.twos_complement(carry), this->defined_size, this->sign);
+    }
+
+    VNumber twos_complement() {
+        return VNumber(this->bitstring.twos_complement(), this->defined_size, this->sign);
+    }
+
+    VNumber to_signed() {
+        return VNumber(this->bitstring, this->defined_size, true);
+    }
+
+    VNumber to_unsigned() {
+        return VNumber(this->bitstring, this->defined_size, false);
+    }
+
+    VNumber bitwise_reduce(const BitSpace::bit_value_t lut[4][4]) {
+        return VNumber(this->bitstring.bitwise_reduce(lut), this->defined_size, false);
+    }
+
+    /**
+     * Unary operations
+     */
+    VNumber bitwise(const BitSpace::bit_value_t lut[4]) {
+        return VNumber(this->bitstring.bitwise(lut), this->defined_size, false);
+    }
+
+    /**
+     * Binary operations
+     */
+    VNumber bitwise(VNumber& b, const BitSpace::bit_value_t lut[4][4]) {
+        size_t std_length = std::max(this->size(), b.size());
+        const BitSpace::bit_value_t pad_a = this->get_padding_bit();
+        const BitSpace::bit_value_t pad_b = b.get_padding_bit();
+
+        VNumber result(std_length, BitSpace::_x, false, this->is_defined_size() && b.is_defined_size());
+
+        for (size_t i = 0; i < result.size(); i++) {
+            BitSpace::bit_value_t bit_a = pad_a;
+            if (i < this->size())
+                bit_a = this->get_bit_from_lsb(i);
+
+            BitSpace::bit_value_t bit_b = pad_b;
+            if (i < b.size())
+                bit_b = b.get_bit_from_lsb(i);
+
+            result.set_bit_from_lsb(i, lut[bit_a][bit_b]);
+        }
+
+        return result;
+    }
+
+    VNumber replicate(int64_t n_times_replicate) {
+        assert_Werr(n_times_replicate > 0,
+                    "Cannot replicate bitstring less than 1 times");
+
+        size_t n_times_unsigned = static_cast<size_t>(n_times_replicate);
+
+        return VNumber(this->bitstring.replicate(n_times_unsigned), true, this->sign);
+    }
+
+    VNumber insert_at_lsb(VNumber& other) {
+        return this->insert(other, 0, other.size());
+    }
+
+    VNumber insert_at_msb(VNumber& other) {
+        return this->insert(other, this->size(), other.size());
+    }
+};
+
+#endif
diff --git a/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp b/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
new file mode 100644
index 000000000..41bead2a7
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
@@ -0,0 +1,95 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+ *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+ *             Dr. Kenneth B. Kent (ken@unb.ca)
+ *             for the Reconfigurable Computing Research Lab at the
+ *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#ifndef RTL_INT_H
+#define RTL_INT_H
+
+#include <string>
+#include "internal_bits.hpp"
+
+/**
+ * Unary Operator
+ */
+
+bool V_TRUE(VNumber& a);
+bool V_FALSE(VNumber& a);
+bool V_UNK(VNumber& a);
+bool V_IS_X(VNumber& a);
+bool V_IS_Z(VNumber& a);
+bool V_IS_SIGNED(VNumber& a);
+bool V_IS_UNSIGNED(VNumber& a);
+
+std::string V_STRING(VNumber& a, const char base);
+
+VNumber V_UNSIGNED(VNumber& a);
+VNumber V_SIGNED(VNumber& a);
+VNumber V_ADD(VNumber& a);
+VNumber V_MINUS(VNumber& a);
+VNumber V_MINUS(VNumber& a, BitSpace::bit_value_t carry);
+
+VNumber V_BITWISE_BUF(VNumber& a);
+VNumber V_BITWISE_NOT(VNumber& a);
+
+VNumber V_BITWISE_AND(VNumber& a);
+VNumber V_BITWISE_OR(VNumber& a);
+VNumber V_BITWISE_XOR(VNumber& a);
+VNumber V_BITWISE_NAND(VNumber& a);
+VNumber V_BITWISE_NOR(VNumber& a);
+VNumber V_BITWISE_XNOR(VNumber& a);
+VNumber V_LOGICAL_NOT(VNumber& a);
+
+/**
+ * Binary Operator
+ */
+VNumber V_REPLICATE(VNumber& a, VNumber& n_times);
+VNumber V_CONCAT(std::vector<VNumber> concat_list);
+
+VNumber V_BITWISE_BUFIF0(VNumber& input, VNumber& trigger);
+VNumber V_BITWISE_BUFIF1(VNumber& input, VNumber& trigger);
+VNumber V_BITWISE_NOTIF0(VNumber& input, VNumber& trigger);
+VNumber V_BITWISE_NOTIF1(VNumber& input, VNumber& trigger);
+
+VNumber V_BITWISE_AND(VNumber& a, VNumber& b);
+VNumber V_BITWISE_OR(VNumber& a, VNumber& b);
+VNumber V_BITWISE_XOR(VNumber& a, VNumber& b);
+VNumber V_BITWISE_NAND(VNumber& a, VNumber& b);
+VNumber V_BITWISE_NOR(VNumber& a, VNumber& b);
+VNumber V_BITWISE_XNOR(VNumber& a, VNumber& b);
+
+VNumber V_SIGNED_SHIFT_LEFT(VNumber& a, VNumber& b);
+VNumber V_SIGNED_SHIFT_RIGHT(VNumber& a, VNumber& b);
+VNumber V_SHIFT_LEFT(VNumber& a, VNumber& b);
+VNumber V_SHIFT_RIGHT(VNumber& a, VNumber& b);
+
+VNumber V_LOGICAL_AND(VNumber& a, VNumber& b);
+VNumber V_LOGICAL_OR(VNumber& a, VNumber& b);
+
+VNumber V_LT(VNumber& a, VNumber& b);
+VNumber V_GT(VNumber& a, VNumber& b);
+VNumber V_LE(VNumber& a, VNumber& b);
+VNumber V_GE(VNumber& a, VNumber& b);
+VNumber V_EQUAL(VNumber& a, VNumber& b);
+VNumber V_NOT_EQUAL(VNumber& a, VNumber& b);
+VNumber V_CASE_EQUAL(VNumber& a, VNumber& b);
+VNumber V_CASE_NOT_EQUAL(VNumber& a, VNumber& b);
+
+VNumber V_ADD(VNumber& a, VNumber& b);
+VNumber V_MINUS(VNumber& a, VNumber& b);
+VNumber V_ADD(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in);
+VNumber V_MINUS(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in);
+VNumber V_MULTIPLY(VNumber& a, VNumber& b);
+VNumber V_POWER(VNumber& a, VNumber& b);
+VNumber V_DIV(VNumber& a, VNumber& b);
+VNumber V_MOD(VNumber& a, VNumber& b);
+
+/**
+ * Ternary Operator
+ */
+VNumber V_TERNARY(VNumber& a, VNumber& b, VNumber& c);
+
+#endif //RTL_INT_H
diff --git a/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp b/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
new file mode 100644
index 000000000..d4e8139e7
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
@@ -0,0 +1,57 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+ *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+ *             Dr. Kenneth B. Kent (ken@unb.ca)
+ *             for the Reconfigurable Computing Research Lab at the
+ *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#ifndef RTL_UTILS_H
+#define RTL_UTILS_H
+
+#include <string>
+#include <iostream>
+
+#include <string.h>
+
+#ifndef FILE_NAME
+#    define FILE_NAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+#endif
+
+/* Enable Debug Messages for libRTLNumber: Un-Comment to Enable Debug Messages:
+ *                                          Comment-out to Disable Debug Messages: */
+// #define ENABLE_DEBUG_MESSAGES
+
+#ifdef ENABLE_DEBUG_MESSAGES
+#    define DEBUG_MSG(debugMsg) std::cerr << "DEBUG: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
+                                          << ": " << debugMsg << std::endl
+#else
+#    define DEBUG_MSG(debugMsg) /* No-Op */
+#endif
+
+#ifndef WARN_MSG
+#    define WARN_MSG(warnMSG) std::cerr << "WARNING: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
+                                        << ": " << warnMSG << "!" << std::endl
+#endif
+
+#ifndef ERR_MSG
+#    define ERR_MSG(errMsg) std::cerr << std::endl                                                            \
+                                      << "ERROR: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
+                                      << ": " << errMsg << "!" << std::endl                                   \
+                                      << std::endl
+#endif
+
+std::string string_of_radix_to_bitstring(std::string orig_string, size_t radix);
+std::string convert_between_bases(std::string str, uint8_t base_from, uint8_t base_to, bool uppercase, bool big_endian);
+
+inline void _assert_Werr(bool cond, const char* FUNCT, int LINE, std::string error_string) {
+    if (!cond) {
+        std::cerr << std::endl
+                  << "ERROR: " << FUNCT << "::" << std::to_string(LINE) << " Assert 'assert_Werr' Failed:\t" << error_string << "!" << std::endl
+                  << std::endl;
+        std::abort();
+    }
+}
+#define assert_Werr(cond, error_string) _assert_Werr((cond), __func__, __LINE__, std::string(error_string))
+
+#endif
diff --git a/third_party/vtr/libs/rtlnumber/src/rtl_int.cc b/third_party/vtr/libs/rtlnumber/src/rtl_int.cc
new file mode 100644
index 000000000..28c4c9d10
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/src/rtl_int.cc
@@ -0,0 +1,746 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+ *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+ *             Dr. Kenneth B. Kent (ken@unb.ca)
+ *             for the Reconfigurable Computing Research Lab at the
+ *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#include <string>
+
+#include "internal_bits.hpp"
+#include "rtl_int.hpp"
+#include "rtl_utils.hpp"
+
+#define AMBIGUOUS_VALUE VNumber("'bx")
+
+using namespace BitSpace;
+
+class compare_bit {
+  private:
+    uint8_t result = 0x0;
+
+  public:
+    compare_bit(uint8_t set_to) { result = set_to; }
+
+    bool is_unk() { return (!result); }
+
+    bool is_gt() { return (result & (0x1)); }
+    bool is_eq() { return (result & (0x2)); }
+    bool is_lt() { return (result & (0x4)); }
+
+    bool is_ne() { return (!is_eq()); }
+    bool is_ge() { return (result & (0x3)); }
+    bool is_le() { return (result & (0x6)); }
+};
+
+#define UNK_EVAL compare_bit(0x0)
+#define GT_EVAL compare_bit(0x1)
+#define EQ_EVAL compare_bit(0x2)
+#define LT_EVAL compare_bit(0x4)
+
+static compare_bit eval_op(VNumber& a_in, VNumber& b_in) {
+    assert_Werr(a_in.size(),
+                "empty 1st bit string");
+
+    assert_Werr(b_in.size(),
+                "empty 2nd bit string");
+
+#ifndef RTL_ALLOW_UNKNOWN_COMPARE
+    if (a_in.has_unknown() || b_in.has_unknown())
+        return UNK_EVAL;
+#endif
+
+    bool neg_a = (a_in.is_negative());
+    bool neg_b = (b_in.is_negative());
+
+    if (neg_a && !neg_b) {
+        return LT_EVAL;
+    } else if (!neg_a && neg_b) {
+        return GT_EVAL;
+    }
+
+    VNumber a;
+    VNumber b;
+    bool invert_result = (neg_a && neg_b);
+
+    if (invert_result) {
+        a = a_in.twos_complement();
+        b = b_in.twos_complement();
+    } else {
+        a = a_in;
+        b = b_in;
+    }
+
+    size_t std_length = std::max(a.size(), b.size());
+    bit_value_t pad_a = a.get_padding_bit();
+    bit_value_t pad_b = b.get_padding_bit();
+
+    for (size_t i = std_length - 1; i < std_length; i--) {
+        bit_value_t bit_a = pad_a;
+        if (i < a.size()) {
+            bit_a = a.get_bit_from_lsb(i);
+        }
+
+        bit_value_t bit_b = pad_b;
+        if (i < b.size()) {
+            bit_b = b.get_bit_from_lsb(i);
+        }
+
+        if (BitSpace::l_lt[bit_a][bit_b] == BitSpace::_1) {
+            return (!invert_result) ? LT_EVAL : GT_EVAL;
+        } else if (BitSpace::l_gt[bit_a][bit_b] == BitSpace::_1) {
+            return (!invert_result) ? GT_EVAL : LT_EVAL;
+        } else if (BitSpace::l_eq[bit_a][bit_b] == BitSpace::_1) {
+            continue;
+        } else {
+            return UNK_EVAL;
+        }
+    }
+
+    return EQ_EVAL;
+}
+
+static compare_bit eval_op(VNumber a, int64_t b) {
+    VNumber bits_value = VNumber(std::to_string(std::abs(b)));
+    if (b < 0)
+        bits_value = bits_value.twos_complement();
+
+    return eval_op(a, bits_value);
+}
+
+/**
+ * Check if the Operation Should be Signed by Checking if Both Operands Are Signed:
+ */
+static bool is_signed_operation(VNumber& a, VNumber& b) {
+    bool is_signed_operation = false;
+
+    if ((true == a.is_signed()) && (true == b.is_signed())) {
+        is_signed_operation = true;
+    }
+
+    return is_signed_operation;
+}
+
+/**
+ * Addition operations
+ */
+static VNumber sum_op(VNumber& a, VNumber& b, const bit_value_t& initial_carry, bool is_twos_complement_subtraction) {
+    assert_Werr(a.size(),
+                "empty 1st bit string");
+
+    assert_Werr(b.size(),
+                "empty 2nd bit string");
+
+    size_t std_length = std::max(a.size(), b.size());
+    size_t new_length = ((true == is_twos_complement_subtraction) ? (std_length) : (std_length + 1));
+    const bit_value_t pad_a = a.get_padding_bit();
+    const bit_value_t pad_b = b.get_padding_bit();
+    bool is_addition_signed_operation = is_signed_operation(a, b);
+
+    //("pad_b: '" << (unsigned(pad_b)) << "'");
+
+    bit_value_t previous_carry = initial_carry;
+    VNumber result(new_length, _0, is_addition_signed_operation, a.is_defined_size() && b.is_defined_size());
+
+    for (size_t i = 0; i < new_length; i++) {
+        bit_value_t bit_a = pad_a;
+        if (i < a.size()) {
+            bit_a = a.get_bit_from_lsb(i);
+        }
+
+        bit_value_t bit_b = pad_b;
+        if (i < b.size()) {
+            bit_b = b.get_bit_from_lsb(i);
+        }
+
+        result.set_bit_from_lsb(i, l_sum[previous_carry][bit_a][bit_b]);
+        previous_carry = l_carry[previous_carry][bit_a][bit_b];
+    }
+
+    return result;
+}
+
+static VNumber shift_op(VNumber& a, int64_t b, bool sign_shift) {
+    VNumber to_return;
+
+    if (b == 0) {
+        to_return = a;
+    }
+    //if b is negative then shift right
+    else if (b < 0) {
+        size_t u_b = static_cast<size_t>(-b);
+        bit_value_t pad = (sign_shift) ? a.get_padding_bit() : BitSpace::_0;
+        to_return = VNumber(a.size(), pad, sign_shift, a.is_defined_size());
+        for (size_t i = 0; i < (a.size() - u_b); i++) {
+            to_return.set_bit_from_lsb(i, a.get_bit_from_lsb(i + u_b));
+        }
+    } else {
+        size_t u_b = static_cast<size_t>(b);
+        bit_value_t pad = BitSpace::_0;
+        to_return = VNumber((a.size() + u_b), pad, sign_shift, a.is_defined_size());
+        for (size_t i = 0; i < a.size(); i++) {
+            to_return.set_bit_from_lsb(i + u_b, a.get_bit_from_lsb(i));
+        }
+    }
+    return to_return;
+}
+
+bool V_TRUE(VNumber& a) {
+    return a.is_true();
+}
+
+bool V_FALSE(VNumber& a) {
+    return a.is_false();
+}
+
+bool V_UNK(VNumber& a) {
+    return a.has_unknown();
+}
+
+bool V_IS_X(VNumber& a) {
+    return a.is_x();
+}
+
+bool V_IS_Z(VNumber& a) {
+    return a.is_z();
+}
+
+bool V_IS_SIGNED(VNumber& a) {
+    return a.is_signed();
+}
+
+bool V_IS_UNSIGNED(VNumber& a) {
+    return !a.is_signed();
+}
+
+std::string V_STRING(VNumber& a, const char base) {
+    return a.to_vstring(base);
+}
+
+/***
+ *                    __          __   __   ___  __       ___    __       
+ *    |  | |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
+ *    \__/ | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
+ *                                                                        
+ */
+
+VNumber V_BITWISE_NOT(VNumber& a) {
+    return a.bitwise(l_not);
+}
+
+VNumber V_LOGICAL_NOT(VNumber& a) {
+    if (a.has_unknown())
+        return AMBIGUOUS_VALUE;
+
+    VNumber ored = a.bitwise_reduce(l_or);
+    VNumber noted = ored.bitwise(l_not);
+    return noted;
+}
+
+VNumber V_ADD(VNumber& a) {
+    VNumber result(a);
+    return result;
+}
+
+VNumber V_MINUS(VNumber& a) {
+    return a.twos_complement();
+}
+
+VNumber V_MINUS(VNumber& a, BitSpace::bit_value_t carry) {
+    return a.twos_complement(carry);
+}
+
+VNumber V_UNSIGNED(VNumber& a) {
+    return a.to_unsigned();
+}
+
+VNumber V_SIGNED(VNumber& a) {
+    return a.to_signed();
+}
+
+VNumber V_BITWISE_AND(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_and);
+    return to_return;
+}
+
+VNumber V_BITWISE_OR(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_or);
+    return to_return;
+}
+
+VNumber V_BITWISE_XOR(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_xor);
+    return to_return;
+}
+
+VNumber V_BITWISE_NAND(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_and).bitwise(l_not);
+    return to_return;
+}
+
+VNumber V_BITWISE_NOR(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_or).bitwise(l_not);
+    return to_return;
+}
+
+VNumber V_BITWISE_XNOR(VNumber& a) {
+    VNumber to_return = a.bitwise_reduce(l_xor).bitwise(l_not);
+    return to_return;
+}
+
+/***
+ *     __               __          __   __   ___  __       ___    __       
+ *    |__) | |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
+ *    |__) | | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
+ *                                                                          
+ */
+
+VNumber V_REPLICATE(VNumber& a, VNumber& n_times) {
+    assert_Werr(!n_times.has_unknown(),
+                "Cannot use undefined number for the replication count");
+
+    return a.replicate(n_times.get_value());
+}
+
+VNumber V_CONCAT(std::vector<VNumber> concat_list) {
+    assert_Werr(!concat_list.empty(),
+                "Concat List cannot be empty");
+
+    VNumber init = concat_list[0];
+    for (size_t i = 1; i < concat_list.size(); i++) {
+        init = init.insert_at_lsb(concat_list[i]);
+    }
+    return init;
+}
+
+VNumber V_BITWISE_BUF(VNumber& a) {
+    return a.bitwise(l_buf);
+}
+
+VNumber V_BITWISE_BUFIF0(VNumber& input, VNumber& trigger) {
+    if (trigger.size() == 1 && input.size() > 1) {
+        trigger = trigger.replicate(input.size());
+    }
+    assert_Werr(input.size() == trigger.size(),
+                "tristate must either have a single trigger or contains as many as the input width");
+    return input.bitwise(trigger, l_bufif0);
+}
+
+VNumber V_BITWISE_BUFIF1(VNumber& input, VNumber& trigger) {
+    if (trigger.size() == 1 && input.size() > 1) {
+        trigger = trigger.replicate(input.size());
+    }
+    assert_Werr(input.size() == trigger.size(),
+                "tristate must either have a single trigger or contains as many as the input width");
+    return input.bitwise(trigger, l_bufif1);
+}
+
+VNumber V_BITWISE_NOTIF0(VNumber& input, VNumber& trigger) {
+    if (trigger.size() == 1 && input.size() > 1) {
+        trigger = trigger.replicate(input.size());
+    }
+    assert_Werr(input.size() == trigger.size(),
+                "tristate must either have a single trigger or contains as many as the input width");
+    return input.bitwise(trigger, l_notif0);
+}
+
+VNumber V_BITWISE_NOTIF1(VNumber& input, VNumber& trigger) {
+    if (trigger.size() == 1 && input.size() > 1) {
+        trigger = trigger.replicate(input.size());
+    }
+    assert_Werr(input.size() == trigger.size(),
+                "tristate must either have a single trigger or contains as many as the input width");
+    return input.bitwise(trigger, l_notif1);
+}
+
+VNumber V_BITWISE_AND(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_and);
+}
+
+VNumber V_BITWISE_OR(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_or);
+}
+
+VNumber V_BITWISE_XOR(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_xor);
+}
+
+VNumber V_BITWISE_NAND(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_nand);
+}
+
+VNumber V_BITWISE_NOR(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_nor);
+}
+
+VNumber V_BITWISE_XNOR(VNumber& a, VNumber& b) {
+    return a.bitwise(b, l_xnor);
+}
+
+/**
+ * Logical Operations
+ */
+
+VNumber V_CASE_EQUAL(VNumber& a, VNumber& b) {
+    VNumber longEval = a.bitwise(b, l_case_eq);
+    VNumber eq = V_BITWISE_AND(longEval);
+    return eq;
+}
+
+VNumber V_CASE_NOT_EQUAL(VNumber& a, VNumber& b) {
+    VNumber eq = V_CASE_EQUAL(a, b);
+    VNumber neq = V_LOGICAL_NOT(eq);
+    return neq;
+}
+
+VNumber V_LOGICAL_AND(VNumber& a, VNumber& b) {
+    if (a.has_unknown() || b.has_unknown())
+        return AMBIGUOUS_VALUE;
+    VNumber reduxA = a.bitwise_reduce(l_or);
+    VNumber reduxB = b.bitwise_reduce(l_or);
+
+    VNumber to_return = reduxA.bitwise(reduxB, l_and);
+
+    return to_return;
+}
+
+VNumber V_LOGICAL_OR(VNumber& a, VNumber& b) {
+    if (a.has_unknown() || b.has_unknown())
+        return AMBIGUOUS_VALUE;
+    VNumber reduxA = a.bitwise_reduce(l_or);
+    VNumber reduxB = b.bitwise_reduce(l_or);
+
+    VNumber to_return = reduxA.bitwise(reduxB, l_or);
+
+    return to_return;
+}
+
+VNumber V_LT(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_lt() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_GT(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_gt() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_EQUAL(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_eq() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_GE(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_ge() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_LE(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_le() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_NOT_EQUAL(VNumber& a, VNumber& b) {
+    compare_bit cmp = eval_op(a, b);
+    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_ne() ? BitSpace::_1 : BitSpace::_0;
+    VNumber to_return(1, result, false, true);
+    return to_return;
+}
+
+VNumber V_SIGNED_SHIFT_LEFT(VNumber& a, VNumber& b) {
+    if (b.has_unknown())
+        return AMBIGUOUS_VALUE;
+
+    return shift_op(a, b.get_value(), a.is_signed());
+}
+
+VNumber V_SHIFT_LEFT(VNumber& a, VNumber& b) {
+    if (b.has_unknown())
+        return AMBIGUOUS_VALUE;
+
+    return shift_op(a, b.get_value(), false);
+}
+
+VNumber V_SIGNED_SHIFT_RIGHT(VNumber& a, VNumber& b) {
+    if (b.has_unknown())
+        return AMBIGUOUS_VALUE;
+
+    return shift_op(a, -1 * b.get_value(), a.is_signed());
+}
+
+VNumber V_SHIFT_RIGHT(VNumber& a, VNumber& b) {
+    if (b.has_unknown())
+        return AMBIGUOUS_VALUE;
+
+    return shift_op(a, -1 * b.get_value(), false);
+}
+
+VNumber V_ADD(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in) {
+    return sum_op(a, b, carry_in, /* is_twos_complement_subtraction */ false);
+}
+
+VNumber V_MINUS(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in) {
+    size_t std_length = std::max(a.size(), b.size());
+    VNumber padded_a(a, std_length);
+    VNumber padded_b(b, std_length);
+
+    VNumber complement = V_MINUS(padded_b);
+    if (padded_b.is_negative() && complement.is_negative()) {
+        /* special case: 2's comp is identical to original, must pad */
+        complement = VNumber(padded_b, padded_b.size() + 1);
+        complement = V_MINUS(complement);
+    }
+
+    return sum_op(padded_a, complement, carry_in, /* is_twos_complement_subtraction */ true);
+}
+
+VNumber V_ADD(VNumber& a, VNumber& b) {
+    return V_ADD(a, b, _0);
+}
+
+VNumber V_MINUS(VNumber& a, VNumber& b) {
+    return V_MINUS(a, b, _0);
+}
+
+VNumber V_MULTIPLY(VNumber& a_in, VNumber& b_in) {
+    if (a_in.has_unknown() || b_in.has_unknown()) {
+        return AMBIGUOUS_VALUE;
+    }
+
+    VNumber a;
+    VNumber b;
+
+    bool is_multiply_signed_operation = is_signed_operation(a_in, b_in);
+    bool neg_a = a_in.is_negative();
+    bool neg_b = b_in.is_negative();
+
+    if (neg_a) {
+        a = V_MINUS(a_in);
+
+        if (a.is_negative()) {
+            /* special case: 2's comp is identical to original, must pad */
+            a = VNumber(a_in, a_in.size() + 1);
+            a = V_MINUS(a);
+        }
+    } else {
+        a = a_in;
+    }
+
+    if (neg_b) {
+        b = V_MINUS(b_in);
+
+        if (b.is_negative()) {
+            /* special case: 2's comp is identical to original, must pad */
+            b = VNumber(b_in, b_in.size() + 1);
+            b = V_MINUS(b);
+        }
+    } else {
+        b = b_in;
+    }
+
+    bool invert_result = ((!neg_a && neg_b) || (neg_a && !neg_b));
+
+    VNumber result("0");
+    VNumber b_copy = b;
+
+    for (size_t i = 0; i < a.size(); i++) {
+        bit_value_t bit_a = a.get_bit_from_lsb(i);
+
+        if (bit_a == _1) {
+            result = V_ADD(result, b_copy);
+        }
+
+        b_copy = shift_op(b_copy, 1, is_multiply_signed_operation);
+    }
+
+    if (invert_result) {
+        result = V_MINUS(result);
+    }
+
+    return result;
+}
+
+/*
+ * From Table 5-6 "Power operator rules" of IEEE Standard 1364-2005:
+ *  "Verilog Hardware Description Language"; on Page 46 (PDF Page 76):
+ *
+ * Table 5-6 — Power operator rules:
+ *
+ * |-----------------------------------------------------------------------------|
+ * | |  \ op1 is -> |               |                  |      |   |              |
+ * | \/   \         | negative < –1 | –1               | zero | 1 | positive > 1 |
+ * | op2 is \       |               |                  |      |   |              |
+ * |-----------------------------------------------------------------------------|
+ * |                |               |                  |      |   |              |
+ * | Positive       | op1 ** op2    | op2 is odd -> –1 | 0    | 1 | op1 ** op2   |
+ * |                |               | op2 is even -> 1 |      |   |              |
+ * |                |               |                  |      |   |              |
+ * |-----------------------------------------------------------------------------|
+ * |                |               |                  |      |   |              |
+ * | Zero           | 1             | 1                | 1    | 1 | 1            |
+ * |                |               |                  |      |   |              |
+ * |-----------------------------------------------------------------------------|
+ * |                |               |                  |      |   |              |
+ * | Negative       | 0             | op2 is odd -> –1 | 'bx  | 1 | 0            |
+ * |                |               | op2 is even -> 1 |      |   |              |
+ * |                |               |                  |      |   |              |
+ * |-----------------------------------------------------------------------------|
+ */
+VNumber V_POWER(VNumber& a, VNumber& b) {
+    if (a.has_unknown() || b.has_unknown()) {
+        return AMBIGUOUS_VALUE;
+    }
+
+    compare_bit res_a = eval_op(a, 0);
+    short val_a = (res_a.is_eq()) ? 0 : (res_a.is_lt()) ? (eval_op(a, -1).is_lt()) ? -2 : -1 :
+                                                        /* GREATHER_THAN */ (eval_op(a, 1).is_gt()) ? 2 : 1;
+
+    compare_bit res_b = eval_op(b, 0);
+    short val_b = (res_b.is_eq()) ? 0 : (res_b.is_lt()) ? -1 :
+                                                        /* GREATHER_THAN */ 1;
+
+    // Compute: Case Where 'val_a <= -2' or 'val_a >= 2'; As-Per the Spec:
+    if (val_b > 0 && (val_a < -1 || val_a > 1)) {
+        VNumber result("2'sb01");
+        VNumber one = VNumber("2'sb01");
+        VNumber tmp_b = b;
+
+        while (eval_op(tmp_b, 0).is_gt()) {
+            VNumber tmp_b_comp = V_MINUS(tmp_b, one);
+            if (tmp_b_comp.is_negative() && tmp_b.is_negative()) {
+                /* special case: 2's comp is identical to original, must pad */
+                tmp_b_comp = VNumber(tmp_b, tmp_b.size() + 1);
+                tmp_b_comp = V_MINUS(tmp_b_comp);
+            }
+            tmp_b = tmp_b_comp;
+
+            result = V_MULTIPLY(result, a);
+        }
+
+        return result;
+    } else if (val_b == 0 || val_a == 1) {
+        return VNumber("2'sb01");
+    } else if (val_b == -1 && val_a == 0) {
+        return AMBIGUOUS_VALUE;
+    } else if (val_a == -1) {
+        // Even:
+        if (BitSpace::_0 == b.get_bit_from_lsb(0)) {
+            return VNumber("2'sb01");
+        }
+        // Odd:
+        else {
+            return VNumber("2'sb11");
+        }
+    } else {
+        return VNumber("2'sb00");
+    }
+}
+
+/////////////////////////////
+VNumber V_DIV(VNumber& a_in, VNumber& b_in) {
+    if (a_in.has_unknown() || b_in.has_unknown() || eval_op(b_in, 0).is_eq())
+        return AMBIGUOUS_VALUE;
+
+    VNumber result("0");
+
+    bool is_division_signed_operation = is_signed_operation(a_in, b_in);
+
+    bool neg_a = a_in.is_negative();
+    bool neg_b = b_in.is_negative();
+
+    VNumber a = neg_a ? V_MINUS(a_in) : a_in;
+    VNumber b = neg_b ? V_MINUS(b_in) : b_in;
+
+    if (neg_a && a.is_negative()) {
+        /* special case: 2's comp is identical to original, must pad */
+        a = VNumber(a_in, a_in.size() + 1);
+        a = V_MINUS(a);
+    }
+
+    if (neg_b && b.is_negative()) {
+        /* special case: 2's comp is identical to original, must pad */
+        b = VNumber(b_in, b_in.size() + 1);
+        b = V_MINUS(b);
+    }
+
+    while (eval_op(a, b).is_ge()) {
+        VNumber count("1");
+        VNumber tmp = b;
+
+        // initialize our variables
+        VNumber sub_with = tmp;
+        VNumber count_sub_with = count;
+        while (eval_op(tmp, a).is_le()) {
+            sub_with = tmp;
+            count_sub_with = count;
+            count = shift_op(count, 1, is_division_signed_operation);
+            tmp = shift_op(tmp, 1, is_division_signed_operation);
+        }
+        a = V_MINUS(a, sub_with);
+        result = V_ADD(result, count_sub_with);
+    }
+
+    return (neg_a != neg_b) ? V_MINUS(result) : result;
+}
+
+VNumber V_MOD(VNumber& a_in, VNumber& b_in) {
+    if (a_in.has_unknown() || b_in.has_unknown() || eval_op(b_in, 0).is_eq())
+        return AMBIGUOUS_VALUE;
+
+    bool neg_a = a_in.is_negative();
+    bool neg_b = b_in.is_negative();
+
+    VNumber a = neg_a ? V_MINUS(a_in) : a_in;
+    VNumber b = neg_b ? V_MINUS(b_in) : b_in;
+
+    if (neg_a && a.is_negative()) {
+        /* special case: 2's comp is identical to original, must pad */
+        a = VNumber(a_in, a_in.size() + 1);
+        a = V_MINUS(a);
+    }
+
+    if (neg_b && b.is_negative()) {
+        /* special case: 2's comp is identical to original, must pad */
+        b = VNumber(b_in, b_in.size() + 1);
+        b = V_MINUS(b);
+    }
+
+    bool is_modulo_signed_operation = is_signed_operation(a, b);
+
+    while (eval_op(a, b).is_ge()) {
+        VNumber tmp = b;
+        VNumber sub_with = tmp;
+
+        while (eval_op(tmp, a).is_le()) {
+            sub_with = tmp;
+            tmp = shift_op(tmp, 1, is_modulo_signed_operation);
+        }
+        a = V_MINUS(a, sub_with);
+    }
+
+    return (neg_a) ? V_MINUS(a) : a;
+}
+
+/***
+ *    ___  ___  __             __          __   __   ___  __       ___    __       
+ *     |  |__  |__) |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
+ *     |  |___ |  \ | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
+ *                                                                                 
+ */
+VNumber V_TERNARY(VNumber& a_in, VNumber& b_in, VNumber& c_in) {
+    /*	if a evaluates properly	*/
+    compare_bit eval = eval_op(V_LOGICAL_NOT(a_in), 0);
+
+    return (eval.is_unk()) ? b_in.bitwise(c_in, l_ternary) : (eval.is_eq()) ? VNumber(b_in) : VNumber(c_in);
+}
diff --git a/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc b/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
new file mode 100644
index 000000000..8989456aa
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
@@ -0,0 +1,304 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
+ *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
+ *             Dr. Kenneth B. Kent (ken@unb.ca)
+ *             for the Reconfigurable Computing Research Lab at the
+ *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#include "rtl_utils.hpp"
+#include <algorithm>
+#include <iostream>
+
+static const char* base_10_digits = "0123456789";
+
+static int to_nb(char val, short base) {
+    if (base == 256) {
+        return (int)val;
+    } else if (val >= '0' && val <= '9') {
+        return val - '0';
+    } else {
+        return tolower(val) - 'a' + 10;
+    }
+}
+
+static char to_chr(int val, short base, bool uppercase) {
+    if (base == 256) {
+        return (char)val;
+    } else if (val >= 0 and val <= 9) {
+        return val + '0';
+    } else if (!uppercase) {
+        return (val - 10) + 'a';
+    } else {
+        return (val - 10) + 'A';
+    }
+}
+
+std::string convert_between_bases(std::string str, uint8_t base_from, uint8_t base_to, bool uppercase, bool big_endian) {
+    std::string digits = "";
+    while (str != "0") {
+        int carry = 0;
+
+        size_t start = (big_endian) ? str.size() - 1 : 0;
+        size_t end = (big_endian) ? 0 : str.size() - 1;
+        size_t increment = (big_endian) ? -1 : 1;
+
+        for (size_t i = start; (big_endian) ? (i >= end && i <= start) : (i >= start && i <= end); i += increment) {
+            int temp = to_nb(str[i], base_from);
+            temp += base_from * carry;
+            carry = temp % base_to;
+            temp = temp / base_to;
+            str[i] = to_chr(temp, base_from, uppercase);
+        }
+
+        if (big_endian) {
+            digits.push_back(to_chr(carry, base_to, uppercase));
+            while (str.size() > 1 && str.back() == '0') {
+                str.pop_back();
+            }
+        } else {
+            digits.insert(0, 1, to_chr(carry, base_to, uppercase));
+            while (str.size() > 1 && str[0] == '0') {
+                str.erase(0, 1);
+            }
+        }
+    }
+    return digits;
+}
+
+static uint8_t _to_decimal(char digit, const char* FUNCT, int LINE) {
+    switch (std::tolower(digit)) {
+        case '0':
+            return 0;
+        case '1':
+            return 1;
+        case '2':
+            return 2;
+        case '3':
+            return 3;
+        case '4':
+            return 4;
+        case '5':
+            return 5;
+        case '6':
+            return 6;
+        case '7':
+            return 7;
+        case '8':
+            return 8;
+        case '9':
+            return 9;
+        default:
+            _assert_Werr(false, FUNCT, LINE,
+                         "INVALID BIT INPUT: " + std::string(1, digit));
+            break;
+    }
+    return 10;
+}
+
+#define to_decimal(num) _to_decimal(num, __func__, __LINE__)
+
+static std::string _radix_digit_to_bits_str(const char digit, size_t radix, const char* FUNCT, int LINE) {
+    switch (radix) {
+        case 2: {
+            switch (std::tolower(digit)) {
+                case '0':
+                    return "0";
+                case '1':
+                    return "1";
+                case 'x':
+                    return "x";
+                case 'z':
+                    return "z";
+                default:
+                    _assert_Werr(false, FUNCT, LINE,
+                                 "INVALID BIT INPUT: " + std::string(1, digit));
+                    break;
+            }
+            break;
+        }
+        case 8: {
+            switch (std::tolower(digit)) {
+                case '0':
+                    return "000";
+                case '1':
+                    return "001";
+                case '2':
+                    return "010";
+                case '3':
+                    return "011";
+                case '4':
+                    return "100";
+                case '5':
+                    return "101";
+                case '6':
+                    return "110";
+                case '7':
+                    return "111";
+                case 'x':
+                    return "xxx";
+                case 'z':
+                    return "zzz";
+                default:
+                    _assert_Werr(false, FUNCT, LINE,
+                                 "INVALID BIT INPUT: " + std::string(1, digit));
+                    break;
+            }
+            break;
+        }
+        case 16: {
+            switch (std::tolower(digit)) {
+                case '0':
+                    return "0000";
+                case '1':
+                    return "0001";
+                case '2':
+                    return "0010";
+                case '3':
+                    return "0011";
+                case '4':
+                    return "0100";
+                case '5':
+                    return "0101";
+                case '6':
+                    return "0110";
+                case '7':
+                    return "0111";
+                case '8':
+                    return "1000";
+                case '9':
+                    return "1001";
+                case 'a':
+                    return "1010";
+                case 'b':
+                    return "1011";
+                case 'c':
+                    return "1100";
+                case 'd':
+                    return "1101";
+                case 'e':
+                    return "1110";
+                case 'f':
+                    return "1111";
+                case 'x':
+                    return "xxxx";
+                case 'z':
+                    return "zzzz";
+                default:
+                    _assert_Werr(false, FUNCT, LINE,
+                                 "INVALID BIT INPUT: " + std::string(1, digit));
+                    break;
+            }
+            break;
+        }
+        case 256: {
+            std::string bitstring = "";
+            char temp = digit;
+            // 8 bit per char
+            for (int i = 0; i < 8; i++) {
+                char value = temp % 2;
+                temp = temp / 2;
+
+                bitstring.insert(bitstring.begin(), (value) ? '1' : '0');
+            }
+            return bitstring;
+        }
+        default: {
+            _assert_Werr(false, FUNCT, LINE,
+                         "Invalid base " + std::to_string(radix));
+            break;
+        }
+    }
+    std::abort();
+}
+
+#define radix_digit_to_bits(num, radix) _radix_digit_to_bits(num, radix, __func__, __LINE__)
+static std::string _radix_digit_to_bits(const char digit, size_t radix, const char* FUNCT, int LINE) {
+    std::string result = _radix_digit_to_bits_str(digit, radix, FUNCT, LINE);
+    return result;
+}
+
+/**********************
+ * convert from different radix to bitstring
+ */
+std::string string_of_radix_to_bitstring(std::string orig_string, size_t radix) {
+    std::string result = "";
+
+    switch (radix) {
+        case 2:
+            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
+
+            assert_Werr(std::string::npos == orig_string.find_first_not_of("xXzZ01"),
+                        "INVALID BIT INPUT: " + orig_string + "for radix 2");
+            break;
+
+        case 8:
+            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
+
+            assert_Werr(std::string::npos == orig_string.find_first_not_of("xXzZ01234567"),
+                        "INVALID BIT INPUT: " + orig_string + "for radix 8");
+            break;
+
+        case 10:
+            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
+
+            assert_Werr(std::string::npos == orig_string.find_first_not_of("0123456789"),
+                        "INVALID BIT INPUT: " + orig_string + "for radix 10");
+            break;
+
+        case 16:
+            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
+
+            assert_Werr(std::string::npos == orig_string.find_first_not_of("xZzZ0123456789aAbBcCdDeEfF"),
+                        "INVALID BIT INPUT: " + orig_string + "for radix 16");
+            break;
+
+        case 256:
+            // allow all chars
+            break;
+
+        default:
+            assert_Werr(false,
+                        "invalid radix: " + std::to_string(radix));
+            break;
+    }
+
+    while (!orig_string.empty()) {
+        switch (radix) {
+            case 10: {
+                std::string new_number = "";
+
+                uint8_t rem_digit = 0;
+                for (char current_digit : orig_string) {
+                    uint8_t new_pair = (rem_digit * 10) + to_decimal(current_digit);
+
+                    new_number.push_back(base_10_digits[(new_pair / 2)]);
+                    rem_digit = new_pair % 2;
+                }
+
+                result.insert(result.begin(), base_10_digits[rem_digit]);
+                while (new_number.size() > 1
+                       && new_number[0] == '0') {
+                    new_number.erase(0, 1);
+                }
+
+                if (new_number == "0") {
+                    orig_string = "";
+                } else {
+                    orig_string = new_number;
+                }
+
+                break;
+            }
+            default: {
+                result = radix_digit_to_bits(orig_string.back(), radix) + result;
+                orig_string.pop_back();
+                break;
+            }
+        }
+    }
+
+    result.insert(result.begin(), '0');
+
+    return result;
+}
diff --git a/third_party/vtr/libs/rtlnumber/unit_test/Makefile b/third_party/vtr/libs/rtlnumber/unit_test/Makefile
new file mode 100644
index 000000000..a872a5922
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/unit_test/Makefile
@@ -0,0 +1,79 @@
+#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
+#          Dr. Kenneth B. Kent (ken@unb.ca)
+#          for the Reconfigurable Computing Research Lab at the
+#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+
+# If the first argument is "run"...
+ifeq (build,$(firstword $(MAKECMDGOALS)))
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (run,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (gdb,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (valgrind,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+ifeq (debug,$(firstword $(MAKECMDGOALS)))	
+  # use the rest as arguments for "make"
+  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+  # ...and turn them into do-nothing targets
+  $(eval $(RUN_ARGS):;@:)
+endif
+
+INCLUDE =-I../src/include
+
+BIN = bin/exec.out
+
+C = clang++ -std=c++14 -lpthread
+
+cleanup_flags=\
+-ferror-limit=1000 \
+-Werror \
+-Wpedantic \
+-Weverything \
+-Wall \
+-Wno-c++98-compat \
+-Wno-unused-parameter \
+-g -O0 -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls
+
+PHONY: error
+
+error: 
+	echo "can only use 'clean', 'debug <testname>.cpp', 'build <testname>.cpp' or 'run <arguments>'"
+
+debug:
+	mkdir -p bin
+	$(C) -ggdb $(cleanup_flags) $(INCLUDE) $(RUN_ARGS) -o $(BIN)
+
+build:
+	mkdir -p bin
+	$(C) $(INCLUDE) $(RUN_ARGS) -o $(BIN)
+
+run:
+	$(BIN) $(RUN_ARGS) 
+
+valgrind: build
+	valgrind --tool=helgrind $(BIN) $(RUN_ARGS) 
+
+gdb:
+	gdb --args $(BIN) $(RUN_ARGS)
+
+clean:
+	$(RM) -Rf bin
+
diff --git a/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp b/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
new file mode 100644
index 000000000..d9a239997
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
@@ -0,0 +1,27 @@
+/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+ *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
+ *            Dr. Kenneth B. Kent (ken@unb.ca)
+ *            for the Reconfigurable Computing Research Lab at the
+ *             Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+ */
+
+#include "internal_bits.hpp"
+
+using namespace BitSpace;
+int main(int argc, char** argv) {
+    size_t size = 0;
+    size = strtoul(argv[1], nullptr, 10);
+    VerilogBits my_bits(size, 'x');
+    printf("array_size(%zu) \n\n================\n", my_bits.size());
+
+    std::cout << my_bits.to_string(false) << std::endl;
+
+    for (size_t value = 0; value < 8; value++) {
+        for (size_t i = 0; i < size; i++) {
+            BitSpace::bit_value_t val = static_cast<BitSpace::bit_value_t>(value);
+            printf("(%hhu)[%zu] : ", val, i);
+            my_bits.set_bit(i, val);
+            std::cout << my_bits.to_string(false) << std::endl;
+        }
+    }
+}
diff --git a/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh b/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
new file mode 100755
index 000000000..08b7ef09d
--- /dev/null
+++ b/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
+#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
+#          Dr. Kenneth B. Kent (ken@unb.ca)
+#          for the Reconfigurable Computing Research Lab at the
+#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
+
+# trap ctrl-c and call ctrl_c()
+trap ctrl_c INT
+
+TOTAL_TEST_RAN=0
+FAILURE_COUNT=0
+DEBUG=0
+
+function ctrl_c() {
+    FAILURE_COUNT=$((FAILURE_COUNT+1))
+	exit_code ${FAILURE_COUNT} "\n\n** EXITED FORCEFULLY **\n\n"
+}
+
+function exit_code() {
+	#print passed in value
+	echo -e $2
+	my_failed_count=$1
+	echo -e "$TOTAL_TEST_RAN Tests Ran; $my_failed_count Test Failures.\n"
+	[ "$my_failed_count" -gt "127" ] && echo "WARNING: Return Code may be unreliable: More than 127 Failures!"
+	echo "End."
+	exit ${my_failed_count}
+}
+
+# # Check if Library 'file' "${0%/*}/librtlnumber.a" exists
+if [ ! -f ./librtlnumber.a ] && [ ! -f ./rtl_number ]; 
+then
+		exit_code 99 "${0%/*}rtl number is nowhere to be found :o !\n" 
+fi
+
+# Dynamically load in inputs and results from
+#  file(s) on disk.
+for INPUT in ${0%/*}/regression_tests/*.csv; do
+	[ ! -f $INPUT ] && exit_code 99 "$INPUT regression test file not found!\n"
+
+	echo -e "\nRunning Test File: $INPUT:"
+
+	LINE=0
+
+	while IFS= read -r input_line; do
+
+		LINE=$((LINE + 1))
+
+		#glob whitespace from line and remove everything after comment
+		input_line=$(echo ${input_line} | tr -d '[:space:]' | cut -d '#' -f1)
+
+		#flip escaped commas to 'ESCAPED_COMMA' to safeguard agains having them as csv separator
+		input_line=$(echo ${input_line} | sed 's/\\\,/ESCAPED_COMMA/g')
+
+		#skip empty lines
+		[  "_" ==  "_${input_line}" ] && continue
+
+		#split csv
+		IFS="," read -ra arr <<< ${input_line}
+		len=${#arr[@]}
+
+		if 	[ ${len} != "4" ] &&		# unary
+			[ ${len} != "5" ] &&		# binary
+			[ ${len} != "7" ] &&		# ternary
+			[ ${len} != "8" ]; then		# replicate
+				[ ! -z ${DEBUG} ] && echo -e "\nWARNING: Malformed Line in CSV File ($INPUT:$LINE) Input Line: ${input_line}! Skipping...\n"
+				continue
+		fi
+
+		
+
+		TOTAL_TEST_RAN=$((TOTAL_TEST_RAN + 1))
+
+		#deal with multiplication
+		set -f
+
+		# everything between is the operation to pipe in so we slice the array and concatenate with space
+		TEST_LABEL=${arr[0]}
+		EXPECTED_RESULT=${arr[$(( len -1 ))]}
+		
+		# build the command and get back our escaped commas 
+		RTL_CMD_IN=$(printf "%s " "${arr[@]:1:$(( len -2 ))}")
+		RTL_CMD_IN=$( echo ${RTL_CMD_IN} | sed 's/ESCAPED_COMMA/,/g' )
+
+		# Check for Anything on standard out and any non-'0' exit codes:
+		OUTPUT_AND_RESULT=$(${0%/*}/rtl_number ${RTL_CMD_IN})
+		EXIT_CODE=$?
+
+		if [[ 0 -ne $EXIT_CODE ]]
+		then
+			FAILURE_COUNT=$((FAILURE_COUNT+1))
+
+			echo -e "\nERROR: Non-Zero Exit Code from ${0%/*}/rtl_number (on $INPUT:$LINE)\n"
+
+			echo -e "-X- FAILED == $TEST_LABEL\t  ./rtl_number ${RTL_CMD_IN}\t Output:<$OUTPUT_AND_RESULT> != Expected:<$EXPECTED_RESULT>"
+
+		elif [ "${OUTPUT_AND_RESULT}" == "${EXPECTED_RESULT}" ]
+		then
+			echo "--- PASSED == $TEST_LABEL ( ${OUTPUT_AND_RESULT} ) "
+
+		elif [ "1'b1" == "$(${0%/*}/rtl_number ${OUTPUT_AND_RESULT} === ${EXPECTED_RESULT})" ]
+		then
+			echo "--- PASSED == $TEST_LABEL ( ${OUTPUT_AND_RESULT} )"
+
+		else
+			FAILURE_COUNT=$((FAILURE_COUNT+1))
+
+			# echo -e "${0##*/}@${HOSTNAME}: DEBUG: FAILURE_COUNT: $FAILURE_COUNT\n"
+
+			echo -e "\nERROR: Expected Result Didn't match what we got back from ${0%/*}/rtl_number (on $INPUT:$LINE)\n"
+
+			echo -e "-X- FAILED == $TEST_LABEL\t  ./rtl_number ${RTL_CMD_IN}\t Output:<$OUTPUT_AND_RESULT> != Expected:<$EXPECTED_RESULT>"
+
+		fi
+
+		#unset the multiplication token override
+		unset -f
+
+	done < "$INPUT"
+	#  Re-Enable Bash Wildcard Expanstion '*' 
+	set +f
+done
+
+exit_code ${FAILURE_COUNT} "Completed Tests\n"
diff --git a/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h b/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
new file mode 100644
index 000000000..d1bb22383
--- /dev/null
+++ b/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
@@ -0,0 +1,33 @@
+#ifndef BREAKPOINT_STATE_GLOBALS
+#define BREAKPOINT_STATE_GLOBALS
+
+#include <string>
+#include <vector>
+
+//the BreakpointState struct holds all values that could possibly trigger a breakpoint
+//some variables such as move_num, from_block, temp_count, blocks_affected are related to the placer and router_iter and net_id are related to the router
+//there is also a string that holds the breakpoint description that are displayed in the UI and printed to the terminal
+//these values are updated in place.cpp and route.cpp and expr_eval.cpp and breakpoint.cpp use these values to look for breakpoints
+struct BreakpointState {
+    int move_num = 0;                         //current number of completed placer moves
+    int from_block = -1;                      //first block moved in the current placement swap
+    int temp_count = 0;                       //number of temperature changes thus far
+    int block_affected = -1;                  //the block_id that was requested to be stopped at if in blocks_affected
+    std::vector<int> blocks_affected_by_move; //vector giving the clb netlist block ids of all blocks moving in the current perturbation
+    int route_net_id = -1;                    //clb netlist id of net that was just routed
+    int router_iter = 0;                      //current rip-up and re-route iteration count of router
+    std::string bp_description;               //the breakpoint description to appear in the breakpoint list in the GUI
+};
+
+class BreakpointStateGlobals {
+    //holds one global BreakpointState variable to be accessed and modified by the placer and router
+    BreakpointState glob_breakpoint_state;
+
+  public:
+    //accessor for glob_breakpoint_state
+    BreakpointState* get_glob_breakpoint_state() {
+        return &glob_breakpoint_state;
+    }
+};
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/CMakeLists.txt b/third_party/vtr/libs/vtrutil/CMakeLists.txt
new file mode 100644
index 000000000..6e69ae530
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/CMakeLists.txt
@@ -0,0 +1,119 @@
+cmake_minimum_required(VERSION 3.9)
+
+project("libvtrutil")
+
+#Version info
+set(VTR_VERSION_FILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/src/vtr_version.cpp.in)
+set(VTR_VERSION_FILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/vtr_version.cpp)
+
+#Compiler info
+set(VTR_COMPILER_INFO "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} on ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR}")
+
+#Set default version numbers in case not specified
+if(NOT DEFINED VTR_VERSION_MAJOR)
+    set(VTR_VERSION_MAJOR 0)
+endif()
+
+if(NOT DEFINED VTR_VERSION_MINOR)
+    set(VTR_VERSION_MINOR 0)
+endif()
+
+if(NOT DEFINED VTR_VERSION_PATCH)
+    set(VTR_VERSION_PATCH 0)
+endif()
+
+set(VTR_BUILD_INFO "${CMAKE_BUILD_TYPE}")
+if (CMAKE_INTERPROCEDURAL_OPTIMIZATION)
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} IPO")
+endif()
+
+if (VPR_PGO_CONFIG STREQUAL "prof_use")
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} PGO")
+elseif (VPR_PGO_CONFIG STREQUAL "prof_gen")
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} PGOgen")
+endif()
+
+set(VTR_BUILD_INFO "${VTR_BUILD_INFO} VTR_ASSERT_LEVEL=${VTR_ASSERT_LEVEL}")
+
+if (VTR_ENABLE_SANITIZE)
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} sanitizers")
+endif()
+if (VTR_ENABLE_PROFILING)
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} gprof")
+endif()
+
+if (VTR_ENABLE_COVERAGE)
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} gcov")
+endif()
+
+if (VTR_ENABLE_DEBUG_LOGGING)
+    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} debug_logging")
+endif()
+
+# We always update the vtr_version.cpp file every time the project is built, 
+# to ensure the git revision and dirty status are up to date.
+#
+# We need to do this in two stages:
+#
+# 1) We a custom target 'version' (which is always out of date) so it will always be run.
+#    It touches the unprocessed version input file so it too will always be out of date.
+#
+# 2) The custom command depends on the touched version input file and generates the processed 
+#    version file, with updated values. The custom command uses the configure_version.cmake 
+#    script to generate the up-to-date vtr_version.cpp
+add_custom_target(version ALL
+    COMMAND ${CMAKE_COMMAND} -E touch ${VTR_VERSION_FILE_IN})
+
+add_custom_command(OUTPUT ${VTR_VERSION_FILE_OUT}
+    COMMAND ${CMAKE_COMMAND} 
+                    -D IN_FILE=${VTR_VERSION_FILE_IN}
+                    -D OUT_FILE=${VTR_VERSION_FILE_OUT}
+                    -D VTR_VERSION_MAJOR=${VTR_VERSION_MAJOR}
+                    -D VTR_VERSION_MINOR=${VTR_VERSION_MINOR}
+                    -D VTR_VERSION_PATCH=${VTR_VERSION_PATCH}
+                    -D VTR_VERSION_PRERELEASE=${VTR_VERSION_PRERELEASE}
+                    -D VTR_COMPILER_INFO=${VTR_COMPILER_INFO}
+                    -D VTR_BUILD_INFO=${VTR_BUILD_INFO}
+                    -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/configure_version.cmake
+    MAIN_DEPENDENCY ${VTR_VERSION_FILE_IN}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+	VERBATIM)
+
+
+#
+# Source files and library
+#
+file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
+file(GLOB_RECURSE LIB_HEADERS src/*.hpp src/*.h)
+files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
+
+#Add the version file to the sources
+list(APPEND LIB_SOURCES ${VTR_VERSION_FILE_OUT})
+
+#Create the library
+add_library(libvtrutil STATIC
+             ${LIB_HEADERS}
+             ${LIB_SOURCES})
+target_include_directories(libvtrutil PUBLIC ${LIB_INCLUDE_DIRS})
+set_target_properties(libvtrutil PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
+
+#Ensure version is always up to date by requiring version to be run first
+add_dependencies(libvtrutil version)
+
+#Specify link-time dependancies
+target_link_libraries(libvtrutil
+                        liblog)
+
+install(TARGETS libvtrutil DESTINATION bin)
+
+#
+# Unit Tests
+#
+file(GLOB_RECURSE TEST_SOURCES test/*.cpp)
+add_executable(test_vtrutil ${TEST_SOURCES})
+target_link_libraries(test_vtrutil 
+                        libvtrutil
+                        Catch2::Catch2WithMain)
+
+add_test(NAME test_vtrutil COMMAND test_vtrutil --use-colour=yes)
+            
diff --git a/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake b/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
new file mode 100644
index 000000000..8c7fbf17e
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
@@ -0,0 +1,55 @@
+#
+# Versioning information
+#
+#Figure out the git revision
+find_package(Git QUIET)
+if(GIT_FOUND)
+    exec_program(${GIT_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}
+                 ARGS describe --always --long --dirty
+                 OUTPUT_VARIABLE VTR_VCS_REVISION
+                 RETURN_VALUE GIT_DESCRIBE_RETURN_VALUE)
+
+    if(NOT GIT_DESCRIBE_RETURN_VALUE EQUAL 0)
+        #Git describe failed, usually this means we
+        #aren't in a git repo -- so don't set a VCS 
+        #revision
+        set(VTR_VCS_REVISION "unkown")
+    endif()
+
+    #Call again with exclude to get the revision excluding any tags
+    #(i.e. just the commit ID and dirty flag)
+    exec_program(${GIT_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}
+                 ARGS describe --always --long --dirty --exclude '*'
+                 OUTPUT_VARIABLE VTR_VCS_REVISION_SHORT
+                 RETURN_VALUE GIT_DESCRIBE_RETURN_VALUE)
+    if(NOT GIT_DESCRIBE_RETURN_VALUE EQUAL 0)
+        #Git describe failed, usually this means we
+        #aren't in a git repo -- so don't set a VCS 
+        #revision
+        set(VTR_VCS_REVISION_SHORT "unkown")
+    endif()
+else()
+    #Couldn't find git, so can't look-up VCS revision
+    set(VTR_VCS_REVISION "unkown")
+    set(VTR_VCS_REVISION_SHORT "unkown")
+endif()
+
+
+#Set the version according to semver.org
+set(VTR_VERSION "${VTR_VERSION_MAJOR}.${VTR_VERSION_MINOR}.${VTR_VERSION_PATCH}")
+if(VTR_VERSION_PRERELEASE)
+    set(VTR_VERSION "${VTR_VERSION}-${VTR_VERSION_PRERELEASE}")
+endif()
+set(VTR_VERSION_SHORT ${VTR_VERSION})
+if(VTR_VCS_REVISION)
+    set(VTR_VERSION "${VTR_VERSION}+${VTR_VCS_REVISION_SHORT}")
+endif()
+
+#Other build meta-data
+string(TIMESTAMP VTR_BUILD_TIMESTAMP)
+set(VTR_BUILD_TIMESTAMP "${VTR_BUILD_TIMESTAMP}")
+set(VTR_BUILD_INFO "${VTR_BUILD_INFO}")
+
+message(STATUS "VTR Version: ${VTR_VERSION}")
+
+configure_file(${IN_FILE} ${OUT_FILE})
diff --git a/third_party/vtr/libs/vtrutil/src/picosha2.h b/third_party/vtr/libs/vtrutil/src/picosha2.h
new file mode 100644
index 000000000..67794f920
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/picosha2.h
@@ -0,0 +1,357 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (C) 2014 okdshin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef PICOSHA2_H
+#define PICOSHA2_H
+//picosha2:20140213
+#include <iostream>
+#include <vector>
+#include <iterator>
+#include <cassert>
+#include <sstream>
+#include <algorithm>
+
+namespace picosha2 {
+typedef unsigned long word_t;
+typedef unsigned char byte_t;
+
+namespace detail {
+inline byte_t mask_8bit(byte_t x) {
+    return x & 0xff;
+}
+
+inline word_t mask_32bit(word_t x) {
+    return x & 0xffffffff;
+}
+
+const word_t add_constant[64] = {
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};
+
+const word_t initial_message_digest[8] = {
+    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
+
+inline word_t ch(word_t x, word_t y, word_t z) {
+    return (x & y) ^ ((~x) & z);
+}
+
+inline word_t maj(word_t x, word_t y, word_t z) {
+    return (x & y) ^ (x & z) ^ (y & z);
+}
+
+inline word_t rotr(word_t x, std::size_t n) {
+    assert(n < 32);
+    return mask_32bit((x >> n) | (x << (32 - n)));
+}
+
+inline word_t bsig0(word_t x) {
+    return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22);
+}
+
+inline word_t bsig1(word_t x) {
+    return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25);
+}
+
+inline word_t shr(word_t x, std::size_t n) {
+    assert(n < 32);
+    return x >> n;
+}
+
+inline word_t ssig0(word_t x) {
+    return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3);
+}
+
+inline word_t ssig1(word_t x) {
+    return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10);
+}
+
+template<typename RaIter1, typename RaIter2>
+void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 /*last*/) {
+    word_t w[64];
+    std::fill(w, w + 64, 0);
+    for (std::size_t i = 0; i < 16; ++i) {
+        w[i] = (static_cast<word_t>(mask_8bit(*(first + i * 4))) << 24)
+               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 1))) << 16)
+               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 2))) << 8)
+               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 3))));
+    }
+    for (std::size_t i = 16; i < 64; ++i) {
+        w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + w[i - 16]);
+    }
+
+    word_t a = *message_digest;
+    word_t b = *(message_digest + 1);
+    word_t c = *(message_digest + 2);
+    word_t d = *(message_digest + 3);
+    word_t e = *(message_digest + 4);
+    word_t f = *(message_digest + 5);
+    word_t g = *(message_digest + 6);
+    word_t h = *(message_digest + 7);
+
+    for (std::size_t i = 0; i < 64; ++i) {
+        word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i];
+        word_t temp2 = bsig0(a) + maj(a, b, c);
+        h = g;
+        g = f;
+        f = e;
+        e = mask_32bit(d + temp1);
+        d = c;
+        c = b;
+        b = a;
+        a = mask_32bit(temp1 + temp2);
+    }
+    *message_digest += a;
+    *(message_digest + 1) += b;
+    *(message_digest + 2) += c;
+    *(message_digest + 3) += d;
+    *(message_digest + 4) += e;
+    *(message_digest + 5) += f;
+    *(message_digest + 6) += g;
+    *(message_digest + 7) += h;
+    for (std::size_t i = 0; i < 8; ++i) {
+        *(message_digest + i) = mask_32bit(*(message_digest + i));
+    }
+}
+
+} //namespace detail
+
+template<typename InIter>
+void output_hex(InIter first, InIter last, std::ostream& os) {
+    std::ios::fmtflags orig_flags = os.flags();
+    std::streamsize orig_width = os.width();
+    char orig_fill = os.fill();
+
+    os.setf(std::ios::hex, std::ios::basefield);
+    while (first != last) {
+        os.width(2);
+        os.fill('0');
+        os << static_cast<unsigned int>(*first);
+        ++first;
+    }
+    os.flags(orig_flags);
+    os.fill(orig_fill);
+    os.width(orig_width);
+}
+
+template<typename InIter>
+void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) {
+    std::ostringstream oss;
+    output_hex(first, last, oss);
+    hex_str.assign(oss.str());
+}
+
+template<typename InContainer>
+void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) {
+    bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str);
+}
+
+template<typename InIter>
+std::string bytes_to_hex_string(InIter first, InIter last) {
+    std::string hex_str;
+    bytes_to_hex_string(first, last, hex_str);
+    return hex_str;
+}
+
+template<typename InContainer>
+std::string bytes_to_hex_string(const InContainer& bytes) {
+    std::string hex_str;
+    bytes_to_hex_string(bytes, hex_str);
+    return hex_str;
+}
+
+class hash256_one_by_one {
+  public:
+    hash256_one_by_one() {
+        init();
+    }
+
+    void init() {
+        buffer_.clear();
+        std::fill(data_length_digits_, data_length_digits_ + 4, 0);
+        std::copy(detail::initial_message_digest, detail::initial_message_digest + 8, h_);
+    }
+
+    template<typename RaIter>
+    void process(RaIter first, RaIter last) {
+        add_to_data_length(std::distance(first, last));
+        std::copy(first, last, std::back_inserter(buffer_));
+        std::size_t i = 0;
+        for (; i + 64 <= buffer_.size(); i += 64) {
+            detail::hash256_block(h_, buffer_.begin() + i, buffer_.begin() + i + 64);
+        }
+        buffer_.erase(buffer_.begin(), buffer_.begin() + i);
+    }
+
+    void finish() {
+        byte_t temp[64];
+        std::fill(temp, temp + 64, 0);
+        std::size_t remains = buffer_.size();
+        std::copy(buffer_.begin(), buffer_.end(), temp);
+        temp[remains] = 0x80;
+
+        if (remains > 55) {
+            std::fill(temp + remains + 1, temp + 64, 0);
+            detail::hash256_block(h_, temp, temp + 64);
+            std::fill(temp, temp + 64 - 4, 0);
+        } else {
+            std::fill(temp + remains + 1, temp + 64 - 4, 0);
+        }
+
+        write_data_bit_length(&(temp[56]));
+        detail::hash256_block(h_, temp, temp + 64);
+    }
+
+    template<typename OutIter>
+    void get_hash_bytes(OutIter first, OutIter last) const {
+        for (const word_t* iter = h_; iter != h_ + 8; ++iter) {
+            for (std::size_t i = 0; i < 4 && first != last; ++i) {
+                *(first++) = detail::mask_8bit(static_cast<byte_t>((*iter >> (24 - 8 * i))));
+            }
+        }
+    }
+
+  private:
+    void add_to_data_length(word_t n) {
+        word_t carry = 0;
+        data_length_digits_[0] += n;
+        for (std::size_t i = 0; i < 4; ++i) {
+            data_length_digits_[i] += carry;
+            if (data_length_digits_[i] >= 65536u) {
+                carry = data_length_digits_[i] >> 16;
+                data_length_digits_[i] &= 65535u;
+            } else {
+                break;
+            }
+        }
+    }
+    void write_data_bit_length(byte_t* begin) {
+        word_t data_bit_length_digits[4];
+        std::copy(
+            data_length_digits_, data_length_digits_ + 4,
+            data_bit_length_digits);
+
+        // convert byte length to bit length (multiply 8 or shift 3 times left)
+        word_t carry = 0;
+        for (std::size_t i = 0; i < 4; ++i) {
+            word_t before_val = data_bit_length_digits[i];
+            data_bit_length_digits[i] <<= 3;
+            data_bit_length_digits[i] |= carry;
+            data_bit_length_digits[i] &= 65535u;
+            carry = (before_val >> (16 - 3)) & 65535u;
+        }
+
+        // write data_bit_length
+        for (int i = 3; i >= 0; --i) {
+            (*begin++) = static_cast<byte_t>(data_bit_length_digits[i] >> 8);
+            (*begin++) = static_cast<byte_t>(data_bit_length_digits[i]);
+        }
+    }
+    std::vector<byte_t> buffer_;
+    word_t data_length_digits_[4]; //as 64bit integer (16bit x 4 integer)
+    word_t h_[8];
+};
+
+inline void get_hash_hex_string(const hash256_one_by_one& hasher, std::string& hex_str) {
+    byte_t hash[32];
+    hasher.get_hash_bytes(hash, hash + 32);
+    return bytes_to_hex_string(hash, hash + 32, hex_str);
+}
+
+inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) {
+    std::string hex_str;
+    get_hash_hex_string(hasher, hex_str);
+    return hex_str;
+}
+
+template<typename RaIter, typename OutIter>
+void hash256(RaIter first, RaIter last, OutIter first2, OutIter last2) {
+    hash256_one_by_one hasher;
+    //hasher.init();
+    hasher.process(first, last);
+    hasher.finish();
+    hasher.get_hash_bytes(first2, last2);
+}
+
+template<typename RaIter, typename OutContainer>
+void hash256(RaIter first, RaIter last, OutContainer& dst) {
+    hash256(first, last, dst.begin(), dst.end());
+}
+
+template<typename RaContainer, typename OutIter>
+void hash256(const RaContainer& src, OutIter first, OutIter last) {
+    hash256(src.begin(), src.end(), first, last);
+}
+
+template<typename RaContainer, typename OutContainer>
+void hash256(const RaContainer& src, OutContainer& dst) {
+    hash256(src.begin(), src.end(), dst.begin(), dst.end());
+}
+
+template<typename RaIter>
+void hash256_hex_string(RaIter first, RaIter last, std::string& hex_str) {
+    byte_t hashed[32];
+    hash256(first, last, hashed, hashed + 32);
+    std::ostringstream oss;
+    output_hex(hashed, hashed + 32, oss);
+    hex_str.assign(oss.str());
+}
+
+template<typename RaIter>
+std::string hash256_hex_string(RaIter first, RaIter last) {
+    std::string hex_str;
+    hash256_hex_string(first, last, hex_str);
+    return hex_str;
+}
+
+inline void hash256_hex_string(const std::string& src, std::string& hex_str) {
+    hash256_hex_string(src.begin(), src.end(), hex_str);
+}
+
+template<typename RaContainer>
+void hash256_hex_string(const RaContainer& src, std::string& hex_str) {
+    hash256_hex_string(src.begin(), src.end(), hex_str);
+}
+
+template<typename RaContainer>
+std::string hash256_hex_string(const RaContainer& src) {
+    return hash256_hex_string(src.begin(), src.end());
+}
+
+} //namespace picosha2
+
+#endif //PICOSHA2_H
diff --git a/third_party/vtr/libs/vtrutil/src/vpr_error.cc b/third_party/vtr/libs/vtrutil/src/vpr_error.cc
new file mode 100644
index 000000000..14eb464b5
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vpr_error.cc
@@ -0,0 +1,89 @@
+#include <cstdarg>
+#include <string>
+
+#include "vtr_util.h"
+#include "vtr_log.h"
+#include "vpr_error.h"
+
+// Set of function names for which the VPR_THROW errors are treated
+// as VTR_LOG_WARN
+static std::unordered_set<std::string> functions_to_demote;
+
+/* Date:June 15th, 2013
+ * Author: Daniel Chen
+ * Purpose: Used to throw any internal VPR error or architecture
+ *			file error and output the appropriate file name,
+ *			line number, and the error message. Does not return
+ *			anything but throw an exception which will be caught
+ *			main.c.
+ */
+void map_error_activation_status(std::string function_name) {
+    functions_to_demote.insert(function_name);
+}
+
+void vpr_throw(enum e_vpr_error type,
+               const char* psz_file_name,
+               unsigned int line_num,
+               const char* psz_message,
+               ...) {
+    // Make a variable argument list
+    va_list va_args;
+
+    // Initialize variable argument list
+    va_start(va_args, psz_message);
+
+    //Format the message
+    std::string msg = vtr::vstring_fmt(psz_message, va_args);
+
+    // Reset variable argument list
+    va_end(va_args);
+
+    vpr_throw_msg(type, psz_file_name, line_num, msg);
+}
+
+void vvpr_throw(enum e_vpr_error type,
+                const char* psz_file_name,
+                unsigned int line_num,
+                const char* psz_message,
+                va_list va_args) {
+    //Format the message
+    std::string msg = vtr::vstring_fmt(psz_message, va_args);
+
+    vpr_throw_msg(type, psz_file_name, line_num, msg);
+}
+
+void vpr_throw_msg(enum e_vpr_error type,
+                   const char* psz_file_name,
+                   unsigned int line_num,
+                   std::string msg) {
+    throw VprError(type, msg, psz_file_name, line_num);
+}
+
+void vpr_throw_opt(enum e_vpr_error type,
+                   const char* psz_func_pretty_name,
+                   const char* psz_func_name,
+                   const char* psz_file_name,
+                   unsigned int line_num,
+                   const char* psz_message,
+                   ...) {
+    std::string func_name(psz_func_name);
+
+    // Make a variable argument list
+    va_list va_args;
+
+    // Initialize variable argument list
+    va_start(va_args, psz_message);
+
+    //Format the message
+    std::string msg = vtr::vstring_fmt(psz_message, va_args);
+
+    // Reset variable argument list
+    va_end(va_args);
+
+    auto result = functions_to_demote.find(func_name);
+    if (result != functions_to_demote.end()) {
+        VTR_LOGFF_WARN(psz_file_name, line_num, psz_func_pretty_name, msg.data());
+    } else {
+        vpr_throw_msg(type, psz_file_name, line_num, msg);
+    }
+}
diff --git a/third_party/vtr/libs/vtrutil/src/vpr_error.h b/third_party/vtr/libs/vtrutil/src/vpr_error.h
new file mode 100644
index 000000000..0c3d5a1cb
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vpr_error.h
@@ -0,0 +1,127 @@
+#ifndef VPR_ERROR_H
+#define VPR_ERROR_H
+
+#include <cstdarg>
+#include <string>
+#include <unordered_set>
+
+#include "vtr_error.h"
+
+enum e_vpr_error {
+    VPR_ERROR_UNKNOWN = 0,
+
+    // Flow errors
+    VPR_ERROR_ARCH,
+    VPR_ERROR_PACK,
+    VPR_ERROR_PLACE,
+    VPR_ERROR_ROUTE,
+    VPR_ERROR_TIMING,
+    VPR_ERROR_POWER,
+    VPR_ERROR_SDC,
+
+    // File parsing errors
+    VPR_ERROR_NET_F,        // Error while parsing the packed netlist file
+    VPR_ERROR_PLACE_F,      // Error while parsning the placement file
+    VPR_ERROR_BLIF_F,       // Error while parsing the blif file
+    VPR_ERROR_IC_NETLIST_F, // Error while parsing the interchange netlist file
+
+    VPR_ERROR_IMPL_NETLIST_WRITER,
+    VPR_ERROR_NETLIST,
+    VPR_ERROR_ATOM_NETLIST,
+    VPR_ERROR_CLB_NETLIST,
+    VPR_ERROR_ANALYSIS,
+    VPR_ERROR_INTERRUPTED,
+    VPR_ERROR_DRAW,
+    VPR_ERROR_OTHER
+};
+typedef enum e_vpr_error t_vpr_error_type;
+
+/* This structure is thrown back to highest level of VPR flow if an *
+ * internal VPR or user input error occurs. */
+
+class VprError : public vtr::VtrError {
+  public:
+    VprError(t_vpr_error_type err_type,
+             std::string msg = "",
+             std::string file = "",
+             size_t linenum = -1)
+        : VtrError(msg, file, linenum)
+        , type_(err_type) {}
+
+    t_vpr_error_type type() const { return type_; }
+
+  private:
+    t_vpr_error_type type_;
+};
+
+// This function is used to save into the functions_to_demote set
+// all the function names which contain VPR_THROW errors that are
+// going to be demoted to be VTR_LOG_WARN
+void map_error_activation_status(std::string function_name);
+
+//VPR error reporting routines
+//
+//Note that we mark these functions with the C++11 attribute 'noreturn'
+//as they will throw exceptions and not return normally. This can help
+//reduce false-positive compiler warnings
+[[noreturn]] void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
+[[noreturn]] void vvpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, va_list args);
+[[noreturn]] void vpr_throw_msg(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, std::string msg);
+
+void vpr_throw_opt(enum e_vpr_error type, const char* psz_func_pretty_name, const char* psz_func_name, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
+
+//Figure out what macro to use to get the name of the current function
+// We default to __func__ which is defined in C99
+//
+// g++ > 2.6 define __PRETTY_FUNC__ which includes class/namespace/overload
+// information, so we prefer to use it if possible
+#define VPR_THROW_FUNCTION __func__
+#ifdef __GNUC__
+#    ifdef __GNUC_MINOR__
+#        if __GNUC__ >= 2 && __GNUC_MINOR__ > 6
+#            undef VPR_THROW_FUNCTION
+#            define VPR_THROW_FUNCTION __PRETTY_FUNCTION__
+#        endif
+#    endif
+#endif
+
+/*
+ * Unconditionally throws a VprError condition with automatically specified
+ * file and line number of the call site.
+ *
+ * It is preferred to use either VPR_FATAL_ERROR(), or VPR_ERROR() to capture
+ * the intention behind the throw.
+ *
+ * This macro is a wrapper around vpr_throw().
+ */
+#define VPR_THROW(type, ...)                              \
+    do {                                                  \
+        vpr_throw(type, __FILE__, __LINE__, __VA_ARGS__); \
+    } while (false)
+
+/*
+ * VPR_FATAL_ERROR() is used to signal an *unconditional* fatal error which should
+ * stop the program.
+ *
+ * This macro is a wrapper around VPR_THOW()
+ */
+#define VPR_FATAL_ERROR(...)    \
+    do {                        \
+        VPR_THROW(__VA_ARGS__); \
+    } while (false)
+
+/*
+ * VPR_ERROR() is used to signal an error (potentially non-fatal) which by
+ * default stops the program, but may be suppressed (i.e. converted to a
+ * warning).
+ *
+ * This macro is a wrapper around vpr_throw_opt() which automatically
+ * specifies file and line number of call site.
+ *
+ */
+#define VPR_ERROR(type, ...)                                                                \
+    do {                                                                                    \
+        vpr_throw_opt(type, VPR_THROW_FUNCTION, __func__, __FILE__, __LINE__, __VA_ARGS__); \
+    } while (false)
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_array_view.h b/third_party/vtr/libs/vtrutil/src/vtr_array_view.h
new file mode 100644
index 000000000..3383263e0
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_array_view.h
@@ -0,0 +1,273 @@
+#ifndef _VTR_ARRAY_VIEW_H
+#define _VTR_ARRAY_VIEW_H
+
+#include <cstddef>
+#include <stdexcept>
+#include <iterator>
+#include "vtr_range.h"
+
+namespace vtr {
+
+/**
+ * @brief An array view class to avoid copying data
+ */
+template<typename T>
+class array_view {
+  public:
+    ///@brief default constructor
+    explicit constexpr array_view()
+        : data_(nullptr)
+        , size_(0) {}
+
+    ///@brief A constructor with data initialization
+    explicit constexpr array_view(T* str, size_t size)
+        : data_(str)
+        , size_(size) {}
+
+    constexpr array_view(const array_view& other) noexcept = default;
+    constexpr array_view& operator=(const array_view& view) noexcept {
+        data_ = view.data_;
+        size_ = view.size_;
+        return *this;
+    }
+
+    ///@brief [] operator
+    constexpr T& operator[](size_t pos) {
+        return data_[pos];
+    }
+
+    ///@brief constant [] operator
+    constexpr const T& operator[](size_t pos) const {
+        return data_[pos];
+    }
+
+    ///@brief at() operator
+    T& at(size_t pos) {
+        if (pos >= size()) {
+            throw std::out_of_range("Pos is out of range.");
+        }
+
+        return data_[pos];
+    }
+
+    ///@brief const at() operator
+    const T& at(size_t pos) const {
+        if (pos >= size()) {
+            throw std::out_of_range("Pos is out of range.");
+        }
+
+        return data_[pos];
+    }
+
+    ///@brief get the first element of the array
+    constexpr T& front() {
+        return data_[0];
+    }
+
+    ///@brief get the first element of the array (can't update it)
+    constexpr const T& front() const {
+        return data_[0];
+    }
+
+    ///@brief get the last element of the array
+    constexpr T& back() {
+        return data_[size() - 1];
+    }
+
+    ///@brief get the last element of the array (can't update it)
+    constexpr const T& back() const {
+        return data_[size() - 1];
+    }
+
+    ///@brief return the underlying pointer
+    constexpr T* data() {
+        return data_;
+    }
+
+    ///@brief return the underlying pointer (constant pointer)
+    constexpr const T* data() const {
+        return data_;
+    }
+
+    ///@brief return thr array size
+    constexpr size_t size() const noexcept {
+        return size_;
+    }
+
+    ///@brief return the array size
+    constexpr size_t length() const noexcept {
+        return size_;
+    }
+
+    ///@brief check if the array is empty
+    constexpr bool empty() const noexcept {
+        return size_ != 0;
+    }
+
+    ///@brief return a pointer to the first element of the array
+    constexpr T* begin() noexcept {
+        return data_;
+    }
+
+    ///@brief return a constant pointer to the first element of the array
+    constexpr const T* begin() const noexcept {
+        return data_;
+    }
+
+    ///@brief return a constant pointer to the first element of the array
+    constexpr const T* cbegin() const noexcept {
+        return data_;
+    }
+
+    ///@brief return a pointer to the last element of the array
+    constexpr T* end() noexcept {
+        return data_ + size_;
+    }
+
+    ///@brief return a constant pointer to the last element of the array
+    constexpr const T* end() const noexcept {
+        return data_ + size_;
+    }
+
+    ///@brief return a constant pointer to the last element of the array
+    constexpr const T* cend() const noexcept {
+        return data_ + size_;
+    }
+
+  private:
+    T* data_;
+    size_t size_;
+};
+
+/**
+ * @brief Implements a fixed length view to an array which is indexed by vtr::StrongId
+ *
+ * The main use of this container is to behave like a std::span which is
+ * indexed by a vtr::StrongId instead of size_t. It assumes that K is explicitly 
+ * convertable to size_t 
+ * (i.e. via operator size_t()), and can be explicitly constructed from a size_t.
+ */
+template<typename K, typename V>
+class array_view_id : private array_view<V> {
+    using storage = array_view<V>;
+
+  public:
+    explicit constexpr array_view_id(V* str, size_t a_size)
+        : array_view<V>(str, a_size) {}
+
+    typedef K key_type;
+
+    class key_iterator;
+    typedef vtr::Range<key_iterator> key_range;
+
+    // Don't include operator[] and at() from std::vector, since we redine them to take key_type instead of size_t
+    ///@brief [] operator
+    V& operator[](const key_type id) {
+        auto i = size_t(id);
+        return storage::operator[](i);
+    }
+    ///@brief constant [] operator
+    const V& operator[](const key_type id) const {
+        auto i = size_t(id);
+        return storage::operator[](i);
+    }
+    ///@brief at() operator
+    V& at(const key_type id) {
+        auto i = size_t(id);
+        return storage::at(i);
+    }
+    ///@brief constant at() operator
+    const V& at(const key_type id) const {
+        auto i = size_t(id);
+        return storage::at(i);
+    }
+
+    ///@brief Returns a range containing the keys
+    key_range keys() const {
+        return vtr::make_range(key_begin(), key_end());
+    }
+
+    using storage::begin;
+    using storage::cbegin;
+    using storage::cend;
+    using storage::end;
+
+    using storage::empty;
+    using storage::size;
+
+    using storage::back;
+    using storage::data;
+    using storage::front;
+
+  public:
+    /**
+     * @brief Iterator class which is convertable to the key_type
+     *
+     * This allows end-users to call the parent class's keys() member
+     * to iterate through the keys with a range-based for loop
+     *
+     */
+    class key_iterator : public std::iterator<std::bidirectional_iterator_tag, key_type> {
+      public:
+        /**
+         * @brief Intermediate type my_iter
+         *
+         * We use the intermediate type my_iter to avoid a potential ambiguity for which
+         * clang generates errors and warnings
+         */
+        using my_iter = typename std::iterator<std::bidirectional_iterator_tag, K>;
+        using typename my_iter::iterator;
+        using typename my_iter::pointer;
+        using typename my_iter::reference;
+        using typename my_iter::value_type;
+
+        key_iterator(key_iterator::value_type init)
+            : value_(init) {}
+
+        /**
+         * @brief Note
+         *
+         * vtr::vector assumes that the key time is convertable to size_t and
+         * that all the underlying IDs are zero-based and contiguous. That means
+         * we can just increment the underlying Id to build the next key.
+         */
+
+        ///@brief increment the iterator
+        key_iterator operator++() {
+            value_ = value_type(size_t(value_) + 1);
+            return *this;
+        }
+
+        ///@brief decrement the iterator
+        key_iterator operator--() {
+            value_ = value_type(size_t(value_) - 1);
+            return *this;
+        }
+
+        ///@brief dereference operator (*)
+        reference operator*() { return value_; }
+
+        ///@brief -> operator
+        pointer operator->() { return &value_; }
+
+        friend bool operator==(const key_iterator lhs, const key_iterator rhs) { return lhs.value_ == rhs.value_; }
+        friend bool operator!=(const key_iterator lhs, const key_iterator rhs) { return !(lhs == rhs); }
+
+      private:
+        value_type value_;
+    };
+
+  private:
+    key_iterator key_begin() const { return key_iterator(key_type(0)); }
+    key_iterator key_end() const { return key_iterator(key_type(size())); }
+};
+
+template<typename Container>
+array_view_id<typename Container::key_type, const typename Container::value_type> make_const_array_view_id(Container& container) {
+    return array_view_id<typename Container::key_type, const typename Container::value_type>(
+        container.data(), container.size());
+}
+
+} // namespace vtr
+
+#endif /* _VTR_ARRAY_VIEW_H */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_assert.cc b/third_party/vtr/libs/vtrutil/src/vtr_assert.cc
new file mode 100644
index 000000000..b77bec6e4
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_assert.cc
@@ -0,0 +1,23 @@
+#include "vtr_assert.h"
+
+#include <cstdio>  //fprintf, stderr
+#include <cstdlib> //abort
+
+namespace vtr {
+namespace assert {
+
+void handle_assert(const char* expr, const char* file, unsigned int line, const char* function, const char* msg) {
+    fprintf(stderr, "%s:%d", file, line);
+    if (function) {
+        fprintf(stderr, " %s:", function);
+    }
+    fprintf(stderr, " Assertion '%s' failed", expr);
+    if (msg) {
+        fprintf(stderr, " (%s)", msg);
+    }
+    fprintf(stderr, ".\n");
+    std::abort();
+}
+
+} // namespace assert
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_assert.h b/third_party/vtr/libs/vtrutil/src/vtr_assert.h
new file mode 100644
index 000000000..ba63a4bb5
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_assert.h
@@ -0,0 +1,151 @@
+#ifndef VTR_ASSERT_H
+#define VTR_ASSERT_H
+
+/**
+ * @file
+ * @brief The header vtr_assert.h defines useful assertion macros for VTR projects.
+ *
+ * Four types of assertions are defined:
+ *
+ *      VTR_ASSERT_OPT   - low overhead assertions that should always be enabled
+ *      VTR_ASSERT       - medium overhead assertions that are usually be enabled
+ *      VTR_ASSERT_SAFE  - high overhead assertions typically enabled only for debugging
+ *      VTR_ASSERT_DEBUG - very high overhead assertions typically enabled only for extreme debugging
+ * Each of the above assertions also have a *_MSG variants (e.g. VTR_ASSERT_MSG(expr, msg))
+ * which takes an additional argument specifying additional message text to be shown.
+ * By convention the message should state the condition *being checked* (and not the failure condition),
+ * since that the condition failed is obvious from the assertion failure itself.
+ *
+ * The macro VTR_ASSERT_LEVEL specifies the level of assertion checking desired and is updated in CMAKE compilation:
+ *
+ *      VTR_ASSERT_LEVEL == 4: VTR_ASSERT_OPT, VTR_ASSERT, VTR_ASSERT_SAFE, VTR_ASSERT_DEBUG enabled
+ *      VTR_ASSERT_LEVEL == 3: VTR_ASSERT_OPT, VTR_ASSERT, VTR_ASSERT_SAFE enabled
+ *      VTR_ASSERT_LEVEL == 2: VTR_ASSERT_OPT, VTR_ASSERT enabled
+ *      VTR_ASSERT_LEVEL == 1: VTR_ASSERT_OPT enabled
+ *      VTR_ASSERT_LEVEL == 0: No assertion checking enabled
+ *
+ * @Note that an assertion levels beyond 4 are currently treated the same as level 4 and the default assertion level is 2
+ */
+
+// Set a default assertion level if none is specified
+#ifndef VTR_ASSERT_LEVEL
+#    define VTR_ASSERT_LEVEL 2
+#endif
+
+// Enable the assertions based on the specified level
+#if VTR_ASSERT_LEVEL >= 4
+#    define VTR_ASSERT_DEBUG_ENABLED
+#endif
+
+#if VTR_ASSERT_LEVEL >= 3
+#    define VTR_ASSERT_SAFE_ENABLED
+#endif
+
+#if VTR_ASSERT_LEVEL >= 2
+#    define VTR_ASSERT_ENABLED
+#endif
+
+#if VTR_ASSERT_LEVEL >= 1
+#    define VTR_ASSERT_OPT_ENABLED
+#endif
+
+// Define the user assertion macros
+#ifdef VTR_ASSERT_DEBUG_ENABLED
+#    define VTR_ASSERT_DEBUG(expr) VTR_ASSERT_IMPL(expr, nullptr)
+#    define VTR_ASSERT_DEBUG_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
+#else
+#    define VTR_ASSERT_DEBUG(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
+#    define VTR_ASSERT_DEBUG_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
+#endif
+
+#ifdef VTR_ASSERT_SAFE_ENABLED
+#    define VTR_ASSERT_SAFE(expr) VTR_ASSERT_IMPL(expr, nullptr)
+#    define VTR_ASSERT_SAFE_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
+#else
+#    define VTR_ASSERT_SAFE(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
+#    define VTR_ASSERT_SAFE_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
+#endif
+
+#ifdef VTR_ASSERT_ENABLED
+#    define VTR_ASSERT(expr) VTR_ASSERT_IMPL(expr, nullptr)
+#    define VTR_ASSERT_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
+#else
+#    define VTR_ASSERT(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
+#    define VTR_ASSERT_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
+#endif
+
+#ifdef VTR_ASSERT_OPT_ENABLED
+#    define VTR_ASSERT_OPT(expr) VTR_ASSERT_IMPL(expr, nullptr)
+#    define VTR_ASSERT_OPT_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
+#else
+#    define VTR_ASSERT_OPT(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
+#    define VTR_ASSERT_OPT_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
+#endif
+
+/**
+ * @brief Define the assertion implementation macro
+ *
+ * We wrap the check in a do {} while() to ensure the function-like
+ * macro can be always be followed by a ';'
+ */
+#define VTR_ASSERT_IMPL(expr, msg)                                                           \
+    do {                                                                                     \
+        if (!(expr)) {                                                                       \
+            vtr::assert::handle_assert(#expr, __FILE__, __LINE__, VTR_ASSERT_FUNCTION, msg); \
+        }                                                                                    \
+    } while (false)
+
+/**
+ * @brief Define the no-op assertion implementation macro
+ *
+ * We wrap the check in a do {} while() to ensure the function-like
+ * macro can be always be followed by a ';'
+ *
+ * Note that to avoid 'unused' variable warnings when assertions are
+ * disabled, we pass the expr and msg to sizeof(). We use sizeof specifically
+ * since it accepts expressions, and the C++ standard gaurentees sizeof's arguments
+ * are never evaluated (ensuring any expensive expressions are not evaluated when
+ * assertions are disabled). To avoid warnings about the unused result of sizeof()
+ * we cast it to void.
+ */
+#define VTR_ASSERT_IMPL_NOP(expr, msg)   \
+    do {                                 \
+        static_cast<void>(sizeof(expr)); \
+        static_cast<void>(sizeof(msg));  \
+    } while (false)
+
+/**
+ * @brief Figure out what macro to use to get the name of the current function
+ * 
+ * We default to __func__ which is defined in C99
+ * 
+ * g++ > 2.6 define __PRETTY_FUNC__ which includes class/namespace/overload
+ * information, so we prefer to use it if possible
+ */
+#define VTR_ASSERT_FUNCTION __func__
+#ifdef __GNUC__
+#    ifdef __GNUC_MINOR__
+#        if __GNUC__ >= 2 && __GNUC_MINOR__ > 6
+#            undef VTR_ASSERT_FUNCTION
+#            define VTR_ASSERT_FUNCTION __PRETTY_FUNCTION__
+#        endif
+#    endif
+#endif
+
+namespace vtr {
+namespace assert {
+/**
+ * @brief Assertion handling routine
+ * 
+ * Note that we mark the routine with the standard C++11
+ * attribute 'noreturn' which tells the compiler this
+ * function will never return. This should ensure the
+ * compiler won't warn about detected conditions such as
+ * dead-code or potential null pointer dereferences
+ * which are gaurded against by assertions.
+ */
+[[noreturn]] void handle_assert(const char* expr, const char* file, unsigned int line, const char* function, const char* msg);
+} // namespace assert
+} // namespace vtr
+
+#endif //VTR_ASSERT_H
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_bimap.h b/third_party/vtr/libs/vtrutil/src/vtr_bimap.h
new file mode 100644
index 000000000..59bc19b42
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_bimap.h
@@ -0,0 +1,167 @@
+#ifndef VTR_BIMAP
+#define VTR_BIMAP
+
+/**
+ * @file
+ * @brief The vtr_bimap.h header provides a bi-directonal mapping between key and value which means that it can be addressed by either the key or the value
+ *
+ * It provides this bi-directional feature for all the map-like containers defined in vtr:
+ *      - unordered map
+ *      - flat map
+ *      - linear map
+ * 
+ * One example where this container might be so useful is the mapping between the atom and clustered net Id. See atom_lookup.h
+ */
+
+#include <map>
+#include <unordered_map>
+#include "vtr_flat_map.h"
+#include "vtr_linear_map.h"
+
+#include "vtr_error.h"
+
+namespace vtr {
+
+/**
+ * @brief A map-like class which provides a bi-directonal mapping between key and value
+ *
+ * Keys and values can be looked up directly by passing either the key or value.
+ * the indexing operator will throw if the key/value does not exist.
+ */
+template<class K, class V, template<typename...> class Map = std::map, template<typename...> class InvMap = std::map>
+class bimap {
+  public: //Public types
+    typedef typename Map<K, V>::const_iterator iterator;
+    typedef typename InvMap<V, K>::const_iterator inverse_iterator;
+
+  public: //Accessors
+    //Iterators
+
+    ///@brief Return an iterator to the begin of the map
+    iterator begin() const { return map_.begin(); }
+
+    ///@brief Return an iterator to the end of the map
+    iterator end() const { return map_.end(); }
+
+    ///@brief Return an iterator to the begin of the inverse map
+    inverse_iterator inverse_begin() const { return inverse_map_.begin(); }
+
+    ///@brief Return an iterator to the end of the inverse map
+    inverse_iterator inverse_end() const { return inverse_map_.end(); }
+
+    ///@brief Return an iterator to the key-value pair matching key, or end() if not found
+    iterator find(const K key) const {
+        return map_.find(key);
+    }
+
+    ///@brief Return an iterator to the value-key pair matching value, or inverse_end() if not found
+    inverse_iterator find(const V value) const {
+        return inverse_map_.find(value);
+    }
+
+    ///@brief Return an immutable reference to the value matching key (throw an exception if key is not found)
+    const V& operator[](const K key) const {
+        auto iter = find(key);
+        if (iter == end()) {
+            throw VtrError("Invalid bimap key during look-up", __FILE__, __LINE__);
+        }
+        return iter->second;
+    }
+
+    ///@brief Return an immutable reference to the key matching value (throw an exception if value is not found)
+    const K& operator[](const V value) const {
+        auto iter = find(value);
+        if (iter == inverse_end()) {
+            throw VtrError("Invalid bimap value during inverse look-up", __FILE__, __LINE__);
+        }
+        return iter->second;
+    }
+
+    ///@brief Return the number of key-value pairs stored
+    std::size_t size() const {
+        VTR_ASSERT(map_.size() == inverse_map_.size());
+        return map_.size();
+    }
+
+    ///@brief Return true if there are no key-value pairs stored
+    bool empty() const { return (size() == 0); }
+
+    ///@brief Return true if the specified key exists
+    bool contains(const K key) const { return find(key) != end(); }
+
+    ///@brief Return true if the specified value exists
+    bool contains(const V value) const { return find(value) != inverse_end(); }
+
+  public: //Mutators
+    ///@brief Drop all stored key-values
+    void clear() {
+        map_.clear();
+        inverse_map_.clear();
+    }
+
+    ///@brief Insert a key-value pair, if not already in map
+    std::pair<iterator, bool> insert(const K key, const V value) {
+        auto ret1 = map_.insert({key, value});
+        auto ret2 = inverse_map_.insert({value, key});
+
+        VTR_ASSERT(ret1.second == ret2.second);
+
+        // Return true if inserted
+        return ret1;
+    }
+
+    ///@brief Update a key-value pair, will insert if not already in map
+    void update(const K key, const V value) {
+        map_[key] = value;
+        inverse_map_[value] = key;
+    }
+
+    ///@brief Remove the specified key (and it's associated value)
+    void erase(const K key) {
+        auto iter = map_.find(key);
+        if (iter != map_.end()) {
+            V val = iter->second;
+            map_.erase(iter);
+
+            auto inv_iter = inverse_map_.find(val);
+            VTR_ASSERT(inv_iter != inverse_map_.end());
+            inverse_map_.erase(inv_iter);
+        }
+    }
+
+    ///@brief Remove the specified value (and it's associated key)
+    void erase(const V val) {
+        auto inv_iter = inverse_map_.find(val);
+        if (inv_iter != inverse_map_.end()) {
+            K key = inv_iter->second;
+            inverse_map_.erase(inv_iter);
+
+            auto iter = map_.find(key);
+            VTR_ASSERT(iter != map_.end());
+            map_.erase(iter);
+        }
+    }
+
+    ///@brief Swap (this enables std::swap via ADL)
+    friend void swap(bimap<K, V, Map, InvMap>& x, bimap<K, V, Map, InvMap>& y) {
+        std::swap(x.map_, y.map_);
+        std::swap(x.inverse_map_, y.inverse_map_);
+    }
+
+  private:
+    Map<K, V> map_;
+    InvMap<V, K> inverse_map_;
+};
+
+template<class K, class V>
+using unordered_bimap = bimap<K, V, std::unordered_map, std::unordered_map>;
+
+template<class K, class V>
+using flat_bimap = bimap<K, V, vtr::flat_map, vtr::flat_map>;
+
+template<class K, class V>
+using linear_bimap = bimap<K, V, vtr::linear_map, vtr::linear_map>;
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_cache.h b/third_party/vtr/libs/vtrutil/src/vtr_cache.h
new file mode 100644
index 000000000..30871cd44
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_cache.h
@@ -0,0 +1,46 @@
+#ifndef VTR_CACHE_H_
+#define VTR_CACHE_H_
+
+#include <memory>
+
+namespace vtr {
+
+///@brief An implementation of a simple cache
+template<typename CacheKey, typename CacheValue>
+class Cache {
+  public:
+    ///@brief Clear cache.
+    void clear() {
+        key_ = CacheKey();
+        value_.reset();
+    }
+    /**
+     * @brief Check if the cache is valid.
+     * 
+     * Returns the cached value if present and valid.
+     * Returns nullptr if the cache is invalid.
+     */
+    const CacheValue* get(const CacheKey& key) const {
+        if (key == key_ && value_) {
+            return value_.get();
+        } else {
+            return nullptr;
+        }
+    }
+
+    ///@brief Update the cache.
+    const CacheValue* set(const CacheKey& key, std::unique_ptr<CacheValue> value) {
+        key_ = key;
+        value_ = std::move(value);
+
+        return value_.get();
+    }
+
+  private:
+    CacheKey key_;
+    std::unique_ptr<CacheValue> value_;
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc b/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
new file mode 100644
index 000000000..7a100e25e
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
@@ -0,0 +1,831 @@
+#include <cmath>
+#include <cstddef>
+#include "vtr_color_map.h"
+#include "vtr_assert.h"
+
+namespace vtr {
+
+//Inferno data from MatPlotLib
+static std::vector<Color<float>> inferno_data = {
+    {0.001462f, 0.000466f, 0.013866f},
+    {0.002267f, 0.001270f, 0.018570f},
+    {0.003299f, 0.002249f, 0.024239f},
+    {0.004547f, 0.003392f, 0.030909f},
+    {0.006006f, 0.004692f, 0.038558f},
+    {0.007676f, 0.006136f, 0.046836f},
+    {0.009561f, 0.007713f, 0.055143f},
+    {0.011663f, 0.009417f, 0.063460f},
+    {0.013995f, 0.011225f, 0.071862f},
+    {0.016561f, 0.013136f, 0.080282f},
+    {0.019373f, 0.015133f, 0.088767f},
+    {0.022447f, 0.017199f, 0.097327f},
+    {0.025793f, 0.019331f, 0.105930f},
+    {0.029432f, 0.021503f, 0.114621f},
+    {0.033385f, 0.023702f, 0.123397f},
+    {0.037668f, 0.025921f, 0.132232f},
+    {0.042253f, 0.028139f, 0.141141f},
+    {0.046915f, 0.030324f, 0.150164f},
+    {0.051644f, 0.032474f, 0.159254f},
+    {0.056449f, 0.034569f, 0.168414f},
+    {0.061340f, 0.036590f, 0.177642f},
+    {0.066331f, 0.038504f, 0.186962f},
+    {0.071429f, 0.040294f, 0.196354f},
+    {0.076637f, 0.041905f, 0.205799f},
+    {0.081962f, 0.043328f, 0.215289f},
+    {0.087411f, 0.044556f, 0.224813f},
+    {0.092990f, 0.045583f, 0.234358f},
+    {0.098702f, 0.046402f, 0.243904f},
+    {0.104551f, 0.047008f, 0.253430f},
+    {0.110536f, 0.047399f, 0.262912f},
+    {0.116656f, 0.047574f, 0.272321f},
+    {0.122908f, 0.047536f, 0.281624f},
+    {0.129285f, 0.047293f, 0.290788f},
+    {0.135778f, 0.046856f, 0.299776f},
+    {0.142378f, 0.046242f, 0.308553f},
+    {0.149073f, 0.045468f, 0.317085f},
+    {0.155850f, 0.044559f, 0.325338f},
+    {0.162689f, 0.043554f, 0.333277f},
+    {0.169575f, 0.042489f, 0.340874f},
+    {0.176493f, 0.041402f, 0.348111f},
+    {0.183429f, 0.040329f, 0.354971f},
+    {0.190367f, 0.039309f, 0.361447f},
+    {0.197297f, 0.038400f, 0.367535f},
+    {0.204209f, 0.037632f, 0.373238f},
+    {0.211095f, 0.037030f, 0.378563f},
+    {0.217949f, 0.036615f, 0.383522f},
+    {0.224763f, 0.036405f, 0.388129f},
+    {0.231538f, 0.036405f, 0.392400f},
+    {0.238273f, 0.036621f, 0.396353f},
+    {0.244967f, 0.037055f, 0.400007f},
+    {0.251620f, 0.037705f, 0.403378f},
+    {0.258234f, 0.038571f, 0.406485f},
+    {0.264810f, 0.039647f, 0.409345f},
+    {0.271347f, 0.040922f, 0.411976f},
+    {0.277850f, 0.042353f, 0.414392f},
+    {0.284321f, 0.043933f, 0.416608f},
+    {0.290763f, 0.045644f, 0.418637f},
+    {0.297178f, 0.047470f, 0.420491f},
+    {0.303568f, 0.049396f, 0.422182f},
+    {0.309935f, 0.051407f, 0.423721f},
+    {0.316282f, 0.053490f, 0.425116f},
+    {0.322610f, 0.055634f, 0.426377f},
+    {0.328921f, 0.057827f, 0.427511f},
+    {0.335217f, 0.060060f, 0.428524f},
+    {0.341500f, 0.062325f, 0.429425f},
+    {0.347771f, 0.064616f, 0.430217f},
+    {0.354032f, 0.066925f, 0.430906f},
+    {0.360284f, 0.069247f, 0.431497f},
+    {0.366529f, 0.071579f, 0.431994f},
+    {0.372768f, 0.073915f, 0.432400f},
+    {0.379001f, 0.076253f, 0.432719f},
+    {0.385228f, 0.078591f, 0.432955f},
+    {0.391453f, 0.080927f, 0.433109f},
+    {0.397674f, 0.083257f, 0.433183f},
+    {0.403894f, 0.085580f, 0.433179f},
+    {0.410113f, 0.087896f, 0.433098f},
+    {0.416331f, 0.090203f, 0.432943f},
+    {0.422549f, 0.092501f, 0.432714f},
+    {0.428768f, 0.094790f, 0.432412f},
+    {0.434987f, 0.097069f, 0.432039f},
+    {0.441207f, 0.099338f, 0.431594f},
+    {0.447428f, 0.101597f, 0.431080f},
+    {0.453651f, 0.103848f, 0.430498f},
+    {0.459875f, 0.106089f, 0.429846f},
+    {0.466100f, 0.108322f, 0.429125f},
+    {0.472328f, 0.110547f, 0.428334f},
+    {0.478558f, 0.112764f, 0.427475f},
+    {0.484789f, 0.114974f, 0.426548f},
+    {0.491022f, 0.117179f, 0.425552f},
+    {0.497257f, 0.119379f, 0.424488f},
+    {0.503493f, 0.121575f, 0.423356f},
+    {0.509730f, 0.123769f, 0.422156f},
+    {0.515967f, 0.125960f, 0.420887f},
+    {0.522206f, 0.128150f, 0.419549f},
+    {0.528444f, 0.130341f, 0.418142f},
+    {0.534683f, 0.132534f, 0.416667f},
+    {0.540920f, 0.134729f, 0.415123f},
+    {0.547157f, 0.136929f, 0.413511f},
+    {0.553392f, 0.139134f, 0.411829f},
+    {0.559624f, 0.141346f, 0.410078f},
+    {0.565854f, 0.143567f, 0.408258f},
+    {0.572081f, 0.145797f, 0.406369f},
+    {0.578304f, 0.148039f, 0.404411f},
+    {0.584521f, 0.150294f, 0.402385f},
+    {0.590734f, 0.152563f, 0.400290f},
+    {0.596940f, 0.154848f, 0.398125f},
+    {0.603139f, 0.157151f, 0.395891f},
+    {0.609330f, 0.159474f, 0.393589f},
+    {0.615513f, 0.161817f, 0.391219f},
+    {0.621685f, 0.164184f, 0.388781f},
+    {0.627847f, 0.166575f, 0.386276f},
+    {0.633998f, 0.168992f, 0.383704f},
+    {0.640135f, 0.171438f, 0.381065f},
+    {0.646260f, 0.173914f, 0.378359f},
+    {0.652369f, 0.176421f, 0.375586f},
+    {0.658463f, 0.178962f, 0.372748f},
+    {0.664540f, 0.181539f, 0.369846f},
+    {0.670599f, 0.184153f, 0.366879f},
+    {0.676638f, 0.186807f, 0.363849f},
+    {0.682656f, 0.189501f, 0.360757f},
+    {0.688653f, 0.192239f, 0.357603f},
+    {0.694627f, 0.195021f, 0.354388f},
+    {0.700576f, 0.197851f, 0.351113f},
+    {0.706500f, 0.200728f, 0.347777f},
+    {0.712396f, 0.203656f, 0.344383f},
+    {0.718264f, 0.206636f, 0.340931f},
+    {0.724103f, 0.209670f, 0.337424f},
+    {0.729909f, 0.212759f, 0.333861f},
+    {0.735683f, 0.215906f, 0.330245f},
+    {0.741423f, 0.219112f, 0.326576f},
+    {0.747127f, 0.222378f, 0.322856f},
+    {0.752794f, 0.225706f, 0.319085f},
+    {0.758422f, 0.229097f, 0.315266f},
+    {0.764010f, 0.232554f, 0.311399f},
+    {0.769556f, 0.236077f, 0.307485f},
+    {0.775059f, 0.239667f, 0.303526f},
+    {0.780517f, 0.243327f, 0.299523f},
+    {0.785929f, 0.247056f, 0.295477f},
+    {0.791293f, 0.250856f, 0.291390f},
+    {0.796607f, 0.254728f, 0.287264f},
+    {0.801871f, 0.258674f, 0.283099f},
+    {0.807082f, 0.262692f, 0.278898f},
+    {0.812239f, 0.266786f, 0.274661f},
+    {0.817341f, 0.270954f, 0.270390f},
+    {0.822386f, 0.275197f, 0.266085f},
+    {0.827372f, 0.279517f, 0.261750f},
+    {0.832299f, 0.283913f, 0.257383f},
+    {0.837165f, 0.288385f, 0.252988f},
+    {0.841969f, 0.292933f, 0.248564f},
+    {0.846709f, 0.297559f, 0.244113f},
+    {0.851384f, 0.302260f, 0.239636f},
+    {0.855992f, 0.307038f, 0.235133f},
+    {0.860533f, 0.311892f, 0.230606f},
+    {0.865006f, 0.316822f, 0.226055f},
+    {0.869409f, 0.321827f, 0.221482f},
+    {0.873741f, 0.326906f, 0.216886f},
+    {0.878001f, 0.332060f, 0.212268f},
+    {0.882188f, 0.337287f, 0.207628f},
+    {0.886302f, 0.342586f, 0.202968f},
+    {0.890341f, 0.347957f, 0.198286f},
+    {0.894305f, 0.353399f, 0.193584f},
+    {0.898192f, 0.358911f, 0.188860f},
+    {0.902003f, 0.364492f, 0.184116f},
+    {0.905735f, 0.370140f, 0.179350f},
+    {0.909390f, 0.375856f, 0.174563f},
+    {0.912966f, 0.381636f, 0.169755f},
+    {0.916462f, 0.387481f, 0.164924f},
+    {0.919879f, 0.393389f, 0.160070f},
+    {0.923215f, 0.399359f, 0.155193f},
+    {0.926470f, 0.405389f, 0.150292f},
+    {0.929644f, 0.411479f, 0.145367f},
+    {0.932737f, 0.417627f, 0.140417f},
+    {0.935747f, 0.423831f, 0.135440f},
+    {0.938675f, 0.430091f, 0.130438f},
+    {0.941521f, 0.436405f, 0.125409f},
+    {0.944285f, 0.442772f, 0.120354f},
+    {0.946965f, 0.449191f, 0.115272f},
+    {0.949562f, 0.455660f, 0.110164f},
+    {0.952075f, 0.462178f, 0.105031f},
+    {0.954506f, 0.468744f, 0.099874f},
+    {0.956852f, 0.475356f, 0.094695f},
+    {0.959114f, 0.482014f, 0.089499f},
+    {0.961293f, 0.488716f, 0.084289f},
+    {0.963387f, 0.495462f, 0.079073f},
+    {0.965397f, 0.502249f, 0.073859f},
+    {0.967322f, 0.509078f, 0.068659f},
+    {0.969163f, 0.515946f, 0.063488f},
+    {0.970919f, 0.522853f, 0.058367f},
+    {0.972590f, 0.529798f, 0.053324f},
+    {0.974176f, 0.536780f, 0.048392f},
+    {0.975677f, 0.543798f, 0.043618f},
+    {0.977092f, 0.550850f, 0.039050f},
+    {0.978422f, 0.557937f, 0.034931f},
+    {0.979666f, 0.565057f, 0.031409f},
+    {0.980824f, 0.572209f, 0.028508f},
+    {0.981895f, 0.579392f, 0.026250f},
+    {0.982881f, 0.586606f, 0.024661f},
+    {0.983779f, 0.593849f, 0.023770f},
+    {0.984591f, 0.601122f, 0.023606f},
+    {0.985315f, 0.608422f, 0.024202f},
+    {0.985952f, 0.615750f, 0.025592f},
+    {0.986502f, 0.623105f, 0.027814f},
+    {0.986964f, 0.630485f, 0.030908f},
+    {0.987337f, 0.637890f, 0.034916f},
+    {0.987622f, 0.645320f, 0.039886f},
+    {0.987819f, 0.652773f, 0.045581f},
+    {0.987926f, 0.660250f, 0.051750f},
+    {0.987945f, 0.667748f, 0.058329f},
+    {0.987874f, 0.675267f, 0.065257f},
+    {0.987714f, 0.682807f, 0.072489f},
+    {0.987464f, 0.690366f, 0.079990f},
+    {0.987124f, 0.697944f, 0.087731f},
+    {0.986694f, 0.705540f, 0.095694f},
+    {0.986175f, 0.713153f, 0.103863f},
+    {0.985566f, 0.720782f, 0.112229f},
+    {0.984865f, 0.728427f, 0.120785f},
+    {0.984075f, 0.736087f, 0.129527f},
+    {0.983196f, 0.743758f, 0.138453f},
+    {0.982228f, 0.751442f, 0.147565f},
+    {0.981173f, 0.759135f, 0.156863f},
+    {0.980032f, 0.766837f, 0.166353f},
+    {0.978806f, 0.774545f, 0.176037f},
+    {0.977497f, 0.782258f, 0.185923f},
+    {0.976108f, 0.789974f, 0.196018f},
+    {0.974638f, 0.797692f, 0.206332f},
+    {0.973088f, 0.805409f, 0.216877f},
+    {0.971468f, 0.813122f, 0.227658f},
+    {0.969783f, 0.820825f, 0.238686f},
+    {0.968041f, 0.828515f, 0.249972f},
+    {0.966243f, 0.836191f, 0.261534f},
+    {0.964394f, 0.843848f, 0.273391f},
+    {0.962517f, 0.851476f, 0.285546f},
+    {0.960626f, 0.859069f, 0.298010f},
+    {0.958720f, 0.866624f, 0.310820f},
+    {0.956834f, 0.874129f, 0.323974f},
+    {0.954997f, 0.881569f, 0.337475f},
+    {0.953215f, 0.888942f, 0.351369f},
+    {0.951546f, 0.896226f, 0.365627f},
+    {0.950018f, 0.903409f, 0.380271f},
+    {0.948683f, 0.910473f, 0.395289f},
+    {0.947594f, 0.917399f, 0.410665f},
+    {0.946809f, 0.924168f, 0.426373f},
+    {0.946392f, 0.930761f, 0.442367f},
+    {0.946403f, 0.937159f, 0.458592f},
+    {0.946903f, 0.943348f, 0.474970f},
+    {0.947937f, 0.949318f, 0.491426f},
+    {0.949545f, 0.955063f, 0.507860f},
+    {0.951740f, 0.960587f, 0.524203f},
+    {0.954529f, 0.965896f, 0.540361f},
+    {0.957896f, 0.971003f, 0.556275f},
+    {0.961812f, 0.975924f, 0.571925f},
+    {0.966249f, 0.980678f, 0.587206f},
+    {0.971162f, 0.985282f, 0.602154f},
+    {0.976511f, 0.989753f, 0.616760f},
+    {0.982257f, 0.994109f, 0.631017f},
+    {0.988362f, 0.998364f, 0.644924f}};
+
+//Plasma data from MatPlotLib
+static std::vector<Color<float>> plasma_data = {
+    {5.03832136e-02f, 2.98028976e-02f, 5.27974883e-01f},
+    {6.35363639e-02f, 2.84259729e-02f, 5.33123681e-01f},
+    {7.53531234e-02f, 2.72063728e-02f, 5.38007001e-01f},
+    {8.62217979e-02f, 2.61253206e-02f, 5.42657691e-01f},
+    {9.63786097e-02f, 2.51650976e-02f, 5.47103487e-01f},
+    {1.05979704e-01f, 2.43092436e-02f, 5.51367851e-01f},
+    {1.15123641e-01f, 2.35562500e-02f, 5.55467728e-01f},
+    {1.23902903e-01f, 2.28781011e-02f, 5.59423480e-01f},
+    {1.32380720e-01f, 2.22583774e-02f, 5.63250116e-01f},
+    {1.40603076e-01f, 2.16866674e-02f, 5.66959485e-01f},
+    {1.48606527e-01f, 2.11535876e-02f, 5.70561711e-01f},
+    {1.56420649e-01f, 2.06507174e-02f, 5.74065446e-01f},
+    {1.64069722e-01f, 2.01705326e-02f, 5.77478074e-01f},
+    {1.71573925e-01f, 1.97063415e-02f, 5.80805890e-01f},
+    {1.78950212e-01f, 1.92522243e-02f, 5.84054243e-01f},
+    {1.86212958e-01f, 1.88029767e-02f, 5.87227661e-01f},
+    {1.93374449e-01f, 1.83540593e-02f, 5.90329954e-01f},
+    {2.00445260e-01f, 1.79015512e-02f, 5.93364304e-01f},
+    {2.07434551e-01f, 1.74421086e-02f, 5.96333341e-01f},
+    {2.14350298e-01f, 1.69729276e-02f, 5.99239207e-01f},
+    {2.21196750e-01f, 1.64970484e-02f, 6.02083323e-01f},
+    {2.27982971e-01f, 1.60071509e-02f, 6.04867403e-01f},
+    {2.34714537e-01f, 1.55015065e-02f, 6.07592438e-01f},
+    {2.41396253e-01f, 1.49791041e-02f, 6.10259089e-01f},
+    {2.48032377e-01f, 1.44393586e-02f, 6.12867743e-01f},
+    {2.54626690e-01f, 1.38820918e-02f, 6.15418537e-01f},
+    {2.61182562e-01f, 1.33075156e-02f, 6.17911385e-01f},
+    {2.67702993e-01f, 1.27162163e-02f, 6.20345997e-01f},
+    {2.74190665e-01f, 1.21091423e-02f, 6.22721903e-01f},
+    {2.80647969e-01f, 1.14875915e-02f, 6.25038468e-01f},
+    {2.87076059e-01f, 1.08554862e-02f, 6.27294975e-01f},
+    {2.93477695e-01f, 1.02128849e-02f, 6.29490490e-01f},
+    {2.99855122e-01f, 9.56079551e-03f, 6.31623923e-01f},
+    {3.06209825e-01f, 8.90185346e-03f, 6.33694102e-01f},
+    {3.12543124e-01f, 8.23900704e-03f, 6.35699759e-01f},
+    {3.18856183e-01f, 7.57551051e-03f, 6.37639537e-01f},
+    {3.25150025e-01f, 6.91491734e-03f, 6.39512001e-01f},
+    {3.31425547e-01f, 6.26107379e-03f, 6.41315649e-01f},
+    {3.37683446e-01f, 5.61830889e-03f, 6.43048936e-01f},
+    {3.43924591e-01f, 4.99053080e-03f, 6.44710195e-01f},
+    {3.50149699e-01f, 4.38202557e-03f, 6.46297711e-01f},
+    {3.56359209e-01f, 3.79781761e-03f, 6.47809772e-01f},
+    {3.62553473e-01f, 3.24319591e-03f, 6.49244641e-01f},
+    {3.68732762e-01f, 2.72370721e-03f, 6.50600561e-01f},
+    {3.74897270e-01f, 2.24514897e-03f, 6.51875762e-01f},
+    {3.81047116e-01f, 1.81356205e-03f, 6.53068467e-01f},
+    {3.87182639e-01f, 1.43446923e-03f, 6.54176761e-01f},
+    {3.93304010e-01f, 1.11388259e-03f, 6.55198755e-01f},
+    {3.99410821e-01f, 8.59420809e-04f, 6.56132835e-01f},
+    {4.05502914e-01f, 6.78091517e-04f, 6.56977276e-01f},
+    {4.11580082e-01f, 5.77101735e-04f, 6.57730380e-01f},
+    {4.17642063e-01f, 5.63847476e-04f, 6.58390492e-01f},
+    {4.23688549e-01f, 6.45902780e-04f, 6.58956004e-01f},
+    {4.29719186e-01f, 8.31008207e-04f, 6.59425363e-01f},
+    {4.35733575e-01f, 1.12705875e-03f, 6.59797077e-01f},
+    {4.41732123e-01f, 1.53984779e-03f, 6.60069009e-01f},
+    {4.47713600e-01f, 2.07954744e-03f, 6.60240367e-01f},
+    {4.53677394e-01f, 2.75470302e-03f, 6.60309966e-01f},
+    {4.59622938e-01f, 3.57374415e-03f, 6.60276655e-01f},
+    {4.65549631e-01f, 4.54518084e-03f, 6.60139383e-01f},
+    {4.71456847e-01f, 5.67758762e-03f, 6.59897210e-01f},
+    {4.77343929e-01f, 6.97958743e-03f, 6.59549311e-01f},
+    {4.83210198e-01f, 8.45983494e-03f, 6.59094989e-01f},
+    {4.89054951e-01f, 1.01269996e-02f, 6.58533677e-01f},
+    {4.94877466e-01f, 1.19897486e-02f, 6.57864946e-01f},
+    {5.00677687e-01f, 1.40550640e-02f, 6.57087561e-01f},
+    {5.06454143e-01f, 1.63333443e-02f, 6.56202294e-01f},
+    {5.12206035e-01f, 1.88332232e-02f, 6.55209222e-01f},
+    {5.17932580e-01f, 2.15631918e-02f, 6.54108545e-01f},
+    {5.23632990e-01f, 2.45316468e-02f, 6.52900629e-01f},
+    {5.29306474e-01f, 2.77468735e-02f, 6.51586010e-01f},
+    {5.34952244e-01f, 3.12170300e-02f, 6.50165396e-01f},
+    {5.40569510e-01f, 3.49501310e-02f, 6.48639668e-01f},
+    {5.46157494e-01f, 3.89540334e-02f, 6.47009884e-01f},
+    {5.51715423e-01f, 4.31364795e-02f, 6.45277275e-01f},
+    {5.57242538e-01f, 4.73307585e-02f, 6.43443250e-01f},
+    {5.62738096e-01f, 5.15448092e-02f, 6.41509389e-01f},
+    {5.68201372e-01f, 5.57776706e-02f, 6.39477440e-01f},
+    {5.73631859e-01f, 6.00281369e-02f, 6.37348841e-01f},
+    {5.79028682e-01f, 6.42955547e-02f, 6.35126108e-01f},
+    {5.84391137e-01f, 6.85790261e-02f, 6.32811608e-01f},
+    {5.89718606e-01f, 7.28775875e-02f, 6.30407727e-01f},
+    {5.95010505e-01f, 7.71902878e-02f, 6.27916992e-01f},
+    {6.00266283e-01f, 8.15161895e-02f, 6.25342058e-01f},
+    {6.05485428e-01f, 8.58543713e-02f, 6.22685703e-01f},
+    {6.10667469e-01f, 9.02039303e-02f, 6.19950811e-01f},
+    {6.15811974e-01f, 9.45639838e-02f, 6.17140367e-01f},
+    {6.20918555e-01f, 9.89336721e-02f, 6.14257440e-01f},
+    {6.25986869e-01f, 1.03312160e-01f, 6.11305174e-01f},
+    {6.31016615e-01f, 1.07698641e-01f, 6.08286774e-01f},
+    {6.36007543e-01f, 1.12092335e-01f, 6.05205491e-01f},
+    {6.40959444e-01f, 1.16492495e-01f, 6.02064611e-01f},
+    {6.45872158e-01f, 1.20898405e-01f, 5.98867442e-01f},
+    {6.50745571e-01f, 1.25309384e-01f, 5.95617300e-01f},
+    {6.55579615e-01f, 1.29724785e-01f, 5.92317494e-01f},
+    {6.60374266e-01f, 1.34143997e-01f, 5.88971318e-01f},
+    {6.65129493e-01f, 1.38566428e-01f, 5.85582301e-01f},
+    {6.69845385e-01f, 1.42991540e-01f, 5.82153572e-01f},
+    {6.74522060e-01f, 1.47418835e-01f, 5.78688247e-01f},
+    {6.79159664e-01f, 1.51847851e-01f, 5.75189431e-01f},
+    {6.83758384e-01f, 1.56278163e-01f, 5.71660158e-01f},
+    {6.88318440e-01f, 1.60709387e-01f, 5.68103380e-01f},
+    {6.92840088e-01f, 1.65141174e-01f, 5.64521958e-01f},
+    {6.97323615e-01f, 1.69573215e-01f, 5.60918659e-01f},
+    {7.01769334e-01f, 1.74005236e-01f, 5.57296144e-01f},
+    {7.06177590e-01f, 1.78437000e-01f, 5.53656970e-01f},
+    {7.10548747e-01f, 1.82868306e-01f, 5.50003579e-01f},
+    {7.14883195e-01f, 1.87298986e-01f, 5.46338299e-01f},
+    {7.19181339e-01f, 1.91728906e-01f, 5.42663338e-01f},
+    {7.23443604e-01f, 1.96157962e-01f, 5.38980786e-01f},
+    {7.27670428e-01f, 2.00586086e-01f, 5.35292612e-01f},
+    {7.31862231e-01f, 2.05013174e-01f, 5.31600995e-01f},
+    {7.36019424e-01f, 2.09439071e-01f, 5.27908434e-01f},
+    {7.40142557e-01f, 2.13863965e-01f, 5.24215533e-01f},
+    {7.44232102e-01f, 2.18287899e-01f, 5.20523766e-01f},
+    {7.48288533e-01f, 2.22710942e-01f, 5.16834495e-01f},
+    {7.52312321e-01f, 2.27133187e-01f, 5.13148963e-01f},
+    {7.56303937e-01f, 2.31554749e-01f, 5.09468305e-01f},
+    {7.60263849e-01f, 2.35975765e-01f, 5.05793543e-01f},
+    {7.64192516e-01f, 2.40396394e-01f, 5.02125599e-01f},
+    {7.68090391e-01f, 2.44816813e-01f, 4.98465290e-01f},
+    {7.71957916e-01f, 2.49237220e-01f, 4.94813338e-01f},
+    {7.75795522e-01f, 2.53657797e-01f, 4.91170517e-01f},
+    {7.79603614e-01f, 2.58078397e-01f, 4.87539124e-01f},
+    {7.83382636e-01f, 2.62499662e-01f, 4.83917732e-01f},
+    {7.87132978e-01f, 2.66921859e-01f, 4.80306702e-01f},
+    {7.90855015e-01f, 2.71345267e-01f, 4.76706319e-01f},
+    {7.94549101e-01f, 2.75770179e-01f, 4.73116798e-01f},
+    {7.98215577e-01f, 2.80196901e-01f, 4.69538286e-01f},
+    {8.01854758e-01f, 2.84625750e-01f, 4.65970871e-01f},
+    {8.05466945e-01f, 2.89057057e-01f, 4.62414580e-01f},
+    {8.09052419e-01f, 2.93491117e-01f, 4.58869577e-01f},
+    {8.12611506e-01f, 2.97927865e-01f, 4.55337565e-01f},
+    {8.16144382e-01f, 3.02368130e-01f, 4.51816385e-01f},
+    {8.19651255e-01f, 3.06812282e-01f, 4.48305861e-01f},
+    {8.23132309e-01f, 3.11260703e-01f, 4.44805781e-01f},
+    {8.26587706e-01f, 3.15713782e-01f, 4.41315901e-01f},
+    {8.30017584e-01f, 3.20171913e-01f, 4.37835947e-01f},
+    {8.33422053e-01f, 3.24635499e-01f, 4.34365616e-01f},
+    {8.36801237e-01f, 3.29104836e-01f, 4.30905052e-01f},
+    {8.40155276e-01f, 3.33580106e-01f, 4.27454836e-01f},
+    {8.43484103e-01f, 3.38062109e-01f, 4.24013059e-01f},
+    {8.46787726e-01f, 3.42551272e-01f, 4.20579333e-01f},
+    {8.50066132e-01f, 3.47048028e-01f, 4.17153264e-01f},
+    {8.53319279e-01f, 3.51552815e-01f, 4.13734445e-01f},
+    {8.56547103e-01f, 3.56066072e-01f, 4.10322469e-01f},
+    {8.59749520e-01f, 3.60588229e-01f, 4.06916975e-01f},
+    {8.62926559e-01f, 3.65119408e-01f, 4.03518809e-01f},
+    {8.66077920e-01f, 3.69660446e-01f, 4.00126027e-01f},
+    {8.69203436e-01f, 3.74211795e-01f, 3.96738211e-01f},
+    {8.72302917e-01f, 3.78773910e-01f, 3.93354947e-01f},
+    {8.75376149e-01f, 3.83347243e-01f, 3.89975832e-01f},
+    {8.78422895e-01f, 3.87932249e-01f, 3.86600468e-01f},
+    {8.81442916e-01f, 3.92529339e-01f, 3.83228622e-01f},
+    {8.84435982e-01f, 3.97138877e-01f, 3.79860246e-01f},
+    {8.87401682e-01f, 4.01761511e-01f, 3.76494232e-01f},
+    {8.90339687e-01f, 4.06397694e-01f, 3.73130228e-01f},
+    {8.93249647e-01f, 4.11047871e-01f, 3.69767893e-01f},
+    {8.96131191e-01f, 4.15712489e-01f, 3.66406907e-01f},
+    {8.98983931e-01f, 4.20391986e-01f, 3.63046965e-01f},
+    {9.01807455e-01f, 4.25086807e-01f, 3.59687758e-01f},
+    {9.04601295e-01f, 4.29797442e-01f, 3.56328796e-01f},
+    {9.07364995e-01f, 4.34524335e-01f, 3.52969777e-01f},
+    {9.10098088e-01f, 4.39267908e-01f, 3.49610469e-01f},
+    {9.12800095e-01f, 4.44028574e-01f, 3.46250656e-01f},
+    {9.15470518e-01f, 4.48806744e-01f, 3.42890148e-01f},
+    {9.18108848e-01f, 4.53602818e-01f, 3.39528771e-01f},
+    {9.20714383e-01f, 4.58417420e-01f, 3.36165582e-01f},
+    {9.23286660e-01f, 4.63250828e-01f, 3.32800827e-01f},
+    {9.25825146e-01f, 4.68103387e-01f, 3.29434512e-01f},
+    {9.28329275e-01f, 4.72975465e-01f, 3.26066550e-01f},
+    {9.30798469e-01f, 4.77867420e-01f, 3.22696876e-01f},
+    {9.33232140e-01f, 4.82779603e-01f, 3.19325444e-01f},
+    {9.35629684e-01f, 4.87712357e-01f, 3.15952211e-01f},
+    {9.37990034e-01f, 4.92666544e-01f, 3.12575440e-01f},
+    {9.40312939e-01f, 4.97642038e-01f, 3.09196628e-01f},
+    {9.42597771e-01f, 5.02639147e-01f, 3.05815824e-01f},
+    {9.44843893e-01f, 5.07658169e-01f, 3.02433101e-01f},
+    {9.47050662e-01f, 5.12699390e-01f, 2.99048555e-01f},
+    {9.49217427e-01f, 5.17763087e-01f, 2.95662308e-01f},
+    {9.51343530e-01f, 5.22849522e-01f, 2.92274506e-01f},
+    {9.53427725e-01f, 5.27959550e-01f, 2.88883445e-01f},
+    {9.55469640e-01f, 5.33093083e-01f, 2.85490391e-01f},
+    {9.57468770e-01f, 5.38250172e-01f, 2.82096149e-01f},
+    {9.59424430e-01f, 5.43431038e-01f, 2.78700990e-01f},
+    {9.61335930e-01f, 5.48635890e-01f, 2.75305214e-01f},
+    {9.63202573e-01f, 5.53864931e-01f, 2.71909159e-01f},
+    {9.65023656e-01f, 5.59118349e-01f, 2.68513200e-01f},
+    {9.66798470e-01f, 5.64396327e-01f, 2.65117752e-01f},
+    {9.68525639e-01f, 5.69699633e-01f, 2.61721488e-01f},
+    {9.70204593e-01f, 5.75028270e-01f, 2.58325424e-01f},
+    {9.71835007e-01f, 5.80382015e-01f, 2.54931256e-01f},
+    {9.73416145e-01f, 5.85761012e-01f, 2.51539615e-01f},
+    {9.74947262e-01f, 5.91165394e-01f, 2.48151200e-01f},
+    {9.76427606e-01f, 5.96595287e-01f, 2.44766775e-01f},
+    {9.77856416e-01f, 6.02050811e-01f, 2.41387186e-01f},
+    {9.79232922e-01f, 6.07532077e-01f, 2.38013359e-01f},
+    {9.80556344e-01f, 6.13039190e-01f, 2.34646316e-01f},
+    {9.81825890e-01f, 6.18572250e-01f, 2.31287178e-01f},
+    {9.83040742e-01f, 6.24131362e-01f, 2.27937141e-01f},
+    {9.84198924e-01f, 6.29717516e-01f, 2.24595006e-01f},
+    {9.85300760e-01f, 6.35329876e-01f, 2.21264889e-01f},
+    {9.86345421e-01f, 6.40968508e-01f, 2.17948456e-01f},
+    {9.87332067e-01f, 6.46633475e-01f, 2.14647532e-01f},
+    {9.88259846e-01f, 6.52324832e-01f, 2.11364122e-01f},
+    {9.89127893e-01f, 6.58042630e-01f, 2.08100426e-01f},
+    {9.89935328e-01f, 6.63786914e-01f, 2.04858855e-01f},
+    {9.90681261e-01f, 6.69557720e-01f, 2.01642049e-01f},
+    {9.91364787e-01f, 6.75355082e-01f, 1.98452900e-01f},
+    {9.91984990e-01f, 6.81179025e-01f, 1.95294567e-01f},
+    {9.92540939e-01f, 6.87029567e-01f, 1.92170500e-01f},
+    {9.93031693e-01f, 6.92906719e-01f, 1.89084459e-01f},
+    {9.93456302e-01f, 6.98810484e-01f, 1.86040537e-01f},
+    {9.93813802e-01f, 7.04740854e-01f, 1.83043180e-01f},
+    {9.94103226e-01f, 7.10697814e-01f, 1.80097207e-01f},
+    {9.94323596e-01f, 7.16681336e-01f, 1.77207826e-01f},
+    {9.94473934e-01f, 7.22691379e-01f, 1.74380656e-01f},
+    {9.94553260e-01f, 7.28727890e-01f, 1.71621733e-01f},
+    {9.94560594e-01f, 7.34790799e-01f, 1.68937522e-01f},
+    {9.94494964e-01f, 7.40880020e-01f, 1.66334918e-01f},
+    {9.94355411e-01f, 7.46995448e-01f, 1.63821243e-01f},
+    {9.94140989e-01f, 7.53136955e-01f, 1.61404226e-01f},
+    {9.93850778e-01f, 7.59304390e-01f, 1.59091984e-01f},
+    {9.93482190e-01f, 7.65498551e-01f, 1.56890625e-01f},
+    {9.93033251e-01f, 7.71719833e-01f, 1.54807583e-01f},
+    {9.92505214e-01f, 7.77966775e-01f, 1.52854862e-01f},
+    {9.91897270e-01f, 7.84239120e-01f, 1.51041581e-01f},
+    {9.91208680e-01f, 7.90536569e-01f, 1.49376885e-01f},
+    {9.90438793e-01f, 7.96858775e-01f, 1.47869810e-01f},
+    {9.89587065e-01f, 8.03205337e-01f, 1.46529128e-01f},
+    {9.88647741e-01f, 8.09578605e-01f, 1.45357284e-01f},
+    {9.87620557e-01f, 8.15977942e-01f, 1.44362644e-01f},
+    {9.86509366e-01f, 8.22400620e-01f, 1.43556679e-01f},
+    {9.85314198e-01f, 8.28845980e-01f, 1.42945116e-01f},
+    {9.84031139e-01f, 8.35315360e-01f, 1.42528388e-01f},
+    {9.82652820e-01f, 8.41811730e-01f, 1.42302653e-01f},
+    {9.81190389e-01f, 8.48328902e-01f, 1.42278607e-01f},
+    {9.79643637e-01f, 8.54866468e-01f, 1.42453425e-01f},
+    {9.77994918e-01f, 8.61432314e-01f, 1.42808191e-01f},
+    {9.76264977e-01f, 8.68015998e-01f, 1.43350944e-01f},
+    {9.74443038e-01f, 8.74622194e-01f, 1.44061156e-01f},
+    {9.72530009e-01f, 8.81250063e-01f, 1.44922913e-01f},
+    {9.70532932e-01f, 8.87896125e-01f, 1.45918663e-01f},
+    {9.68443477e-01f, 8.94563989e-01f, 1.47014438e-01f},
+    {9.66271225e-01f, 9.01249365e-01f, 1.48179639e-01f},
+    {9.64021057e-01f, 9.07950379e-01f, 1.49370428e-01f},
+    {9.61681481e-01f, 9.14672479e-01f, 1.50520343e-01f},
+    {9.59275646e-01f, 9.21406537e-01f, 1.51566019e-01f},
+    {9.56808068e-01f, 9.28152065e-01f, 1.52409489e-01f},
+    {9.54286813e-01f, 9.34907730e-01f, 1.52921158e-01f},
+    {9.51726083e-01f, 9.41670605e-01f, 1.52925363e-01f},
+    {9.49150533e-01f, 9.48434900e-01f, 1.52177604e-01f},
+    {9.46602270e-01f, 9.55189860e-01f, 1.50327944e-01f},
+    {9.44151742e-01f, 9.61916487e-01f, 1.46860789e-01f},
+    {9.41896120e-01f, 9.68589814e-01f, 1.40955606e-01f},
+    {9.40015097e-01f, 9.75158357e-01f, 1.31325517e-01f}};
+
+//Viridis data from MatPlotLib
+static std::vector<Color<float>> viridis_data = {
+    {0.26700401f, 0.00487433f, 0.32941519f},
+    {0.26851048f, 0.00960483f, 0.33542652f},
+    {0.26994384f, 0.01462494f, 0.34137895f},
+    {0.27130489f, 0.01994186f, 0.34726862f},
+    {0.27259384f, 0.02556309f, 0.35309303f},
+    {0.27380934f, 0.03149748f, 0.35885256f},
+    {0.27495242f, 0.03775181f, 0.36454323f},
+    {0.27602238f, 0.04416723f, 0.37016418f},
+    {0.2770184f, 0.05034437f, 0.37571452f},
+    {0.27794143f, 0.05632444f, 0.38119074f},
+    {0.27879067f, 0.06214536f, 0.38659204f},
+    {0.2795655f, 0.06783587f, 0.39191723f},
+    {0.28026658f, 0.07341724f, 0.39716349f},
+    {0.28089358f, 0.07890703f, 0.40232944f},
+    {0.28144581f, 0.0843197f, 0.40741404f},
+    {0.28192358f, 0.08966622f, 0.41241521f},
+    {0.28232739f, 0.09495545f, 0.41733086f},
+    {0.28265633f, 0.10019576f, 0.42216032f},
+    {0.28291049f, 0.10539345f, 0.42690202f},
+    {0.28309095f, 0.11055307f, 0.43155375f},
+    {0.28319704f, 0.11567966f, 0.43611482f},
+    {0.28322882f, 0.12077701f, 0.44058404f},
+    {0.28318684f, 0.12584799f, 0.44496f},
+    {0.283072f, 0.13089477f, 0.44924127f},
+    {0.28288389f, 0.13592005f, 0.45342734f},
+    {0.28262297f, 0.14092556f, 0.45751726f},
+    {0.28229037f, 0.14591233f, 0.46150995f},
+    {0.28188676f, 0.15088147f, 0.46540474f},
+    {0.28141228f, 0.15583425f, 0.46920128f},
+    {0.28086773f, 0.16077132f, 0.47289909f},
+    {0.28025468f, 0.16569272f, 0.47649762f},
+    {0.27957399f, 0.17059884f, 0.47999675f},
+    {0.27882618f, 0.1754902f, 0.48339654f},
+    {0.27801236f, 0.18036684f, 0.48669702f},
+    {0.27713437f, 0.18522836f, 0.48989831f},
+    {0.27619376f, 0.19007447f, 0.49300074f},
+    {0.27519116f, 0.1949054f, 0.49600488f},
+    {0.27412802f, 0.19972086f, 0.49891131f},
+    {0.27300596f, 0.20452049f, 0.50172076f},
+    {0.27182812f, 0.20930306f, 0.50443413f},
+    {0.27059473f, 0.21406899f, 0.50705243f},
+    {0.26930756f, 0.21881782f, 0.50957678f},
+    {0.26796846f, 0.22354911f, 0.5120084f},
+    {0.26657984f, 0.2282621f, 0.5143487f},
+    {0.2651445f, 0.23295593f, 0.5165993f},
+    {0.2636632f, 0.23763078f, 0.51876163f},
+    {0.26213801f, 0.24228619f, 0.52083736f},
+    {0.26057103f, 0.2469217f, 0.52282822f},
+    {0.25896451f, 0.25153685f, 0.52473609f},
+    {0.25732244f, 0.2561304f, 0.52656332f},
+    {0.25564519f, 0.26070284f, 0.52831152f},
+    {0.25393498f, 0.26525384f, 0.52998273f},
+    {0.25219404f, 0.26978306f, 0.53157905f},
+    {0.25042462f, 0.27429024f, 0.53310261f},
+    {0.24862899f, 0.27877509f, 0.53455561f},
+    {0.2468114f, 0.28323662f, 0.53594093f},
+    {0.24497208f, 0.28767547f, 0.53726018f},
+    {0.24311324f, 0.29209154f, 0.53851561f},
+    {0.24123708f, 0.29648471f, 0.53970946f},
+    {0.23934575f, 0.30085494f, 0.54084398f},
+    {0.23744138f, 0.30520222f, 0.5419214f},
+    {0.23552606f, 0.30952657f, 0.54294396f},
+    {0.23360277f, 0.31382773f, 0.54391424f},
+    {0.2316735f, 0.3181058f, 0.54483444f},
+    {0.22973926f, 0.32236127f, 0.54570633f},
+    {0.22780192f, 0.32659432f, 0.546532f},
+    {0.2258633f, 0.33080515f, 0.54731353f},
+    {0.22392515f, 0.334994f, 0.54805291f},
+    {0.22198915f, 0.33916114f, 0.54875211f},
+    {0.22005691f, 0.34330688f, 0.54941304f},
+    {0.21812995f, 0.34743154f, 0.55003755f},
+    {0.21620971f, 0.35153548f, 0.55062743f},
+    {0.21429757f, 0.35561907f, 0.5511844f},
+    {0.21239477f, 0.35968273f, 0.55171011f},
+    {0.2105031f, 0.36372671f, 0.55220646f},
+    {0.20862342f, 0.36775151f, 0.55267486f},
+    {0.20675628f, 0.37175775f, 0.55311653f},
+    {0.20490257f, 0.37574589f, 0.55353282f},
+    {0.20306309f, 0.37971644f, 0.55392505f},
+    {0.20123854f, 0.38366989f, 0.55429441f},
+    {0.1994295f, 0.38760678f, 0.55464205f},
+    {0.1976365f, 0.39152762f, 0.55496905f},
+    {0.19585993f, 0.39543297f, 0.55527637f},
+    {0.19410009f, 0.39932336f, 0.55556494f},
+    {0.19235719f, 0.40319934f, 0.55583559f},
+    {0.19063135f, 0.40706148f, 0.55608907f},
+    {0.18892259f, 0.41091033f, 0.55632606f},
+    {0.18723083f, 0.41474645f, 0.55654717f},
+    {0.18555593f, 0.4185704f, 0.55675292f},
+    {0.18389763f, 0.42238275f, 0.55694377f},
+    {0.18225561f, 0.42618405f, 0.5571201f},
+    {0.18062949f, 0.42997486f, 0.55728221f},
+    {0.17901879f, 0.43375572f, 0.55743035f},
+    {0.17742298f, 0.4375272f, 0.55756466f},
+    {0.17584148f, 0.44128981f, 0.55768526f},
+    {0.17427363f, 0.4450441f, 0.55779216f},
+    {0.17271876f, 0.4487906f, 0.55788532f},
+    {0.17117615f, 0.4525298f, 0.55796464f},
+    {0.16964573f, 0.45626209f, 0.55803034f},
+    {0.16812641f, 0.45998802f, 0.55808199f},
+    {0.1666171f, 0.46370813f, 0.55811913f},
+    {0.16511703f, 0.4674229f, 0.55814141f},
+    {0.16362543f, 0.47113278f, 0.55814842f},
+    {0.16214155f, 0.47483821f, 0.55813967f},
+    {0.16066467f, 0.47853961f, 0.55811466f},
+    {0.15919413f, 0.4822374f, 0.5580728f},
+    {0.15772933f, 0.48593197f, 0.55801347f},
+    {0.15626973f, 0.4896237f, 0.557936f},
+    {0.15481488f, 0.49331293f, 0.55783967f},
+    {0.15336445f, 0.49700003f, 0.55772371f},
+    {0.1519182f, 0.50068529f, 0.55758733f},
+    {0.15047605f, 0.50436904f, 0.55742968f},
+    {0.14903918f, 0.50805136f, 0.5572505f},
+    {0.14760731f, 0.51173263f, 0.55704861f},
+    {0.14618026f, 0.51541316f, 0.55682271f},
+    {0.14475863f, 0.51909319f, 0.55657181f},
+    {0.14334327f, 0.52277292f, 0.55629491f},
+    {0.14193527f, 0.52645254f, 0.55599097f},
+    {0.14053599f, 0.53013219f, 0.55565893f},
+    {0.13914708f, 0.53381201f, 0.55529773f},
+    {0.13777048f, 0.53749213f, 0.55490625f},
+    {0.1364085f, 0.54117264f, 0.55448339f},
+    {0.13506561f, 0.54485335f, 0.55402906f},
+    {0.13374299f, 0.54853458f, 0.55354108f},
+    {0.13244401f, 0.55221637f, 0.55301828f},
+    {0.13117249f, 0.55589872f, 0.55245948f},
+    {0.1299327f, 0.55958162f, 0.55186354f},
+    {0.12872938f, 0.56326503f, 0.55122927f},
+    {0.12756771f, 0.56694891f, 0.55055551f},
+    {0.12645338f, 0.57063316f, 0.5498411f},
+    {0.12539383f, 0.57431754f, 0.54908564f},
+    {0.12439474f, 0.57800205f, 0.5482874f},
+    {0.12346281f, 0.58168661f, 0.54744498f},
+    {0.12260562f, 0.58537105f, 0.54655722f},
+    {0.12183122f, 0.58905521f, 0.54562298f},
+    {0.12114807f, 0.59273889f, 0.54464114f},
+    {0.12056501f, 0.59642187f, 0.54361058f},
+    {0.12009154f, 0.60010387f, 0.54253043f},
+    {0.11973756f, 0.60378459f, 0.54139999f},
+    {0.11951163f, 0.60746388f, 0.54021751f},
+    {0.11942341f, 0.61114146f, 0.53898192f},
+    {0.11948255f, 0.61481702f, 0.53769219f},
+    {0.11969858f, 0.61849025f, 0.53634733f},
+    {0.12008079f, 0.62216081f, 0.53494633f},
+    {0.12063824f, 0.62582833f, 0.53348834f},
+    {0.12137972f, 0.62949242f, 0.53197275f},
+    {0.12231244f, 0.63315277f, 0.53039808f},
+    {0.12344358f, 0.63680899f, 0.52876343f},
+    {0.12477953f, 0.64046069f, 0.52706792f},
+    {0.12632581f, 0.64410744f, 0.52531069f},
+    {0.12808703f, 0.64774881f, 0.52349092f},
+    {0.13006688f, 0.65138436f, 0.52160791f},
+    {0.13226797f, 0.65501363f, 0.51966086f},
+    {0.13469183f, 0.65863619f, 0.5176488f},
+    {0.13733921f, 0.66225157f, 0.51557101f},
+    {0.14020991f, 0.66585927f, 0.5134268f},
+    {0.14330291f, 0.66945881f, 0.51121549f},
+    {0.1466164f, 0.67304968f, 0.50893644f},
+    {0.15014782f, 0.67663139f, 0.5065889f},
+    {0.15389405f, 0.68020343f, 0.50417217f},
+    {0.15785146f, 0.68376525f, 0.50168574f},
+    {0.16201598f, 0.68731632f, 0.49912906f},
+    {0.1663832f, 0.69085611f, 0.49650163f},
+    {0.1709484f, 0.69438405f, 0.49380294f},
+    {0.17570671f, 0.6978996f, 0.49103252f},
+    {0.18065314f, 0.70140222f, 0.48818938f},
+    {0.18578266f, 0.70489133f, 0.48527326f},
+    {0.19109018f, 0.70836635f, 0.48228395f},
+    {0.19657063f, 0.71182668f, 0.47922108f},
+    {0.20221902f, 0.71527175f, 0.47608431f},
+    {0.20803045f, 0.71870095f, 0.4728733f},
+    {0.21400015f, 0.72211371f, 0.46958774f},
+    {0.22012381f, 0.72550945f, 0.46622638f},
+    {0.2263969f, 0.72888753f, 0.46278934f},
+    {0.23281498f, 0.73224735f, 0.45927675f},
+    {0.2393739f, 0.73558828f, 0.45568838f},
+    {0.24606968f, 0.73890972f, 0.45202405f},
+    {0.25289851f, 0.74221104f, 0.44828355f},
+    {0.25985676f, 0.74549162f, 0.44446673f},
+    {0.26694127f, 0.74875084f, 0.44057284f},
+    {0.27414922f, 0.75198807f, 0.4366009f},
+    {0.28147681f, 0.75520266f, 0.43255207f},
+    {0.28892102f, 0.75839399f, 0.42842626f},
+    {0.29647899f, 0.76156142f, 0.42422341f},
+    {0.30414796f, 0.76470433f, 0.41994346f},
+    {0.31192534f, 0.76782207f, 0.41558638f},
+    {0.3198086f, 0.77091403f, 0.41115215f},
+    {0.3277958f, 0.77397953f, 0.40664011f},
+    {0.33588539f, 0.7770179f, 0.40204917f},
+    {0.34407411f, 0.78002855f, 0.39738103f},
+    {0.35235985f, 0.78301086f, 0.39263579f},
+    {0.36074053f, 0.78596419f, 0.38781353f},
+    {0.3692142f, 0.78888793f, 0.38291438f},
+    {0.37777892f, 0.79178146f, 0.3779385f},
+    {0.38643282f, 0.79464415f, 0.37288606f},
+    {0.39517408f, 0.79747541f, 0.36775726f},
+    {0.40400101f, 0.80027461f, 0.36255223f},
+    {0.4129135f, 0.80304099f, 0.35726893f},
+    {0.42190813f, 0.80577412f, 0.35191009f},
+    {0.43098317f, 0.80847343f, 0.34647607f},
+    {0.44013691f, 0.81113836f, 0.3409673f},
+    {0.44936763f, 0.81376835f, 0.33538426f},
+    {0.45867362f, 0.81636288f, 0.32972749f},
+    {0.46805314f, 0.81892143f, 0.32399761f},
+    {0.47750446f, 0.82144351f, 0.31819529f},
+    {0.4870258f, 0.82392862f, 0.31232133f},
+    {0.49661536f, 0.82637633f, 0.30637661f},
+    {0.5062713f, 0.82878621f, 0.30036211f},
+    {0.51599182f, 0.83115784f, 0.29427888f},
+    {0.52577622f, 0.83349064f, 0.2881265f},
+    {0.5356211f, 0.83578452f, 0.28190832f},
+    {0.5455244f, 0.83803918f, 0.27562602f},
+    {0.55548397f, 0.84025437f, 0.26928147f},
+    {0.5654976f, 0.8424299f, 0.26287683f},
+    {0.57556297f, 0.84456561f, 0.25641457f},
+    {0.58567772f, 0.84666139f, 0.24989748f},
+    {0.59583934f, 0.84871722f, 0.24332878f},
+    {0.60604528f, 0.8507331f, 0.23671214f},
+    {0.61629283f, 0.85270912f, 0.23005179f},
+    {0.62657923f, 0.85464543f, 0.22335258f},
+    {0.63690157f, 0.85654226f, 0.21662012f},
+    {0.64725685f, 0.85839991f, 0.20986086f},
+    {0.65764197f, 0.86021878f, 0.20308229f},
+    {0.66805369f, 0.86199932f, 0.19629307f},
+    {0.67848868f, 0.86374211f, 0.18950326f},
+    {0.68894351f, 0.86544779f, 0.18272455f},
+    {0.69941463f, 0.86711711f, 0.17597055f},
+    {0.70989842f, 0.86875092f, 0.16925712f},
+    {0.72039115f, 0.87035015f, 0.16260273f},
+    {0.73088902f, 0.87191584f, 0.15602894f},
+    {0.74138803f, 0.87344918f, 0.14956101f},
+    {0.75188414f, 0.87495143f, 0.14322828f},
+    {0.76237342f, 0.87642392f, 0.13706449f},
+    {0.77285183f, 0.87786808f, 0.13110864f},
+    {0.78331535f, 0.87928545f, 0.12540538f},
+    {0.79375994f, 0.88067763f, 0.12000532f},
+    {0.80418159f, 0.88204632f, 0.11496505f},
+    {0.81457634f, 0.88339329f, 0.11034678f},
+    {0.82494028f, 0.88472036f, 0.10621724f},
+    {0.83526959f, 0.88602943f, 0.1026459f},
+    {0.84556056f, 0.88732243f, 0.09970219f},
+    {0.8558096f, 0.88860134f, 0.09745186f},
+    {0.86601325f, 0.88986815f, 0.09595277f},
+    {0.87616824f, 0.89112487f, 0.09525046f},
+    {0.88627146f, 0.89237353f, 0.09537439f},
+    {0.89632002f, 0.89361614f, 0.09633538f},
+    {0.90631121f, 0.89485467f, 0.09812496f},
+    {0.91624212f, 0.89609127f, 0.1007168f},
+    {0.92610579f, 0.89732977f, 0.10407067f},
+    {0.93590444f, 0.8985704f, 0.10813094f},
+    {0.94563626f, 0.899815f, 0.11283773f},
+    {0.95529972f, 0.90106534f, 0.11812832f},
+    {0.96489353f, 0.90232311f, 0.12394051f},
+    {0.97441665f, 0.90358991f, 0.13021494f},
+    {0.98386829f, 0.90486726f, 0.13689671f},
+    {0.99324789f, 0.90615657f, 0.1439362f}};
+
+ColorMap::ColorMap(float min_val, float max_val, const std::vector<Color<float>>& color_data)
+    : min_(min_val)
+    , max_(max_val)
+    , color_data_(color_data) {
+    VTR_ASSERT(max_ >= min_);
+}
+
+Color<float> ColorMap::color(float value) const {
+    VTR_ASSERT(value >= min_);
+    VTR_ASSERT(value <= max_);
+
+    float norm_value;
+    if (range() == 0) {
+        norm_value = 0;
+    } else {
+        norm_value = (value - min_) / range();
+    }
+
+    size_t color_idx = std::round(norm_value * (color_data_.size() - 1));
+
+    VTR_ASSERT(color_idx < color_data_.size());
+
+    return color_data_[color_idx];
+}
+
+float ColorMap::min() const {
+    return min_;
+}
+
+float ColorMap::max() const {
+    return max_;
+}
+
+float ColorMap::range() const {
+    return max() - min();
+}
+
+InfernoColorMap::InfernoColorMap(float min_val, float max_val)
+    : ColorMap(min_val, max_val, inferno_data) {}
+
+PlasmaColorMap::PlasmaColorMap(float min_val, float max_val)
+    : ColorMap(min_val, max_val, plasma_data) {}
+
+ViridisColorMap::ViridisColorMap(float min_val, float max_val)
+    : ColorMap(min_val, max_val, viridis_data) {}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_color_map.h b/third_party/vtr/libs/vtrutil/src/vtr_color_map.h
new file mode 100644
index 000000000..f313999ca
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_color_map.h
@@ -0,0 +1,58 @@
+#ifndef VTR_CMAP_H
+#define VTR_CMAP_H
+#include <vector>
+
+namespace vtr {
+
+///@brief A container to save the rgb components of a color
+template<class T>
+struct Color {
+    T r;
+    T g;
+    T b;
+};
+
+///@brief A class that holds a complete color map
+class ColorMap {
+  public:
+    ///@brief color map constructor
+    ColorMap(float min, float max, const std::vector<Color<float>>& color_data);
+
+    ///@brief color map destructor
+    virtual ~ColorMap() = default;
+
+    ///@brief Returns the full color corresponding to the input value
+    Color<float> color(float value) const;
+
+    ///@brief Return the min Color of this color map
+    float min() const;
+
+    ///@brief Return the max color of this color map
+    float max() const;
+
+    ///@brief Return the range of the color map
+    float range() const;
+
+  private:
+    float min_;
+    float max_;
+    std::vector<Color<float>> color_data_;
+};
+
+class InfernoColorMap : public ColorMap {
+  public:
+    InfernoColorMap(float min, float max);
+};
+
+class PlasmaColorMap : public ColorMap {
+  public:
+    PlasmaColorMap(float min, float max);
+};
+
+class ViridisColorMap : public ColorMap {
+  public:
+    ViridisColorMap(float min, float max);
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_digest.cc b/third_party/vtr/libs/vtrutil/src/vtr_digest.cc
new file mode 100644
index 000000000..aedcd613e
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_digest.cc
@@ -0,0 +1,39 @@
+#include "vtr_digest.h"
+#include "vtr_error.h"
+
+#include <iostream>
+#include <fstream>
+#include <array>
+
+#include "picosha2.h"
+
+namespace vtr {
+
+std::string secure_digest_file(const std::string& filepath) {
+    std::ifstream is(filepath);
+    if (!is) {
+        throw VtrError("Failed to open file", filepath);
+    }
+    return secure_digest_stream(is);
+}
+
+std::string secure_digest_stream(std::istream& is) {
+    //Read the stream in chunks and calculate the SHA256 digest
+    picosha2::hash256_one_by_one hasher;
+
+    std::array<char, 1024> buf;
+    while (!is.eof()) {
+        //Process a chunk
+        is.read(buf.data(), buf.size());
+        hasher.process(buf.begin(), buf.begin() + is.gcount());
+    }
+    hasher.finish();
+
+    //Return the digest as a hex string, prefixed with the hash type
+    //
+    //Prefixing with the hash type should allow us to differentiate if the
+    //hash type is ever changed in the future
+    return "SHA256:" + picosha2::get_hash_hex_string(hasher);
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_digest.h b/third_party/vtr/libs/vtrutil/src/vtr_digest.h
new file mode 100644
index 000000000..4d67f8f6a
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_digest.h
@@ -0,0 +1,16 @@
+#ifndef VTR_DIGEST_H
+#define VTR_DIGEST_H
+#include <iosfwd>
+#include <string>
+
+namespace vtr {
+
+///@brief Generate a secure hash of the file at filepath
+std::string secure_digest_file(const std::string& filepath);
+
+///@brief Generate a secure hash of a stream
+std::string secure_digest_stream(std::istream& is);
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h b/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
new file mode 100644
index 000000000..291ddd239
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
@@ -0,0 +1,72 @@
+#ifndef VTR_DYNAMIC_BITSET
+#define VTR_DYNAMIC_BITSET
+
+#include <limits>
+#include <vector>
+
+namespace vtr {
+/**
+ * @brief A container to represent a set of flags either they are set or reset 
+ *
+ * It allocates any required length of bit at runtime. It is very useful in bit manipulation
+ */
+template<typename Index = size_t, typename Storage = unsigned int>
+class dynamic_bitset {
+  public:
+    ///@brief Bits in underlying storage.
+    static constexpr size_t kWidth = std::numeric_limits<Storage>::digits;
+    static_assert(!std::numeric_limits<Storage>::is_signed,
+                  "dynamic_bitset storage must be unsigned!");
+    static_assert(std::numeric_limits<Storage>::is_integer,
+                  "dynamic_bitset storage must be integer!");
+
+    ///@brief Reize to the determined size
+    void resize(size_t size) {
+        array_.resize((size + kWidth - 1) / kWidth);
+    }
+
+    ///@brief Clear all the bits
+    void clear() {
+        array_.clear();
+        array_.shrink_to_fit();
+    }
+
+    ///@brief Return the size of the bitset (total number of bits)
+    size_t size() const {
+        return array_.size() * kWidth;
+    }
+
+    ///@brief Fill the whole bitset with a specific value (0 or 1)
+    void fill(bool set) {
+        if (set) {
+            std::fill(array_.begin(), array_.end(), std::numeric_limits<Storage>::max());
+        } else {
+            std::fill(array_.begin(), array_.end(), 0);
+        }
+    }
+
+    ///@brief Set a specific bit in the bit set to a specific value (0 or 1)
+    void set(Index index, bool val) {
+        size_t index_value(index);
+        VTR_ASSERT_SAFE(index_value < size());
+        if (val) {
+            array_[index_value / kWidth] |= (1 << (index_value % kWidth));
+        } else {
+            array_[index_value / kWidth] &= ~(1u << (index_value % kWidth));
+        }
+    }
+
+    ///@brief Return the value of a specific bit in the bitset
+    bool get(Index index) const {
+        size_t index_value(index);
+        VTR_ASSERT_SAFE(index_value < size());
+        return (array_[index_value / kWidth] & (1u << (index_value % kWidth))) != 0;
+    }
+
+  private:
+    std::vector<Storage> array_;
+};
+
+} // namespace vtr
+
+#endif /* VTR_DYNAMIC_BITSET */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_error.h b/third_party/vtr/libs/vtrutil/src/vtr_error.h
new file mode 100644
index 000000000..d710c6630
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_error.h
@@ -0,0 +1,68 @@
+#ifndef VTR_ERROR_H
+#define VTR_ERROR_H
+
+#include <stdexcept>
+#include <string>
+
+/**
+ * @file
+ * @brief A utility container that can be used to identify VTR execution errors.
+ * 
+ * The recommended usage is to store information in this container about the error during an error event and and then throwing an exception with the container. If the exception is not handled (exception is not caught), this will result in the termination of the program.
+ * 
+ * Error information can be displayed using the information stored within this container.
+ * 
+ */
+
+namespace vtr {
+
+/**
+ * @brief Container that holds information related to an error
+ *
+ * It holds different info related to a VTR error:
+ *      - error message
+ *      - file name associated with the error
+ *      - line number associated with the error
+ * 
+ * Example Usage:
+ * 
+ *      // creating and throwing an exception with a VtrError container that has an error occuring in file "error_file.txt" at line number 1
+ *       
+ *      throw vtr::VtrError("This is a program terminating error!", "error_file.txt", 1);
+ * 
+ */
+class VtrError : public std::runtime_error {
+  public:
+    ///@brief VtrError constructor
+    VtrError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
+        : std::runtime_error(msg)
+        , filename_(new_filename)
+        , linenumber_(new_linenumber) {}
+
+    /**
+     * @brief gets the filename 
+     *
+     * Returns the filename associated with this error.
+     * Returns an empty string if none is specified.
+     */
+    std::string filename() const { return filename_; }
+
+    ///@brief same as filename() but returns in c style string
+    const char* filename_c_str() const { return filename_.c_str(); }
+
+    /**
+     * @brief get the line number
+     *
+     * Returns the line number associated with this error.
+     * Returns zero if none is specified.
+     */
+    size_t line() const { return linenumber_; }
+
+  private:
+    std::string filename_;
+    size_t linenumber_;
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
new file mode 100644
index 000000000..165b9caa5
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
@@ -0,0 +1,904 @@
+#include "vtr_expr_eval.h"
+#include "vtr_error.h"
+#include "vtr_util.h"
+#include "vtr_math.h"
+
+#include <string>
+#include <sstream>
+#include <iostream>
+
+/** global variables **/
+
+/** bp_state_globals is a variable that holds a member of type BreakpointState. This member is altered by the breakpoint class, the placer, and router and holds the most updated values for variables that can trigger breakpoints (e.g move_num, temp_num etc.) **/
+BreakpointStateGlobals bp_state_globals;
+
+namespace vtr {
+
+using std::stack;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+/**this variables is used for the += operator and holds the initial value of the variable that is to be added to. after every addition, the related function compares with initial value to ensure correct incrementation **/
+static int before_addition = 0;
+
+/*---- Functions for Parsing the Symbolic Formulas ----*/
+
+/* converts specified formula to a vector in reverse-polish notation */
+static void formula_to_rpn(const char* formula, const t_formula_data& mydata, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack, bool is_breakpoint);
+
+static void get_formula_object(const char* ch, int& ichar, const t_formula_data& mydata, Formula_Object* fobj, bool is_breakpoint);
+
+/* returns integer specifying precedence of passed-in operator. higher integer
+ * means higher precedence */
+static int get_fobj_precedence(const Formula_Object& fobj);
+
+/* Returns associativity of the specified operator */
+static bool op_associativity_is_left(const t_operator& op);
+
+/* used by the shunting-yard formula parser to deal with operators such as add and subtract */
+static void handle_operator(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
+
+/* used by the shunting-yard formula parser to deal with brackets, ie '(' and ')' */
+static void handle_bracket(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
+
+/* used by the shunting-yard formula parser to deal with commas, ie ','. These occur in function calls*/
+static void handle_comma(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
+
+/* parses revere-polish notation vector to return formula result */
+static int parse_rpn_vector(vector<Formula_Object>& rpn_vec);
+
+/* applies operation specified by 'op' to the given arguments. arg1 comes before arg2 */
+static int apply_rpn_op(const Formula_Object& arg1, const Formula_Object& arg2, const Formula_Object& op);
+
+/* checks if specified character represents an ASCII number */
+static bool is_char_number(const char ch);
+
+// returns true if ch is an operator (e.g +,-, *, etc.)
+static bool is_operator(const char ch);
+
+// returns true if the specified name is a known function operator
+static bool is_function(std::string name);
+
+// returns true if the specified name is a known compound operator
+t_compound_operator is_compound_op(const char* ch);
+
+// returns true if the specified name is a known variable
+static bool is_variable(std::string var);
+
+// returns the length of any identifier (e.g. name, function) starting at the beginning of str
+static int identifier_length(const char* str);
+
+/* increments str_ind until it reaches specified char is formula. returns true if character was found, false otherwise */
+static bool goto_next_char(int* str_ind, const string& pw_formula, char ch);
+
+//compares two strings while ignoring white space and case
+bool same_string(std::string str1, std::string str2);
+
+//checks if the block indicated by the user was one of the moved blocks in the last perturbation
+int in_blocks_affected(std::string expression_left);
+
+//the function of += operator
+bool additional_assignment_op(int arg1, int arg2);
+
+/**** Function Implementations ****/
+/* returns integer result according to specified non-piece-wise formula and data */
+int FormulaParser::parse_formula(std::string formula, const t_formula_data& mydata, bool is_breakpoint) {
+    int result = -1;
+
+    /* output in reverse-polish notation */
+    auto& rpn_output = rpn_output_;
+    rpn_output.clear();
+
+    /* now we have to run the shunting-yard algorithm to convert formula to reverse polish notation */
+    formula_to_rpn(formula.c_str(), mydata, rpn_output, op_stack_, is_breakpoint);
+
+    /* then we run an RPN parser to get the final result */
+    result = parse_rpn_vector(rpn_output);
+
+    return result;
+}
+
+/* EXPERIMENTAL:
+ *
+ * returns integer result according to specified piece-wise formula and data. the piecewise
+ * notation specifies different formulas that should be evaluated based on the index of
+ * the incoming wire in 'mydata'. for example the formula
+ *
+ * {0:(W/2)} t-1; {(W/2):W} t+1;
+ *
+ * indicates that the function "t-1" should be evaluated if the incoming wire index falls
+ * within the range [0,W/2) and that "t+1" should be evaluated if it falls within the
+ * [W/2,W) range. The piece-wise format is:
+ *
+ * {start_0:end_0} formula_0; ... {start_i;end_i} formula_i; ...
+ */
+int FormulaParser::parse_piecewise_formula(const char* formula, const t_formula_data& mydata) {
+    int result = -1;
+    int str_ind = 0;
+    int str_size = 0;
+
+    int t = mydata.get_var_value("t");
+    int tmp_ind_start = -1;
+    int tmp_ind_count = -1;
+    string substr;
+
+    /* convert formula to string format */
+    string pw_formula(formula);
+    str_size = pw_formula.size();
+
+    if (pw_formula[str_ind] != '{') {
+        throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: the first character in piece-wise formula should always be '{'\n"), __FILE__, __LINE__);
+    }
+
+    /* find the range to which t corresponds */
+    /* the first character must be '{' as verified above */
+    while (str_ind != str_size - 1) {
+        /* set to true when range to which wire number corresponds has been found */
+        bool found_range = false;
+        bool char_found = false;
+        int range_start = -1;
+        int range_end = -1;
+        tmp_ind_start = -1;
+        tmp_ind_count = -1;
+
+        /* get the start of the range */
+        tmp_ind_start = str_ind + 1;
+        char_found = goto_next_char(&str_ind, pw_formula, ':');
+        if (!char_found) {
+            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", ':'), __FILE__, __LINE__);
+        }
+        tmp_ind_count = str_ind - tmp_ind_start; /* range start is between { and : */
+        substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
+        range_start = parse_formula(substr.c_str(), mydata);
+
+        /* get the end of the range */
+        tmp_ind_start = str_ind + 1;
+        char_found = goto_next_char(&str_ind, pw_formula, '}');
+        if (!char_found) {
+            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", '}'), __FILE__, __LINE__);
+        }
+        tmp_ind_count = str_ind - tmp_ind_start; /* range end is between : and } */
+        substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
+        range_end = parse_formula(substr.c_str(), mydata);
+
+        if (range_start > range_end) {
+            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: range_start, %d, is bigger than range end, %d\n", range_start, range_end), __FILE__, __LINE__);
+        }
+
+        /* is the incoming wire within this range? (inclusive) */
+        if (range_start <= t && range_end >= t) {
+            found_range = true;
+        } else {
+            found_range = false;
+        }
+
+        /* we're done if found correct range */
+        if (found_range) {
+            break;
+        }
+        char_found = goto_next_char(&str_ind, pw_formula, '{');
+        if (!char_found) {
+            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", '{'), __FILE__, __LINE__);
+        }
+    }
+    /* the string index should never actually get to the end of the string because we should have found the range to which the
+     * current wire number corresponds */
+    if (str_ind == str_size - 1) {
+        throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find a closing '}'?\n"), __FILE__, __LINE__);
+    }
+
+    /* at this point str_ind should point to '}' right before the formula we're interested in starts */
+    /* get the value corresponding to this formula */
+    tmp_ind_start = str_ind + 1;
+    goto_next_char(&str_ind, pw_formula, ';');
+    tmp_ind_count = str_ind - tmp_ind_start; /* formula is between } and ; */
+    substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
+
+    /* now parse the formula corresponding to the appropriate piece-wise range */
+    result = parse_formula(substr.c_str(), mydata);
+
+    return result;
+}
+
+/* increments str_ind until it reaches specified char in formula. returns true if character was found, false otherwise */
+static bool goto_next_char(int* str_ind, const string& pw_formula, char ch) {
+    bool result = true;
+    int str_size = pw_formula.size();
+    if ((*str_ind) == str_size - 1) {
+        throw vtr::VtrError(vtr::string_fmt("goto_next_char: passed-in str_ind is already at the end of string\n"), __FILE__, __LINE__);
+    }
+
+    do {
+        (*str_ind)++;
+        if (pw_formula[*str_ind] == ch) {
+            /* found the next requested character */
+            break;
+        }
+
+    } while ((*str_ind) != str_size - 1);
+    if ((*str_ind) == str_size - 1 && pw_formula[*str_ind] != ch) {
+        result = false;
+    }
+    return result;
+}
+
+/* Parses the specified formula using a shunting yard algorithm (see wikipedia). The function's result
+ * is stored in the rpn_output vector in reverse-polish notation */
+static void formula_to_rpn(const char* formula, const t_formula_data& mydata, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack, bool is_breakpoint) {
+    // Empty op_stack.
+    while (!op_stack.empty()) {
+        op_stack.pop();
+    }
+
+    Formula_Object fobj; /* for parsing formula objects */
+
+    int ichar = 0;
+    const char* ch = nullptr;
+    /* go through formula and build rpn_output along with op_stack until \0 character is hit */
+    while (true) {
+        ch = &formula[ichar];
+
+        if ('\0' == (*ch)) {
+            /* we're done */
+            break;
+        } else if (' ' == (*ch)) {
+            /* skip space */
+        } else {
+            /* parse the character */
+            get_formula_object(ch, ichar, mydata, &fobj, is_breakpoint);
+            switch (fobj.type) {
+                case E_FML_NUMBER:
+                    /* add to output vector */
+                    rpn_output.push_back(fobj);
+                    break;
+                case E_FML_OPERATOR:
+                    /* operators may be pushed to op_stack or rpn_output */
+                    handle_operator(fobj, rpn_output, op_stack);
+                    break;
+                case E_FML_BRACKET:
+                    /* brackets are only ever pushed to op_stack, not rpn_output */
+                    handle_bracket(fobj, rpn_output, op_stack);
+                    break;
+                case E_FML_COMMA:
+                    handle_comma(fobj, rpn_output, op_stack);
+                    break;
+                case E_FML_VARIABLE:
+                    /* add to output vector */
+                    rpn_output.push_back(fobj);
+                    break;
+                default:
+                    throw vtr::VtrError(vtr::string_fmt("in formula_to_rpn: unknown formula object type: %d\n", fobj.type), __FILE__, __LINE__);
+                    break;
+            }
+        }
+        ichar++;
+    }
+
+    /* pop all remaining operators off of stack */
+    Formula_Object fobj_dummy;
+    while (!op_stack.empty()) {
+        fobj_dummy = op_stack.top();
+
+        if (E_FML_BRACKET == fobj_dummy.type) {
+            throw vtr::VtrError(vtr::string_fmt("in formula_to_rpn: Mismatched brackets in user-provided formula\n"), __FILE__, __LINE__);
+        }
+
+        rpn_output.push_back(fobj_dummy);
+        op_stack.pop();
+    }
+
+    return;
+}
+
+/* Fills the formula object fobj according to specified character and mydata,
+ * which help determine which numeric value, if any, gets assigned to fobj
+ * ichar is incremented by the corresponding count if the need to step through the
+ * character array arises */
+static void get_formula_object(const char* ch, int& ichar, const t_formula_data& mydata, Formula_Object* fobj, bool is_breakpoint) {
+    /* the character can either be part of a number, or it can be an object like W, t, (, +, etc
+     * here we have to account for both possibilities */
+
+    int id_len = identifier_length(ch);
+    //We have a variable or function name
+    std::string var_name(ch, id_len);
+    if (id_len != 0) {
+        if (is_function(var_name)) {
+            fobj->type = E_FML_OPERATOR;
+            if (var_name == "min")
+                fobj->data.op = E_OP_MIN;
+            else if (var_name == "max")
+                fobj->data.op = E_OP_MAX;
+            else if (var_name == "gcd")
+                fobj->data.op = E_OP_GCD;
+            else if (var_name == "lcm")
+                fobj->data.op = E_OP_LCM;
+            else {
+                throw vtr::VtrError(vtr::string_fmt("in get_formula_object: recognized function: %s\n", var_name.c_str()), __FILE__, __LINE__);
+            }
+
+        } else if (!is_breakpoint) {
+            //A number
+            fobj->type = E_FML_NUMBER;
+            fobj->data.num = mydata.get_var_value(
+                vtr::string_view(
+                    var_name.data(),
+                    var_name.size()));
+        } else if (is_variable(var_name)) {
+            fobj->type = E_FML_VARIABLE;
+            if (same_string(var_name, "temp_count"))
+                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->temp_count;
+            else if (same_string(var_name, "from_block"))
+                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->from_block;
+            else if (same_string(var_name, "move_num"))
+                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->move_num;
+            else if (same_string(var_name, "route_net_id"))
+                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->route_net_id;
+            else if (same_string(var_name, "in_blocks_affected"))
+                fobj->data.num = in_blocks_affected(std::string(ch));
+            else if (same_string(var_name, "router_iter"))
+                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->router_iter;
+        }
+
+        ichar += (id_len - 1); //-1 since ichar is incremented at end of loop in formula_to_rpn()
+
+    } else if (is_char_number(*ch)) {
+        /* we have a number -- use atoi to convert */
+        stringstream ss;
+        while (is_char_number(*ch)) {
+            ss << (*ch);
+            ichar++;
+            ch++;
+        }
+        ichar--;
+        fobj->type = E_FML_NUMBER;
+        fobj->data.num = vtr::atoi(ss.str().c_str());
+    } else if (is_compound_op(ch) != E_COM_OP_UNDEFINED) {
+        fobj->type = E_FML_OPERATOR;
+        t_compound_operator comp_op_code = is_compound_op(ch);
+        if (comp_op_code == E_COM_OP_EQ)
+            fobj->data.op = E_OP_EQ;
+        else if (comp_op_code == E_COM_OP_GTE)
+            fobj->data.op = E_OP_GTE;
+        else if (comp_op_code == E_COM_OP_LTE)
+            fobj->data.op = E_OP_LTE;
+        else if (comp_op_code == E_COM_OP_AND)
+            fobj->data.op = E_OP_AND;
+        else if (comp_op_code == E_COM_OP_OR)
+            fobj->data.op = E_OP_OR;
+        else if (comp_op_code == E_COM_OP_AA)
+            fobj->data.op = E_OP_AA;
+        ichar++;
+    } else {
+        switch ((*ch)) {
+            case '+':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_ADD;
+                break;
+            case '-':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_SUB;
+                break;
+            case '*':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_MULT;
+                break;
+            case '/':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_DIV;
+                break;
+            case '(':
+                fobj->type = E_FML_BRACKET;
+                fobj->data.left_bracket = true;
+                break;
+            case ')':
+                fobj->type = E_FML_BRACKET;
+                fobj->data.left_bracket = false;
+                break;
+            case ',':
+                fobj->type = E_FML_COMMA;
+                break;
+            case '>':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_GT;
+                break;
+            case '<':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_LT;
+                break;
+            case '%':
+                fobj->type = E_FML_OPERATOR;
+                fobj->data.op = E_OP_MOD;
+                break;
+            default:
+                throw vtr::VtrError(vtr::string_fmt("in get_formula_object: unsupported character: %c\n", *ch), __FILE__, __LINE__);
+                break;
+        }
+    }
+
+    return;
+}
+
+/* returns integer specifying precedence of passed-in operator. higher integer
+ * means higher precedence */
+static int get_fobj_precedence(const Formula_Object& fobj) {
+    int precedence = 0;
+
+    if (E_FML_BRACKET == fobj.type || E_FML_COMMA == fobj.type) {
+        precedence = 0;
+    } else if (E_FML_OPERATOR == fobj.type) {
+        t_operator op = fobj.data.op;
+        switch (op) {
+            case E_OP_AND: //fallthrough
+            case E_OP_OR:  //fallthrough
+                precedence = 1;
+                break;
+            case E_OP_ADD: //fallthrough
+            case E_OP_SUB: //fallthrough
+            case E_OP_GT:  //fallthrough
+            case E_OP_LT:  //fallthrough
+            case E_OP_EQ:  //fallthrough
+            case E_OP_GTE: //fallthrough
+            case E_OP_LTE: //fallthrough
+            case E_OP_AA:  //falthrough
+                precedence = 2;
+                break;
+            case E_OP_MULT: //fallthrough
+            case E_OP_DIV:  //fallthrough
+            case E_OP_MOD:
+                precedence = 3;
+                break;
+            case E_OP_MIN: //fallthrough
+            case E_OP_MAX: //fallthrough
+            case E_OP_LCM: //fallthrough
+            case E_OP_GCD:
+                precedence = 4;
+                break;
+            default:
+                throw vtr::VtrError(vtr::string_fmt("in get_fobj_precedence: unrecognized operator: %d\n", op), __FILE__, __LINE__);
+                break;
+        }
+    } else {
+        throw vtr::VtrError(vtr::string_fmt("in get_fobj_precedence: no precedence possible for formula object type %d\n", fobj.type), __FILE__, __LINE__);
+    }
+
+    return precedence;
+}
+
+/* Returns associativity of the specified operator */
+static bool op_associativity_is_left(const t_operator& /*op*/) {
+    bool is_left = true;
+
+    /* associativity is 'left' for all but the power operator, which is not yet implemented */
+    //TODO:
+    //if op is 'power' set associativity is_left=false and return
+
+    return is_left;
+}
+
+/* used by the shunting-yard formula parser to deal with operators such as add and subtract */
+static void handle_operator(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
+    if (E_FML_OPERATOR != fobj.type) {
+        throw vtr::VtrError(vtr::string_fmt("in handle_operator: passed in formula object not of type operator\n"), __FILE__, __LINE__);
+    }
+    int op_pr = get_fobj_precedence(fobj);
+    bool op_assoc_is_left = op_associativity_is_left(fobj.data.op);
+
+    Formula_Object fobj_dummy;
+    bool keep_going = false;
+    do {
+        /* here we keep popping operators off the stack onto back of rpn_output while
+         * associativity of operator is 'left' and precedence op_pr = top_pr, or while
+         * precedence op_pr < top_pr */
+
+        /* determine whether we should keep popping operators off the op stack */
+        if (op_stack.empty()) {
+            keep_going = false;
+        } else {
+            /* get precedence of top operator */
+            int top_pr = get_fobj_precedence(op_stack.top());
+
+            keep_going = ((op_assoc_is_left && op_pr == top_pr)
+                          || op_pr < top_pr);
+
+            if (keep_going) {
+                /* pop top operator off stack onto the back of rpn_output */
+                fobj_dummy = op_stack.top();
+                rpn_output.push_back(fobj_dummy);
+                op_stack.pop();
+            }
+        }
+
+    } while (keep_going);
+
+    /* place new operator object on top of stack */
+    op_stack.push(fobj);
+
+    return;
+}
+
+/* used by the shunting-yard formula parser to deal with brackets, ie '(' and ')' */
+static void handle_bracket(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
+    if (E_FML_BRACKET != fobj.type) {
+        throw vtr::VtrError(vtr::string_fmt("in handle_bracket: passed-in formula object not of type bracket\n"), __FILE__, __LINE__);
+    }
+
+    /* check if left or right bracket */
+    if (fobj.data.left_bracket) {
+        /* left bracket, so simply push it onto operator stack */
+        op_stack.push(fobj);
+    } else {
+        bool keep_going = false;
+        do {
+            /* here we keep popping operators off op_stack onto back of rpn_output until a
+             * left bracket is encountered */
+
+            if (op_stack.empty()) {
+                /* didn't find an opening bracket - mismatched brackets */
+                keep_going = false;
+                throw vtr::VtrError(vtr::string_fmt("Ran out of stack while parsing brackets -- bracket mismatch in user-specified formula\n"), __FILE__, __LINE__);
+            }
+
+            Formula_Object next_fobj = op_stack.top();
+            if (E_FML_BRACKET == next_fobj.type) {
+                if (next_fobj.data.left_bracket) {
+                    /* matching bracket found -- pop off stack and finish */
+                    op_stack.pop();
+                    keep_going = false;
+                } else {
+                    /* should not find two right brackets without a left bracket in-between */
+                    keep_going = false;
+                    throw vtr::VtrError(vtr::string_fmt("Mismatched brackets encountered in user-specified formula\n"), __FILE__, __LINE__);
+                }
+            } else if (E_FML_OPERATOR == next_fobj.type) {
+                /* pop operator off stack onto the back of rpn_output */
+                Formula_Object fobj_dummy = op_stack.top();
+                rpn_output.push_back(fobj_dummy);
+                op_stack.pop();
+                keep_going = true;
+            } else {
+                keep_going = false;
+                throw vtr::VtrError(vtr::string_fmt("Found unexpected formula object on operator stack: %d\n", next_fobj.type), __FILE__, __LINE__);
+            }
+        } while (keep_going);
+    }
+    return;
+}
+
+/* used by the shunting-yard formula parser to deal with commas, ie ','. These occur in function calls*/
+static void handle_comma(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
+    if (E_FML_COMMA != fobj.type) {
+        throw vtr::VtrError(vtr::string_fmt("in handle_comm: passed-in formula object not of type comma\n"), __FILE__, __LINE__);
+    }
+
+    //Commas are treated as right (closing) bracket since it completes a
+    //sub-expression, except that we do not cause the left (opening) brack to
+    //be popped
+
+    bool keep_going = true;
+    do {
+        /* here we keep popping operators off op_stack onto back of rpn_output until a
+         * left bracket is encountered */
+
+        if (op_stack.empty()) {
+            /* didn't find an opening bracket - mismatched brackets */
+            keep_going = false;
+            throw vtr::VtrError(vtr::string_fmt("Ran out of stack while parsing comma -- bracket mismatch in user-specified formula\n"), __FILE__, __LINE__);
+            keep_going = false;
+        }
+
+        Formula_Object next_fobj = op_stack.top();
+        if (E_FML_BRACKET == next_fobj.type) {
+            if (next_fobj.data.left_bracket) {
+                /* matching bracket found */
+                keep_going = false;
+            } else {
+                /* should not find two right brackets without a left bracket in-between */
+                throw vtr::VtrError(vtr::string_fmt("Mismatched brackets encountered in user-specified formula\n"), __FILE__, __LINE__);
+                keep_going = false;
+            }
+        } else if (E_FML_OPERATOR == next_fobj.type) {
+            /* pop operator off stack onto the back of rpn_output */
+            Formula_Object fobj_dummy = op_stack.top();
+            rpn_output.push_back(fobj_dummy);
+            op_stack.pop();
+            keep_going = true;
+        } else {
+            throw vtr::VtrError(vtr::string_fmt("Found unexpected formula object on operator stack: %d\n", next_fobj.type), __FILE__, __LINE__);
+            keep_going = false;
+        }
+
+    } while (keep_going);
+}
+
+/* parses a reverse-polish notation vector corresponding to a switchblock formula
+ * and returns the integer result */
+static int parse_rpn_vector(vector<Formula_Object>& rpn_vec) {
+    int result = -1;
+
+    /* first entry should always be a number or variable name*/
+    if (E_FML_NUMBER != rpn_vec[0].type && E_FML_VARIABLE != rpn_vec[0].type) {
+        throw vtr::VtrError(vtr::string_fmt("parse_rpn_vector: first entry is not a number or variable(was %s)\n", rpn_vec[0].to_string().c_str()), __FILE__, __LINE__);
+    }
+
+    if (rpn_vec.size() == 1 && rpn_vec[0].type == E_FML_NUMBER) {
+        /* if the vector size is 1 then we just have a number (which was verified above) */
+        result = rpn_vec[0].data.num;
+    } else {
+        /* have numbers and operators */
+        Formula_Object fobj;
+        int ivec = 0;
+        /* keep going until we have gone through the whole vector */
+        while (!rpn_vec.empty()) {
+            /* keep going until we have hit an operator */
+            do {
+                ivec++; /* first item should never be operator anyway */
+                if (ivec == (int)rpn_vec.size()) {
+                    throw vtr::VtrError(vtr::string_fmt("parse_rpn_vector(): found multiple numbers in formula, but no operator\n"), __FILE__, __LINE__);
+                }
+            } while (E_FML_OPERATOR != rpn_vec[ivec].type);
+
+            /* now we apply the selected operation to the two previous entries */
+            /* the result is stored in the object that used to be the operation */
+            rpn_vec[ivec].data.num = apply_rpn_op(rpn_vec[ivec - 2], rpn_vec[ivec - 1], rpn_vec[ivec]);
+            rpn_vec[ivec].type = E_FML_NUMBER;
+
+            /* remove the previous two entries from the vector */
+            rpn_vec.erase(rpn_vec.begin() + ivec - 2, rpn_vec.begin() + ivec - 0);
+            ivec -= 2;
+
+            /* if we're down to one element, we are done */
+            if (1 == rpn_vec.size()) {
+                result = rpn_vec[ivec].data.num;
+                rpn_vec.erase(rpn_vec.begin() + ivec);
+            }
+        }
+    }
+    return result;
+}
+
+/* applies operation specified by 'op' to the given arguments. arg1 comes before arg2 */
+static int apply_rpn_op(const Formula_Object& arg1, const Formula_Object& arg2, const Formula_Object& op) {
+    int result = -1;
+
+    /* arguments must be numbers or variables */
+    if (E_FML_NUMBER != arg1.type || E_FML_NUMBER != arg2.type) {
+        if (E_FML_VARIABLE != arg1.type && E_FML_VARIABLE != arg2.type) {
+            throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: one of the arguments is not a number or variable(was '%s %s %s')\n", arg1.to_string().c_str(), op.to_string().c_str(), arg2.to_string().c_str()), __FILE__, __LINE__);
+        }
+    }
+
+    /* check that op is actually an operation */
+    if (E_FML_OPERATOR != op.type) {
+        throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: the object specified as the operation is not of operation type\n"), __FILE__, __LINE__);
+    }
+
+    /* apply operation to arguments */
+    switch (op.data.op) {
+        case E_OP_ADD:
+            result = arg1.data.num + arg2.data.num;
+            break;
+        case E_OP_SUB:
+            result = arg1.data.num - arg2.data.num;
+            break;
+        case E_OP_MULT:
+            result = arg1.data.num * arg2.data.num;
+            break;
+        case E_OP_DIV:
+            result = arg1.data.num / arg2.data.num;
+            break;
+        case E_OP_MAX:
+            result = std::max(arg1.data.num, arg2.data.num);
+            break;
+        case E_OP_MIN:
+            result = std::min(arg1.data.num, arg2.data.num);
+            break;
+        case E_OP_GCD:
+            result = vtr::gcd(arg1.data.num, arg2.data.num);
+            break;
+        case E_OP_LCM:
+            result = vtr::lcm(arg1.data.num, arg2.data.num);
+            break;
+        case E_OP_AND:
+            result = arg1.data.num && arg2.data.num;
+            break;
+        case E_OP_OR:
+            result = (arg1.data.num || arg2.data.num);
+            break;
+        case E_OP_GT:
+            result = arg1.data.num > arg2.data.num;
+            break;
+        case E_OP_LT:
+            result = arg1.data.num < arg2.data.num;
+            break;
+        case E_OP_GTE:
+            result = (arg1.data.num >= arg2.data.num);
+            break;
+        case E_OP_LTE:
+            result = (arg1.data.num <= arg2.data.num);
+            break;
+        case E_OP_EQ:
+            result = arg1.data.num == arg2.data.num;
+            break;
+        case E_OP_MOD:
+            result = arg1.data.num % arg2.data.num;
+            break;
+        case E_OP_AA:
+            result = additional_assignment_op(arg1.data.num, arg2.data.num);
+            break;
+        default:
+            throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: invalid operation: %d\n", op.data.op), __FILE__, __LINE__);
+            break;
+    }
+
+    return result;
+}
+
+/* checks if specified character represents an ASCII number */
+static bool is_char_number(const char ch) {
+    bool result = false;
+
+    if (ch >= '0' && ch <= '9') {
+        result = true;
+    } else {
+        result = false;
+    }
+
+    return result;
+}
+
+//checks if entered char is a known operator (e.g +,-,<,>,...)
+static bool is_operator(const char ch) {
+    switch (ch) {
+        case '+': //fallthrough
+        case '-': //fallthrough
+        case '*': //fallthrough
+        case '/': //fallthrough
+        case ')': //fallthrough
+        case '(': //fallthrough
+        case ',': //fallthrough
+        case '&': //fallthrough
+        case '|': //fallthrough
+        case '>': //fallthrough
+        case '<': //fallthrough
+        case '=': //fallthrough
+        case '%': //fallthrough
+            return true;
+        default:
+            return false;
+    }
+}
+
+//returns true if string signifies a function e.g max, min
+static bool is_function(std::string name) {
+    if (name == "min"
+        || name == "max"
+        || name == "gcd"
+        || name == "lcm") {
+        return true;
+    }
+    return false;
+}
+
+//returns enumerated code depending on the compound operator
+//compound operators are operators with more than one character e.g &&, >=
+t_compound_operator is_compound_op(const char* ch) {
+    if (ch[1] != '\0') {
+        if (ch[0] == '=' && ch[1] == '=')
+            return E_COM_OP_EQ;
+        else if (ch[0] == '>' && ch[1] == '=')
+            return E_COM_OP_GTE;
+        else if (ch[0] == '<' && ch[1] == '=')
+            return E_COM_OP_LTE;
+        else if (ch[0] == '&' && ch[1] == '&')
+            return E_COM_OP_AND;
+        else if (ch[0] == '|' && ch[1] == '|')
+            return E_COM_OP_OR;
+        else if (ch[0] == '+' && ch[1] == '=')
+            return E_COM_OP_AA;
+    }
+    return E_COM_OP_UNDEFINED;
+}
+
+//checks if the entered string is a known variable name
+static bool is_variable(std::string var_name) {
+    if (same_string(var_name, "from_block") || same_string(var_name, "temp_count") || same_string(var_name, "move_num") || same_string(var_name, "route_net_id") || same_string(var_name, "in_blocks_affected") || same_string(var_name, "router_iter")) {
+        return true;
+    }
+    return false;
+}
+
+//returns the length of the substring consisting of valid vairable characters from
+//the start of the string
+static int identifier_length(const char* str) {
+    int ichar = 0;
+
+    if (!str) return 0;
+
+    while (str[ichar] != '\0') {
+        //No whitespace
+        if (str[ichar] == ' ') break;
+
+        //Not an operator
+        if (is_operator(str[ichar])) break;
+
+        //First char must not be a number
+        if (ichar == 0 && is_char_number(str[ichar])) break;
+
+        ++ichar; //Next character
+    }
+
+    return ichar;
+}
+
+/* checks if the specified formula is piece-wise defined */
+bool FormulaParser::is_piecewise_formula(const char* formula) {
+    bool result = false;
+    /* if formula is piecewise, we expect '{' to be the very first character */
+    if ('{' == formula[0]) {
+        result = true;
+    } else {
+        result = false;
+    }
+    return result;
+}
+
+//compares two string while ignoring case and white space. returns true if strings are the same
+bool same_string(std::string str1, std::string str2) {
+    //earse any white space in both strings
+    str1.erase(remove(str1.begin(), str1.end(), ' '), str1.end());
+    str2.erase(remove(str2.begin(), str2.end(), ' '), str2.end());
+
+    //converting both strings to lower case to eliminate case sensivity
+    std::transform(str1.begin(), str1.end(), str1.begin(), ::tolower);
+    std::transform(str2.begin(), str2.end(), str2.begin(), ::tolower);
+
+    return (str1.compare(str2) == 0);
+}
+
+//the += operator
+bool additional_assignment_op(int arg1, int arg2) {
+    int result = 0;
+    if (before_addition == 0)
+        before_addition = arg1;
+    result = (arg1 == (before_addition + arg2));
+    if (result)
+        before_addition = 0;
+    return result;
+}
+
+//recognizes the block_id to look for (entered by the user)
+//then looks for that block_id in all the blocks moved in the last perturbation.
+//returns the block id if found, else just -1
+int in_blocks_affected(std::string expression_left) {
+    int wanted_block = -1;
+    int found_block;
+    std::stringstream ss;
+    ss << expression_left;
+    std::string s;
+
+    //finds block_id to look for
+    while (!ss.eof()) {
+        ss >> s;
+        if (std::stringstream(s) >> found_block) {
+            s = "";
+            break;
+        }
+    }
+
+    //goes through blocks_affected
+    for (size_t i = 0; i < bp_state_globals.get_glob_breakpoint_state()->blocks_affected_by_move.size(); i++) {
+        if (bp_state_globals.get_glob_breakpoint_state()->blocks_affected_by_move[i] == found_block) {
+            bp_state_globals.get_glob_breakpoint_state()->block_affected = found_block;
+            return found_block;
+        }
+    }
+    return wanted_block;
+}
+
+} //namespace vtr
+
+//returns the global variable that holds all values that can trigger a breakpoint and are updated by the router and placer
+BreakpointStateGlobals* get_bp_state_globals() {
+    return &bp_state_globals;
+}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
new file mode 100644
index 000000000..43aac411d
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
@@ -0,0 +1,234 @@
+#ifndef EXPR_EVAL_H
+#define EXPR_EVAL_H
+#include <map>
+#include <string>
+#include <vector>
+#include <stack>
+#include <cstring>
+#include <iostream>
+
+#include "vtr_util.h"
+#include "vtr_error.h"
+#include "vtr_string_view.h"
+#include "vtr_flat_map.h"
+#include "breakpoint_state_globals.h"
+
+/**
+ * @file
+ * @brief   This file implements an expressopn evaluator
+ *
+ * The expression evaluator is capable of performing many operations on given variables, 
+ * after parsing the expression. The parser goes character by character and identifies 
+ * the type of char or chars. (e.g bracket, comma, number, operator, variable). 
+ * The supported operations include addition, subtraction, multiplication, division, 
+ * finding max, min, gcd, lcm, as well as boolean operators such as &&, ||, ==, >=, <= etc. 
+ * The result is returned as an int value and operation precedance is taken into account. 
+ * (e.g given 3-2*4, the result will be -5). This class is also used to parse expressions 
+ * indicating breakpoints. The breakpoint expressions consist of variable names such as 
+ * move_num, temp_num, from_block etc, and boolean operators (e.g move_num == 3). 
+ * Multiple breakpoints can be expressed in one expression
+ */
+
+//function declarations
+///@brief returns the global variable that holds all values that can trigger a breakpoint and are updated by the router and placer
+BreakpointStateGlobals* get_bp_state_globals();
+
+namespace vtr {
+
+/**** Structs ****/
+
+///@brief a class to hold the formula data
+class t_formula_data {
+  public:
+    ///@brief clears all the formula data
+    void clear() {
+        vars_.clear();
+    }
+
+    ///@brief set the value of a specific part of the formula
+    void set_var_value(vtr::string_view var, int value) { vars_[var] = value; }
+
+    ///@brief set the value of a specific part of the formula (the var can be c-style string)
+    void set_var_value(const char* var, int value) { vars_[vtr::string_view(var)] = value; }
+
+    ///@brief get the value of a specific part of the formula
+    int get_var_value(const std::string& var) const {
+        return get_var_value(vtr::string_view(var.data(), var.size()));
+    }
+
+    ///@brief get the value of a specific part of the formula (the var can be c-style string)
+    int get_var_value(vtr::string_view var) const {
+        auto iter = vars_.find(var);
+        if (iter == vars_.end()) {
+            std::string copy(var.data(), var.size());
+            throw vtr::VtrError(vtr::string_fmt("No value found for variable '%s' from expression\n", copy.c_str()), __FILE__, __LINE__);
+        }
+
+        return iter->second;
+    }
+
+  private:
+    vtr::flat_map<vtr::string_view, int> vars_;
+};
+
+/**** Enums ****/
+///@brief Used to identify the type of symbolic formula object
+typedef enum e_formula_obj {
+    E_FML_UNDEFINED = 0,
+    E_FML_NUMBER,
+    E_FML_BRACKET,
+    E_FML_COMMA,
+    E_FML_OPERATOR,
+    E_FML_VARIABLE,
+    E_FML_NUM_FORMULA_OBJS
+} t_formula_obj;
+
+///@brief Used to identify an operator in a formula
+typedef enum e_operator {
+    E_OP_UNDEFINED = 0,
+    E_OP_ADD,
+    E_OP_SUB,
+    E_OP_MULT,
+    E_OP_DIV,
+    E_OP_MIN,
+    E_OP_MAX,
+    E_OP_GCD,
+    E_OP_LCM,
+    E_OP_AND,
+    E_OP_OR,
+    E_OP_GT,
+    E_OP_LT,
+    E_OP_GTE,
+    E_OP_LTE,
+    E_OP_EQ,
+    E_OP_MOD,
+    E_OP_AA,
+    E_OP_NUM_OPS
+} t_operator;
+
+///@brief Used to identify operators with more than one character
+typedef enum e_compound_operator {
+    E_COM_OP_UNDEFINED = 0,
+    E_COM_OP_AND,
+    E_COM_OP_OR,
+    E_COM_OP_EQ,
+    E_COM_OP_AA,
+    E_COM_OP_GTE,
+    E_COM_OP_LTE
+
+} t_compound_operator;
+
+/**** Class Definitions ****/
+/** 
+ * @brief A class represents an object in a formula
+ *
+ * This object can be any of the following:
+ *      - a number
+ *      - a bracket
+ *      - an operator
+ *      - a variable
+ */
+class Formula_Object {
+  public:
+    ///@brief indicates the type of formula object this is
+    t_formula_obj type;
+
+    /**
+     * @brief object data, accessed based on what kind of object this is 
+     */
+    union u_Data {
+        int num;           ///< for number objects
+        t_operator op;     ///< for operator objects
+        bool left_bracket; ///< for bracket objects -- specifies if this is a left bracket
+        //std::string variable;
+
+        u_Data() { memset(this, 0, sizeof(u_Data)); }
+    } data;
+
+    ///@brief constructor
+    Formula_Object() {
+        this->type = E_FML_UNDEFINED;
+    }
+
+    ///@brief convert enum to string
+    std::string to_string() const {
+        if (type == E_FML_NUMBER || type == E_FML_VARIABLE) {
+            return std::to_string(data.num);
+        } else if (type == E_FML_BRACKET) {
+            if (data.left_bracket) {
+                return "(";
+            } else {
+                return ")";
+            }
+        } else if (type == E_FML_COMMA) {
+            return ",";
+        } else if (type == E_FML_OPERATOR) {
+            if (data.op == E_OP_ADD) {
+                return "+";
+            } else if (data.op == E_OP_SUB) {
+                return "-";
+            } else if (data.op == E_OP_MULT) {
+                return "*";
+            } else if (data.op == E_OP_DIV) {
+                return "/";
+            } else if (data.op == E_OP_MOD) {
+                return "%";
+            } else if (data.op == E_OP_AND) {
+                return "&&";
+            } else if (data.op == E_OP_OR) {
+                return "||";
+            } else if (data.op == E_OP_GT) {
+                return ">";
+            } else if (data.op == E_OP_LT) {
+                return "<";
+            } else if (data.op == E_OP_GTE) {
+                return ">=";
+            } else if (data.op == E_OP_LTE) {
+                return "<=";
+            } else if (data.op == E_OP_EQ) {
+                return "==";
+            } else if (data.op == E_OP_MIN) {
+                return "min";
+            } else if (data.op == E_OP_MAX) {
+                return "max";
+            } else if (data.op == E_OP_GCD) {
+                return "gcd";
+            } else if (data.op == E_OP_LCM) {
+                return "lcm";
+            } else if (data.op == E_OP_AA) {
+                return "+=";
+            } else {
+                return "???"; //Unkown
+            }
+        } else {
+            return "???"; //Unkown
+        }
+    }
+};
+
+///@brief A class to parse formula
+class FormulaParser {
+  public:
+    FormulaParser() = default;
+    FormulaParser(const FormulaParser&) = delete;
+    FormulaParser& operator=(const FormulaParser&) = delete;
+
+    ///@brief returns integer result according to specified formula and data
+    int parse_formula(std::string formula, const t_formula_data& mydata, bool is_breakpoint = false);
+
+    ///@brief returns integer result according to specified piece-wise formula and data
+    int parse_piecewise_formula(const char* formula, const t_formula_data& mydata);
+
+    ///@brief checks if the specified formula is piece-wise defined
+    static bool is_piecewise_formula(const char* formula);
+
+  private:
+    std::vector<Formula_Object> rpn_output_;
+
+    // stack for handling operators and brackets in formula
+    std::stack<Formula_Object> op_stack_;
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h b/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
new file mode 100644
index 000000000..2c0b34f85
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
@@ -0,0 +1,483 @@
+#ifndef VTR_FLAT_MAP
+#define VTR_FLAT_MAP
+#include <functional>
+#include <iterator>
+#include <vector>
+#include <algorithm>
+#include <stdexcept>
+
+#include "vtr_assert.h"
+
+namespace vtr {
+
+//Forward declaration
+template<class K, class V, class Compare = std::less<K>, class Storage = std::vector<std::pair<K, V>>>
+class flat_map;
+
+template<class K, class V, class Compare = std::less<K>, class Storage = std::vector<std::pair<K, V>>>
+class flat_map2;
+
+/**
+ * @brief A function to create a flat map
+ *
+ * Helper function to create a flat map from a vector of pairs
+ * without haveing to explicity specify the key and value types
+ */
+template<class K, class V>
+flat_map<K, V> make_flat_map(std::vector<std::pair<K, V>>&& vec) {
+    return flat_map<K, V>(std::move(vec));
+}
+
+///@brief Same as make_flat_map but for flat_map2
+template<class K, class V>
+flat_map2<K, V> make_flat_map2(std::vector<std::pair<K, V>>&& vec) {
+    return flat_map2<K, V>(std::move(vec));
+}
+
+/**
+ * @brief flat_map is a (nearly) std::map compatible container 
+ * 
+ * It uses a vector as it's underlying storage. Internally the stored elements 
+ * are kept sorted allowing efficient look-up in O(logN) time via binary search.
+ *
+ *
+ * This container is typically useful in the following scenarios:
+ *    - Reduced memory usage if key/value are small (std::map needs to store pointers to
+ *      other BST nodes which can add substantial overhead for small keys/values)
+ *    - Faster search/iteration by exploiting data locality (all elments are in continguous
+ *      memory enabling better spatial locality)
+ *
+ * The container deviates from the behaviour of std::map in the following important ways:
+ *    - Insertion/erase takes O(N) instead of O(logN) time
+ *    - Iterators may be invalidated on insertion/erase (i.e. if the vector is reallocated)
+ *
+ * The slow insertion/erase performance makes this container poorly suited to maps that
+ * frequently add/remove new keys. If this is required you likely want std::map or
+ * std::unordered_map. However if the map is constructed once and then repeatedly quieried,
+ * consider using the range or vector-based constructors which initializes the flat_map in
+ * O(NlogN) time.
+ */
+template<class K, class T, class Compare, class Storage>
+class flat_map {
+  public:
+    typedef K key_type;
+    typedef T mapped_type;
+    typedef std::pair<K, T> value_type;
+    typedef Compare key_compare;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef typename Storage::iterator iterator;
+    typedef typename Storage::const_iterator const_iterator;
+    typedef typename Storage::reverse_iterator reverse_iterator;
+    typedef typename Storage::const_reverse_iterator const_reverse_iterator;
+    typedef typename Storage::difference_type difference_type;
+    typedef typename Storage::size_type size_type;
+
+    class value_compare;
+
+  public:
+    ///@brief Standard constructors
+    flat_map() = default;
+    flat_map(const flat_map&) = default;
+    flat_map(flat_map&&) = default;
+    flat_map& operator=(const flat_map&) = default;
+    flat_map& operator=(flat_map&&) = default;
+
+    ///@brief range constructor
+    template<class InputIterator>
+    flat_map(InputIterator first, InputIterator last) {
+        // Copy the values
+        std::copy(first, last, std::back_inserter(vec_));
+
+        sort();
+        uniquify();
+    }
+
+    ///@brief direct vector constructor
+    explicit flat_map(Storage&& values) {
+        assign(std::move(values));
+    }
+
+    /**
+     * @brief Move the values
+     * 
+     * Should be more efficient than the range constructor which 
+     * must copy each element
+     */
+    void assign(Storage&& values) {
+        vec_ = std::move(values);
+
+        sort();
+        uniquify();
+    }
+
+    ///@brief By moving the values this should be more efficient than the range constructor which must copy each element
+    void assign_sorted(Storage&& values) {
+        vec_ = std::move(values);
+        if (vec_.size() > 1) {
+            for (size_t i = 0; i < vec_.size() - 1; ++i) {
+                VTR_ASSERT_SAFE(vec_[i].first < vec_[i + 1].first);
+            }
+        }
+    }
+
+    ///@brief Return an iterator pointing to the first element in the sequence:
+    iterator begin() { return vec_.begin(); }
+
+    ///@brief Return a constant iterator pointing to the first element in the sequence:
+    const_iterator begin() const { return vec_.begin(); }
+
+    ///@brief Returns an iterator referring to the past-the-end element in the vector container.
+    iterator end() { return vec_.end(); }
+
+    ///@brief Returns a constant iterator referring to the past-the-end element in the vector container.
+    const_iterator end() const { return vec_.end(); }
+
+    ///@brief Returns a reverse iterator which points to the last element of the map.
+    reverse_iterator rbegin() { return vec_.rbegin(); }
+
+    ///@brief Returns a constant reverse iterator which points to the last element of the map.
+    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
+
+    ///@brief Returns a reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
+    reverse_iterator rend() { return vec_.rend(); }
+
+    ///@brief Returns a constant reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
+    const_reverse_iterator rend() const { return vec_.rend(); }
+
+    ///@brief Returns a constant_iterator to the first element in the underlying vector
+    const_iterator cbegin() const { return vec_.begin(); }
+
+    ///@brief Returns a const_iterator pointing to the past-the-end element in the container.
+    const_iterator cend() const { return vec_.end(); }
+
+    ///@brief Returns a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
+    const_reverse_iterator crbegin() const { return vec_.rbegin(); }
+
+    ///@brief Returns a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
+    const_reverse_iterator crend() const { return vec_.rend(); }
+
+    ///@brief Return true if the underlying vector is empty
+    bool empty() const { return vec_.empty(); }
+
+    ///@brief Return the container size
+    size_type size() const { return vec_.size(); }
+
+    ///@brief Return the underlying vector's max size
+    size_type max_size() const { return vec_.max_size(); }
+
+    ///@brief The constant version of operator []
+    const mapped_type& operator[](const key_type& key) const {
+        auto iter = find(key);
+        if (iter == end()) {
+            //Not found
+            throw std::out_of_range("Invalid key");
+        }
+
+        return iter->second;
+    }
+
+    ///@brief operator []
+    mapped_type& operator[](const key_type& key) {
+        auto iter = std::lower_bound(begin(), end(), key, value_comp());
+        if (iter == end()) {
+            // The new element should be placed at the end, so do so.
+            vec_.emplace_back(std::make_pair(key, mapped_type()));
+            return vec_.back().second;
+        } else {
+            if (iter->first == key) {
+                // The element already exists, return it.
+                return iter->second;
+            } else {
+                // The element does not exist, insert such that vector remains
+                // sorted.
+                iter = vec_.emplace(iter, std::make_pair(key, mapped_type()));
+                return iter->second;
+            }
+        }
+    }
+
+    ///@brief operator at()
+    mapped_type& at(const key_type& key) {
+        return const_cast<mapped_type&>(const_cast<const flat_map*>(this)->at(key));
+    }
+
+    ///@brief The constant version of at() operator
+    const mapped_type& at(const key_type& key) const {
+        auto iter = find(key);
+        if (iter == end()) {
+            throw std::out_of_range("Invalid key");
+        }
+        return iter->second;
+    }
+
+    ///@brief Insert value
+    std::pair<iterator, bool> insert(const value_type& value) {
+        auto iter = lower_bound(value.first);
+        if (iter != end() && keys_equivalent(iter->first, value.first)) {
+            //Found existing
+            return std::make_pair(iter, false);
+        } else {
+            //Insert
+            iter = insert(iter, value);
+
+            return std::make_pair(iter, true);
+        }
+    }
+
+    ///@brief Emplace function
+    std::pair<iterator, bool> emplace(const value_type&& value) {
+        auto iter = lower_bound(value.first);
+        if (iter != end() && keys_equivalent(iter->first, value.first)) {
+            //Found existing
+            return std::make_pair(iter, false);
+        } else {
+            //Emplace
+            iter = emplace(iter, value);
+
+            return std::make_pair(iter, true);
+        }
+    }
+
+    ///@brief Insert value with position hint
+    iterator insert(const_iterator position, const value_type& value) {
+        //In a legal position
+        VTR_ASSERT(position == begin() || value_comp()(*(position - 1), value));
+        VTR_ASSERT((size() > 0 && (position + 1) == end()) || position == end() || !value_comp()(*(position + 1), value));
+
+        iterator iter = vec_.insert(position, value);
+
+        return iter;
+    }
+
+    ///@brief Emplace value with position hint
+    iterator emplace(const_iterator position, const value_type& value) {
+        //In a legal position
+        VTR_ASSERT(position == begin() || value_comp()(*(position - 1), value));
+        VTR_ASSERT((size() > 0 && (position + 1) == end()) || position == end() || !value_comp()(*(position + 1), value));
+
+        iterator iter = vec_.emplace(position, value);
+
+        return iter;
+    }
+
+    ///@brief Insert range
+    template<class InputIterator>
+    void insert(InputIterator first, InputIterator last) {
+        vec_.insert(vec_.end(), first, last);
+
+        //TODO: could be more efficient
+        sort();
+        uniquify();
+    }
+
+    ///@brief Erase by key
+    void erase(const key_type& key) {
+        auto iter = find(key);
+        if (iter != end()) {
+            vec_.erase(iter);
+        }
+    }
+
+    ///@brief Erase at iterator
+    void erase(const_iterator position) {
+        vec_.erase(position);
+    }
+
+    ///@brief Erase range
+    void erase(const_iterator first, const_iterator last) {
+        vec_.erase(first, last);
+    }
+
+    ///@brief swap two flat maps
+    void swap(flat_map& other) { std::swap(*this, other); }
+
+    ///@brief clear the flat map
+    void clear() { vec_.clear(); }
+
+    ///@brief templated emplace function
+    template<class... Args>
+    iterator emplace(const key_type& key, Args&&... args) {
+        auto iter = lower_bound(key);
+        if (iter != end() && keys_equivalent(iter->first, key)) {
+            //Found
+            return std::make_pair(iter, false);
+        } else {
+            //Emplace
+            iter = emplace_hint(iter, key, std::forward<Args>(args)...);
+            return std::make_pair(iter, true);
+        }
+    }
+
+    ///@brief templated emplace_hint function
+    template<class... Args>
+    iterator emplace_hint(const_iterator position, Args&&... args) {
+        return vec_.emplace(position, std::forward<Args>(args)...);
+    }
+
+    ///@brief Reserve a minimum capacity for the underlying vector
+    void reserve(size_type n) { vec_.reserve(n); }
+
+    ///@brief Reduce the capacity of the underlying vector to fit its size
+    void shrink_to_fit() { vec_.shrink_to_fit(); }
+
+    ///@brief
+    key_compare key_comp() const { return key_compare(); }
+
+    ///@brief
+    value_compare value_comp() const { return value_compare(key_comp()); }
+
+    ///@brief Find a key and return an iterator to the found key
+    iterator find(const key_type& key) {
+        const_iterator const_iter = const_cast<const flat_map*>(this)->find(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Find a key and return a constant iterator to the found key
+    const_iterator find(const key_type& key) const {
+        auto iter = lower_bound(key);
+        if (iter != end() && keys_equivalent(iter->first, key)) {
+            //Found
+            return iter;
+        }
+        return end();
+    }
+
+    ///@brief Return the count of occurances of a key
+    size_type count(const key_type& key) const {
+        return (find(key) == end()) ? 0 : 1;
+    }
+
+    ///@brief lower bound function
+    iterator lower_bound(const key_type& key) {
+        const_iterator const_iter = const_cast<const flat_map*>(this)->lower_bound(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Return a constant iterator to the lower bound
+    const_iterator lower_bound(const key_type& key) const {
+        return std::lower_bound(begin(), end(), key, value_comp());
+    }
+
+    ///@brief upper bound function
+    iterator upper_bound(const key_type& key) {
+        const_iterator const_iter = const_cast<const flat_map*>(this)->upper_bound(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Return a constant iterator to the upper bound
+    const_iterator upper_bound(const key_type& key) const {
+        return std::upper_bound(begin(), end(), key, value_comp());
+    }
+
+    ///@brief Returns a range containing all elements equivalent to "key"
+    std::pair<iterator, iterator> equal_range(const key_type& key) {
+        auto const_iter_pair = const_cast<const flat_map*>(this)->equal_range(key);
+        return std::pair<iterator, iterator>(iterator(const_iter_pair.first), iterator(const_iter_pair.second));
+    }
+
+    ///@brief Returns a constant range containing all elements equivalent to "key"
+    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+        return std::equal_range(begin(), end(), key);
+    }
+
+  public:
+    ///@brief Swaps 2 flat maps
+    friend void swap(flat_map& lhs, flat_map& rhs) { std::swap(lhs.vec_, rhs.vec_); }
+
+  private:
+    bool keys_equivalent(const key_type& lhs, const key_type& rhs) const {
+        return !key_comp()(lhs, rhs) && !key_comp()(rhs, lhs);
+    }
+
+    void sort() {
+        std::sort(vec_.begin(), vec_.end(), value_comp());
+    }
+
+    void uniquify() {
+        //Uniquify
+        auto key_equal_pred = [this](const value_type& lhs, const value_type& rhs) {
+            return !value_comp()(lhs, rhs) && !value_comp()(rhs, lhs);
+        };
+        vec_.erase(std::unique(vec_.begin(), vec_.end(), key_equal_pred), vec_.end());
+    }
+
+    iterator convert_to_iterator(const_iterator const_iter) {
+        /*
+         * A work around as there is no conversion betweena const_iterator and iterator.
+         *
+         * We intiailize i to the start of the container and then advance it by
+         * the distance to const_iter. The resulting i points to the same element
+         * as const_iter
+         * 
+         * Note that to be able to call std::distance with an iterator and
+         * const_iterator we need to specify the type as const_iterator (relying
+         * on the implicit conversion from iterator to const_iterator for i)
+         *
+         * Since the iterators are really vector (i.e. random-access) iterators
+         * this takes constant time
+         */
+        iterator i = begin();
+        std::advance(i, std::distance<const_iterator>(i, const_iter));
+        return i;
+    }
+
+  private:
+    Storage vec_;
+};
+
+/**
+ * @brief Another flat_map container
+ *
+ * Like flat_map, but operator[] never inserts and directly returns the mapped value
+ */
+template<class K, class T, class Compare, class Storage>
+class flat_map2 : public flat_map<K, T, Compare, Storage> {
+  public:
+    ///@brief Constructor
+    flat_map2() {}
+    explicit flat_map2(std::vector<typename flat_map2<K, T, Compare, Storage>::value_type>&& values)
+        : flat_map<K, T, Compare>(std::move(values)) {}
+
+    ///@brief const [] operator
+    const T& operator[](const K& key) const {
+        auto itr = this->find(key);
+        if (itr == this->end()) {
+            throw std::logic_error("Key not found");
+        }
+        return itr->second;
+    }
+
+    ///@brief [] operator
+    T& operator[](const K& key) {
+        return const_cast<T&>(const_cast<const flat_map2*>(this)->operator[](key));
+    }
+};
+
+///@brief A class to perform the comparison operation for the flat map
+template<class K, class T, class Compare, class Storage>
+class flat_map<K, T, Compare, Storage>::value_compare {
+    friend class flat_map;
+
+  public:
+    bool operator()(const value_type& x, const value_type& y) const {
+        return comp(x.first, y.first);
+    }
+
+    //For std::lower_bound, std::upper_bound
+    bool operator()(const value_type& x, const key_type& y) const {
+        return comp(x.first, y);
+    }
+    bool operator()(const key_type& x, const value_type& y) const {
+        return comp(x, y.first);
+    }
+
+  private:
+    value_compare(Compare c)
+        : comp(c) {}
+
+    Compare comp;
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_geometry.h b/third_party/vtr/libs/vtrutil/src/vtr_geometry.h
new file mode 100644
index 000000000..3685c3086
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_geometry.h
@@ -0,0 +1,312 @@
+#ifndef VTR_GEOMETRY_H
+#define VTR_GEOMETRY_H
+#include "vtr_range.h"
+#include "vtr_assert.h"
+
+#include <cstdio> // vtr_geometry.tpp uses printf()
+
+#include <vector>
+#include <tuple>
+#include <limits>
+#include <type_traits>
+
+/**
+ * @file
+ * @brief   This file include differents different geometry classes
+ */
+
+namespace vtr {
+
+/*
+ * Forward declarations
+ */
+template<class T>
+class Point;
+
+template<class T>
+class Rect;
+
+template<class T>
+class Line;
+
+template<class T>
+class RectUnion;
+
+template<class T>
+bool operator==(Point<T> lhs, Point<T> rhs);
+template<class T>
+bool operator!=(Point<T> lhs, Point<T> rhs);
+template<class T>
+bool operator<(Point<T> lhs, Point<T> rhs);
+
+template<class T>
+bool operator==(const Rect<T>& lhs, const Rect<T>& rhs);
+template<class T>
+bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs);
+
+template<class T>
+bool operator==(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
+template<class T>
+bool operator!=(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
+/*
+ * Class Definitions
+ */
+
+/**
+ * @brief A point in 2D space
+ *
+ * This class represents a point in 2D space. Hence, it holds both
+ * x and y components of the point. 
+ */
+template<class T>
+class Point {
+  public: //Constructors
+    Point(T x_val, T y_val) noexcept;
+
+  public: //Accessors
+    ///@brief x coordinate
+    T x() const;
+
+    ///@brief y coordinate
+    T y() const;
+
+    ///@brief == operator
+    friend bool operator== <>(Point<T> lhs, Point<T> rhs);
+
+    ///@brief != operator
+    friend bool operator!= <>(Point<T> lhs, Point<T> rhs);
+
+    ///@brief < operator
+    friend bool operator< <>(Point<T> lhs, Point<T> rhs);
+
+  public: //Mutators
+    ///@brief Set x and y values
+    void set(T x_val, T y_val);
+
+    ///@brief set x value
+    void set_x(T x_val);
+
+    ///@brief set y value
+    void set_y(T y_val);
+
+    ///@brief Swap x and y values
+    void swap();
+
+  private:
+    T x_;
+    T y_;
+};
+
+/**
+ * @brief A 2D rectangle
+ *
+ * This class represents a 2D rectangle. It can be created with 
+ * its 4 points or using the bottom left and the top rights ones only
+ */
+template<class T>
+class Rect {
+  public: //Constructors
+    ///@brief default constructor
+    Rect();
+
+    ///@brief construct using 4 vertex
+    Rect(T left_val, T bottom_val, T right_val, T top_val);
+
+    ///@brief construct using the bottom left and the top right vertex
+    Rect(Point<T> bottom_left_val, Point<T> top_right_val);
+
+    /**
+     * @brief Constructs a rectangle that only contains the given point
+     *
+     * Rect(p1).contains(p2) => p1 == p2
+     * It is only enabled for integral types, because making this work for floating point types would be difficult and brittle.
+     * The following line only enables the constructor if std::is_integral<T>::value == true
+     */
+    template<typename U = T, typename std::enable_if<std::is_integral<U>::value>::type...>
+    Rect(Point<U> point);
+
+  public: //Accessors
+    ///@brief xmin coordinate
+    T xmin() const;
+
+    ///@brief xmax coordinate
+    T xmax() const;
+
+    ///@brief ymin coodrinate
+    T ymin() const;
+
+    ///@brief ymax coordinate
+    T ymax() const;
+
+    ///@brief Return the bottom left point
+    Point<T> bottom_left() const;
+
+    ///@brief Return the top right point
+    Point<T> top_right() const;
+
+    ///@brief Return the rectangle width
+    T width() const;
+
+    ///@brief Return the rectangle height
+    T height() const;
+
+    ///@brief Returns true if the point is fully contained within the rectangle (excluding the top-right edges)
+    bool contains(Point<T> point) const;
+
+    ///@brief Returns true if the point is strictly contained within the region (excluding all edges)
+    bool strictly_contains(Point<T> point) const;
+
+    ///@brief Returns true if the point is coincident with the rectangle (including the top-right edges)
+    bool coincident(Point<T> point) const;
+
+    ///@brief Returns true if other is contained within the rectangle (including all edges)
+    bool contains(const Rect<T>& other) const;
+
+    /**
+     * @brief Checks whether the rectangle is empty
+     *
+     * Returns true if no points are contained in the rectangle
+     * rect.empty() => not exists p. rect.contains(p)
+     * This also implies either the width or height is 0.
+     */
+    bool empty() const;
+
+    ///@brief == operator
+    friend bool operator== <>(const Rect<T>& lhs, const Rect<T>& rhs);
+
+    ///@brief != operator
+    friend bool operator!= <>(const Rect<T>& lhs, const Rect<T>& rhs);
+
+  public: //Mutators
+    ///@brief set xmin to a point
+    void set_xmin(T xmin_val);
+
+    ///@brief set ymin to a point
+    void set_ymin(T ymin_val);
+
+    ///@brief set xmax to a point
+    void set_xmax(T xmax_val);
+
+    ///@brief set ymax to a point
+    void set_ymax(T ymax_val);
+
+    ///@brief Equivalent to `*this = bounding_box(*this, other)`
+    Rect<T>& expand_bounding_box(const Rect<T>& other);
+
+  private:
+    Point<T> bottom_left_;
+    Point<T> top_right_;
+};
+
+/**
+ * @brief Return the smallest rectangle containing both given rectangles
+ *
+ * Note that this isn't a union and the resulting rectangle may include points not in either given rectangle
+ */
+template<class T>
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs);
+
+///@brief Return the intersection of two given rectangles
+template<class T>
+Rect<T> intersection(const Rect<T>& lhs, const Rect<T>& rhs);
+
+//Prints a rectangle
+template<class T>
+static void print_rect(FILE* fp, const Rect<T> rect);
+
+//Sample on a uniformly spaced grid within a rectangle
+//  sample(vtr::Rect(l, h), 0, 0, M) == l
+//  sample(vtr::Rect(l, h), M, M, M) == h
+//To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
+//Only defined for integral types
+
+/**
+ * @brief Sample on a uniformly spaced grid within a rectangle
+ *
+ * sample(vtr::Rect(l, h), 0, 0, M) == l
+ * sample(vtr::Rect(l, h), M, M, M) == h
+ * To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
+ * Only defined for integral types
+ */
+
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d);
+
+///@brief clamps v to be between low (lo) and high (hi), inclusive.
+template<class T>
+static constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
+    return std::min(std::max(v, lo), hi);
+}
+
+/**
+ * @brief A 2D line
+ *
+ * It is constructed using a vector of the line points
+ */
+template<class T>
+class Line {
+  public: //Types
+    typedef typename std::vector<Point<T>>::const_iterator point_iter;
+    typedef vtr::Range<point_iter> point_range;
+
+  public: //Constructors
+    ///@brief contructor
+    Line(std::vector<Point<T>> line_points);
+
+  public: //Accessors
+    ///@brief Returns the bounding box
+    Rect<T> bounding_box() const;
+
+    ///@brief Returns a range of constituent points
+    point_range points() const;
+
+  private:
+    std::vector<Point<T>> points_;
+};
+
+///@brief A union of 2d rectangles
+template<class T>
+class RectUnion {
+  public: //Types
+    typedef typename std::vector<Rect<T>>::const_iterator rect_iter;
+    typedef vtr::Range<rect_iter> rect_range;
+
+  public: //Constructors
+    ///@brief Construct from a set of rectangles
+    RectUnion(std::vector<Rect<T>> rects);
+
+  public: //Accessors
+    ///@brief Returns the bounding box of all rectangles in the union
+    Rect<T> bounding_box() const;
+
+    ///@brief Returns true if the point is fully contained within the region (excluding top-right edges)
+    bool contains(Point<T> point) const;
+
+    ///@brief Returns true if the point is strictly contained within the region (excluding all edges)
+    bool strictly_contains(Point<T> point) const;
+
+    ///@brief Returns true if the point is coincident with the region (including the top-right edges)
+    bool coincident(Point<T> point) const;
+
+    ///@brief Returns a range of all constituent rectangles
+    rect_range rects() const;
+
+    /**
+     * @brief Checks whether two RectUnions have identical representations
+     *
+     * Note: does not check whether the representations they are equivalent
+     */
+    friend bool operator== <>(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
+
+    ///@brief != operator
+    friend bool operator!= <>(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
+
+  private:
+    // Note that a union of rectanges may have holes and may not be contiguous
+    std::vector<Rect<T>> rects_;
+};
+
+} // namespace vtr
+
+#include "vtr_geometry.tpp"
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp b/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
new file mode 100644
index 000000000..2010700fc
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
@@ -0,0 +1,347 @@
+namespace vtr {
+/*
+ * Point
+ */
+
+template<class T>
+Point<T>::Point(T x_val, T y_val) noexcept
+    : x_(x_val)
+    , y_(y_val) {
+    //pass
+}
+
+template<class T>
+T Point<T>::x() const {
+    return x_;
+}
+
+template<class T>
+T Point<T>::y() const {
+    return y_;
+}
+
+template<class T>
+bool operator==(Point<T> lhs, Point<T> rhs) {
+    return lhs.x() == rhs.x()
+           && lhs.y() == rhs.y();
+}
+
+template<class T>
+bool operator!=(Point<T> lhs, Point<T> rhs) {
+    return !(lhs == rhs);
+}
+
+template<class T>
+bool operator<(Point<T> lhs, Point<T> rhs) {
+    return std::make_tuple(lhs.x(), lhs.y()) < std::make_tuple(rhs.x(), rhs.y());
+}
+
+//Mutators
+template<class T>
+void Point<T>::set(T x_val, T y_val) {
+    x_ = x_val;
+    y_ = y_val;
+}
+
+template<class T>
+void Point<T>::set_x(T x_val) {
+    x_ = x_val;
+}
+
+template<class T>
+void Point<T>::set_y(T y_val) {
+    y_ = y_val;
+}
+
+template<class T>
+void Point<T>::swap() {
+    std::swap(x_, y_);
+}
+
+/*
+ * Rect
+ */
+template<class T>
+Rect<T>::Rect()
+    : Rect<T>(Point<T>(0, 0), Point<T>(0, 0)) {
+    //pass
+}
+
+template<class T>
+Rect<T>::Rect(T left_val, T bottom_val, T right_val, T top_val)
+    : Rect<T>(Point<T>(left_val, bottom_val), Point<T>(right_val, top_val)) {
+    //pass
+}
+
+template<class T>
+Rect<T>::Rect(Point<T> bottom_left_val, Point<T> top_right_val)
+    : bottom_left_(bottom_left_val)
+    , top_right_(top_right_val) {
+    //pass
+}
+
+//Only defined for integral types
+template<class T>
+template<typename U, typename std::enable_if<std::is_integral<U>::value>::type...>
+Rect<T>::Rect(Point<U> point)
+    : bottom_left_(point)
+    , top_right_(point.x() + 1,
+                 point.y() + 1) {
+    //pass
+}
+
+template<class T>
+T Rect<T>::xmin() const {
+    return bottom_left_.x();
+}
+
+template<class T>
+T Rect<T>::xmax() const {
+    return top_right_.x();
+}
+
+template<class T>
+T Rect<T>::ymin() const {
+    return bottom_left_.y();
+}
+
+template<class T>
+T Rect<T>::ymax() const {
+    return top_right_.y();
+}
+
+template<class T>
+Point<T> Rect<T>::bottom_left() const {
+    return bottom_left_;
+}
+
+template<class T>
+Point<T> Rect<T>::top_right() const {
+    return top_right_;
+}
+
+template<class T>
+T Rect<T>::width() const {
+    return xmax() - xmin();
+}
+
+template<class T>
+T Rect<T>::height() const {
+    return ymax() - ymin();
+}
+
+template<class T>
+bool Rect<T>::contains(Point<T> point) const {
+    //Up-to but not including right or top edges
+    return point.x() >= xmin() && point.x() < xmax()
+           && point.y() >= ymin() && point.y() < ymax();
+}
+
+template<class T>
+bool Rect<T>::strictly_contains(Point<T> point) const {
+    //Excluding edges
+    return point.x() > xmin() && point.x() < xmax()
+           && point.y() > ymin() && point.y() < ymax();
+}
+
+template<class T>
+bool Rect<T>::coincident(Point<T> point) const {
+    //Including right or top edges
+    return point.x() >= xmin() && point.x() <= xmax()
+           && point.y() >= ymin() && point.y() <= ymax();
+}
+
+template<class T>
+bool Rect<T>::contains(const Rect<T>& other) const {
+    //Including all edges
+    return other.xmin() >= xmin() && other.xmax() <= xmax()
+           && other.ymin() >= ymin() && other.ymax() <= ymax();
+}
+
+template<class T>
+bool Rect<T>::empty() const {
+    return xmax() <= xmin() || ymax() <= ymin();
+}
+
+template<class T>
+bool operator==(const Rect<T>& lhs, const Rect<T>& rhs) {
+    return lhs.bottom_left() == rhs.bottom_left()
+           && lhs.top_right() == rhs.top_right();
+}
+
+template<class T>
+bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs) {
+    return !(lhs == rhs);
+}
+
+template<class T>
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs) {
+    return Rect<T>(std::min(lhs.xmin(), rhs.xmin()),
+                   std::min(lhs.ymin(), rhs.ymin()),
+                   std::max(lhs.xmax(), rhs.xmax()),
+                   std::max(lhs.ymax(), rhs.ymax()));
+}
+
+template<class T>
+Rect<T> intersection(const Rect<T>& lhs, const Rect<T>& rhs) {
+    return Rect<T>(std::max(lhs.xmin(), rhs.xmin()),
+                   std::max(lhs.ymin(), rhs.ymin()),
+                   std::min(lhs.xmax(), rhs.xmax()),
+                   std::min(lhs.ymax(), rhs.ymax()));
+}
+template<class T>
+static void print_rect(FILE* fp, const Rect<T> rect) {
+    fprintf(fp, "\txmin: %d\n", rect.xmin());
+    fprintf(fp, "\tymin: %d\n", rect.ymin());
+    fprintf(fp, "\txmax: %d\n", rect.xmax());
+    fprintf(fp, "\tymax: %d\n", rect.ymax());
+}
+//Only defined for integral types
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) {
+    VTR_ASSERT(d > 0 && x <= d && y <= d && !r.empty());
+    return Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d,
+                    (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d);
+}
+
+template<class T>
+void Rect<T>::set_xmin(T xmin_val) {
+    bottom_left_.set_x(xmin_val);
+}
+
+template<class T>
+void Rect<T>::set_ymin(T ymin_val) {
+    bottom_left_.set_y(ymin_val);
+}
+
+template<class T>
+void Rect<T>::set_xmax(T xmax_val) {
+    top_right_.set_x(xmax_val);
+}
+
+template<class T>
+void Rect<T>::set_ymax(T ymax_val) {
+    top_right_.set_y(ymax_val);
+}
+
+template<class T>
+Rect<T>& Rect<T>::expand_bounding_box(const Rect<T>& other) {
+    *this = bounding_box(*this, other);
+    return *this;
+}
+
+/*
+ * Line
+ */
+template<class T>
+Line<T>::Line(std::vector<Point<T>> line_points)
+    : points_(line_points) {
+    //pass
+}
+
+template<class T>
+Rect<T> Line<T>::bounding_box() const {
+    T xmin = std::numeric_limits<T>::max();
+    T ymin = std::numeric_limits<T>::max();
+    T xmax = std::numeric_limits<T>::min();
+    T ymax = std::numeric_limits<T>::min();
+
+    for (const auto& point : points()) {
+        xmin = std::min(xmin, point.x());
+        ymin = std::min(ymin, point.y());
+        xmax = std::max(xmax, point.x());
+        ymax = std::max(ymax, point.y());
+    }
+
+    return Rect<T>(xmin, ymin, xmax, ymax);
+}
+
+template<class T>
+typename Line<T>::point_range Line<T>::points() const {
+    return vtr::make_range(points_.begin(), points_.end());
+}
+
+/*
+ * RectUnion
+ */
+template<class T>
+RectUnion<T>::RectUnion(std::vector<Rect<T>> rectangles)
+    : rects_(rectangles) {
+    //pass
+}
+
+template<class T>
+Rect<T> RectUnion<T>::bounding_box() const {
+    T xmin = std::numeric_limits<T>::max();
+    T ymin = std::numeric_limits<T>::max();
+    T xmax = std::numeric_limits<T>::min();
+    T ymax = std::numeric_limits<T>::min();
+
+    for (const auto& rect : rects_) {
+        xmin = std::min(xmin, rect.xmin());
+        ymin = std::min(ymin, rect.ymin());
+        xmax = std::max(xmax, rect.xmax());
+        ymax = std::max(ymax, rect.ymax());
+    }
+
+    return Rect<T>(xmin, ymin, xmax, ymax);
+}
+
+template<class T>
+bool RectUnion<T>::contains(Point<T> point) const {
+    for (const auto& rect : rects()) {
+        if (rect.contains(point)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+template<class T>
+bool RectUnion<T>::strictly_contains(Point<T> point) const {
+    for (const auto& rect : rects()) {
+        if (rect.strictly_contains(point)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+template<class T>
+bool RectUnion<T>::coincident(Point<T> point) const {
+    for (const auto& rect : rects()) {
+        if (rect.coincident(point)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+template<class T>
+typename RectUnion<T>::rect_range RectUnion<T>::rects() const {
+    return vtr::make_range(rects_.begin(), rects_.end());
+}
+
+template<class T>
+bool operator==(const RectUnion<T>& lhs, const RectUnion<T>& rhs) {
+    //Currently checks for an identical *representation* (not whether the
+    //representations are equivalent)
+
+    if (lhs.rects_.size() != rhs.rects_.size()) {
+        return false;
+    }
+
+    for (size_t i = 0; i < lhs.rects_.size(); ++i) {
+        if (lhs.rects_[i] != rhs.rects_[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+template<class T>
+bool operator!=(const RectUnion<T>& lhs, const RectUnion<T>& rhs) {
+    return !(lhs == rhs);
+}
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_hash.h b/third_party/vtr/libs/vtrutil/src/vtr_hash.h
new file mode 100644
index 000000000..7e8e6fa42
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_hash.h
@@ -0,0 +1,30 @@
+#ifndef VTR_HASH_H
+#define VTR_HASH_H
+#include <functional>
+
+namespace vtr {
+
+/**
+ * @brief Hashes v and combines it with seed (as in boost)
+ *
+ * This is typically used to implement std::hash for composite types.
+ */
+template<class T>
+inline void hash_combine(std::size_t& seed, const T& v) {
+    std::hash<T> hasher;
+    seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
+struct hash_pair {
+    template<class T1, class T2>
+    std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
+        auto hash1 = std::hash<T1>{}(pair.first);
+        auto hash2 = std::hash<T2>{}(pair.second);
+
+        return hash1 ^ hash2;
+    }
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h b/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
new file mode 100644
index 000000000..c0ef38cfc
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
@@ -0,0 +1,312 @@
+#ifndef VTR_LINEAR_MAP_H
+#define VTR_LINEAR_MAP_H
+#include <vector>
+#include <stdexcept>
+
+#include "vtr_sentinels.h"
+
+namespace vtr {
+/**
+ * @brief A std::map-like container which is indexed by K
+ *
+ * The main use of this container is to behave like a std::map which is optimized to hold
+ * mappings between a dense linear range of keys (e.g. vtr::StrongId).
+ *
+ * Requires that K be convertable to size_t with the size_t operator (i.e. size_t()), and
+ * that the conversion results in a linearly increasing index into the underlying vector.
+ * Also requires that K() return the sentinel value used to mark invalid entries.
+ *
+ * If you only need to access the value associated with the key consider using vtr::vector_map
+ * instead, which provides a similar but more std::vector-like interface.
+ * 
+ * Note that it is possible to use linear_map with sparse/non-contiguous keys, but this is typically
+ * memory inefficient as the underlying vector will allocate space for [0..size_t(max_key)-1],
+ * where max_key is the largest key that has been inserted.
+ *
+ * As with a std::vector, it is the caller's responsibility to ensure there is sufficient space
+ * when a given index/key before it is accessed. The exception to this are the find() and insert()
+ * methods which handle non-existing keys gracefully.
+ */
+template<class K, class T, class Sentinel = DefaultSentinel<K>>
+class linear_map {
+  public:
+    typedef K key_type;
+    typedef T mapped_type;
+    typedef std::pair<K, T> value_type;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef typename std::vector<value_type>::iterator iterator;
+    typedef typename std::vector<value_type>::const_iterator const_iterator;
+    typedef typename std::vector<value_type>::reverse_iterator reverse_iterator;
+    typedef typename std::vector<value_type>::const_reverse_iterator const_reverse_iterator;
+    typedef typename std::vector<value_type>::difference_type difference_type;
+    typedef typename std::vector<value_type>::size_type size_type;
+
+  public:
+    ///@brief Standard big 5 constructors
+    linear_map() = default;
+    linear_map(const linear_map&) = default;
+    linear_map(linear_map&&) = default;
+    linear_map& operator=(const linear_map&) = default;
+    linear_map& operator=(linear_map&&) = default;
+
+    linear_map(size_t num_keys)
+        : vec_(num_keys, std::make_pair(sentinel(), T())) //Initialize all with sentinel values
+    {}
+
+    ///@brief Return an iterator to the first element
+    iterator begin() { return vec_.begin(); }
+
+    ///@brief Return a constant iterator to the first element
+    const_iterator begin() const { return vec_.begin(); }
+
+    ///@brief Return an iterator to the last element
+    iterator end() { return vec_.end(); }
+
+    ///@brief Return a constant iterator to the last element
+    const_iterator end() const { return vec_.end(); }
+
+    ///@brief Return a reverse iterator to the last element
+    reverse_iterator rbegin() { return vec_.rbegin(); }
+
+    ///@brief Return a constant reverse iterator to the last element
+    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
+
+    ///@brief Return a reverse iterator pointing to the theoretical element preceding the first element
+    reverse_iterator rend() { return vec_.rend(); }
+
+    ///@brief Return a constant reverse iterator pointing to the theoretical element preceding the first element
+    const_reverse_iterator rend() const { return vec_.rend(); }
+
+    ///@brief Return a const iterator to the first element
+    const_iterator cbegin() const { return vec_.begin(); }
+
+    ///@brief Return a const_iterator pointing to the past-the-end element in the container
+    const_iterator cend() const { return vec_.end(); }
+
+    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
+    const_reverse_iterator crbegin() const { return vec_.rbegin(); }
+
+    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
+    const_reverse_iterator crend() const { return vec_.rend(); }
+
+    ///@brief Return true if the container is empty
+    bool empty() const { return vec_.empty(); }
+
+    ///@brief Return the size of the container
+    size_type size() const { return vec_.size(); }
+
+    ///@brief Return the maximum size of the container
+    size_type max_size() const { return vec_.max_size(); }
+
+    ///@brief [] operator
+    mapped_type& operator[](const key_type& key) {
+        auto iter = find(key);
+        if (iter == end()) {
+            //Not found, create it
+            iter = insert(std::make_pair(key, mapped_type())).first;
+        }
+
+        return iter->second;
+    }
+
+    ///@brief at() operator
+    mapped_type& at(const key_type& key) {
+        return const_cast<mapped_type&>(const_cast<const linear_map*>(this)->at(key));
+    }
+
+    ///@brief constant at() operator
+    const mapped_type& at(const key_type& key) const {
+        auto iter = find(key);
+        if (iter == end()) {
+            throw std::out_of_range("Invalid key");
+        }
+        return iter->second;
+    }
+
+    ///@brief Insert value
+    std::pair<iterator, bool> insert(const value_type& value) {
+        auto iter = find(value.first);
+        if (iter != end()) {
+            //Found existing
+            return std::make_pair(iter, false);
+        } else {
+            //Insert
+            size_t index = size_t(value.first);
+
+            if (index >= vec_.size()) {
+                //Make space, initialize empty slots with sentinel values
+                vec_.resize(index + 1, std::make_pair(sentinel(), T()));
+            }
+
+            vec_[index] = value;
+
+            return std::make_pair(vec_.begin() + index, true);
+        }
+    }
+
+    ///@brief Insert range
+    template<class InputIterator>
+    void insert(InputIterator first, InputIterator last) {
+        for (InputIterator iter = first; iter != last; ++iter) {
+            insert(*iter);
+        }
+    }
+
+    ///@brief Erase by key
+    void erase(const key_type& key) {
+        auto iter = find(key);
+        if (iter != end()) {
+            erase(iter);
+        }
+    }
+
+    ///@brief Erase at iterator
+    void erase(const_iterator position) {
+        iterator pos = convert_to_iterator(position);
+        pos->first = sentinel(); //Mark invalid
+    }
+
+    ///@brief Erase range
+    void erase(const_iterator first, const_iterator last) {
+        for (auto iter = first; iter != last; ++iter) {
+            erase(iter);
+        }
+    }
+
+    ///@brief Swap two linear maps
+    void swap(linear_map& other) { std::swap(vec_, other.vec_); }
+
+    ///@brief Clear the container
+    void clear() { vec_.clear(); }
+
+    ///@brief Emplace
+    template<class... Args>
+    std::pair<iterator, bool> emplace(const key_type& key, Args&&... args) {
+        auto iter = find(key);
+        if (iter != end()) {
+            //Found
+            return std::make_pair(iter, false);
+        } else {
+            //Emplace
+            size_t index = size_t(key);
+
+            if (index >= vec_.size()) {
+                //Make space, initialize empty slots with sentinel values
+                vec_.resize(index + 1, value_type(sentinel(), T()));
+            }
+
+            vec_[index] = value_type(key, std::forward<Args>(args)...);
+
+            return std::make_pair(vec_.begin() + index, true);
+        }
+    }
+
+    ///@brief Requests that the underlying vector capacity be at least enough to contain n elements.
+    void reserve(size_type n) { vec_.reserve(n); }
+
+    ///@brief Reduces the capacity of the container to fit its size and destroys all elements beyond the capacity.
+    void shrink_to_fit() { vec_.shrink_to_fit(); }
+
+    ///@brief Returns an iterator to the first element in the range [first,last) that compares equal to val. If no such element is found, the function returns last.
+    iterator find(const key_type& key) {
+        const_iterator const_iter = const_cast<const linear_map*>(this)->find(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Returns a constant iterator to the first element in the range [first,last) that compares equal to val. If no such element is found, the function returns last.
+    const_iterator find(const key_type& key) const {
+        size_t index = size_t(key);
+
+        if (index < vec_.size() && vec_[index].first != sentinel()) {
+            return vec_.begin() + index;
+        }
+        return end();
+    }
+
+    ///@brief Returns the number of elements in the range [first,last) that compare equal to val.
+    size_type count(const key_type& key) const {
+        return (find(key) == end()) ? 0 : 1;
+    }
+
+    ///@brief Returns an iterator pointing to the first element in the range [first,last) which does not compare less than val.
+    iterator lower_bound(const key_type& key) {
+        const_iterator const_iter = const_cast<const linear_map*>(this)->lower_bound(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Returns a constant iterator pointing to the first element in the range [first,last) which does not compare less than val.
+    const_iterator lower_bound(const key_type& key) const {
+        return find(key);
+    }
+
+    ///@brief Returns an iterator pointing to the first element in the range [first,last) which compares greater than val.
+    iterator upper_bound(const key_type& key) {
+        const_iterator const_iter = const_cast<const linear_map*>(this)->upper_bound(key);
+        return convert_to_iterator(const_iter);
+    }
+
+    ///@brief Returns a constant iterator pointing to the first element in the range [first,last) which compares greater than val.
+    const_iterator upper_bound(const key_type& key) const {
+        auto iter = find(key);
+        return (iter != end()) ? iter + 1 : end();
+    }
+
+    ///@brief Returns the bounds of the subrange that includes all the elements of the range [first,last) with values equivalent to val.
+    std::pair<iterator, iterator> equal_range(const key_type& key) {
+        auto const_iter_pair = const_cast<const linear_map*>(this)->equal_range(key);
+        return std::pair<iterator, iterator>(iterator(const_iter_pair.first), iterator(const_iter_pair.second));
+    }
+
+    ///@brief Returns constant bounds of the subrange that includes all the elements of the range [first,last) with values equivalent to val.
+    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+        auto lb_iter = lower_bound(key);
+        auto ub_iter = upper_bound(key);
+        return (lb_iter != end()) ? std::make_pair(lb_iter, ub_iter) : std::make_pair(ub_iter, ub_iter);
+    }
+
+    ///@brief Return the size of valid elements
+    size_type valid_size() const {
+        size_t valid_cnt = 0;
+        for (const auto& kv : vec_) {
+            if (kv.first != sentinel()) {
+                ++valid_cnt;
+            }
+        }
+        return valid_cnt;
+    }
+
+  public:
+    friend void swap(linear_map& lhs, linear_map& rhs) {
+        std::swap(lhs.vec_, rhs.vec_);
+    }
+
+  private:
+    iterator convert_to_iterator(const_iterator const_iter) {
+        /*
+         * This is a work around for the fact that there is no conversion between a const_iterator and iterator.
+         * 
+         * We intiailize i to the start of the container and then advance it by
+         * the distance to const_iter. The resulting i points to the same element
+         * as const_iter
+         *
+         * Note that to be able to call std::distance with an iterator and
+         * const_iterator we need to specify the type as const_iterator (relying
+         * on the implicit conversion from iterator to const_iterator for i)
+         *
+         * Since the iterators are really vector (i.e. random-access) iterators
+         * both distance and advance take constant time
+         */
+        iterator i = begin();
+        std::advance(i, std::distance<const_iterator>(i, const_iter));
+        return i;
+    }
+
+    constexpr K sentinel() const { return Sentinel::INVALID(); }
+
+  private:
+    std::vector<value_type> vec_;
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_list.cc b/third_party/vtr/libs/vtrutil/src/vtr_list.cc
new file mode 100644
index 000000000..ce354dfe3
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_list.cc
@@ -0,0 +1,25 @@
+#include <cstdlib>
+
+#include "vtr_list.h"
+#include "vtr_memory.h"
+
+namespace vtr {
+
+t_linked_vptr* insert_in_vptr_list(t_linked_vptr* head, void* vptr_to_add) {
+    /* Inserts a new element at the head of a linked list of void pointers. *
+     * Returns the new head of the list.                                    */
+
+    return new t_linked_vptr{vptr_to_add, head}; /* New head of the list */
+}
+
+/* Deletes the element at the head of a linked list of void pointers. *
+ * Returns the new head of the list.                                    */
+t_linked_vptr* delete_in_vptr_list(t_linked_vptr* head) {
+    if (head == nullptr)
+        return nullptr;
+    t_linked_vptr* const linked_vptr = head->next;
+    delete head;
+    return linked_vptr; /* New head of the list */
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_list.h b/third_party/vtr/libs/vtrutil/src/vtr_list.h
new file mode 100644
index 000000000..8403742c7
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_list.h
@@ -0,0 +1,24 @@
+#ifndef VTR_LIST_H
+#define VTR_LIST_H
+
+/**
+ * @file
+ * @brief Linked lists of void pointers and integers, respectively.
+ */
+
+namespace vtr {
+
+///@brief Linked list node struct
+struct t_linked_vptr {
+    void* data_vptr;
+    struct t_linked_vptr* next;
+};
+
+///@brief Inserts a node to a list
+t_linked_vptr* insert_in_vptr_list(t_linked_vptr* head,
+                                   void* vptr_to_add);
+
+///@brief Delete a list
+t_linked_vptr* delete_in_vptr_list(t_linked_vptr* head);
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_log.cc b/third_party/vtr/libs/vtrutil/src/vtr_log.cc
new file mode 100644
index 000000000..0615e71ab
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_log.cc
@@ -0,0 +1,50 @@
+#include <string>
+#include <fstream>
+#include <cstdarg>
+
+#include "vtr_util.h"
+#include "vtr_log.h"
+#include "log.h"
+
+namespace vtr {
+PrintHandlerInfo printf = log_print_info;
+PrintHandlerInfo printf_info = log_print_info;
+PrintHandlerWarning printf_warning = log_print_warning;
+PrintHandlerError printf_error = log_print_error;
+PrintHandlerDirect printf_direct = log_print_direct;
+
+void set_log_file(const char* filename) {
+    log_set_output_file(filename);
+}
+
+} // namespace vtr
+
+void add_warnings_to_suppress(std::string function_name) {
+    warnings_to_suppress.insert(function_name);
+}
+
+void set_noisy_warn_log_file(std::string log_file_name) {
+    std::ofstream log;
+    log.open(log_file_name, std::ifstream::out | std::ifstream::trunc);
+    log.close();
+    noisy_warn_log_file = log_file_name;
+}
+
+void print_or_suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...) {
+    std::string function_name(pszFuncName);
+
+    va_list va_args;
+    va_start(va_args, pszMessage);
+    std::string msg = vtr::vstring_fmt(pszMessage, va_args);
+    va_end(va_args);
+
+    auto result = warnings_to_suppress.find(function_name);
+    if (result == warnings_to_suppress.end()) {
+        vtr::printf_warning(pszFileName, lineNum, msg.data());
+    } else if (!noisy_warn_log_file.empty()) {
+        std::ofstream log;
+        log.open(noisy_warn_log_file.data(), std::ios_base::app);
+        log << "Warning:\n\tfile: " << pszFileName << "\n\tline: " << lineNum << "\n\tmessage: " << msg << std::endl;
+        log.close();
+    }
+}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_log.h b/third_party/vtr/libs/vtrutil/src/vtr_log.h
new file mode 100644
index 000000000..3c52e249d
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_log.h
@@ -0,0 +1,167 @@
+#ifndef VTR_LOG_H
+#define VTR_LOG_H
+#include <tuple>
+#include <unordered_set>
+#include <string>
+
+/**
+ * @file
+ * @brief This header defines useful logging macros for VTR projects.
+ *
+ * Message Type
+ * ============
+ *
+ * Three types of log message types are defined:
+ *     - VTR_LOG         : The standard 'info' type log message
+ *     - VTR_LOG_WARN    : A warning log message. This represents unusual condition that may indicate an issue but executiom continues
+ *     - VTR_LOG_ERROR   : An error log message. This represents a clear issue that should result in stopping the program execution. Please note that using this log message will not actually terminate the program. So a VtrError should be thrown after all the neccessary VTR_LOG_ERROR messages are printed. 
+ * 
+ * For example:
+ *
+ *      VTR_LOG("This produces a regular '%s' message\n", "info");
+ *      VTR_LOG_WARN("This produces a '%s' message\n", "warning");
+ *      VTR_LOG_ERROR("This produces an '%s' message\n", "error");
+ *
+ * Conditional Logging
+ * ===================
+ *
+ * Each of the three message types also have a VTR_LOGV_* variant,
+ * which will cause the message to be logged if a user-defined condition
+ * is satisifed.
+ *
+ * For example:
+ *
+ *      VTR_LOGV(verbosity > 5, "This message will be logged only if verbosity is greater than %d\n", 5);
+ *      VTR_LOGV_WARN(verbose, "This warning message will be logged if verbose is true\n");
+ *      VTR_LOGV_ERROR(false, "This error message will never be logged\n");
+ *
+ * Custom Location Logging
+ * =======================
+ *
+ * Each of the three message types also have a VTR_LOGF_* variant,
+ * which will cause the message to be logged for a custom file and
+ *
+ * For example:
+ *
+ *      VTR_LOGF("my_file.txt", "This message will be logged from file 'my_file.txt' line %d\n", 42);
+ *  
+ * Debug Logging
+ * =============
+ *
+ * For debug purposes it may be useful to have additional logging.
+ * This is supported by VTR_LOG_DEBUG() and VTR_LOGV_DEBUG().
+ *
+ * To avoid run-time overhead, these are only enabled if VTR_ENABLE_DEBUG_LOGGING 
+ * is defined (disabled by default).
+ */
+
+// Unconditional logging macros
+#define VTR_LOG(...) VTR_LOGV(true, __VA_ARGS__)
+#define VTR_LOG_WARN(...) VTR_LOGV_WARN(true, __VA_ARGS__)
+#define VTR_LOG_ERROR(...) VTR_LOGV_ERROR(true, __VA_ARGS__)
+#define VTR_LOG_NOP(...) VTR_LOGV_NOP(true, __VA_ARGS__)
+
+// Conditional logging macros
+#define VTR_LOGV(expr, ...) VTR_LOGVF(expr, __FILE__, __LINE__, __VA_ARGS__)
+#define VTR_LOGV_WARN(expr, ...) VTR_LOGVF_WARN(expr, __FILE__, __LINE__, __VA_ARGS__)
+#define VTR_LOGV_ERROR(expr, ...) VTR_LOGVF_ERROR(expr, __FILE__, __LINE__, __VA_ARGS__)
+#define VTR_LOGV_NOP(expr, ...) VTR_LOGVF_NOP(expr, __FILE__, __LINE__, __VA_ARGS__)
+
+// Custom file-line location logging macros
+#define VTR_LOGF(file, line, ...) VTR_LOGVF(true, file, line, __VA_ARGS__)
+#define VTR_LOGF_WARN(file, line, ...) VTR_LOGVF_WARN(true, file, line, __VA_ARGS__)
+#define VTR_LOGF_ERROR(file, line, ...) VTR_LOGVF_ERROR(true, file, line, __VA_ARGS__)
+#define VTR_LOGF_NOP(file, line, ...) VTR_LOGVF_NOP(true, file, line, __VA_ARGS__)
+
+// Custom file-line-func location logging macros
+#define VTR_LOGFF_WARN(file, line, func, ...) VTR_LOGVFF_WARN(true, file, line, func, __VA_ARGS__)
+
+// Conditional logging and custom file-line location macros
+#define VTR_LOGVF(expr, file, line, ...)    \
+    do {                                    \
+        if (expr) vtr::printf(__VA_ARGS__); \
+    } while (false)
+
+#define VTR_LOGVF_WARN(expr, file, line, ...)                                   \
+    do {                                                                        \
+        if (expr) print_or_suppress_warning(file, line, __func__, __VA_ARGS__); \
+    } while (false)
+
+#define VTR_LOGVF_ERROR(expr, file, line, ...)                \
+    do {                                                      \
+        if (expr) vtr::printf_error(file, line, __VA_ARGS__); \
+    } while (false)
+
+// Conditional logging and custom file-line-func location macros
+#define VTR_LOGVFF_WARN(expr, file, line, func, ...)                        \
+    do {                                                                    \
+        if (expr) print_or_suppress_warning(file, line, func, __VA_ARGS__); \
+    } while (false)
+
+/*
+ * No-op version of logging macro which avoids unused parameter warnings.
+ *
+ * Note that to avoid unused parameter warnings we call sizeof() and cast
+ * the result to void. sizeof is evaluated at compile time so there is no
+ * run-time overhead.
+ *
+ * Also note the use of std::make_tuple to ensure all arguments in VA_ARGS
+ * are used.
+ */
+#define VTR_LOGVF_NOP(expr, file, line, ...)                     \
+    do {                                                         \
+        static_cast<void>(sizeof(expr));                         \
+        static_cast<void>(sizeof(file));                         \
+        static_cast<void>(sizeof(line));                         \
+        static_cast<void>(sizeof(std::make_tuple(__VA_ARGS__))); \
+    } while (false)
+
+// Debug logging macros
+#ifdef VTR_ENABLE_DEBUG_LOGGING //Enable
+#    define VTR_LOG_DEBUG(...) VTR_LOG(__VA_ARGS__)
+#    define VTR_LOGV_DEBUG(expr, ...) VTR_LOGV(expr, __VA_ARGS__)
+#else //Disable
+#    define VTR_LOG_DEBUG(...) VTR_LOG_NOP(__VA_ARGS__)
+#    define VTR_LOGV_DEBUG(expr, ...) VTR_LOGV_NOP(expr, __VA_ARGS__)
+#endif
+
+namespace vtr {
+
+typedef void (*PrintHandlerInfo)(const char* pszMessage, ...);
+typedef void (*PrintHandlerWarning)(const char* pszFileName, unsigned int lineNum, const char* pszMessage, ...);
+typedef void (*PrintHandlerError)(const char* pszFileName, unsigned int lineNum, const char* pszMessage, ...);
+typedef void (*PrintHandlerDirect)(const char* pszMessage, ...);
+
+extern PrintHandlerInfo printf; //Same as printf_info
+extern PrintHandlerInfo printf_info;
+extern PrintHandlerWarning printf_warning;
+extern PrintHandlerError printf_error;
+extern PrintHandlerDirect printf_direct;
+
+void set_log_file(const char* filename);
+
+} // namespace vtr
+
+static std::unordered_set<std::string> warnings_to_suppress;
+static std::string noisy_warn_log_file;
+
+/**
+ * @brief The following data structure and functions allow to suppress noisy warnings and direct them into an external file, if specified.
+ */
+void add_warnings_to_suppress(std::string function_name);
+
+/**
+ * @brief This function creates a new log file to hold the suppressed warnings. If the file already exists, it is cleared out first.
+ */
+void set_noisy_warn_log_file(std::string log_file_name);
+
+/** 
+ * @brief This function checks whether to print or to suppress warning
+ *
+ * This function checks whether the function from which the warning has been called
+ *  is in the set of warnings_to_suppress. If so, the warning is printed on the
+ * noisy_warn_log_file, otherwise it is printed on stdout (or the regular log file)
+ */
+void print_or_suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...);
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_logic.h b/third_party/vtr/libs/vtrutil/src/vtr_logic.h
new file mode 100644
index 000000000..30d44c4a6
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_logic.h
@@ -0,0 +1,33 @@
+// Put this above guard so that TRUE/FALSE are undef'ed
+// even if this file was already included earlier.
+#ifndef VTR_LOGIC_H
+#define VTR_LOGIC_H
+
+#ifdef FALSE
+#    undef FALSE
+#endif
+#define FALSE FALSE
+
+#ifdef TRUE
+#    undef TRUE
+#endif
+#define TRUE TRUE
+
+constexpr int FALSE = 0;
+constexpr int TRUE = 1;
+
+namespace vtr {
+
+/**
+ * @brief This class represents the different supported logic values
+ */
+enum class LogicValue {
+    FALSE = 0,
+    TRUE = 1,
+    DONT_CARE = 2,
+    UNKOWN = 3
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_map_util.h b/third_party/vtr/libs/vtrutil/src/vtr_map_util.h
new file mode 100644
index 000000000..fd1cdd4f8
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_map_util.h
@@ -0,0 +1,45 @@
+#ifndef VTR_MAP_UTIL_H
+#define VTR_MAP_UTIL_H
+
+#include "vtr_pair_util.h"
+#include "vtr_range.h"
+
+namespace vtr {
+
+///@brief An iterator who wraps a std::map iterator to return it's key
+template<typename Iter>
+using map_key_iter = pair_first_iter<Iter>;
+
+///@brief An iterator who wraps a std::map iterator to return it's value
+template<typename Iter>
+using map_value_iter = pair_second_iter<Iter>;
+
+///@brief Returns a range iterating over a std::map's keys
+template<typename T>
+auto make_key_range(T b, T e) {
+    using key_iter = map_key_iter<T>;
+    return vtr::make_range(key_iter(b), key_iter(e));
+}
+
+///@brief Returns a range iterating over a std::map's keys
+template<typename Container>
+auto make_key_range(const Container& c) {
+    return make_key_range(std::begin(c), std::end(c));
+}
+
+///@brief Returns a range iterating over a std::map's values
+template<typename T>
+auto make_value_range(T b, T e) {
+    using value_iter = map_value_iter<T>;
+    return vtr::make_range(value_iter(b), value_iter(e));
+}
+
+///@brief Returns a range iterating over a std::map's values
+template<typename Container>
+auto make_value_range(const Container& c) {
+    return make_value_range(std::begin(c), std::end(c));
+}
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_math.cc b/third_party/vtr/libs/vtrutil/src/vtr_math.cc
new file mode 100644
index 000000000..32594753c
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_math.cc
@@ -0,0 +1,106 @@
+#include <map>
+#include <algorithm>
+
+#include "vtr_assert.h"
+#include "vtr_error.h"
+#include "vtr_math.h"
+
+namespace vtr {
+
+///@brief Calculates the value pow(base, exp)
+int ipow(int base, int exp) {
+    int result = 1;
+
+    VTR_ASSERT(exp >= 0);
+
+    while (exp) {
+        if (exp & 1)
+            result *= base;
+        exp >>= 1;
+        base *= base;
+    }
+    return result;
+}
+
+float median(std::vector<float> vector) {
+    VTR_ASSERT(vector.size() > 0);
+
+    std::sort(vector.begin(), vector.end());
+
+    auto size = vector.size();
+    if (size % 2 == 0) {
+        return (float)(vector[size / 2 - 1] + vector[size / 2]) / 2;
+    }
+
+    return (float)vector[size / 2];
+}
+
+/**
+ * @brief Linear interpolation/Extrapolation 
+ *
+ * Performs linear interpolation or extrapolation on the set of (x,y) values specified by the xy_map.
+ * A requested x value is passed in, and we return the interpolated/extrapolated y value at this requested value of x.
+ * Meant for maps where both key and element are numbers.
+ * This is specifically enforced by the explicit instantiations below this function. i.e. only templates
+ * using those types listed in the explicit instantiations below are allowed 
+ */
+template<typename X, typename Y>
+Y linear_interpolate_or_extrapolate(const std::map<X, Y>* xy_map, X requested_x) {
+    Y result;
+
+    /* the intention of this function is to interpolate/extrapolate. we can't do so with less than 2 values in the xy_map */
+    if (xy_map->size() < 2) {
+        throw VtrError("linear_interpolate_or_extrapolate: cannot interpolate/extrapolate based on less than 2 (x,y) pairs", __FILE__, __LINE__);
+    }
+
+    auto itr = xy_map->find(requested_x);
+    if (itr != xy_map->end()) {
+        /* requested x already exists in the x,y map */
+        result = itr->second;
+    } else {
+        /* requested x does not exist in the x,y map. need to interpolate/extrapolate */
+
+        typename std::map<X, Y>::const_iterator it;
+        double x_low, x_high, y_low, y_high;
+        double slope, reference_y, delta_x;
+
+        /* get first x greater than the one requested */
+        it = xy_map->upper_bound(requested_x);
+
+        if (it == xy_map->end()) {
+            /* need to extrapolate to higher x. based on the y values at the two largest x values */
+            it--;
+            x_high = (double)it->first;
+            y_high = (double)it->second;
+            it--;
+            x_low = (double)it->first;
+            y_low = (double)it->second;
+        } else if (it == xy_map->begin()) {
+            /* need to extrapolate to lower x. based on the y values at the two smallest x */
+            x_low = (double)it->first;
+            y_low = (double)it->second;
+            it++;
+            x_high = (double)it->first;
+            y_high = (double)it->second;
+        } else {
+            /* need to interpolate. based on y values at x just above/below
+             * the one we want */
+            x_high = (double)it->first;
+            y_high = (double)it->second;
+            it--;
+            x_low = (double)it->first;
+            y_low = (double)it->second;
+        }
+
+        slope = (y_high - y_low) / (x_high - x_low);
+        reference_y = y_low;
+        delta_x = (double)requested_x - x_low;
+        result = (Y)(reference_y + (slope * delta_x));
+    }
+
+    return result;
+}
+template double linear_interpolate_or_extrapolate(const std::map<int, double>* xy_map, int requested_x);       /* (int,double) */
+template double linear_interpolate_or_extrapolate(const std::map<double, double>* xy_map, double requested_x); /* (double,double) */
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_math.h b/third_party/vtr/libs/vtrutil/src/vtr_math.h
new file mode 100644
index 000000000..74b4ccebf
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_math.h
@@ -0,0 +1,168 @@
+#ifndef VTR_MATH_H
+#define VTR_MATH_H
+
+#include <map>
+#include <vector>
+#include <cmath>
+
+#include "vtr_assert.h"
+
+/**
+ * @file
+ *
+ * @brief This file defines some math operations
+ */
+
+namespace vtr {
+/*********************** Math operations *************************************/
+
+///@brief Calculates the value pow(base, exp)
+int ipow(int base, int exp);
+
+///@brief Returns the median of an input vector.
+float median(std::vector<float> vector);
+
+///@brief Linear interpolation/Extrapolation
+template<typename X, typename Y>
+Y linear_interpolate_or_extrapolate(const std::map<X, Y>* xy_map, X requested_x);
+
+///@brief Integer rounding conversion for floats
+constexpr int nint(float val) { return static_cast<int>(val + 0.5); }
+
+///@brief Returns a 'safe' ratio which evaluates to zero if the denominator is zero
+template<typename T>
+T safe_ratio(T numerator, T denominator) {
+    if (denominator == T(0)) {
+        return 0;
+    }
+    return numerator / denominator;
+}
+
+///@brief Returns the median of the elements in range [first, last]
+template<typename InputIterator>
+double median(InputIterator first, InputIterator last) {
+    auto len = std::distance(first, last);
+    auto iter = first + len / 2;
+
+    if (len % 2 == 0) {
+        return (*iter + *(iter + 1)) / 2;
+    } else {
+        return *iter;
+    }
+}
+
+///@brief Returns the median of a whole container
+template<typename Container>
+double median(Container c) {
+    return median(std::begin(c), std::end(c));
+}
+
+/**
+ * @brief Returns the geometric mean of the elments in range [first, last)
+ *
+ * To avoid potential round-off issues we transform the standard formula:
+ *
+ *      geomean = ( v_1 * v_2 * ... * v_n) ^ (1/n)
+ *
+ * by taking the log:
+ *
+ *      geomean = exp( (1 / n) * (log(v_1) + log(v_2) + ... + log(v_n)))
+ */
+template<typename InputIterator>
+double geomean(InputIterator first, InputIterator last, double init = 1.) {
+    double log_sum = std::log(init);
+    size_t n = 0;
+    for (auto iter = first; iter != last; ++iter) {
+        log_sum += std::log(*iter);
+        n += 1;
+    }
+
+    if (n == 0) {
+        return init;
+    } else {
+        return std::exp((1. / n) * log_sum);
+    }
+}
+
+///@brief Returns the geometric mean of a whole container
+template<typename Container>
+double geomean(Container c) {
+    return geomean(std::begin(c), std::end(c));
+}
+
+///@brief Returns the arithmatic mean of the elements in range [first, last]
+template<typename InputIterator>
+double arithmean(InputIterator first, InputIterator last, double init = 0.) {
+    double sum = init;
+    size_t n = 0;
+    for (auto iter = first; iter != last; ++iter) {
+        sum += *iter;
+        n += 1;
+    }
+
+    if (n == 0) {
+        return init;
+    } else {
+        return sum / n;
+    }
+}
+
+///@brief Returns the aritmatic mean of a whole container
+template<typename Container>
+double arithmean(Container c) {
+    return arithmean(std::begin(c), std::end(c));
+}
+
+/**
+ * @brief Returns the greatest common divisor of x and y
+ *
+ * Note that T should be an integral type
+ */
+template<typename T>
+static T gcd(T x, T y) {
+    static_assert(std::is_integral<T>::value, "T must be integral");
+    // Euclidean algorithm
+    if (y == 0) {
+        return x;
+    }
+    return gcd(y, x % y);
+}
+
+/**
+ * @brief Return the least common multiple of x and y
+ *
+ * Note that T should be an integral type
+ */
+template<typename T>
+T lcm(T x, T y) {
+    static_assert(std::is_integral<T>::value, "T must be integral");
+
+    if (x == 0 && y == 0) {
+        return 0;
+    } else {
+        return (x / gcd(x, y)) * y;
+    }
+}
+
+constexpr double DEFAULT_REL_TOL = 1e-9;
+constexpr double DEFAULT_ABS_TOL = 0;
+
+///@brief Return true if a and b values are close to each other
+template<class T>
+bool isclose(T a, T b, T rel_tol, T abs_tol) {
+    if (std::isinf(a) && std::isinf(b)) return (std::signbit(a) == std::signbit(b));
+    if (std::isnan(a) && std::isnan(b)) return false;
+
+    T abs_largest = std::max(std::abs(a), std::abs(b));
+    return std::abs(a - b) <= std::max(rel_tol * abs_largest, abs_tol);
+}
+
+///@brief Return true if a and b values are close to each other (using the default tolerances)
+template<class T>
+bool isclose(T a, T b) {
+    return isclose<T>(a, b, DEFAULT_REL_TOL, DEFAULT_ABS_TOL);
+}
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_memory.cc b/third_party/vtr/libs/vtrutil/src/vtr_memory.cc
new file mode 100644
index 000000000..39d6b244b
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_memory.cc
@@ -0,0 +1,178 @@
+#include <cstddef>
+#include <cstdlib>
+#include <math.h>
+
+#include "vtr_assert.h"
+#include "vtr_list.h"
+#include "vtr_memory.h"
+#include "vtr_error.h"
+#include "vtr_util.h"
+#include "vtr_log.h"
+
+#ifndef __GLIBC__
+#    include <stdlib.h>
+#else
+#    include <malloc.h>
+#endif
+
+namespace vtr {
+
+#ifndef __GLIBC__
+int malloc_trim(size_t /*pad*/) {
+    return 0;
+}
+#else
+int malloc_trim(size_t pad) {
+    return ::malloc_trim(pad);
+}
+#endif
+
+void* free(void* some) {
+    if (some) {
+        std::free(some);
+        some = nullptr;
+    }
+    return nullptr;
+}
+
+void* calloc(size_t nelem, size_t size) {
+    void* ret;
+    if (nelem == 0) {
+        return nullptr;
+    }
+
+    if ((ret = std::calloc(nelem, size)) == nullptr) {
+        throw VtrError("Unable to calloc memory.", __FILE__, __LINE__);
+    }
+    return ret;
+}
+
+void* malloc(size_t size) {
+    void* ret;
+    if (size == 0) {
+        return nullptr;
+    }
+
+    if ((ret = std::malloc(size)) == nullptr && size != 0) {
+        throw VtrError("Unable to malloc memory.", __FILE__, __LINE__);
+    }
+    return ret;
+}
+
+void* realloc(void* ptr, size_t size) {
+    void* ret;
+
+    ret = std::realloc(ptr, size);
+    if (nullptr == ret && size != 0) {
+        throw VtrError(string_fmt("Unable to realloc memory (ptr=%p, size=%d).", ptr, size),
+                       __FILE__, __LINE__);
+    }
+    return ret;
+}
+
+void* chunk_malloc(size_t size, t_chunk* chunk_info) {
+    /* This routine should be used for allocating fairly small data             *
+     * structures where memory-efficiency is crucial.  This routine allocates   *
+     * large "chunks" of data, and parcels them out as requested.  Whenever     *
+     * it mallocs a new chunk it adds it to the linked list pointed to by       *
+     * chunk_info->chunk_ptr_head.  This list can be used to free the	    *
+     * chunked memory.							    *
+     * Information about the currently open "chunk" must be stored by the       *
+     * user program.  chunk_info->mem_avail_ptr points to an int storing	    *
+     * how many bytes are left in the current chunk, while			    *
+     * chunk_info->next_mem_loc_ptr is the address of a pointer to the	    *
+     * next free bytes in the chunk.  To start a new chunk, simply set	    *
+     * chunk_info->mem_avail_ptr = 0.  Each independent set of data		    *
+     * structures should use a new chunk.                                       */
+
+    /* To make sure the memory passed back is properly aligned, I must *
+     * only send back chunks in multiples of the worst-case alignment  *
+     * restriction of the machine.  On most machines this should be    *
+     * a long, but on 64-bit machines it might be a long long or a     *
+     * double.  Change the typedef below if this is the case.          */
+
+    typedef size_t Align;
+
+    constexpr int CHUNK_SIZE = 32768;
+    constexpr int FRAGMENT_THRESHOLD = 100;
+
+    char* tmp_ptr;
+    int aligned_size;
+
+    VTR_ASSERT(chunk_info->mem_avail >= 0);
+
+    if ((size_t)(chunk_info->mem_avail) < size) { /* Need to malloc more memory. */
+        if (size > CHUNK_SIZE) {                  /* Too big, use standard routine. */
+                                                  /* Want to allocate a block of memory the size of size.
+                                                   * i.e. malloc(size) */
+            tmp_ptr = new char[size];
+
+            /* When debugging, uncomment the code below to see if memory allocation size */
+            /* makes sense */
+            //#ifdef DEBUG
+            // vtr_printf("NB: my_chunk_malloc got a request for %d bytes.\n", size);
+            // vtr_printf("You should consider using vtr::malloc for such big requests.\n");
+            // #endif
+
+            VTR_ASSERT(chunk_info != nullptr);
+            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, tmp_ptr);
+            return tmp_ptr;
+        }
+
+        if (chunk_info->mem_avail < FRAGMENT_THRESHOLD) { /* Only a small scrap left. */
+            chunk_info->next_mem_loc_ptr = new char[CHUNK_SIZE];
+            chunk_info->mem_avail = CHUNK_SIZE;
+            VTR_ASSERT(chunk_info != nullptr);
+            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, chunk_info->next_mem_loc_ptr);
+        }
+
+        /* Execute else clause only when the chunk we want is pretty big,  *
+         * and would leave too big an unused fragment.  Then we use malloc *
+         * to allocate normally.                                           */
+
+        else {
+            tmp_ptr = new char[size];
+            VTR_ASSERT(chunk_info != nullptr);
+            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, tmp_ptr);
+
+            return tmp_ptr;
+        }
+    }
+
+    /* Find the smallest distance to advance the memory pointer and keep *
+     * everything aligned.                                               */
+
+    if (size % sizeof(Align) == 0) {
+        aligned_size = size;
+    } else {
+        aligned_size = size + sizeof(Align) - size % sizeof(Align);
+    }
+
+    tmp_ptr = chunk_info->next_mem_loc_ptr;
+    chunk_info->next_mem_loc_ptr += aligned_size;
+    chunk_info->mem_avail -= aligned_size;
+    return tmp_ptr;
+}
+
+void free_chunk_memory(t_chunk* chunk_info) {
+    /* Frees the memory allocated by a sequence of calls to my_chunk_malloc. */
+
+    t_linked_vptr *curr_ptr, *prev_ptr;
+
+    curr_ptr = chunk_info->chunk_ptr_head;
+
+    while (curr_ptr != nullptr) {
+        /* Must cast pointers to type char*, since the're of type void*, which delete can't
+         * be called on.*/
+        delete[]((char*)curr_ptr->data_vptr); /* Free memory "chunk". */
+        prev_ptr = curr_ptr;
+        curr_ptr = curr_ptr->next;
+        delete (t_linked_vptr*)prev_ptr; /* Free memory used to track "chunk". */
+    }
+
+    chunk_info->chunk_ptr_head = nullptr;
+    chunk_info->mem_avail = 0;
+    chunk_info->next_mem_loc_ptr = nullptr;
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_memory.h b/third_party/vtr/libs/vtrutil/src/vtr_memory.h
new file mode 100644
index 000000000..46a486569
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_memory.h
@@ -0,0 +1,151 @@
+#ifndef VTR_MEMORY_H
+#define VTR_MEMORY_H
+#include <cstddef>
+#include <cstdlib>
+#include <new>
+
+#ifdef _WIN32
+#    include <cerrno>
+#    include <malloc.h>
+#endif
+
+namespace vtr {
+
+/**
+ * @brief This function will force the container to be cleared
+ *
+ * It release it's held memory.
+ * For efficiency, STL containers usually don't
+ * release their actual heap-allocated memory until
+ * destruction (even if Container::clear() is called).
+ */
+template<typename Container>
+void release_memory(Container& container) {
+    ///@brief Force a re-allocation to happen by swapping in a new (empty) container.
+    Container().swap(container);
+}
+
+struct t_linked_vptr; //Forward declaration
+
+/**
+ * This structure keeps track to chenks of memory
+ *
+ * This structure is to keep track of chunks of memory that is being	
+ * allocated to save overhead when allocating very small memory pieces. 
+ * For a complete description, please see the comment in chunk_malloc
+ */
+struct t_chunk {
+    t_linked_vptr* chunk_ptr_head = nullptr;
+
+    //chunk_ptr_head->data_vptr: head of the entire linked
+    //list of allocated "chunk" memory;
+    //chunk_ptr_head->next: pointer to the next chunk on the linked list
+    int mem_avail = 0;                ///< number of bytes left in the current chunk
+    char* next_mem_loc_ptr = nullptr; ///< pointer to the first available (free) byte in the current chunk
+};
+
+void* free(void* some);
+void* calloc(size_t nelem, size_t size);
+void* malloc(size_t size);
+void* realloc(void* ptr, size_t size);
+
+void* chunk_malloc(size_t size, t_chunk* chunk_info);
+void free_chunk_memory(t_chunk* chunk_info);
+
+///@brief Like chunk_malloc, but with proper C++ object initialization
+template<typename T>
+T* chunk_new(t_chunk* chunk_info) {
+    void* block = chunk_malloc(sizeof(T), chunk_info);
+
+    return new (block) T(); //Placement new
+}
+
+///@brief Call the destructor of an obj which must have been allocated in the specified chunk
+template<typename T>
+void chunk_delete(T* obj, t_chunk* /*chunk_info*/) {
+    if (obj) {
+        obj->~T(); // Manually call destructor
+        // Currently we don't mark the unused memory as free
+    }
+}
+
+/**
+ * @brief Cross platform wrapper around GNU's malloc_trim()
+ *
+ * TODO: This is only used in one place within VPR, consider removing it
+ */
+int malloc_trim(size_t pad);
+
+inline int memalign(void** ptr_out, size_t align, size_t size) {
+#ifdef _WIN32
+    void* temp_ptr = _aligned_malloc(size, align);
+    if (temp_ptr != NULL) {
+        *ptr_out = temp_ptr;
+        return 0;
+    } else {
+        return errno;
+    }
+#else
+    return posix_memalign(ptr_out, align, size);
+#endif
+}
+
+/**
+ * @brief A macro generates a prefetch instruction on all architectures that include it.
+ * 
+ * This is all modern x86 and ARM64 platforms.
+ *
+ * This is a macro because it has to be.  rw and locality must be constants,
+ * not just constexpr.
+ */
+#define VTR_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
+
+/**
+ * @brief aligned_allocator is a STL allocator that allocates memory in an aligned fashion
+ *
+ * works if supported by the platform
+ * 
+ * It is worth noting the C++20 std::allocator does aligned allocations, but
+ * C++20 has poor support.
+ */
+template<class T>
+struct aligned_allocator {
+    using value_type = T;
+    using pointer = T*;
+    using const_pointer = const T*;
+    using reference = T&;
+    using const_reference = const T&;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+
+    pointer allocate(size_type n, const void* /*hint*/ = 0) {
+        void* data;
+        int ret = vtr::memalign(&data, alignof(T), sizeof(T) * n);
+        if (ret != 0) {
+            throw std::bad_alloc();
+        }
+        return static_cast<pointer>(data);
+    }
+
+    void deallocate(T* p, size_type /*n*/) {
+#ifdef _WIN32
+        _aligned_free(p);
+#else
+        vtr::free(p);
+#endif
+    }
+};
+
+/**
+ * @brief compare two aligned_allocators.
+ *
+ * Since the allocator doesn't have any internal state, all allocators for a given type are the same.
+ */
+template<typename T>
+bool operator==(const aligned_allocator<T>&, const aligned_allocator<T>&) {
+    return true;
+}
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
new file mode 100644
index 000000000..c3a4692ed
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
@@ -0,0 +1,409 @@
+#ifndef VTR_ND_MATRIX_H
+#define VTR_ND_MATRIX_H
+#include <algorithm>
+#include <array>
+#include <memory>
+
+#include "vtr_assert.h"
+
+namespace vtr {
+
+/**
+ * @brief Proxy class for a sub-matrix of a NdMatrix class.
+ * 
+ * This is used to allow chaining of array indexing [] operators in a natural way.
+ *
+ * Each instance of this class peels off one-dimension and returns a NdMatrixProxy representing
+ * the resulting sub-matrix. This is repeated recursively until we hit the 1-dimensional base-case.
+ *
+ * Since this expansion happens at compiler time all the proxy classes get optimized away,
+ * yielding both high performance and generality.
+ * 
+ * Recursive case: N-dimensional array
+ */
+template<typename T, size_t N>
+class NdMatrixProxy {
+  public:
+    static_assert(N > 0, "Must have at least one dimension");
+
+    /**
+     * @brief Construct a matrix proxy object
+     *
+     *    @param dim_sizes: Array of dimension sizes
+     *    @param idim: The dimension associated with this proxy
+     *    @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
+     *    @param  start: Pointer to the start of the sub-matrix this proxy represents
+     */
+    NdMatrixProxy<T, N>(const size_t* dim_sizes, const size_t* dim_strides, T* start)
+        : dim_sizes_(dim_sizes)
+        , dim_strides_(dim_strides)
+        , start_(start) {}
+
+    NdMatrixProxy<T, N>& operator=(const NdMatrixProxy<T, N>& other) = delete;
+
+    ///@brief const [] operator
+    const NdMatrixProxy<T, N - 1> operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(index < dim_sizes_[0], "Index out of range (above dimension maximum)");
+        VTR_ASSERT_SAFE_MSG(dim_sizes_[1] > 0, "Can not index into zero-sized dimension");
+
+        // Strip off one dimension
+        return NdMatrixProxy<T, N - 1>(
+            dim_sizes_ + 1,                    // Pass the dimension information
+            dim_strides_ + 1,                  // Pass the stride for the next dimension
+            start_ + dim_strides_[0] * index); // Advance to index in this dimension
+    }
+
+    ///@brief [] operator
+    NdMatrixProxy<T, N - 1> operator[](size_t index) {
+        // Call the const version and cast-away constness
+        return const_cast<const NdMatrixProxy<T, N>*>(this)->operator[](index);
+    }
+
+  private:
+    const size_t* dim_sizes_;
+    const size_t* dim_strides_;
+    T* start_;
+};
+
+///@brief Base case: 1-dimensional array
+template<typename T>
+class NdMatrixProxy<T, 1> {
+  public:
+    /**
+     * @brief Construct a 1-d matrix proxy object
+     *
+     *    @param dim_sizes: Array of dimension sizes
+     *    @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
+     *    @param  start: Pointer to the start of the sub-matrix this proxy represents
+     */
+    NdMatrixProxy<T, 1>(const size_t* dim_sizes, const size_t* dim_stride, T* start)
+        : dim_sizes_(dim_sizes)
+        , dim_strides_(dim_stride)
+        , start_(start) {}
+
+    NdMatrixProxy<T, 1>& operator=(const NdMatrixProxy<T, 1>& other) = delete;
+
+    ///@brief const [] operator
+    const T& operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(dim_strides_[0] == 1, "Final dimension must have stride 1");
+        VTR_ASSERT_SAFE_MSG(index < dim_sizes_[0], "Index out of range (above dimension maximum)");
+
+        //Base case
+        return start_[index];
+    }
+
+    ///@brief [] operator
+    T& operator[](size_t index) {
+        // Call the const version and cast-away constness
+        return const_cast<T&>(const_cast<const NdMatrixProxy<T, 1>*>(this)->operator[](index));
+    }
+
+    /**
+     * @brief  Backward compitability
+     *
+     * For legacy compatibility (i.e. code expecting a pointer) we allow this base dimension
+     * case to retrieve a raw pointer to the last dimension elements.
+     *
+     * Note that it is the caller's responsibility to use this correctly; care must be taken
+     * not to clobber elements in other dimensions
+     */
+    const T* data() const {
+        return start_;
+    }
+
+    ///@brief same as above but allow update the value
+    T* data() {
+        // Call the const version and cast-away constness
+        return const_cast<T*>(const_cast<const NdMatrixProxy<T, 1>*>(this)->data());
+    }
+
+  private:
+    const size_t* dim_sizes_;
+    const size_t* dim_strides_;
+    T* start_;
+};
+
+/**
+ * @brief Base class for an N-dimensional matrix
+ *
+ * Base class for an N-dimensional matrix supporting arbitrary index ranges per dimension.
+ * This class implements all of the matrix handling (lifetime etc.) except for indexing
+ * (which is implemented in the NdMatrix class). Indexing is split out to allows specialization
+ * (of indexing for N = 1.
+ *
+ * Implementation:
+ * 
+ * This class uses a single linear array to store the matrix in c-style (row major)
+ * order. That is, the right-most index is laid out contiguous memory.
+ *
+ * This should improve memory usage (no extra pointers to store for each dimension),
+ * and cache locality (less indirection via pointers, predictable strides).
+ *
+ * The indicies are calculated based on the dimensions to access the appropriate elements.
+ * Since the indexing calculations are visible to the compiler at compile time they can be
+ * optimized to be efficient.
+ */
+template<typename T, size_t N>
+class NdMatrixBase {
+  public:
+    static_assert(N >= 1, "Minimum dimension 1");
+
+    ///@brief An empty matrix (all dimensions size zero)
+    NdMatrixBase() {
+        clear();
+    }
+
+    /**
+     * @brief Specified dimension sizes:
+     *
+     *      [0..dim_sizes[0])
+     *      [0..dim_sizes[1])
+     *      ...
+     *      with optional fill value
+     */
+    NdMatrixBase(std::array<size_t, N> dim_sizes, T value = T()) {
+        resize(dim_sizes, value);
+    }
+
+  public: //Accessors
+    ///@brief Returns the size of the matrix (number of elements)
+    size_t size() const {
+        VTR_ASSERT_DEBUG_MSG(calc_size() == size_, "Calculated and current matrix size must be consistent");
+        return size_;
+    }
+
+    ///@brief Returns true if there are no elements in the matrix
+    bool empty() const {
+        return size() == 0;
+    }
+
+    ///@brief Returns the number of dimensions (i.e. N)
+    size_t ndims() const {
+        return dim_sizes_.size();
+    }
+
+    ///@brief Returns the size of the ith dimension
+    size_t dim_size(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return dim_sizes_[i];
+    }
+
+    ///@brief Returns the starting index of ith dimension
+    size_t begin_index(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return 0;
+    }
+
+    ///@brief Returns the one-past-the-end index of the ith dimension
+    size_t end_index(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return dim_sizes_[i];
+    }
+
+    ///@brief const Flat accessors of NdMatrix
+    const T& get(size_t i) const {
+        VTR_ASSERT_SAFE(i < size_);
+        return data_[i];
+    }
+
+    ///@brief Flat accessors of NdMatrix
+    T& get(size_t i) {
+        VTR_ASSERT_SAFE(i < size_);
+        return data_[i];
+    }
+
+  public: //Mutators
+    ///@brief Set all elements to 'value'
+    void fill(T value) {
+        std::fill(data_.get(), data_.get() + size(), value);
+    }
+
+    /**
+     * @brief Resize the matrix to the specified dimension ranges
+     *
+     * If 'value' is specified all elements will be initialized to it,
+     * otherwise they will be default constructed.
+     */
+    void resize(std::array<size_t, N> dim_sizes, T value = T()) {
+        dim_sizes_ = dim_sizes;
+        size_ = calc_size();
+        alloc();
+        fill(value);
+        if (size_ > 0) {
+            dim_strides_[0] = size_ / dim_sizes_[0];
+            for (size_t dim = 1; dim < N; ++dim) {
+                dim_strides_[dim] = dim_strides_[dim - 1] / dim_sizes_[dim];
+            }
+        } else {
+            dim_strides_.fill(0);
+        }
+    }
+
+    ///@brief Reset the matrix to size zero
+    void clear() {
+        data_.reset(nullptr);
+        dim_sizes_.fill(0);
+        dim_strides_.fill(0);
+        size_ = 0;
+    }
+
+  public: //Lifetime management
+    ///@brief Copy constructor
+    NdMatrixBase(const NdMatrixBase& other)
+        : NdMatrixBase(other.dim_sizes_) {
+        std::copy(other.data_.get(), other.data_.get() + other.size(), data_.get());
+    }
+
+    ///@brief Move constructor
+    NdMatrixBase(NdMatrixBase&& other)
+        : NdMatrixBase() {
+        swap(*this, other);
+    }
+
+    /**
+     * @brief Copy/move assignment
+     *
+     * Note that rhs is taken by value (copy-swap idiom)
+     */
+    NdMatrixBase& operator=(NdMatrixBase rhs) {
+        swap(*this, rhs);
+        return *this;
+    }
+
+    ///@brief Swap two NdMatrixBase objects
+    friend void swap(NdMatrixBase<T, N>& m1, NdMatrixBase<T, N>& m2) {
+        using std::swap;
+        swap(m1.size_, m2.size_);
+        swap(m1.dim_sizes_, m2.dim_sizes_);
+        swap(m1.dim_strides_, m2.dim_strides_);
+        swap(m1.data_, m2.data_);
+    }
+
+  private:
+    ///@brief Allocate space for all the elements
+    void alloc() {
+        data_ = std::make_unique<T[]>(size());
+    }
+
+    ///@brief Returns the size of the matrix (number of elements) calucated from the current dimensions
+    size_t calc_size() const {
+        ///@brief Size is the product of all dimension sizes
+        size_t cnt = dim_size(0);
+        for (size_t idim = 1; idim < ndims(); ++idim) {
+            cnt *= dim_size(idim);
+        }
+        return cnt;
+    }
+
+  protected:
+    size_t size_ = 0;
+    std::array<size_t, N> dim_sizes_;
+    std::array<size_t, N> dim_strides_;
+    std::unique_ptr<T[]> data_ = nullptr;
+};
+
+/**
+ * @brief An N-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
+ * 
+ * Examples:
+ * 
+ *       //A 2-dimensional matrix with indicies [0..4][0..9]
+ *       NdMatrix<int,2> m1({5,10});
+ * 
+ *       //Accessing an element
+ *       int i = m1[3][5];
+ * 
+ *       //Setting an element
+ *       m1[2][8] = 0;
+ * 
+ *       //A 3-dimensional matrix with indicies [0..4][0..9][0..19]
+ *       NdMatrix<int,3> m2({5,10,20});
+ * 
+ *       //A 2-dimensional matrix with indicies [0..4][0..9], with all entries
+ *       //initialized to 42
+ *       NdMatrix<int,2> m3({5,10}, 42);
+ * 
+ *       //Filling all entries with value 101
+ *       m3.fill(101);
+ * 
+ *       //Resizing an existing matrix (all values reset to default constucted value)
+ *       m3.resize({5,5})
+ * 
+ *       //Resizing an existing matrix (all elements set to value 88)
+ *       m3.resize({15,55}, 88)
+ */
+template<typename T, size_t N>
+class NdMatrix : public NdMatrixBase<T, N> {
+    //General case
+    static_assert(N >= 2, "Minimum dimension 2");
+
+  public:
+    ///@brief Use the base constructors
+    using NdMatrixBase<T, N>::NdMatrixBase;
+
+  public:
+    /**
+     * @brief Access an element
+     *
+     * Returns a proxy-object to allow chained array-style indexing  (N >= 2 case)
+     */
+    const NdMatrixProxy<T, N - 1> operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(this->dim_size(1) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(index < this->dim_sizes_[0], "Index out of range (above dimension maximum)");
+
+        // Peel off the first dimension
+        return NdMatrixProxy<T, N - 1>(
+            this->dim_sizes_.data() + 1,                        //Pass the dimension information
+            this->dim_strides_.data() + 1,                      //Pass the stride for the next dimension
+            this->data_.get() + this->dim_strides_[0] * index); //Advance to index in this dimension
+    }
+
+    /**
+     * @brief Access an element
+     *
+     * Returns a proxy-object to allow chained array-style indexing
+     */
+    NdMatrixProxy<T, N - 1> operator[](size_t index) {
+        //Call the const version, since returned by value don't need to worry about const
+        return const_cast<const NdMatrix<T, N>*>(this)->operator[](index);
+    }
+};
+
+/**
+ * @brief A 1-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
+ *
+ * This is considered a specialization for N=1
+ */
+template<typename T>
+class NdMatrix<T, 1> : public NdMatrixBase<T, 1> {
+  public:
+    ///@brief Use the base constructors
+    using NdMatrixBase<T, 1>::NdMatrixBase;
+
+  public:
+    ///@brief Access an element (immutable)
+    const T& operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(index >= 0, "Index out of range (below dimension minimum)");
+        VTR_ASSERT_SAFE_MSG(index < this->dim_sizes_[0], "Index out of range (above dimension maximum)");
+
+        return this->data_[index];
+    }
+
+    ///@brief Access an element (mutable)
+    T& operator[](size_t index) {
+        //Call the const version, and cast away const-ness
+        return const_cast<T&>(const_cast<const NdMatrix<T, 1>*>(this)->operator[](index));
+    }
+};
+
+///@brief Convenient short forms for common NdMatricies
+template<typename T>
+using Matrix = NdMatrix<T, 2>;
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
new file mode 100644
index 000000000..c42553038
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
@@ -0,0 +1,459 @@
+#ifndef VTR_ND_OFFSET_MATRIX_H
+#define VTR_ND_OFFSET_MATRIX_H
+#include <array>
+#include <memory>
+
+#include "vtr_assert.h"
+
+namespace vtr {
+
+/**
+ * @brief  A half-open range specification for a matrix dimension [begin_index, last_index)
+ * 
+ * It comes with valid indicies from [begin_index() ... end_index()-1], provided size() > 0.
+ */
+class DimRange {
+  public:
+    ///@brief default constructor
+    DimRange() = default;
+
+    ///@brief a constructor with begin_index, end_index
+    DimRange(size_t begin, size_t end)
+        : begin_index_(begin)
+        , end_index_(end) {}
+
+    ///@brief Return the begin index
+    size_t begin_index() const { return begin_index_; }
+
+    ///@brief Return the end index
+    size_t end_index() const { return end_index_; }
+
+    ///@brief Return the size
+    size_t size() const { return end_index_ - begin_index_; }
+
+  private:
+    size_t begin_index_ = 0;
+    size_t end_index_ = 0;
+};
+
+/**
+ * @brief Proxy class for a sub-matrix of a NdOffsetMatrix class.
+ *
+ * This is used to allow chaining of array indexing [] operators in a natural way.
+ * 
+ * Each instance of this class peels off one-dimension and returns a NdOffsetMatrixProxy representing
+ * the resulting sub-matrix. This is repeated recursively until we hit the 1-dimensional base-case.
+ * 
+ * Since this expansion happens at compiler time all the proxy classes get optimized away,
+ * yielding both high performance and generality.
+ * 
+ * Recursive case: N-dimensional array
+ */
+template<typename T, size_t N>
+class NdOffsetMatrixProxy {
+  public:
+    static_assert(N > 0, "Must have at least one dimension");
+
+    /**
+     * @brief Construct a matrix proxy object
+     *
+     *   dim_ranges: Array of DimRange objects
+     * idim: The dimension associated with this proxy
+     *  dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
+     *  start: Pointer to the start of the sub-matrix this proxy represents
+     */
+    NdOffsetMatrixProxy<T, N>(const DimRange* dim_ranges, size_t idim, size_t dim_stride, T* start)
+        : dim_ranges_(dim_ranges)
+        , idim_(idim)
+        , dim_stride_(dim_stride)
+        , start_(start) {}
+
+    ///@brief const [] operator
+    const NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(index >= dim_ranges_[idim_].begin_index(), "Index out of range (below dimension minimum)");
+        VTR_ASSERT_SAFE_MSG(index < dim_ranges_[idim_].end_index(), "Index out of range (above dimension maximum)");
+
+        /*
+         * Calculate the effective index
+         *
+         * The elements are stored in zero-indexed form, so we need to adjust
+         * for any non-zero minimum index
+         */
+        size_t effective_index = index - dim_ranges_[idim_].begin_index();
+
+        //Determine the stride of the next dimension
+        size_t next_dim_stride = dim_stride_ / dim_ranges_[idim_ + 1].size();
+
+        //Strip off one dimension
+        return NdOffsetMatrixProxy<T, N - 1>(dim_ranges_,                             //Pass the dimension information
+                                             idim_ + 1,                               //Pass the next dimension
+                                             next_dim_stride,                         //Pass the stride for the next dimension
+                                             start_ + dim_stride_ * effective_index); //Advance to index in this dimension
+    }
+
+    ///@brief [] operator
+    NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) {
+        //Call the const version and cast-away constness
+        return const_cast<const NdOffsetMatrixProxy<T, N>*>(this)->operator[](index);
+    }
+
+  private:
+    const DimRange* dim_ranges_;
+    const size_t idim_;
+    const size_t dim_stride_;
+    T* start_;
+};
+
+///@brief Base case: 1-dimensional array
+template<typename T>
+class NdOffsetMatrixProxy<T, 1> {
+  public:
+    /**
+     * @brief Construct a matrix proxy object
+     *
+     *     - dim_ranges: Array of DimRange objects
+     *     - dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
+     *     - start: Pointer to the start of the sub-matrix this proxy represents
+     */
+    NdOffsetMatrixProxy<T, 1>(const DimRange* dim_ranges, size_t idim, size_t dim_stride, T* start)
+        : dim_ranges_(dim_ranges)
+        , idim_(idim)
+        , dim_stride_(dim_stride)
+        , start_(start) {}
+
+    ///@brief const [] operator
+    const T& operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(dim_stride_ == 1, "Final dimension must have stride 1");
+        VTR_ASSERT_SAFE_MSG(index >= dim_ranges_[idim_].begin_index(), "Index out of range (below dimension minimum)");
+        VTR_ASSERT_SAFE_MSG(index < dim_ranges_[idim_].end_index(), "Index out of range (above dimension maximum)");
+
+        //The elements are stored in zero-indexed form, so we need to adjust
+        //for any non-zero minimum index
+        size_t effective_index = index - dim_ranges_[idim_].begin_index();
+
+        //Base case
+        return start_[effective_index];
+    }
+
+    ///@brief [] operator
+    T& operator[](size_t index) {
+        //Call the const version and cast-away constness
+        return const_cast<T&>(const_cast<const NdOffsetMatrixProxy<T, 1>*>(this)->operator[](index));
+    }
+
+  private:
+    const DimRange* dim_ranges_;
+    const size_t idim_;
+    const size_t dim_stride_;
+    T* start_;
+};
+
+/**
+ * @brief Base class for an N-dimensional matrix supporting arbitrary index ranges per dimension.
+ *
+ * This class implements all of the matrix handling (lifetime etc.) except for indexing
+ * (which is implemented in the NdOffsetMatrix class). Indexing is split out to allows specialization
+ * of indexing for N = 1.
+ * 
+ * Implementation:
+ * 
+ * This class uses a single linear array to store the matrix in c-style (row major)
+ * order. That is, the right-most index is laid out contiguous memory.
+ * 
+ * This should improve memory usage (no extra pointers to store for each dimension),
+ * and cache locality (less indirection via pointers, predictable strides).
+ * 
+ * The indicies are calculated based on the dimensions to access the appropriate elements.
+ * Since the indexing calculations are visible to the compiler at compile time they can be
+ * optimized to be efficient.
+ */
+template<typename T, size_t N>
+class NdOffsetMatrixBase {
+  public:
+    static_assert(N >= 1, "Minimum dimension 1");
+
+    ///@brief An empty matrix (all dimensions size zero)
+    NdOffsetMatrixBase() {
+        clear();
+    }
+
+    /** 
+     * @brief Specified dimension sizes:
+     *
+     *      [0..dim_sizes[0])
+     *      [0..dim_sizes[1])
+     *      ...
+     * with optional fill value
+     */
+    NdOffsetMatrixBase(std::array<size_t, N> dim_sizes, T value = T()) {
+        resize(dim_sizes, value);
+    }
+
+    /**
+     * @brief Specified dimension index ranges:
+     *
+     *      [dim_ranges[0].begin_index() ... dim_ranges[1].end_index())
+     *      [dim_ranges[1].begin_index() ... dim_ranges[1].end_index())
+     *      ...
+     * with optional fill value
+     */
+    NdOffsetMatrixBase(std::array<DimRange, N> dim_ranges, T value = T()) {
+        resize(dim_ranges, value);
+    }
+
+  public: //Accessors
+    ///@brief Returns the size of the matrix (number of elements)
+    size_t size() const {
+        ///@brief Size is the product of all dimension sizes
+        size_t cnt = dim_size(0);
+        for (size_t idim = 1; idim < ndims(); ++idim) {
+            cnt *= dim_size(idim);
+        }
+        return cnt;
+    }
+
+    ///@brief Returns true if there are no elements in the matrix
+    bool empty() const {
+        return size() == 0;
+    }
+
+    ///@brief Returns the number of dimensions (i.e. N)
+    size_t ndims() const {
+        return dim_ranges_.size();
+    }
+
+    ///@brief Returns the size of the ith dimension
+    size_t dim_size(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return dim_ranges_[i].size();
+    }
+
+    ///@brief Returns the starting index of ith dimension
+    size_t begin_index(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return dim_ranges_[i].begin_index();
+    }
+
+    ///@brief Returns the one-past-the-end index of the ith dimension
+    size_t end_index(size_t i) const {
+        VTR_ASSERT_SAFE(i < ndims());
+
+        return dim_ranges_[i].end_index();
+    }
+
+  public: //Mutators
+    ///@brief Set all elements to 'value'
+    void fill(T value) {
+        std::fill(data_.get(), data_.get() + size(), value);
+    }
+
+    /**
+     * @brief Resize the matrix to the specified dimensions
+     *
+     * If 'value' is specified all elements will be initialized to it,
+     * otherwise they will be default constructed.
+     */
+    void resize(std::array<size_t, N> dim_sizes, T value = T()) {
+        //Convert dimension to range [0..dim)
+        for (size_t i = 0; i < dim_sizes.size(); ++i) {
+            dim_ranges_[i] = {0, dim_sizes[i]};
+        }
+        alloc();
+        fill(value);
+    }
+
+    /**
+     * @brief Resize the matrix to the specified dimension ranges
+     *
+     * If 'value' is specified all elements will be initialized to it,
+     * otherwise they will be default constructed.
+     */
+    void resize(std::array<DimRange, N> dim_ranges, T value = T()) {
+        dim_ranges_ = dim_ranges;
+        alloc();
+        fill(value);
+    }
+
+    ///@brief Reset the matrix to size zero
+    void clear() {
+        data_.reset(nullptr);
+        for (size_t i = 0; i < dim_ranges_.size(); ++i) {
+            dim_ranges_[i] = {0, 0};
+        }
+    }
+
+  public: //Lifetime management
+    ///@brief Copy constructor
+    NdOffsetMatrixBase(const NdOffsetMatrixBase& other)
+        : NdOffsetMatrixBase(other.dim_ranges_) {
+        std::copy(other.data_.get(), other.data_.get() + other.size(), data_.get());
+    }
+
+    ///@brief Move constructor
+    NdOffsetMatrixBase(NdOffsetMatrixBase&& other)
+        : NdOffsetMatrixBase() {
+        swap(*this, other);
+    }
+
+    /**
+     * @brief Copy/move assignment
+     *
+     * Note that rhs is taken by value (copy-swap idiom)
+     */
+    NdOffsetMatrixBase& operator=(NdOffsetMatrixBase rhs) {
+        swap(*this, rhs);
+        return *this;
+    }
+
+    ///@brief Swap two NdOffsetMatrixBase objects
+    friend void swap(NdOffsetMatrixBase<T, N>& m1, NdOffsetMatrixBase<T, N>& m2) {
+        using std::swap;
+        swap(m1.dim_ranges_, m2.dim_ranges_);
+        swap(m1.data_, m2.data_);
+    }
+
+  private:
+    // Allocate space for all the elements
+    void alloc() {
+        data_ = std::make_unique<T[]>(size());
+    }
+
+  protected:
+    std::array<DimRange, N> dim_ranges_;
+    std::unique_ptr<T[]> data_ = nullptr;
+};
+
+/**
+ * @brief An N-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
+ * 
+ * If no second template parameter is provided defaults to a 2-dimensional
+ * matrix
+ * 
+ * Examples:
+ * 
+ *       //A 2-dimensional matrix with indicies [0..4][0..9]
+ *       NdOffsetMatrix<int,2> m1({5,10});
+ * 
+ *       //Accessing an element
+ *       int i = m4[3][5];
+ * 
+ *       //Setting an element
+ *       m4[6][20] = 0;
+ * 
+ *       //A 2-dimensional matrix with indicies [2..6][5..9]
+ *       // Note that C++ requires one more set of curly brace than you would expect
+ *       NdOffsetMatrix<int,2> m2({{{2,7},{5,10}}});
+ * 
+ *       //A 3-dimensional matrix with indicies [0..4][0..9][0..19]
+ *       NdOffsetMatrix<int,3> m3({5,10,20});
+ * 
+ *       //A 3-dimensional matrix with indicies [2..6][1..19][50..89]
+ *       NdOffsetMatrix<int,3> m4({{{2,7}, {1,20}, {50,90}}});
+ * 
+ *       //A 2-dimensional matrix with indicies [2..6][1..20], with all entries
+ *       //intialized to 42
+ *       NdOffsetMatrix<int,2> m4({{{2,7}, {1,21}}}, 42);
+ * 
+ *       //A 2-dimensional matrix with indicies [0..4][0..9], with all entries
+ *       //initialized to 42
+ *       NdOffsetMatrix<int,2> m1({5,10}, 42);
+ * 
+ *       //Filling all entries with value 101
+ *       m1.fill(101);
+ * 
+ *       //Resizing an existing matrix (all values reset to default constucted value)
+ *       m1.resize({5,5})
+ * 
+ *       //Resizing an existing matrix (all elements set to value 88)
+ *       m1.resize({15,55}, 88)
+ */
+template<typename T, size_t N>
+class NdOffsetMatrix : public NdOffsetMatrixBase<T, N> {
+    //General case
+    static_assert(N >= 2, "Minimum dimension 2");
+
+  public:
+    ///@brief Use the base constructors
+    using NdOffsetMatrixBase<T, N>::NdOffsetMatrixBase;
+
+  public:
+    /**
+     * @brief Access an element
+     *
+     * Returns a proxy-object to allow chained array-style indexing  (N >= 2 case)
+     * template<typename = typename std::enable_if<N >= 2>::type, typename T1=T>
+     */
+    const NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(this->dim_size(1) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(index >= this->dim_ranges_[0].begin_index(), "Index out of range (below dimension minimum)");
+        VTR_ASSERT_SAFE_MSG(index < this->dim_ranges_[0].end_index(), "Index out of range (above dimension maximum)");
+
+        /*
+         * Clacluate the effective index
+         * 
+         * The elements are stored in zero-indexed form, so adjust for any
+         * non-zero minimum index in this dimension
+         */
+        size_t effective_index = index - this->dim_ranges_[0].begin_index();
+
+        //Calculate the stride for the current dimension
+        size_t dim_stride = this->size() / this->dim_size(0);
+
+        //Calculate the stride for the next dimension
+        size_t next_dim_stride = dim_stride / this->dim_size(1);
+
+        //Peel off the first dimension
+        return NdOffsetMatrixProxy<T, N - 1>(this->dim_ranges_.data(),                          //Pass the dimension information
+                                             1,                                                 //Pass the next dimension
+                                             next_dim_stride,                                   //Pass the stride for the next dimension
+                                             this->data_.get() + dim_stride * effective_index); //Advance to index in this dimension
+    }
+
+    /**
+     * @brief Access an element
+     *
+     * Returns a proxy-object to allow chained array-style indexing
+     */
+    NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) {
+        //Call the const version, since returned by value don't need to worry about const
+        return const_cast<const NdOffsetMatrix<T, N>*>(this)->operator[](index);
+    }
+};
+
+/**
+ * @brief A 1-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
+ *
+ * This is considered a specialization for N=1
+ */
+template<typename T>
+class NdOffsetMatrix<T, 1> : public NdOffsetMatrixBase<T, 1> {
+  public:
+    ///@brief Use the base constructors
+    using NdOffsetMatrixBase<T, 1>::NdOffsetMatrixBase;
+
+  public:
+    ///@brief Access an element (immutable)
+    const T& operator[](size_t index) const {
+        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
+        VTR_ASSERT_SAFE_MSG(index >= this->dim_ranges_[0].begin_index(), "Index out of range (below dimension minimum)");
+        VTR_ASSERT_SAFE_MSG(index < this->dim_ranges_[0].end_index(), "Index out of range (above dimension maximum)");
+
+        return this->data_[index];
+    }
+
+    ///@brief Access an element (mutable)
+    T& operator[](size_t index) {
+        //Call the const version, and cast away const-ness
+        return const_cast<T&>(const_cast<const NdOffsetMatrix<T, 1>*>(this)->operator[](index));
+    }
+};
+
+///@brief Convenient short forms for common NdMatricies
+template<typename T>
+using OffsetMatrix = NdOffsetMatrix<T, 2>;
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h b/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
new file mode 100644
index 000000000..199c5cb4c
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
@@ -0,0 +1,40 @@
+#ifndef VTR_OSTREAM_GUARD_H
+#define VTR_OSTREAM_GUARD_H
+
+namespace vtr {
+
+///@brief A RAII guard class to ensure restoration of output stream format
+class OsFormatGuard {
+  public:
+    ///@brief constructor
+    explicit OsFormatGuard(std::ostream& os)
+        : os_(os)
+        , flags_(os_.flags()) //Save formatting flag state
+        , width_(os_.width())
+        , precision_(os.precision())
+        , fill_(os.fill()) {}
+
+    ///@brief destructor
+    ~OsFormatGuard() {
+        os_.flags(flags_); //Restore
+        os_.width(width_);
+        os_.precision(precision_);
+        os_.fill(fill_);
+    }
+
+    OsFormatGuard(const OsFormatGuard&) = delete;
+    OsFormatGuard& operator=(const OsFormatGuard&) = delete;
+    OsFormatGuard(const OsFormatGuard&&) = delete;
+    OsFormatGuard& operator=(const OsFormatGuard&&) = delete;
+
+  private:
+    std::ostream& os_;
+    std::ios::fmtflags flags_;
+    std::streamsize width_;
+    std::streamsize precision_;
+    char fill_;
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h b/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
new file mode 100644
index 000000000..feabbd1ac
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
@@ -0,0 +1,96 @@
+#ifndef VTR_PAIR_UTIL_H
+#define VTR_PAIR_UTIL_H
+
+#include "vtr_range.h"
+
+namespace vtr {
+/**
+ * @brief Iterator which derefernces the 'first' element of a std::pair iterator
+ */
+template<typename PairIter>
+class pair_first_iter {
+  public:
+    using iterator_category = std::bidirectional_iterator_tag;
+    using value_type = typename PairIter::value_type::first_type;
+    using difference_type = void;
+    using pointer = value_type*;
+    using reference = value_type&;
+
+    ///@brief constructor
+    pair_first_iter(PairIter init)
+        : iter_(init) {}
+
+    ///@brief increment operator (++)
+    auto operator++() {
+        iter_++;
+        return *this;
+    }
+
+    ///@brief decrement operator (\-\-)
+    auto operator--() {
+        iter_--;
+        return *this;
+    }
+
+    ///@brief dereference * operator
+    auto operator*() { return iter_->first; }
+
+    ///@brief -> operator
+    auto operator-> () { return &iter_->first; }
+
+    ///@brief == operator
+    friend bool operator==(const pair_first_iter lhs, const pair_first_iter rhs) { return lhs.iter_ == rhs.iter_; }
+
+    ///@brief != operator
+    friend bool operator!=(const pair_first_iter lhs, const pair_first_iter rhs) { return !(lhs == rhs); }
+
+  private:
+    PairIter iter_;
+};
+
+/**
+ *Iterator which derefernces the 'second' element of a std::pair iterator
+ */
+template<typename PairIter>
+class pair_second_iter {
+  public:
+    using iterator_category = std::bidirectional_iterator_tag;
+    using value_type = typename PairIter::value_type::second_type;
+    using difference_type = void;
+    using pointer = value_type*;
+    using reference = value_type&;
+
+    ///@brief constructor
+    pair_second_iter(PairIter init)
+        : iter_(init) {}
+
+    ///@brief increment operator (++)
+    auto operator++() {
+        iter_++;
+        return *this;
+    }
+
+    ///@brief decrement operator (--)
+    auto operator--() {
+        iter_--;
+        return *this;
+    }
+
+    ///@brief dereference * operator
+    auto operator*() { return iter_->second; }
+
+    ///@brief -> operator
+    auto operator-> () { return &iter_->second; }
+
+    ///@brief == operator
+    friend bool operator==(const pair_second_iter lhs, const pair_second_iter rhs) { return lhs.iter_ == rhs.iter_; }
+
+    ///@brief != operator
+    friend bool operator!=(const pair_second_iter lhs, const pair_second_iter rhs) { return !(lhs == rhs); }
+
+  private:
+    PairIter iter_;
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_path.cc b/third_party/vtr/libs/vtrutil/src/vtr_path.cc
new file mode 100644
index 000000000..e6bf293d7
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_path.cc
@@ -0,0 +1,87 @@
+#include "vtr_path.h"
+
+#include "vtr_util.h"
+
+//TODO: currently this file assumes unix-like
+//      in the future support windows
+#include <unistd.h>
+
+#include <sstream>
+
+namespace vtr {
+
+const std::string PATH_DELIM = "/";
+
+//Splits off the name and extension (including ".") of the specified filename
+std::array<std::string, 2> split_ext(const std::string& filename) {
+    std::array<std::string, 2> name_ext;
+    auto pos = filename.find_last_of('.');
+
+    if (pos == std::string::npos) {
+        //No extension
+        pos = filename.size();
+    }
+
+    name_ext[0] = std::string(filename, 0, pos);
+    name_ext[1] = std::string(filename, pos, filename.size() - pos);
+
+    return name_ext;
+}
+
+std::string basename(const std::string& path) {
+    auto elements = split(path, PATH_DELIM);
+
+    std::string str;
+    if (elements.size() > 0) {
+        //Return the last path element
+        str = elements[elements.size() - 1];
+    }
+
+    return str;
+}
+
+std::string dirname(const std::string& path) {
+    auto elements = split(path, PATH_DELIM);
+
+    std::string str;
+    if (elements.size() > 0) {
+        //We need to start the dirname with a PATH_DELIM if path started with one
+        if (starts_with(path, PATH_DELIM)) {
+            str += PATH_DELIM;
+        }
+
+        //Join all except the last path element
+        str += join(elements.begin(), elements.end() - 1, PATH_DELIM);
+
+        //We append a final PATH_DELIM to allow clients to just append directly to the
+        //returned value
+        str += PATH_DELIM;
+    }
+
+    return str;
+}
+
+std::string getcwd() {
+    constexpr size_t BUF_SIZE = 500;
+    char buf[BUF_SIZE];
+
+    if (::getcwd(buf, BUF_SIZE)) {
+        return std::string(buf);
+    }
+
+    //Check the global errno
+    int error = errno;
+
+    switch (error) {
+        case EACCES:
+            throw std::runtime_error("Access denied");
+
+        default: {
+            std::stringstream str;
+            str << "Unrecognised error" << error;
+            throw std::runtime_error(str.str());
+        }
+    }
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_path.h b/third_party/vtr/libs/vtrutil/src/vtr_path.h
new file mode 100644
index 000000000..a48d2bdb5
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_path.h
@@ -0,0 +1,33 @@
+#ifndef VTR_PATH_H
+#define VTR_PATH_H
+#include <string>
+#include <array>
+
+/**
+ * @file 
+ * @brief This file defines some useful utilities to handle paths
+ */
+namespace vtr {
+
+///@brief Splits off the name and extension (including ".") of the specified filename
+std::array<std::string, 2> split_ext(const std::string& filename);
+
+/**
+ * @brief Returns the basename of path (i.e. the last filename component)
+ *
+ *  For example, the path "/home/user/my_files/test.blif" -> "test.blif"
+ */
+std::string basename(const std::string& path);
+
+/**
+ * Returns the dirname of path (i.e. everything except the last filename component)
+ *
+ *  For example, the path "/home/user/my_files/test.blif" -> "/home/user/my_files/"
+ */
+std::string dirname(const std::string& path);
+
+///@brief Returns the current working directory
+std::string getcwd();
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
new file mode 100644
index 000000000..bbe7fea78
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
@@ -0,0 +1,255 @@
+#ifndef VTR_RAGGED_MATRIX_H
+#define VTR_RAGGED_MATRIX_H
+#include <vector>
+#include <iterator>
+
+#include "vtr_assert.h"
+#include "vtr_array_view.h"
+
+namespace vtr {
+
+/**
+ * @brief A 2 dimensional 'ragged' matrix with rows indexed by Index0, and each row of variable length (indexed by Index1)
+ * 
+ * Example:
+ * 
+ *       std::vector<int> row_sizes = {1, 5, 3, 10};
+ *       FlatRaggedMatrix<float> matrix(row_sizes);
+ * 
+ *       //Fill in all entries with ascending values
+ *       float value = 1.;
+ *       for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
+ *           for (size_t icol = 0; icol < row_sizes[irow]; ++icoll) {
+ *               matrix[irow][icol] = value;
+ *               value += 1.;
+ *           }
+ *       }
+ * 
+ * 
+ * For efficiency, this class uses a flat memory layout,
+ * where all elements are laid out contiguiously (one row
+ * after another).
+ * 
+ * Expects Index0 and Index1 to be convertable to size_t.
+ */
+template<typename T, typename Index0 = size_t, typename Index1 = size_t>
+class FlatRaggedMatrix {
+  public:
+    ///@brief default constructor
+    FlatRaggedMatrix() = default;
+
+    /**
+     * @brief Constructs matrix with 'nrows' rows. 
+     *
+     * The row length is determined by calling 
+     * 'row_length_callback' with the associated row index.
+     */
+    template<class Callback>
+    FlatRaggedMatrix(size_t nrows, Callback& row_length_callback, T default_value = T())
+        : FlatRaggedMatrix(RowLengthIterator<Callback>(0, row_length_callback),
+                           RowLengthIterator<Callback>(nrows, row_length_callback),
+                           default_value) {}
+
+    ///@brief Constructs matrix from a container of row lengths
+    template<class Container>
+    FlatRaggedMatrix(Container container, T default_value = T())
+        : FlatRaggedMatrix(std::begin(container), std::end(container), default_value) {}
+
+    /**
+     * @brief Constructs matrix from an iterator range. 
+     *
+     * The length of the range is the number of rows, and iterator values are the row lengths. 
+     */
+    template<class Iter>
+    FlatRaggedMatrix(Iter row_size_first, Iter row_size_last, T default_value = T()) {
+        size_t nrows = std::distance(row_size_first, row_size_last);
+        first_elem_.resize(nrows + 1, -1); //+1 for sentinel
+
+        size_t nelem = 0;
+        size_t irow = 0;
+        for (Iter iter = row_size_first; iter != row_size_last; ++iter) {
+            first_elem_[irow] = nelem;
+
+            nelem += *iter;
+            ++irow;
+        }
+
+        //Sentinel
+        first_elem_[irow] = nelem;
+
+        data_.resize(nelem + 1, default_value); //+1 for sentinel
+    }
+
+  public: //Accessors
+    ///@brief Iterators to *all* elements
+    auto begin() {
+        return data_.begin();
+    }
+
+    ///@brief Iterator to the last element of the matrix
+    auto end() {
+        if (empty()) {
+            return data_.end();
+        }
+        return data_.end() - 1;
+    }
+
+    ///@brief Iterator to the first element of the matrix (immutable)
+    auto begin() const {
+        return data_.begin();
+    }
+
+    ///@brief Iterator to the last element of the matrix (immutable)
+    auto end() const {
+        if (empty()) {
+            return data_.end();
+        }
+        return data_.end() - 1;
+    }
+
+    ///@brief Return the size of the matrix
+    size_t size() const {
+        if (data_.empty()) {
+            return 0;
+        }
+        return data_.size() - 1; //-1 for sentinel
+    }
+
+    ///@brief Return true if empty
+    bool empty() const {
+        return size() == 0;
+    }
+
+    ///@brief Indexing operators for the first dimension
+    vtr::array_view<T> operator[](Index0 i) {
+        int idx = size_t(i);
+        T* first = &data_[first_elem_[idx]];
+        T* last = &data_[first_elem_[idx + 1]];
+        return vtr::array_view<T>(first,
+                                  last - first);
+    }
+
+    ///@brief Indexing operators for the first dimension (immutable)
+    vtr::array_view<const T> operator[](Index0 i) const {
+        int idx = size_t(i);
+        const T* first = &data_[first_elem_[idx]];
+        const T* last = &data_[first_elem_[idx + 1]];
+        return vtr::array_view<const T>(first,
+                                        last - first);
+    }
+
+    ///@brief Clears the matrix
+    void clear() {
+        data_.clear();
+        first_elem_.clear();
+    }
+
+    ///@brief Swaps two matrices
+    void swap(FlatRaggedMatrix<T, Index0, Index1>& other) {
+        std::swap(data_, other.data_);
+        std::swap(first_elem_, other.first_elem_);
+    }
+
+    ///@brief Swaps two matrices
+    friend void swap(FlatRaggedMatrix<T, Index0, Index1>& lhs, FlatRaggedMatrix<T, Index0, Index1>& rhs) {
+        lhs.swap(rhs);
+    }
+
+  public: //Types
+    ///@brief Proxy class used to represent a 'row' in the matrix
+    template<typename U>
+    class ProxyRow {
+      public:
+        ///@brief constructor
+        ProxyRow(U* first, U* last)
+            : first_(first)
+            , last_(last) {}
+
+        ///@brief Return iterator to the first element
+        U* begin() { return first_; }
+        ///@brief Return iterator to the last element
+        U* end() { return last_; }
+
+        ///@brief Return iterator to the first element (immutable)
+        const U* begin() const { return first_; }
+        ///@brief Return iterator to the last element (immutable)
+        const U* end() const { return last_; }
+
+        ///@brief Return the size of the row
+        size_t size() const { return last_ - first_; }
+
+        ///@brief indexing [] operator
+        U& operator[](Index1 j) {
+            VTR_ASSERT_SAFE(size_t(j) < size());
+            return first_[size_t(j)];
+        }
+
+        ///@brief indexing [] operator (immutable)
+        const U& operator[](Index1 j) const {
+            VTR_ASSERT_SAFE(size_t(j) < size());
+            return first_[size_t(j)];
+        }
+
+        ///@brief Return iterator to the first element
+        U* data() {
+            return first_;
+        }
+
+        ///@brief Return iterator to the first element (immutable)
+        U* data() const {
+            return first_;
+        }
+
+      private:
+        U* first_;
+        U* last_;
+    };
+
+  private:
+    /*
+     * Iterator for constructing FlatRaggedMatrix.
+     *
+     * uses a callback to determine row lengths.
+     */
+    template<class Callback>
+    class RowLengthIterator : public std::iterator<std::random_access_iterator_tag, size_t> {
+      public:
+        RowLengthIterator(size_t irow, Callback& callback)
+            : irow_(irow)
+            , callback_(callback) {}
+
+        RowLengthIterator& operator++() {
+            ++irow_;
+            return *this;
+        }
+
+        bool operator==(const RowLengthIterator& other) {
+            return irow_ == other.irow_;
+        }
+
+        bool operator!=(const RowLengthIterator& other) {
+            return !(*this == other);
+        }
+
+        int operator-(const RowLengthIterator& other) {
+            return irow_ - other.irow_;
+        }
+
+        size_t operator*() {
+            //Call the callback to get the row length
+            return callback_(Index0(irow_));
+        }
+
+      private:
+        size_t irow_;
+        Callback& callback_;
+    };
+
+  private:
+    std::vector<T> data_;
+    std::vector<int> first_elem_;
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_random.cc b/third_party/vtr/libs/vtrutil/src/vtr_random.cc
new file mode 100644
index 000000000..3427e5fc2
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_random.cc
@@ -0,0 +1,77 @@
+#include <cstddef>
+
+#include "vtr_random.h"
+#include "vtr_util.h"
+#include "vtr_error.h"
+
+#define CHECK_RAND
+
+namespace vtr {
+/* Portable random number generator defined below.  Taken from ANSI C by  *
+ * K & R.  Not a great generator, but fast, and good enough for my needs. */
+
+constexpr size_t IA = 1103515245u;
+constexpr size_t IC = 12345u;
+constexpr size_t IM = 2147483648u;
+
+static RandState random_state = 0;
+
+/**
+ * @brief The pseudo-random number generator is initialized using the argument passed as seed.
+ */
+void srandom(int seed) {
+    random_state = (unsigned int)seed;
+}
+
+/* returns the random_state value */
+RandState get_random_state() {
+    return random_state;
+}
+
+int irand(int imax, RandState& state) {
+    /* Creates a random integer between 0 and imax, inclusive.  i.e. [0..imax] */
+    int ival;
+
+    /* state = (state * IA + IC) % IM; */
+    state = state * IA + IC; /* Use overflow to wrap */
+    ival = state & (IM - 1); /* Modulus */
+    ival = (int)((float)ival * (float)(imax + 0.999) / (float)IM);
+
+#ifdef CHECK_RAND
+    if ((ival < 0) || (ival > imax)) {
+        if (ival == imax + 1) {
+            /* Due to random floating point rounding, sometimes above calculation gives number greater than ival by 1 */
+            ival = imax;
+        } else {
+            throw VtrError(string_fmt("Bad value in my_irand, imax = %d  ival = %d", imax, ival), __FILE__, __LINE__);
+        }
+    }
+#endif
+
+    return ival;
+}
+
+int irand(int imax) {
+    return irand(imax, random_state);
+}
+
+float frand() {
+    /* Creates a random float between 0 and 1.  i.e. [0..1).        */
+
+    float fval;
+    int ival;
+
+    random_state = random_state * IA + IC; /* Use overflow to wrap */
+    ival = random_state & (IM - 1);        /* Modulus */
+    fval = (float)ival / (float)IM;
+
+#ifdef CHECK_RAND
+    if ((fval < 0) || (fval > 1.)) {
+        throw VtrError(string_fmt("Bad value in my_frand, fval = %g", fval), __FILE__, __LINE__);
+    }
+#endif
+
+    return (fval);
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_random.h b/third_party/vtr/libs/vtrutil/src/vtr_random.h
new file mode 100644
index 000000000..c5a3ce663
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_random.h
@@ -0,0 +1,45 @@
+#ifndef VTR_RANDOM_H
+#define VTR_RANDOM_H
+#include <algorithm> //For std::swap
+
+namespace vtr {
+/*********************** Portable random number generators *******************/
+typedef unsigned RandState;
+
+/**
+ * @brief The pseudo-random number generator is initialized using the argument passed as seed.
+ */
+void srandom(int seed);
+
+///@brief Return The random number generator state
+RandState get_random_state();
+
+///@brief Return a randomly generated integer less than or equal imax
+int irand(int imax);
+
+///@brief Return a randomly generated integer less than or equal imax using the generator (rand_state)
+int irand(int imax, RandState& rand_state);
+
+///@brief Return a randomly generated float number between [0,1]
+float frand();
+
+/**
+ * @brief Portable/invariant version of std::shuffle
+ *
+ * Note that std::shuffle relies on std::uniform_int_distribution
+ * which can produce different sequences accross different
+ * compilers/compiler versions.
+ * 
+ * This version should be deterministic/invariant. However,  since
+ * it uses vtr::irand(), may not be as well distributed as std::shuffle.
+ */
+template<typename Iter>
+void shuffle(Iter first, Iter last, RandState& rand_state) {
+    for (auto i = (last - first) - 1; i > 0; --i) {
+        using std::swap;
+        swap(first[i], first[irand(i, rand_state)]);
+    }
+}
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_range.h b/third_party/vtr/libs/vtrutil/src/vtr_range.h
new file mode 100644
index 000000000..493a379fb
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_range.h
@@ -0,0 +1,85 @@
+#ifndef VTR_RANGE_H
+#define VTR_RANGE_H
+#include <iterator>
+
+namespace vtr {
+/**
+ * @brief The vtr::Range template models a range defined by two iterators of type T.
+ *
+ * It allows conveniently returning a range from a single function call
+ * without having to explicity expose the underlying container, or make two
+ * explicit calls to retrieve the associated begin and end iterators.
+ * It also enables the easy use of range-based-for loops.
+ *
+ * For example:
+ *
+ *      class My Data {
+ *          public:
+ *              typdef std::vector<int>::const_iterator my_iter;
+ *              vtr::Range<my_iter> data();
+ *          ...
+ *          private:
+ *              std::vector<int> data_;
+ *      };
+ *
+ *      ...
+ *
+ *      MyDat my_data;
+ *
+ *      //fill my_data
+ *
+ *      for(int val : my_data.data()) {
+ *          //work with values stored in my_data
+ *      }
+ *
+ * The empty() and size() methods are convenience wrappers around the relevant
+ * iterator comparisons.
+ *
+ * Note that size() is only constant time if T is a random-access iterator!
+ */
+template<typename T>
+class Range {
+  public:
+    ///@brief constructor
+    Range(T b, T e)
+        : begin_(b)
+        , end_(e) {}
+    ///@brief Return an iterator to the start of the range
+    T begin() { return begin_; }
+    ///@brief Return an iterator to the end of the range
+    T end() { return end_; }
+    ///@brief Return an iterator to the start of the range (immutable)
+    const T begin() const { return begin_; }
+    ///@brief Return an iterator to the end of the range (immutable)
+    const T end() const { return end_; }
+    ///@brief Return true if empty
+    bool empty() { return begin_ == end_; }
+    ///@brief Return the range size
+    size_t size() { return std::distance(begin_, end_); }
+
+  private:
+    T begin_;
+    T end_;
+};
+
+/**
+ * @brief Creates a vtr::Range from a pair of iterators.
+ *
+ *  Unlike using the vtr::Range() constructor (which requires specifying
+ *  the template type T, using vtr::make_range() infers T from the arguments.
+ *
+ * Example usage:
+ *  auto my_range = vtr::make_range(my_vec.begin(), my_vec.end());
+ */
+template<typename T>
+auto make_range(T b, T e) { return Range<T>(b, e); }
+
+/**
+ * @brief Creates a vtr::Range from a container
+ */
+template<typename Container>
+auto make_range(const Container& c) { return make_range(std::begin(c), std::end(c)); }
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc b/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
new file mode 100644
index 000000000..a3b74c04c
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
@@ -0,0 +1,30 @@
+#include "vtr_rusage.h"
+
+#ifdef __unix__
+#    include <sys/time.h>
+#    include <sys/resource.h>
+#endif
+
+namespace vtr {
+
+///@brief Returns the maximum resident set size in bytes, or zero if unable to determine.
+size_t get_max_rss() {
+    size_t max_rss = 0;
+
+#ifdef __unix__
+    rusage usage;
+    int result = getrusage(RUSAGE_SELF, &usage);
+
+    if (result == 0) { //Success
+        //ru_maxrss is in kilobytes, convert to bytes
+        max_rss = usage.ru_maxrss * 1024;
+    }
+#else
+    //Do nothing, other platform specific code could be added here
+    //with appropriate defines
+#endif
+
+    return max_rss;
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_rusage.h b/third_party/vtr/libs/vtrutil/src/vtr_rusage.h
new file mode 100644
index 000000000..b69dc438a
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_rusage.h
@@ -0,0 +1,11 @@
+#ifndef VTR_RUSAGE_H
+#define VTR_RUSAGE_H
+#include <cstddef>
+
+namespace vtr {
+
+///@brief Returns the maximum resident set size in bytes, or zero if unable to determine.
+size_t get_max_rss();
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h b/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
new file mode 100644
index 000000000..036fd593b
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
@@ -0,0 +1,49 @@
+#ifndef VTR_SENTINELS_H
+#define VTR_SENTINELS_H
+
+/**
+ * @file
+ * @brief This header defines different sentinal value classes
+ */
+namespace vtr {
+
+/**
+ * @brief The Default sentinal value class
+ *
+ * Some specialized containers like vtr::linear_map and
+ * vtr::vector_map require sentinel values to mark invalid/uninitialized
+ * values. By convention, such containers query the sentinel objects static
+ * INVALID() member function to retrieve the sentinel value.
+ * 
+ * These classes allows users to specify a custom sentinel value.
+ * 
+ * Usually the containers default to DefaultSentinel
+ * 
+ * The sentinel value is the default constructed value of the type
+ */
+template<class T>
+class DefaultSentinel {
+  public:
+    constexpr static T INVALID() { return T(); }
+};
+
+///@brief Specialization for pointer types
+template<class T>
+class DefaultSentinel<T*> {
+  public:
+    constexpr static T* INVALID() { return nullptr; }
+};
+
+///@brief The sentile value is a specified value of the type
+template<class T, T val>
+class CustomSentinel {
+  public:
+    constexpr static T INVALID() { return T(val); }
+};
+
+///@brief The common case where -1 is used as the sentinel value
+template<class T>
+using MinusOneSentinel = CustomSentinel<T, -1>;
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h b/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
new file mode 100644
index 000000000..5fe755201
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
@@ -0,0 +1,854 @@
+#ifndef VTR_SMALL_VECTOR
+#define VTR_SMALL_VECTOR
+#include <memory>
+#include <algorithm>
+#include <limits>
+#include <cstdint>
+#include <array>
+#include "vtr_assert.h"
+
+namespace vtr {
+
+namespace small_vector_impl {
+
+/**
+ * @brief The long format view of the vector.
+ *
+ * It consists of a dynamically allocated array, capacity and size.
+ */
+template<class T, class S>
+struct long_format {
+    T* data_ = nullptr;
+    S capacity_ = 0;
+    S size_ = 0;
+};
+
+/**
+ * @brief The short format view of the vector. 
+ *
+ * It consists of an in-place (potentially empty)
+ * array of objects, a pad, and a size.
+ */
+template<class T, class S, size_t CAPACITY, size_t PAD>
+struct short_format {
+    std::array<T, CAPACITY> data_;
+    std::array<uint8_t, PAD> pad_; ///< Padding to keep size_ aligned in both long_format and short_format
+    S size_ = 0;
+};
+
+/**
+ * @brief A specialized version of short_format for padding of size zero.
+ *
+ * Since a std::array with zero array size may still have non-zero sizeof()
+ */
+template<class T, class S, size_t CAPACITY>
+struct short_format<T, S, CAPACITY, 0> {
+    std::array<T, CAPACITY> data_;
+    S size_ = 0;
+};
+
+} // namespace small_vector_impl
+
+/**
+ * @brief vtr::small_vector is a std::vector like container which:
+ *
+ *   - consumes less memory: sizeof(vtr::small_vector) < sizeof(std::vector)
+ *   - possibly stores elements in-place (i.e. within the object)
+ * 
+ * On a typical LP64 system a vtr::small_vector consumes 16 bytes by default and supports
+ * vectors up to ~2^32 elements long, while a std::vector consumes 24 bytes and supports up
+ * to ~2^64 elements. The type used to store the size and capacity is configurable,
+ * and set by the second template parameter argument. Setting it to size_t will replicate
+ * std::vector's characteristics.
+ * 
+ * For short vectors vtr::small_vector will try to store elements in-place (i.e. within the
+ * vtr::small_vector object) instead of dynamically allocating an array (by re-using the
+ * internal storage for the pointer, size and capacity). Whether this is possible depends on
+ * the size and alignment requirements of the value type, as compared to
+ * vtr::small_vector. If in-place storage is not possible (e.g. due to a large value
+ * type, or a large number of elements) a dynamic buffer is allocated (similar to
+ * std::vector).
+ * 
+ * This is a highly specialized container. Unless you have specifically measured it's
+ * usefulness you should use std::vector.
+ */
+template<class T, class S = uint32_t>
+class small_vector {
+  public: //Types
+    typedef T value_type;
+    //typedef allocator_type //Allocator, unimplemented
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+
+    typedef T* iterator;
+    typedef const T* const_iterator;
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+    typedef ptrdiff_t difference_type;
+    typedef S size_type;
+
+  public: //Constructors
+    ///@brief constructor
+    small_vector() {
+        if (SHORT_CAPACITY == 0) {
+            long_.data_ = nullptr;
+            long_.capacity_ = 0;
+        }
+        set_size(0);
+    }
+    ///@brief constructor
+    small_vector(size_type nelem)
+        : small_vector() {
+        reserve(nelem);
+        for (size_type i = 0; i < nelem; i++) {
+            emplace_back();
+        }
+        set_size(0);
+    }
+
+  public: //Accessors
+    ///@brief Return a const_iterator to the first element
+    const_iterator begin() const {
+        return cbegin();
+    }
+
+    ///@brief Return a const_iterator pointing to the past-the-end element in the container.
+    const_iterator end() const {
+        return cend();
+    }
+
+    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
+    const_reverse_iterator rbegin() const {
+        return crbegin();
+    }
+
+    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
+    const_reverse_iterator rend() const {
+        return crend();
+    }
+
+    ///@brief Return a const_iterator pointing to the first element in the container.
+    const_iterator cbegin() const {
+        if (is_short()) {
+            return short_.data_.data();
+        }
+        return long_.data_;
+    }
+
+    ///@brief a const_iterator pointing to the past-the-end element in the container.
+    const_iterator cend() const {
+        if (is_short()) {
+            return short_.data_.data() + size();
+        }
+        return long_.data_ + size();
+    }
+
+    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
+    const_reverse_iterator crbegin() const {
+        return const_reverse_iterator(cend());
+    }
+
+    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
+    const_reverse_iterator crend() const {
+        return const_reverse_iterator(cbegin());
+    }
+
+    ///@brief return the vector size (Padding ensures long/short format sizes are always aligned)
+    size_type size() const {
+        return long_.size_;
+    }
+
+    ///@brief Return the maximum size
+    size_t max_size() const {
+        return std::numeric_limits<S>::max();
+    }
+
+    ///@brief Return the vector capacity
+    size_type capacity() const {
+        if (is_short()) {
+            return SHORT_CAPACITY; //Fixed capacity
+        }
+        return long_.capacity_;
+    }
+
+    ///@brief Return true if empty
+    bool empty() const { return size() == 0; }
+
+    ///@brief Immutable indexing operator []
+    const_reference operator[](size_t i) const {
+        if (is_short()) {
+            return short_.data_[i];
+        }
+        return long_.data_[i];
+    }
+
+    ///@brief Immutable at() operator
+    const_reference at(size_t i) const {
+        if (i > size()) {
+            throw std::out_of_range("Index out of bounds");
+        }
+        return operator[](i);
+    }
+
+    ///@brief Return a constant reference to the first element
+    const_reference front() const {
+        return *begin();
+    }
+
+    ///@brief Return a constant reference to the last element
+    const_reference back() const {
+        return *(end() - 1);
+    }
+
+    ///@brief Return a constant pointer to the vector data
+    const_pointer data() const {
+        if (is_short()) {
+            short_.data_;
+        }
+        return long_.data_;
+    }
+
+  public: //Mutators
+    ///@brief Return an iterator pointing to the first element in the sequence
+    iterator begin() {
+        //Call const method and cast-away constness
+        return const_cast<iterator>(const_cast<const small_vector<T, S>*>(this)->begin());
+    }
+
+    ///@brief Return an iterator referring to the past-the-end element in the vector container.
+    iterator end() {
+        return const_cast<iterator>(const_cast<const small_vector<T, S>*>(this)->end());
+    }
+
+    ///@brief Return a reverse iterator pointing to the last element in the vector (i.e., its reverse beginning).
+    reverse_iterator rbegin() {
+        //Call const method and cast-away constness
+        return reverse_iterator(const_cast<small_vector<T, S>*>(this)->end());
+    }
+
+    ///@brief Return  a reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
+    reverse_iterator rend() {
+        return reverse_iterator(const_cast<small_vector<T, S>*>(this)->begin());
+    }
+
+    ///@brief Resizes the container so that it contains n elements
+    void resize(size_type n) {
+        resize(n, value_type());
+    }
+
+    ///@brief Resizes the container so that it contains n elements and fills it with val
+    void resize(size_type n, value_type val) {
+        if (n < size()) {
+            //Remove at end
+            erase(begin() + n, end());
+        } else if (n > size()) {
+            //Insert new elements at end
+            insert(end(), n - size(), val);
+        }
+    }
+
+    /**
+     * @brief Reserve memory for a spicific number of elemnts
+     *
+     * Don't change capacity unless requested number of elements is both:
+     *   - More than the short capacity (no need to reserve up to short capacity)
+     *   - Greater than the current size (capacity can never be below size)
+     */
+    void reserve(size_type num_elems) {
+        if (num_elems > SHORT_CAPACITY && num_elems > size()) {
+            change_capacity(num_elems);
+        }
+    }
+
+    ///@brief Requests the container to reduce its capacity to fit its size.
+    void shrink_to_fit() {
+        if (!is_short()) {
+            change_capacity(size());
+        }
+    }
+
+    ///@brief Indexing operator []
+    reference operator[](size_t i) {
+        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->operator[](i));
+    }
+
+    ///@brief at() operator
+    reference at(size_t i) {
+        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->at(i));
+    }
+
+    ///@brief Returns a reference to the first element in the vector.
+    reference front() {
+        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->front());
+    }
+
+    ///@brief Returns a reference to the last element in the vector.
+    reference back() {
+        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->back());
+    }
+
+    pointer data() {
+        return const_cast<pointer>(const_cast<const small_vector<T, S>*>(this)->data());
+    }
+
+    /**
+     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
+     *
+     * Input iterators to the initial and final positions in a sequence. The range used is [first,last),
+     * which includes all the elements between first and last, including the element pointed by first 
+     * but not the element pointed by last.
+     */
+    template<class InputIterator>
+    void assign(InputIterator first, InputIterator last) {
+        insert(begin(), first, last);
+    }
+
+    /**
+     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
+     *
+     * Resize the vector to n and fill it with val
+     */
+    void assign(size_type n, const value_type& val) {
+        insert(begin(), n, val);
+    }
+
+    /**
+     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
+     *
+     * The compiler will automatically construct such objects from initializer list declarators (il)
+     */
+    void assign(std::initializer_list<value_type> il) {
+        assign(il.begin(), il.end());
+    }
+
+    ///@brief Construct default value_type at new location
+    void push_back(value_type value) {
+        auto new_ptr = next_back();
+
+        new (new_ptr) T();
+
+        //Since we took a copy in the argument, we can move it
+        //into the new location
+        *new_ptr = std::move(value);
+    }
+
+    ///@brief Removes the last element in the vector, effectively reducing the container size by one.
+    void pop_back() {
+        if (size() > 0) {
+            erase(end() - 1);
+        }
+    }
+
+    ///@brief The vector is extended by inserting new elements before the element at the specified position, effectively increasing the container size by the number of elements inserted.
+    iterator insert(const_iterator position, const value_type& val) {
+        return insert(position, 1, val);
+    }
+
+    /** 
+     * @brief Insert a new value
+     *
+     * Location of position as an index, which will be
+     * unchanged if the underlying storage is reallocated
+     */
+    iterator insert(const_iterator position, size_type n, const value_type& val) {
+        size_type i = std::distance(cbegin(), position);
+
+        /*
+         * If needed, grow capacity
+         *
+         * Note that change_capacity will automatically convert from short to long
+         * format if required.
+         */
+        size_type new_size = size() + n;
+        if (capacity() < new_size) {
+            change_capacity(new_size);
+        }
+
+        iterator first = begin() + i;
+        iterator last = first + n;
+        reverse_swap_elements(first, end(), end() + n - 1);
+
+        //Insert new values at end
+        std::uninitialized_fill(first, last, val);
+
+        set_size(new_size);
+
+        return first;
+    }
+
+    ///@brief Insert n elements at position position and fill them with value val
+    iterator insert(const_iterator position, size_type n, value_type&& val) {
+        return insert(position, n, value_type(val)); //TODO: optimize for moved val
+    }
+
+    //Range insert
+    //template<class InputIterator>
+    //iterator insert(const_iterator position, InputIterator first, InputIterator last) {
+    ////Location of position as an index, which will be
+    ////unchanged if the underlying storage is reallocated
+    //size_type i = std::distance(cbegin(), position);
+    //size_type n = std::distance(first, last);
+
+    ////If needed, grow capacity
+    ////
+    ////Note that change_capacity will automatically convert from short to long
+    ////format if required.
+    //size_type new_size = size() + n;
+    //if (capacity() < new_size) {
+    //change_capacity(new_size);
+    //}
+
+    //reverse_swap_elements(begin() + i, end(), end() + n - 1);
+
+    ////Insert new values at end
+    //std::uninitialized_copy(first, last, begin() + i);
+
+    //set_size(new_size);
+
+    //return begin() + i;
+    //}
+
+    ///@brief Removes from the vector a single element (position).
+    iterator erase(const_iterator position) {
+        return erase(position, position + 1);
+    }
+
+    ///@brief Removes from the vector either a range of elements ([first,last)).
+    iterator erase(const_iterator first, const_iterator last) {
+        //Number of elements to erase
+        size_type n = std::distance(first, last);
+
+        //Location of position as an index, which will be
+        //unchanged if the underlying storage is changed
+        size_type i_first = std::distance(cbegin(), first);
+
+        size_type new_size = size() - n;
+
+        const_iterator position = first;
+
+        if (!is_short() && new_size <= SHORT_CAPACITY) {
+            //Convert from long format to short/in-place format
+
+            //Keep handle on buffer and original size
+            auto buff_ptr = long_.data_;
+            size_type orig_size = size();
+
+            //Copy into in-place the valid (not-to-be-erased) values in
+            //[begin, first) and [last, end)
+            //
+            //Note that we can use uninitialized_copy since the long format
+            //has only basic data types, which have no destructors to call
+            auto buff_begin = buff_ptr;
+            auto buff_end = buff_begin + orig_size;
+            auto erase_begin = buff_ptr + i_first;
+            auto erase_end = erase_begin + n;
+
+            //Copy from beginning until start of erase
+            auto inplace_ptr = short_.data_.data();
+            for (auto buff_itr = buff_begin; buff_itr != erase_begin; ++buff_itr) {
+                new (inplace_ptr++) T(*buff_itr);
+            }
+            //Copy from end of erase until end of buf
+            for (auto buff_itr = erase_end; buff_itr != buff_end; ++buff_itr) {
+                new (inplace_ptr++) T(*buff_itr);
+            }
+
+            VTR_ASSERT_SAFE(std::distance(short_.data_.data(), inplace_ptr) == new_size);
+
+            //Clean-up elements in buffer and free it
+            destruct_elements(buff_begin, buff_end);
+            dealloc(buff_ptr);
+
+            //New position
+            position = begin() + i_first;
+        } else {
+            //Remove elements in either long or short formats
+
+            iterator first2 = begin() + i_first;
+            iterator last2 = first2 + n;
+
+            //Swap all elements in [first, last) to the end.
+            //That is with those within [last, end())
+            if (last2 < end()) {
+                swap_elements(last2, end(), first2);
+            }
+
+            //Finally destruct the elements in [last, end()); that is the
+            //elements which were originally to be erased
+            destruct_elements(end() - n, end());
+
+            //Note that capacity is unchanged, so we do not need to change
+            //position in this case
+        }
+
+        //Shrink size
+        set_size(new_size);
+
+        return begin() + std::distance(cbegin(), position);
+    }
+
+    ///@brief Exchanges the content of the container by the content of x, which is another vector object of the same type. Sizes may differ.
+    void swap(small_vector<T, S>& other) {
+        swap(*this, other);
+    }
+
+    ///@brief swaps two vectors
+    friend void swap(small_vector<T, S>& lhs, small_vector<T, S>& rhs) {
+        using std::swap;
+
+        if (lhs.is_short() && rhs.is_short()) {
+            //Both short
+            std::swap(lhs.short_, rhs.short_);
+        } else if (!lhs.is_short() && !rhs.is_short()) {
+            //Both long
+            std::swap(lhs.long_, rhs.long_);
+        } else {
+            //Mixed long/short
+            VTR_ASSERT_SAFE(lhs.is_short() != rhs.is_short());
+
+            auto& long_vec = ((lhs.is_short()) ? rhs : lhs);
+            auto& short_vec = ((lhs.is_short()) ? lhs : rhs);
+
+            /** 
+             * @brief Swapping two vectors of different formats
+             *
+             * If the two vectors are in different formats we can't just swap them,
+             * since the short format has real values (potentially with destructors),
+             * while the long format has only basic data types.
+             * 
+             * Instead we copy the short_vec values into long, destruct the original short_vec
+             * values and then set short_vec to point to long_vec's original buffer (avoids
+             * extra copy of long elements).
+             *
+             * Save long data
+             */
+            pointer long_buf = long_vec.long_.data_;
+            size_type long_size = long_vec.long_size_;
+            size_type long_capacity = long_vec.long_.capacity_;
+
+            /**
+             * @brief Copy short data into long
+             *
+             * Note that the long format contains only basic data types with no destructors to call,
+             * so we can use uninitialzed copy
+             */
+            std::uninitialized_copy(short_vec.short_.begin(), short_vec.short_.end(), long_vec.short_.data_);
+            long_vec.short_.size_ = short_vec.size();
+
+            //Destroy original elements in short
+            short_vec.destruct_elements();
+
+            //Copy long data into short
+            short_vec.long_.data = long_buf;
+            short_vec.long_.capacity_ = long_capacity;
+            short_vec.long_.size_ = long_size;
+        }
+    }
+
+    ///@brief Removes all elements from the vector (which are destroyed), leaving the container with a size of 0.
+    void clear() {
+        //Destruct all elements and clear size, but do not free memory
+        destruct_elements();
+        set_size(0);
+    }
+
+    ///@brief Inserts a new element at the end of the vector, right after its current last element. This new element is constructed in place using args as the arguments for its constructor.
+    template<typename... Args>
+    void emplace_back(Args&&... args) {
+        //Construct in-place
+        new (next_back()) T(std::forward<Args>(args)...);
+    }
+
+    //Unsupported: Emplace at position
+    //template<typename... Args>
+    //void emplace(const_iterator position, Args&&... args) {
+    //throw std::logic_error("unimplemented");
+    //}
+
+  public: //Comparisons
+    ///@brief == p[erator
+    friend bool operator==(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        if (lhs.size() != rhs.size()) {
+            return false;
+        }
+        return std::equal(lhs.begin(), lhs.end(),
+                          rhs.begin());
+    }
+
+    ///@brief < operator
+    friend bool operator<(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        return std::lexicographical_compare(lhs.begin(), lhs.end(),
+                                            rhs.begin(), rhs.end());
+    }
+
+    ///@brief != operator
+    friend bool operator!=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        return !(lhs == rhs);
+    }
+
+    ///@brief > operator
+    friend bool operator>(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        return rhs < lhs;
+    }
+
+    ///@brief <= operator
+    friend bool operator<=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        return !(rhs < lhs);
+    }
+
+    ///@brief >= operator
+    friend bool operator>=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
+        return !(lhs < rhs);
+    }
+
+  public: //Lifetime management
+    ///@brief destructor
+    ~small_vector() {
+        destruct_elements();
+        if (!is_short()) {
+            dealloc(long_.data_);
+        }
+    }
+
+    ///@brief copy constructor
+    small_vector(const small_vector& other) {
+        if (other.is_short()) {
+            ~small_vector(); //Clean-up elements
+
+            //Copy in place
+            short_ = other.short_;
+        } else {
+            if (!is_short() && capacity() >= other.size()) {
+                //Re-use existing buffer, since it has sufficient capacity
+                destruct_elements();
+
+            } else {
+                ~small_vector(); //Clean-up elements, potentially freeing buffer
+
+                //Create new buffer of exact size
+                long_.data_ = alloc(other.size());
+                long_.capacity_ = other.size();
+            }
+
+            set_size(other.size());
+
+            //Copy elements
+            std::uninitialized_copy(other.begin(), other.end(), long_.data_);
+        }
+    }
+
+    ///@brief copy and swap constructor
+    small_vector(small_vector&& other)
+        : small_vector() {
+        swap(*this, other); //Copy-swap
+    }
+
+    small_vector& operator=(small_vector other) {
+        swap(*this, other); //Copy-swap
+        return *this;
+    }
+
+  private: //Internal types
+    static constexpr size_t LONG_FMT_SIZE = sizeof(small_vector_impl::long_format<value_type, size_type>);
+    static constexpr size_t LONG_FMT_ALIGN = alignof(small_vector_impl::long_format<value_type, size_type>);
+
+    ///@brief The number of value types which can be stored in-place in the object (may be zero)
+    static constexpr size_t SHORT_CAPACITY = (LONG_FMT_SIZE - sizeof(size_type)) / sizeof(value_type);
+
+    /**
+     * @brief required padding
+     *
+     * The amount of padding required to ensure the size_ attributes of long_format and short_format
+     * are aligned.
+     */
+    static constexpr size_t SHORT_PAD = LONG_FMT_SIZE - (sizeof(value_type) * SHORT_CAPACITY + sizeof(size_type));
+
+    static constexpr size_t SHORT_FMT_SIZE = sizeof(small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD>);
+    static constexpr size_t SHORT_FMT_ALIGN = alignof(small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD>);
+
+    static_assert(LONG_FMT_SIZE == SHORT_FMT_SIZE, "Short and long data formats must have same size");
+    static_assert(LONG_FMT_ALIGN % SHORT_FMT_ALIGN == 0, "Short and long data formats must have compatible alignment");
+
+  public:
+    static constexpr size_t INPLACE_CAPACITY = SHORT_CAPACITY;
+
+  private: //Internal methods
+    /**
+     * @brief Returns a pointer to the (uninitialized) location for the next element to be added.
+     *
+     * Automatically grows the storage if needed.
+     */
+    T* next_back() {
+        T* next = nullptr;
+        if (size() < SHORT_CAPACITY) { //Space in-place
+            next = short_.data_.data() + size();
+        } else { //Dynamically allocated
+            if (size() == capacity()) {
+                //Out of space
+                grow();
+            }
+            next = long_.data_ + size();
+        }
+        ++long_.size_;
+        VTR_ASSERT_SAFE(size() <= capacity());
+        return next;
+    }
+
+    /**
+     * @brief Increases the capacity by GROWTH_FACTOR
+     *
+     * Note that this automatically handles the case of growing beyond SHORT_CAPACITY and
+     * switching to long_format
+     */
+    void grow() {
+        //How much to scale the size of the storage when out of space
+        constexpr size_type GROWTH_FACTOR = 2;
+
+        VTR_ASSERT_SAFE_MSG(size() >= SHORT_CAPACITY, "Should only grow capacity when at or beyond SHORT_CAPACITY");
+        VTR_ASSERT_SAFE_MSG(capacity() <= (max_size() / GROWTH_FACTOR), "No capacity overflow");
+        size_type new_capacity = std::max<size_type>(1, capacity() * GROWTH_FACTOR);
+        //TODO: Consider ensuring new_capacity is always a power of 2, may be easier on the memory allocator...
+
+        VTR_ASSERT_SAFE_MSG(new_capacity > capacity(), "Grown capacity should be greater than previous capacity");
+
+        change_capacity(new_capacity);
+    }
+
+    /**
+     * @brief Changes capacity to new_capacity
+     *
+     * It is assumed that new_capacity is > SHORT_CAPACITY.
+     *
+     * If currently in short format, automatically converts to long format
+     */
+    void change_capacity(size_type new_capacity) {
+        VTR_ASSERT_SAFE_MSG(new_capacity >= size(), "New capacity should be at least size");
+
+        if (new_capacity == capacity()) {
+            return; //Already at correct capacity
+        }
+
+        //Get new raw memory
+        T* tmp_data = alloc(new_capacity);
+
+        //Copy values
+        std::uninitialized_copy(begin(), end(), tmp_data);
+
+        //Clean-up the old values
+        //We do this before updating the array pointer, since if we are updating
+        //from short to long the assignment would corrupt the old values
+        destruct_elements();
+
+        //Update
+        std::swap(long_.data_, tmp_data);
+        long_.capacity_ = new_capacity;
+
+        //Free memory if we aren't using the inplace buffer
+        if (!is_short()) {
+            dealloc(tmp_data);
+        }
+    }
+
+    ///@brief Returns true if using the short/in-place format
+    bool is_short() const {
+        return SHORT_CAPACITY > 0u          //Can use the inplace buffer
+               && size() <= SHORT_CAPACITY; //Not using the dynamic buffer
+    }
+
+    /**
+     * @brief set the size 
+     *
+     * The two data (short/long) are padded to
+     * ensure that thier size_ members area always
+     * aligned, allowing is to set the size directly
+     * for both formats
+     */
+    void set_size(size_type new_size) {
+        short_.size_ = new_size;
+    }
+
+    ///@brief Allocates raw (un-initialzied) memory for nelem objects of type T
+    static T* alloc(size_type nelem) {
+        return static_cast<T*>(::operator new(sizeof(T) * nelem));
+    }
+
+    /**
+     * @brief Deallocates a block of memory
+     * 
+     * Caller must ensure any object's associated with this block have already had
+     * their destructors called
+     */
+    static void dealloc(T* data) {
+        ::operator delete(data);
+    }
+
+    /**
+     * @brief Swaps the elements in [src_first, src_last) to positions starting at dst_first
+     *
+     * Returns an iterator to the element in the first swapped location
+     */
+    iterator swap_elements(iterator src_first, iterator src_last, iterator dst_first) {
+        VTR_ASSERT_SAFE_MSG(src_first < src_last, "First swap range first must start before last");
+
+        auto dst_itr = dst_first;
+        for (auto src_itr = src_first; src_itr != src_last; ++src_itr) {
+            std::swap(*src_itr, *(dst_itr++));
+        }
+
+        return src_first;
+    }
+
+    /**
+     * @brief Swaps the elements in [src_first, src_last) in reverse order starting at dst_first and working backwards
+     *
+     * Returns an iterator to the element in the first swapped location
+     */
+    iterator reverse_swap_elements(iterator src_first, iterator src_last, iterator dst_first) {
+        VTR_ASSERT_SAFE_MSG(src_first < src_last, "First swap range first must start before last");
+
+        auto dst_itr = dst_first;
+        for (auto src_itr = src_last - 1; src_itr != src_first - 1; --src_itr) {
+            std::swap(*src_itr, *(dst_itr--));
+        }
+
+        return src_first;
+    }
+
+    ///@brief Calls the destructors of all elements currently held
+    void destruct_elements() {
+        destruct_elements(begin(), end());
+    }
+
+    ///@brief Calls the destructors of elements in [first, last] range
+    void destruct_elements(iterator first, iterator last) {
+        for (auto itr = first; itr != last; ++itr) {
+            itr->~T();
+        }
+    }
+
+    ///@brief Calls the destructors of elements in one position (position)
+    void destruct_element(iterator position) {
+        destruct_elements(position, position + 1);
+    }
+
+  private: //Data
+    /*
+     * The object data storage is re-used between the long and short formats.
+     *
+     * If the capacity is small (less than or equal to SHORT_CAPACITY) the
+     * short format (which stores element in-place) is used. Otherwise the
+     * long format is used and the elements are stored in a dynamically
+     * allocated buffer
+     */
+    union {
+        small_vector_impl::long_format<value_type, size_type> long_;
+        small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD> short_;
+    };
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h b/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
new file mode 100644
index 000000000..3af949701
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
@@ -0,0 +1,558 @@
+#ifndef VTR_STRING_INTERNING_H_
+#define VTR_STRING_INTERNING_H_
+
+/**
+ * @file
+ * @brief  Provides basic string interning, along with pattern splitting suitable for use with FASM.
+ * 
+ *  For reference, string interning refers to keeping a unique copy of a string
+ *  in storage, and then handing out an id to that storage location, rather than
+ *  keeping the string around.  This deduplicates memory overhead for strings.
+ * 
+ *  This string internment has an additional feature that is splitting the
+ *  input string into "parts" based on '.', which happens to be the feature
+ *  seperator for FASM.  This means the string "TILE.CLB.A" and "TILE.CLB.B"
+ *  would be made up of the intern ids for {"TILE", "CLB", "A"} and
+ *  {"TILE", "CLB", "B"} respectively, allowing some internal deduplication.
+ * 
+ *  Strings can contain up to kMaxParts, before they will be interned as their
+ *  whole string.
+ * 
+ *  Interned strings (interned_string) that come from the same internment
+ *  object (string_internment) can safely be checked for equality and hashed
+ *  without touching the underlying string.  Lexigraphical comprisions (e.g. <)
+ *  requires reconstructing the string.
+ * 
+ *  Basic usage:
+ *  -# Create a string_internment
+ *  -# Invoke string_internment::intern_string, which returns the
+ *     interned_string object that is the interned string's unique idenfier.
+ *     This idenfier can be checked for equality or hashed. If
+ *     string_internment::intern_string is called with the same string, a value
+ *     equivalent interned_string object will be returned.
+ *  -# If the original string is required, interned_string::get can be invoked
+ *     to copy the string into a std::string.
+ *     interned_string also provides iteration via begin/end, however the begin
+ *     method requires a pointer to original string_internment object.  This is
+ *     not suitable for range iteration, so the method interned_string::bind
+ *     can be used to create a bound_interned_string that can be used in a
+ *     range iteration context.
+ * 
+ *     For reference, the reason that interned_string's does not have a
+ *     reference back to the string_internment object is to keep their memory
+ *     footprint lower.
+ */
+#include <cstring>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <stdexcept>
+#include <climits>
+#include <algorithm>
+#include <array>
+
+#include "vtr_strong_id.h"
+#include "vtr_string_view.h"
+#include "vtr_vector.h"
+
+namespace vtr {
+
+// Forward declare classes for pointers.
+class string_internment;
+class interned_string;
+class interned_string_less;
+
+// StrongId for identifying unique string pieces.
+struct interned_string_tag;
+typedef StrongId<interned_string_tag> StringId;
+
+/**
+ * @brief Values that control the size of the used storage
+ *
+ * To keep interned_string memory footprint lower and flexible, these values
+ * control the size of the storage used.
+ *
+ * Number of bytes to represent the StringId.  This implies a maximum number of unique strings available equal to (1 << (kBytesPerId*CHAR_BIT)).
+ */
+constexpr size_t kBytesPerId = 3;
+///@brief Maximum number of splits to accomidate before just interning the entire string.
+constexpr size_t kMaxParts = 3;
+///@brief Number of bytes to represent the number of splits present in an interned string.
+constexpr size_t kSizeSize = 1;
+///@brief Which character to split the input string by.
+constexpr char kSplitChar = '.';
+
+static_assert((1 << (CHAR_BIT * kSizeSize)) > kMaxParts, "Size of size data is too small");
+
+/**
+ * @brief Iterator over interned string.
+ *
+ * This object is much heavier memory wise than interned_string, so do not
+ * store these.
+ * 
+ * This iterator only accomidates the forward_iterator concept.
+ * 
+ * Do no construct this iterator directly.  Use either
+ * bound_interned_string::begin/end or interned_string;:begin/end.
+ */
+class interned_string_iterator {
+  public:
+    interned_string_iterator(const string_internment* internment, std::array<StringId, kMaxParts> intern_ids, size_t n);
+
+    interned_string_iterator() {
+        clear();
+    }
+
+    using value_type = char;
+    using difference_type = void;
+    using pointer = const char*;
+    using reference = const char&;
+    using iterator_category = std::forward_iterator_tag;
+
+    char operator*() const {
+        if (num_parts_ == size_t(-1)) {
+            throw std::out_of_range("Invalid iterator");
+        }
+
+        if (str_idx_ >= view_.size()) {
+            return kSplitChar;
+        } else {
+            return view_.at(str_idx_);
+        }
+    }
+
+    interned_string_iterator& operator++();
+    interned_string_iterator operator++(int);
+
+    friend bool operator==(const interned_string_iterator& lhs, const interned_string_iterator& rhs);
+
+  private:
+    void clear() {
+        internment_ = nullptr;
+        num_parts_ = size_t(-1);
+        std::fill(parts_.begin(), parts_.end(), StringId());
+        part_idx_ = size_t(-1);
+        str_idx_ = size_t(-1);
+        view_ = vtr::string_view();
+    }
+
+    const string_internment* internment_;
+    size_t num_parts_;
+    std::array<StringId, kMaxParts> parts_;
+    size_t part_idx_;
+    size_t str_idx_;
+    vtr::string_view view_;
+};
+
+///@brief == operator
+inline bool operator==(const interned_string_iterator& lhs, const interned_string_iterator& rhs) {
+    return lhs.internment_ == rhs.internment_ && lhs.num_parts_ == rhs.num_parts_ && lhs.parts_ == rhs.parts_ && lhs.part_idx_ == rhs.part_idx_ && lhs.str_idx_ == rhs.str_idx_ && lhs.view_ == rhs.view_;
+}
+
+///@brief != operator
+inline bool operator!=(const interned_string_iterator& lhs, const interned_string_iterator& rhs) {
+    return !(lhs == rhs);
+}
+
+/**
+ * @brief A interned_string bound to it's string_internment object.
+ *
+ * This object is heavier than just an interned_string.
+ * This object holds a pointer to interned_string, so its lifetime must be
+ * shorter than the parent interned_string.
+ */
+class bound_interned_string {
+  public:
+    ///@brief constructor
+    bound_interned_string(const string_internment* internment, const interned_string* str)
+        : internment_(internment)
+        , str_(str) {}
+
+    ///@brief return an iterator to the first part of the interned_string
+    interned_string_iterator begin() const;
+    ///@brief return an iterator to the last part of the interned_string
+    interned_string_iterator end() const;
+
+  private:
+    const string_internment* internment_;
+    const interned_string* str_;
+};
+
+/**
+ * @brief Interned string value returned from a string_internment object.
+ *
+ * This is a value object without allocation.  It can be checked for equality
+ * and hashed safely against other interned_string's generated from the same
+ * string_internment.
+ */
+class interned_string {
+  public:
+    ///@brief constructor
+    interned_string(std::array<StringId, kMaxParts> intern_ids, size_t n) {
+        std::fill(storage_.begin(), storage_.end(), 0);
+        set_num_parts(n);
+        for (size_t i = 0; i < n; ++i) {
+            set_id(i, intern_ids[i]);
+        }
+    }
+
+    /**
+     * @brief Copy the underlying string into output.
+     *
+     * internment must the object that generated this interned_string.
+     */
+    void get(const string_internment* internment, std::string* output) const;
+
+    /**
+     * @brief Returns the underlying string as a std::string.
+     *
+     * This method will allocated memory.
+     */
+    std::string get(const string_internment* internment) const {
+        std::string result;
+        get(internment, &result);
+        return result;
+    }
+
+    /**
+     * @brief Bind the parent string_internment and return a bound_interned_string object.
+     * 
+     * That bound_interned_string lifetime must be shorter than this
+     * interned_string object lifetime, as bound_interned_string contains
+     * a reference this object, along with a reference to the internment
+     * object.
+     */
+    bound_interned_string bind(const string_internment* internment) const {
+        return bound_interned_string(internment, this);
+    }
+
+    ///@brief begin() function
+    interned_string_iterator begin(const string_internment* internment) const {
+        size_t n = num_parts();
+        std::array<StringId, kMaxParts> intern_ids;
+
+        for (size_t i = 0; i < n; ++i) {
+            intern_ids[i] = id(i);
+        }
+
+        return interned_string_iterator(internment, intern_ids, n);
+    }
+
+    ///@brief end() function
+    interned_string_iterator end() const {
+        return interned_string_iterator();
+    }
+
+    ///@brief == operator
+    friend bool operator==(interned_string lhs,
+                           interned_string rhs) noexcept;
+    ///@brief != operator
+    friend bool operator!=(interned_string lhs,
+                           interned_string rhs) noexcept;
+    ///@brief hash function
+    friend std::hash<interned_string>;
+    friend interned_string_less;
+
+  private:
+    void set_num_parts(size_t n) {
+        for (size_t i = 0; i < kSizeSize; ++i) {
+            storage_[i] = (n >> (i * CHAR_BIT)) & UCHAR_MAX;
+        }
+
+        if (num_parts() != n) {
+            throw std::runtime_error("Storage size exceeded.");
+        }
+    }
+
+    size_t num_parts() const {
+        size_t n = 0;
+        for (size_t i = 0; i < kSizeSize; ++i) {
+            n |= storage_[i] << (i * CHAR_BIT);
+        }
+
+        return n;
+    }
+
+    void set_id(size_t idx, StringId id) {
+        if (idx >= kMaxParts) {
+            throw std::runtime_error("Storage size exceeded.");
+        }
+
+        size_t val = (size_t)id;
+        for (size_t i = 0; i < kBytesPerId; ++i) {
+            storage_[kSizeSize + i + idx * kBytesPerId] = (val >> (i * CHAR_BIT)) & UCHAR_MAX;
+        }
+
+        if (this->id(idx) != id) {
+            throw std::runtime_error("Storage size exceeded.");
+        }
+    }
+
+    StringId id(size_t idx) const {
+        size_t val = 0;
+        for (size_t i = 0; i < kBytesPerId; ++i) {
+            val |= storage_[kSizeSize + i + idx * kBytesPerId] << (i * CHAR_BIT);
+        }
+
+        return StringId(val);
+    }
+
+    std::array<uint8_t, kSizeSize + kMaxParts * kBytesPerId> storage_;
+};
+
+///@brief == operator
+inline bool operator==(interned_string lhs,
+                       interned_string rhs) noexcept {
+    return lhs.storage_ == rhs.storage_;
+}
+
+///@brief != operator
+inline bool operator!=(interned_string lhs,
+                       interned_string rhs) noexcept {
+    return lhs.storage_ != rhs.storage_;
+}
+
+///@brief < operator
+inline bool operator<(bound_interned_string lhs,
+                      bound_interned_string rhs) noexcept {
+    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+///@brief >= operator
+inline bool operator>=(bound_interned_string lhs,
+                       bound_interned_string rhs) noexcept {
+    return !std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+///@brief > operator
+inline bool operator>(bound_interned_string lhs,
+                      bound_interned_string rhs) noexcept {
+    return rhs < lhs;
+}
+
+///@brief <= operator
+inline bool operator<=(bound_interned_string lhs,
+                       bound_interned_string rhs) noexcept {
+    return rhs >= lhs;
+}
+
+/**
+ * @brief  Storage of interned string, and object capable of generating new interned_string objects.
+ */
+class string_internment {
+  public:
+    /**
+     * @brief Intern a string, and return a unique identifier to that string.
+     *
+     * If interned_string is ever called with two strings of the same value,
+     * the interned_string will be equal.
+     */
+    interned_string intern_string(vtr::string_view view) {
+        size_t num_parts = 1;
+        for (const auto& c : view) {
+            if (c == kSplitChar) {
+                num_parts += 1;
+            }
+        }
+
+        std::array<StringId, kMaxParts> parts;
+        if (num_parts == 1 || num_parts > kMaxParts) {
+            // Intern entire string.
+            parts[0] = intern_one_string(view);
+            return interned_string(parts, 1);
+        } else {
+            // Implements parts = [intern_one_string(s) for s in view.split(kSplitChar)]
+            size_t idx = 0;
+            size_t start = 0;
+
+            for (size_t i = 0; i < view.size(); ++i) {
+                if (view[i] == kSplitChar) {
+                    parts[idx++] = intern_one_string(view.substr(start, i - start));
+                    start = i + 1;
+                    if (idx == num_parts - 1) {
+                        break;
+                    }
+                }
+            }
+
+            parts[idx++] = intern_one_string(view.substr(start));
+            return interned_string(parts, num_parts);
+        }
+    }
+
+    /**
+     * @brief Retrieve a string part based on id.
+     *
+     * This method should not generally be called directly.
+     */
+    vtr::string_view get_string(StringId id) const {
+        auto& str = strings_[id];
+        return vtr::string_view(str.data(), str.size());
+    }
+
+    ///@brief Number of unique string parts stored.
+    size_t unique_strings() const {
+        return strings_.size();
+    }
+
+  private:
+    StringId intern_one_string(vtr::string_view view) {
+        temporary_.assign(view.begin(), view.end());
+        StringId next_id(strings_.size());
+        auto result = string_to_id_.insert(std::make_pair(temporary_, next_id));
+        if (result.second) {
+            strings_.push_back(std::move(temporary_));
+        }
+
+        return result.first->second;
+    }
+
+    // FIXME: This storage scheme does store 2x memory for the strings storage,
+    // however it does avoid having to be concerned with what happens when
+    // strings_ resizes, so for a simplier initial implementation, this is the
+    // approach taken.
+    vtr::vector<StringId, std::string> strings_;
+    std::string temporary_;
+    std::unordered_map<std::string, StringId> string_to_id_;
+};
+
+/**
+ * @brief Copy the underlying string into output.
+ *
+ * internment must the object that generated this interned_string.
+ */
+inline void interned_string::get(const string_internment* internment, std::string* output) const {
+    // Implements
+    // kSplitChar.join(interned_string->get_string(id(idx)) for idx in range(num_parts())));
+    size_t parts = num_parts();
+    size_t storage_needed = parts - 1;
+    std::array<StringId, kMaxParts> intern_ids;
+    for (size_t i = 0; i < parts; ++i) {
+        intern_ids[i] = id(i);
+        storage_needed += internment->get_string(intern_ids[i]).size();
+    }
+
+    output->clear();
+    output->reserve(storage_needed);
+
+    for (size_t i = 0; i < parts; ++i) {
+        auto view = internment->get_string(intern_ids[i]);
+        std::copy(view.begin(), view.end(), std::back_inserter(*output));
+        if (i + 1 < parts) {
+            output->push_back(kSplitChar);
+        }
+    }
+}
+
+/**
+ * @brief constructor for interned string iterator.
+ *
+ * Do no construct this iterator directly.  Use either
+ * bound_interned_string::begin/end or interned_string;:begin/end.
+ */
+inline interned_string_iterator::interned_string_iterator(const string_internment* internment, std::array<StringId, kMaxParts> intern_ids, size_t n)
+    : internment_(internment)
+    , num_parts_(n)
+    , parts_(intern_ids)
+    , part_idx_(0)
+    , str_idx_(0) {
+    if (num_parts_ == 0) {
+        clear();
+    } else {
+        view_ = internment_->get_string(parts_[0]);
+    }
+}
+
+///@brief Increment operator for interned_string_iterator
+inline interned_string_iterator& interned_string_iterator::operator++() {
+    if (num_parts_ == size_t(-1)) {
+        throw std::out_of_range("Invalid iterator");
+    }
+
+    if (str_idx_ < view_.size()) {
+        // Current string has characters left, advance.
+        str_idx_ += 1;
+        // Normally when str_idx_ the iterator will next emit a kSplitChar,
+        // but this is omitted on the last part of the string.
+        if (str_idx_ == view_.size() && part_idx_ + 1 == num_parts_) {
+            clear();
+        }
+    } else {
+        // Current part of the string is out of characters, and the
+        // kSplitChar has been emitted, advance to the next part.
+        str_idx_ = 0;
+        part_idx_ += 1;
+        if (part_idx_ == num_parts_) {
+            // No more parts.
+            clear();
+        } else {
+            view_ = internment_->get_string(parts_[part_idx_]);
+            if (view_.size() == 0 && part_idx_ + 1 == num_parts_) {
+                // The last string part is empty, and because this is the last
+                // part we don't want to emit another kSplitChar.
+                clear();
+            }
+        }
+    }
+
+    return *this;
+}
+
+///@brief Increment operator for interned_string_iterator
+inline interned_string_iterator interned_string_iterator::operator++(int) {
+    interned_string_iterator prev = *this;
+    ++*this;
+
+    return prev;
+}
+
+///@brief return an iterator to the first part of the interned_string
+inline interned_string_iterator bound_interned_string::begin() const {
+    return str_->begin(internment_);
+}
+
+///@brief return an iterator to the last part of the interned_string
+inline interned_string_iterator bound_interned_string::end() const {
+    return interned_string_iterator();
+}
+
+inline std::ostream& operator<<(std::ostream& os, bound_interned_string const& value) {
+    for (const auto& c : value) {
+        os << c;
+    }
+    return os;
+}
+
+/**
+ * @brief A friend class to interned_string that compares 2 interned_strings
+ */
+class interned_string_less {
+  public:
+    ///@brief Return true if the first interned string is less than the second one
+    bool operator()(const vtr::interned_string& lhs, const vtr::interned_string& rhs) const {
+        return lhs.storage_ < rhs.storage_;
+    }
+};
+
+} // namespace vtr
+
+namespace std {
+/**
+ * @brief Hash function for the interned_string 
+ *
+ * It is defined as a friend function to interned_string class.
+ * It returns a unique hash for every interned_string.
+ */
+template<>
+struct hash<vtr::interned_string> {
+    std::size_t operator()(vtr::interned_string const& s) const noexcept {
+        std::size_t h = 0;
+        for (const auto& data : s.storage_) {
+            vtr::hash_combine(h, std::hash<char>()(data));
+        }
+        return h;
+    }
+};
+} // namespace std
+
+#endif /* VTR_STRING_INTERNING_H_ */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_string_view.h b/third_party/vtr/libs/vtrutil/src/vtr_string_view.h
new file mode 100644
index 000000000..12a7a7a44
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_string_view.h
@@ -0,0 +1,192 @@
+#ifndef VTR_STRING_VIEW_H_
+#define VTR_STRING_VIEW_H_
+
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <stdexcept>
+
+#include "vtr_hash.h"
+
+namespace vtr {
+
+/**
+ * @brief Implements a view to a fixed length string (similar to std::basic_string_view).
+ *
+ * The underlying string does not need to be NULL terminated.
+ */
+class string_view {
+  public:
+    static constexpr size_t npos = size_t(-1);
+
+    ///@brief constructor
+    explicit constexpr string_view()
+        : data_(nullptr)
+        , size_(0) {}
+
+    ///@brief constructor
+    explicit string_view(const char* str)
+        : data_(str)
+        , size_(strlen(str)) {}
+    ///@brief constructor
+    explicit constexpr string_view(const char* str, size_t size)
+        : data_(str)
+        , size_(size) {}
+
+    constexpr string_view(const string_view& other) noexcept = default;
+    ///@brief copy constructor
+    constexpr string_view& operator=(const string_view& view) noexcept {
+        data_ = view.data_;
+        size_ = view.size_;
+        return *this;
+    }
+
+    ///@brief indexing [] operator (immutable)
+    constexpr char operator[](size_t pos) const {
+        return data_[pos];
+    }
+
+    ///@brief aT() operator (immutable)
+    const char& at(size_t pos) const {
+        if (pos >= size()) {
+            throw std::out_of_range("Pos is out of range.");
+        }
+
+        return data_[pos];
+    }
+
+    ///@brief Returns the first character of the string
+    constexpr const char& front() const {
+        return data_[0];
+    }
+
+    ///@brief Returns the last character of the string
+    constexpr const char& back() const {
+        return data_[size() - 1];
+    }
+
+    ///@brief Returns a pointer to the string data
+    constexpr const char* data() const {
+        return data_;
+    }
+
+    ///@brief Returns the string size
+    constexpr size_t size() const noexcept {
+        return size_;
+    }
+
+    ///@brief Returns the string size
+    constexpr size_t length() const noexcept {
+        return size_;
+    }
+
+    ///@brief Returns true if empty
+    constexpr bool empty() const noexcept {
+        return size_ == 0;
+    }
+
+    ///@brief Returns a pointer to the begin of the string
+    constexpr const char* begin() const noexcept {
+        return data_;
+    }
+
+    ///@brief Same as begin()
+    constexpr const char* cbegin() const noexcept {
+        return data_;
+    }
+
+    ///@brief Returns a pointer to the end of the string
+    constexpr const char* end() const noexcept {
+        return data_ + size_;
+    }
+
+    ///@brief Same as end()
+    constexpr const char* cend() const noexcept {
+        return data_ + size_;
+    }
+
+    ///@brief Swaps two string views
+    void swap(string_view& v) noexcept {
+        std::swap(data_, v.data_);
+        std::swap(size_, v.size_);
+    }
+
+    ///@brief Returns a newly constructed string object with its value initialized to a copy of a substring of this object.
+    string_view substr(size_t pos = 0, size_t count = npos) {
+        if (pos > size()) {
+            throw std::out_of_range("Pos is out of range.");
+        }
+
+        size_t rcount = size_ - pos;
+        if (count != npos && (pos + count) < size_) {
+            rcount = count;
+        }
+
+        return string_view(data_ + pos, rcount);
+    }
+
+  private:
+    const char* data_;
+    size_t size_;
+};
+
+///@brief == operator
+inline bool operator==(string_view lhs,
+                       string_view rhs) noexcept {
+    return lhs.size() == rhs.size() && (lhs.empty() || rhs.empty() || (strncmp(lhs.data(), rhs.data(), std::min(lhs.size(), rhs.size())) == 0));
+}
+
+///@brief != operator
+inline bool operator!=(string_view lhs,
+                       string_view rhs) noexcept {
+    return lhs.size() != rhs.size() || strncmp(lhs.data(), rhs.data(), std::min(lhs.size(), rhs.size())) != 0;
+}
+
+///@brief < operator
+inline bool operator<(string_view lhs,
+                      string_view rhs) noexcept {
+    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+///brief >= operator
+inline bool operator>=(string_view lhs,
+                       string_view rhs) noexcept {
+    return !std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+///@brief > operator
+inline bool operator>(string_view lhs,
+                      string_view rhs) noexcept {
+    return rhs < lhs;
+}
+
+///@brief <= operator
+inline bool operator<=(string_view lhs,
+                       string_view rhs) noexcept {
+    return rhs >= lhs;
+}
+
+///@brief << operator for ostream
+inline std::ostream& operator<<(std::ostream& os, string_view const& value) {
+    for (const auto& c : value) {
+        os << c;
+    }
+    return os;
+}
+
+} // namespace vtr
+
+namespace std {
+template<>
+struct hash<vtr::string_view> {
+    std::size_t operator()(vtr::string_view const& s) const noexcept {
+        std::size_t h = 0;
+        for (const auto& data : s) {
+            vtr::hash_combine(h, std::hash<char>()(data));
+        }
+        return h;
+    }
+};
+} // namespace std
+
+#endif /* VTR_STRING_VIEW_H_ */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h b/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
new file mode 100644
index 000000000..1ce922ab5
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
@@ -0,0 +1,245 @@
+#ifndef VTR_STRONG_ID_H
+#define VTR_STRONG_ID_H
+/**
+ * @file
+ * @brief This header provides the StrongId class.
+ *
+ * It is template which can be used to create strong Id's 
+ * which avoid accidental type conversions (generating compiler errors when they occur).
+ *
+ * Motivation
+ * ==========
+ * It is common to use an Id (typically an integer) to identify and represent a component.
+ * A basic example (poor style):
+ *
+ *      size_t count_net_terminals(int net_id);
+ *
+ * Where a plain int is used to represent the net identifier.
+ * Using a plain basic type is poor style since it makes it unclear that the parameter is
+ * an Id.
+ *
+ * A better example is to use a typedef:
+ *
+ *      typedef int NetId;
+ *
+ *      size_t count_net_teriminals(NetId net_id);
+ *
+ * It is now clear that the parameter is expecting an Id.
+ *
+ * However this approach has some limitations. In particular, typedef's only create type
+ * aliases, and still allow conversions. This is problematic if there are multiple types
+ * of Ids. For example:
+ *
+ *      typedef int NetId;
+ *      typedef int BlkId;
+ *
+ *      size_t count_net_teriminals(NetId net_id);
+ *
+ *      BlkId blk_id = 10;
+ *      NetId net_id = 42;
+ *
+ *      count_net_teriminals(net_id); //OK
+ *      count_net_teriminals(blk_id); //Bug: passed a BlkId as a NetId
+ *
+ * Since typdefs are aliases the compiler issues no errors or warnings, and silently passes
+ * the BlkId where a NetId is expected. This results in hard to diagnose bugs.
+ *
+ * We can avoid this issue by using a StrongId:
+ *
+ *      struct net_id_tag; //Phantom tag for NetId
+ *      struct blk_id_tag; //Phantom tag for BlkId
+ *
+ *      typedef StrongId<net_id_tag> NetId;
+ *      typedef StrongId<blk_id_tag> BlkId;
+ *
+ *      size_t count_net_teriminals(NetId net_id);
+ *
+ *      BlkId blk_id = 10;
+ *      NetId net_id = 42;
+ *
+ *      count_net_teriminals(net_id); //OK
+ *      count_net_teriminals(blk_id); //Compiler Error: NetId expected!
+ *
+ * StrongId is a template which implements the basic features of an Id, but disallows silent conversions
+ * between different types of Ids. It uses another 'tag' type (passed as the first template parameter)
+ * to uniquely identify the type of the Id (preventing conversions between different types of Ids).
+ *
+ * Usage
+ * =====
+ *
+ * The StrongId template class takes one required and three optional template parameters:
+ *
+ *    1. Tag        - the unique type used to identify this type of Ids [Required]
+ *    2. T          - the underlying integral id type (default: int) [Optional]
+ *    3. T sentinel - a value representing an invalid Id (default: -1) [Optional]
+ *
+ * If no value is supllied during construction the StrongId is initialized to the invalid/sentinel value.
+ *
+ * Example 1: default definition
+ *
+ *      struct net_id_tag;
+ *      typedef StrongId<net_id_tag> NetId; //Internally stores an integer Id, -1 represents invalid
+ *
+ * Example 2: definition with custom underlying type
+ *
+ *      struct blk_id_tag;
+ *      typedef StrongId<net_id_tag,size_t> BlkId; //Internally stores a size_t Id, -1 represents invalid
+ *
+ * Example 3: definition with custom underlying type and custom sentinel value
+ *
+ *      struct pin_id_tag;
+ *      typedef StrongId<net_id_tag,size_t,0> PinId; //Internally stores a size_t Id, 0 represents invalid
+ *
+ * Example 4: Creating Ids
+ *
+ *      struct net_id_tag;
+ *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
+ *
+ *      MyId my_id;           //Defaults to the sentinel value (-1 by default)
+ *      MyId my_other_id = 5; //Explicit construction
+ *      MyId my_thrid_id(25); //Explicit construction
+ *
+ * Example 5: Comparing Ids
+ *
+ *      struct net_id_tag;
+ *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
+ *
+ *      MyId my_id;           //Defaults to the sentinel value (-1 by default)
+ *      MyId my_id_one = 1;
+ *      MyId my_id_two = 2;
+ *      MyId my_id_also_one = 1;
+ *
+ *      my_id_one == my_id_also_one; //True
+ *      my_id_one == my_id; //False
+ *      my_id_one == my_id_two; //False
+ *      my_id_one != my_id_two; //True
+ *
+ * Example 5: Checking for invalid Ids
+ *
+ *      struct net_id_tag;
+ *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
+ *
+ *      MyId my_id;           //Defaults to the sentinel value
+ *      MyId my_id_one = 1;
+ *
+ *      //Comparison against a constructed invalid id
+ *      my_id == MyId::INVALID(); //True
+ *      my_id_one == MyId::INVALID(); //False
+ *      my_id_one != MyId::INVALID(); //True
+ *
+ *      //The Id can also be evaluated in a boolean context against the sentinel value
+ *      if(my_id) //False, my_id is invalid
+ *      if(!my_id) //True my_id is valid
+ *      if(my_id_one) //True my_id_one is valid
+ *
+ * Example 6: Indexing data structures
+ *
+ *      struct my_id_tag;
+ *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
+ *
+ *      std::vector<int> my_vec = {0, 1, 2, 3, 4, 5};
+ *
+ *      MyId my_id = 2;
+ *
+ *      my_vec[size_t(my_id)]; //Access the third element via explicit conversion
+ */
+#include <type_traits> //for std::is_integral
+#include <cstddef>     //for std::size_t
+#include <functional>  //for std::hash
+
+namespace vtr {
+
+// Forward declare the class (needed for operator declarations)
+template<typename tag, typename T, T sentinel>
+class StrongId;
+
+/*
+ * Forward declare the equality/inequality operators
+ *
+ * We need to do this before the class definition so the class can
+ * friend them
+ */
+template<typename tag, typename T, T sentinel>
+bool operator==(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+
+template<typename tag, typename T, T sentinel>
+bool operator!=(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+
+template<typename tag, typename T, T sentinel>
+bool operator<(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+
+///@brief Class template definition with default template parameters
+template<typename tag, typename T = int, T sentinel = T(-1)>
+class StrongId {
+    static_assert(std::is_integral<T>::value, "T must be integral");
+
+  public:
+    ///@brief Gets the invalid Id
+    static constexpr StrongId INVALID() { return StrongId(); }
+
+    ///@brief Default to the sentinel value
+    constexpr StrongId()
+        : id_(sentinel) {}
+
+    ///@brief Only allow explict constructions from a raw Id (no automatic conversions)
+    explicit constexpr StrongId(T id)
+        : id_(id) {}
+
+    // Allow some explicit conversion to useful types:
+
+    ///@brief Allow explicit conversion to bool (e.g. if(id))
+    explicit operator bool() const { return *this != INVALID(); }
+
+    ///@brief Allow explicit conversion to size_t (e.g. my_vector[size_t(strong_id)])
+    explicit operator std::size_t() const { return static_cast<std::size_t>(id_); }
+
+    ///@brief To enable hasing Ids
+    friend std::hash<StrongId<tag, T, sentinel>>;
+
+    /**
+     * @brief To enable comparisions between Ids
+     *
+     * Note that since these are templated functions we provide an empty set of template parameters
+     * after the function name (i.e. <>)
+     */
+    friend bool operator== <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+    ///@brief != operator
+    friend bool operator!= <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+    ///@brief < operator
+    friend bool operator< <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
+
+  private:
+    T id_;
+};
+
+///@brief == operator
+template<typename tag, typename T, T sentinel>
+bool operator==(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
+    return lhs.id_ == rhs.id_;
+}
+
+///@brief != operator
+template<typename tag, typename T, T sentinel>
+bool operator!=(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
+    return !(lhs == rhs);
+}
+
+///@brief operator < Needed for std::map-like containers
+template<typename tag, typename T, T sentinel>
+bool operator<(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
+    return lhs.id_ < rhs.id_;
+}
+
+} //namespace vtr
+
+///@brief Specialize std::hash for StrongId's (needed for std::unordered_map-like containers)
+namespace std {
+template<typename tag, typename T, T sentinel>
+struct hash<vtr::StrongId<tag, T, sentinel>> {
+    std::size_t operator()(const vtr::StrongId<tag, T, sentinel> k) const noexcept {
+        return std::hash<T>()(k.id_); //Hash with the underlying type
+    }
+};
+} //namespace std
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h b/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
new file mode 100644
index 000000000..e9fd938f3
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
@@ -0,0 +1,185 @@
+#ifndef _VTR_STRONG_ID_RANGE_H
+#define _VTR_STRONG_ID_RANGE_H
+
+#include <algorithm>
+#include "vtr_assert.h"
+
+namespace vtr {
+
+/**
+ * @file
+ * @brief This header defines a utility class for StrongId's.
+ *
+ * StrongId's are described in vtr_strong_id.h.  In some cases, StrongId's be considered
+ * like random access iterators, but not all StrongId's have this property.
+ * In addition, there is utility in refering to a range of id's, and being able
+ * to iterator over that range.
+ */
+
+/**
+ * @brief StrongIdIterator class
+ *
+ * StrongIdIterator allows a StrongId to be treated like a random access
+ * iterator.  Whether this is a correct use of the abstraction is up to the
+ * called.
+ *
+ */
+template<typename StrongId>
+class StrongIdIterator {
+  public:
+    ///@brief constructor
+    StrongIdIterator() = default;
+    ///@brief copy constructor
+    StrongIdIterator& operator=(const StrongIdIterator& other) = default;
+    ///@brief copy constructor
+    StrongIdIterator(const StrongIdIterator& other) = default;
+    ///@brief constructor
+    explicit StrongIdIterator(StrongId id)
+        : id_(id) {
+        VTR_ASSERT(bool(id));
+    }
+
+    using iterator_category = std::random_access_iterator_tag;
+    using value_type = StrongId;
+    using reference = StrongId&;
+    using pointer = StrongId*;
+    using difference_type = ssize_t;
+
+    ///@brief Dereference operator (*)
+    StrongId& operator*() {
+        VTR_ASSERT_SAFE(bool(id_));
+        return this->id_;
+    }
+
+    ///@brief += operator
+    StrongIdIterator& operator+=(ssize_t n) {
+        VTR_ASSERT_SAFE(bool(id_));
+        id_ = StrongId(size_t(id_) + n);
+        VTR_ASSERT_SAFE(bool(id_));
+        return *this;
+    }
+
+    ///@brief -= operator
+    StrongIdIterator& operator-=(ssize_t n) {
+        VTR_ASSERT_SAFE(bool(id_));
+        id_ = StrongId(size_t(id_) - n);
+        VTR_ASSERT_SAFE(bool(id_));
+        return *this;
+    }
+
+    ///@brief ++ operator
+    StrongIdIterator& operator++() {
+        VTR_ASSERT_SAFE(bool(id_));
+        *this += 1;
+        VTR_ASSERT_SAFE(bool(id_));
+        return *this;
+    }
+
+    ///@brief Decremment operator
+    StrongIdIterator& operator--() {
+        VTR_ASSERT_SAFE(bool(id_));
+        *this -= 1;
+        VTR_ASSERT_SAFE(bool(id_));
+        return *this;
+    }
+
+    ///@brief Indexing operator []
+    StrongId operator[](ssize_t offset) const {
+        return StrongId(size_t(id_) + offset);
+    }
+
+    ///@brief + operator
+    template<typename IdType>
+    friend StrongIdIterator<IdType> operator+(
+        const StrongIdIterator<IdType>& lhs,
+        ssize_t n) {
+        StrongIdIterator ret = lhs;
+        ret += n;
+        return ret;
+    }
+
+    ///@brief - operator
+    template<typename IdType>
+    friend StrongIdIterator<IdType> operator-(
+        const StrongIdIterator<IdType>& lhs,
+        ssize_t n) {
+        StrongIdIterator ret = lhs;
+        ret -= n;
+        return ret;
+    }
+
+    ///@brief ~ operator
+    template<typename IdType>
+    friend ssize_t operator-(
+        const StrongIdIterator<IdType>& lhs,
+        const StrongIdIterator<IdType>& rhs) {
+        VTR_ASSERT_SAFE(bool(lhs.id_));
+        VTR_ASSERT_SAFE(bool(rhs.id_));
+
+        ssize_t ret = size_t(lhs.id_);
+        ret -= size_t(rhs.id_);
+        return ret;
+    }
+
+    ///@brief == operator
+    template<typename IdType>
+    friend bool operator==(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
+        return lhs.id_ == rhs.id_;
+    }
+
+    ///@brief != operator
+    template<typename IdType>
+    friend bool operator!=(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
+        return lhs.id_ != rhs.id_;
+    }
+
+    ///@brief < operator
+    template<typename IdType>
+    friend bool operator<(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
+        return lhs.id_ < rhs.id_;
+    }
+
+  private:
+    StrongId id_;
+};
+
+/**
+ * @brief StrongIdRange class
+ *
+ * StrongIdRange allows a pair of StrongId's to defines a continguous range of
+ * ids.  The "end" StrongId is excluded from this range.
+ */
+template<typename StrongId>
+class StrongIdRange {
+  public:
+    ///@brief constructor
+    StrongIdRange(StrongId b, StrongId e)
+        : begin_(b)
+        , end_(e) {
+        VTR_ASSERT(begin_ < end_ || begin_ == end_);
+    }
+
+    ///@brief Returns a StrongIdIterator to the first strongId in the range
+    StrongIdIterator<StrongId> begin() const {
+        return StrongIdIterator<StrongId>(begin_);
+    }
+    ///@brief Returns a StrongIdIterator referring to the past-the-end element in the vector container.
+    StrongIdIterator<StrongId> end() const {
+        return StrongIdIterator<StrongId>(end_);
+    }
+
+    ///@brief Returns true if the range is empty
+    bool empty() { return begin_ == end_; }
+    ///@brief Reurns the size of the range
+    size_t size() {
+        return std::distance(begin(), end());
+    }
+
+  private:
+    StrongId begin_;
+    StrongId end_;
+};
+
+} //namespace vtr
+
+#endif /* _VTR_STRONG_ID_RANGE_H */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_time.cc b/third_party/vtr/libs/vtrutil/src/vtr_time.cc
new file mode 100644
index 000000000..a557f1867
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_time.cc
@@ -0,0 +1,99 @@
+#include "vtr_time.h"
+
+#include "vtr_log.h"
+#include "vtr_rusage.h"
+
+namespace vtr {
+
+int f_timer_depth = 0;
+
+///@brief Constructor
+Timer::Timer()
+    : start_(clock::now())
+    , initial_max_rss_(get_max_rss()) {
+}
+
+///@brief Returns the elapsed seconds since construction
+float Timer::elapsed_sec() const {
+    return std::chrono::duration<float>(clock::now() - start_).count();
+}
+
+///@brief Returns the maximum resident size (rss) in bytes
+float Timer::max_rss_mib() const {
+    return get_max_rss() / BYTE_TO_MIB;
+}
+
+///@brief Returns the change in maximum resident size in bytes
+float Timer::delta_max_rss_mib() const {
+    return (get_max_rss() - initial_max_rss_) / BYTE_TO_MIB;
+}
+
+///@brief Constructor
+ScopedActionTimer::ScopedActionTimer(std::string action_str)
+    : action_(action_str)
+    , depth_(f_timer_depth++) {
+}
+
+///@brief Destructor
+ScopedActionTimer::~ScopedActionTimer() {
+    --f_timer_depth;
+}
+
+///@brief Sets quiet value (when true, prints the timing info)
+void ScopedActionTimer::quiet(bool value) {
+    quiet_ = value;
+}
+
+///@brief Returns the quiet value
+bool ScopedActionTimer::quiet() const {
+    return quiet_;
+}
+
+///@brief Returns the action string
+std::string ScopedActionTimer::action() const {
+    return action_;
+}
+
+///@brief Pads the output string with # if it is not empty
+std::string ScopedActionTimer::pad() const {
+    if (depth() == 0) {
+        return "";
+    }
+    return std::string(depth(), '#') + " ";
+}
+
+///@brief Returns the depth
+int ScopedActionTimer::depth() const {
+    return depth_;
+}
+
+///@brief Constructor
+ScopedFinishTimer::ScopedFinishTimer(std::string action_str)
+    : ScopedActionTimer(action_str) {
+}
+
+///@brief Destructor
+ScopedFinishTimer::~ScopedFinishTimer() {
+    if (!quiet()) {
+        vtr::printf_info("%s%s took %.2f seconds (max_rss %.1f MiB)\n",
+                         pad().c_str(), action().c_str(), elapsed_sec(),
+                         max_rss_mib());
+    }
+}
+
+///@brief Constructor
+ScopedStartFinishTimer::ScopedStartFinishTimer(std::string action_str)
+    : ScopedActionTimer(action_str) {
+    vtr::printf_info("%s%s\n", pad().c_str(), action().c_str());
+}
+
+///@brief Destructor
+ScopedStartFinishTimer::~ScopedStartFinishTimer() {
+    if (!quiet()) {
+        vtr::printf_info("%s%s took %.2f seconds (max_rss %.1f MiB, delta_rss %+.1f MiB)\n",
+                         pad().c_str(), action().c_str(), elapsed_sec(),
+                         max_rss_mib(), delta_max_rss_mib());
+    }
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_time.h b/third_party/vtr/libs/vtrutil/src/vtr_time.h
new file mode 100644
index 000000000..2a4d4ec8a
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_time.h
@@ -0,0 +1,99 @@
+#ifndef VTR_TIME_H
+#define VTR_TIME_H
+#include <chrono>
+#include <string>
+
+namespace vtr {
+
+///@brief Class for tracking time elapsed since construction
+class Timer {
+  public:
+    Timer();
+    virtual ~Timer() = default;
+
+    ///@brief No copy
+    Timer(Timer&) = delete;
+    Timer& operator=(Timer&) = delete;
+
+    ///@brief No move
+    Timer(Timer&&) = delete;
+    Timer& operator=(Timer&&) = delete;
+
+    ///@brief Return elapsed time in seconds
+    float elapsed_sec() const;
+
+    ///@brief Return peak memory resident set size (in MiB)
+    float max_rss_mib() const;
+
+    ///@brief Return change in peak memory resident set size (in MiB)
+    float delta_max_rss_mib() const;
+
+  private:
+    using clock = std::chrono::steady_clock;
+    std::chrono::time_point<clock> start_;
+
+    size_t initial_max_rss_; //Maximum resident set size In bytes
+    constexpr static float BYTE_TO_MIB = 1024 * 1024;
+};
+
+///@brief Scoped time class which prints the time elapsed for the specifid action
+class ScopedActionTimer : public Timer {
+  public:
+    ScopedActionTimer(const std::string action);
+    ~ScopedActionTimer();
+
+    void quiet(bool value);
+    bool quiet() const;
+    std::string action() const;
+
+  protected:
+    int depth() const;
+    std::string pad() const;
+
+  private:
+    const std::string action_;
+    bool quiet_ = false;
+    int depth_;
+};
+
+/**
+ * @brief Scoped elapsed time class which prints the time elapsed for the specified action when it is destructed.
+ *
+ * For example:
+ * 
+ *       {
+ *           vtr::ScopedFinishTimer timer("my_action");
+ * 
+ *           //Do other work
+ * 
+ *           //Will print: 'my_action took X.XX seconds' when out-of-scope
+ *       }
+ */
+class ScopedFinishTimer : public ScopedActionTimer {
+  public:
+    ScopedFinishTimer(const std::string action);
+    ~ScopedFinishTimer();
+};
+
+/**
+ * @brief Scoped elapsed time class which prints out the action when initialized and again both the action and elapsed time
+ *
+ * when destructed.
+ * For example:
+ * 
+ *       {
+ *           vtr::ScopedStartFinishTimer timer("my_action") //Will print: 'my_action'
+ * 
+ *           //Do other work
+ * 
+ *           //Will print 'my_action took X.XX seconds' when out of scope
+ *       }
+ */
+class ScopedStartFinishTimer : public ScopedActionTimer {
+  public:
+    ScopedStartFinishTimer(const std::string action);
+    ~ScopedStartFinishTimer();
+};
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_token.cc b/third_party/vtr/libs/vtrutil/src/vtr_token.cc
new file mode 100644
index 000000000..1715e9f23
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_token.cc
@@ -0,0 +1,218 @@
+/**
+ * Jason Luu
+ * July 22, 2009
+ * Tokenizer
+ */
+
+#include <cstring>
+
+#include "vtr_assert.h"
+#include "vtr_log.h"
+#include "vtr_util.h"
+#include "vtr_memory.h"
+#include "vtr_token.h"
+
+enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
+                                       const char cur);
+
+bool IsWhitespace(char c);
+
+///@brief Returns true if character is whatspace between tokens
+bool IsWhitespace(char c) {
+    switch (c) {
+        case ' ':
+        case '\t':
+        case '\r':
+        case '\n':
+            return true;
+        default:
+            return false;
+    }
+}
+
+///@brief Returns a token list of the text for a given string.
+t_token* GetTokensFromString(const char* inString, int* num_tokens) {
+    const char* cur;
+    t_token* tokens;
+    int i, in_string_index, prev_in_string_index;
+    bool has_null;
+    enum e_token_type cur_token_type, new_token_type;
+
+    *num_tokens = i = 0;
+    cur_token_type = TOKEN_NULL;
+
+    if (inString == nullptr) {
+        return nullptr;
+    };
+
+    cur = inString;
+
+    /* Count number of tokens */
+    while (*cur) {
+        new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
+        if (new_token_type != cur_token_type) {
+            cur_token_type = new_token_type;
+            if (new_token_type != TOKEN_NULL) {
+                i++;
+            }
+        }
+        ++cur;
+    }
+    *num_tokens = i;
+
+    if (*num_tokens > 0) {
+        tokens = (t_token*)vtr::calloc(*num_tokens + 1, sizeof(t_token));
+    } else {
+        return nullptr;
+    }
+
+    /* populate tokens */
+    i = 0;
+    in_string_index = 0;
+    has_null = true;
+    prev_in_string_index = 0;
+    cur_token_type = TOKEN_NULL;
+
+    cur = inString;
+
+    while (*cur) {
+        new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
+        if (new_token_type != cur_token_type) {
+            if (!has_null) {
+                tokens[i - 1].data[in_string_index - prev_in_string_index] = '\0'; /* NULL the end of the data string */
+                has_null = true;
+            }
+            if (new_token_type != TOKEN_NULL) {
+                tokens[i].type = new_token_type;
+                tokens[i].data = vtr::strdup(inString + in_string_index);
+                prev_in_string_index = in_string_index;
+                has_null = false;
+                i++;
+            }
+            cur_token_type = new_token_type;
+        }
+        ++cur;
+        in_string_index++;
+    }
+
+    VTR_ASSERT(i == *num_tokens);
+
+    tokens[*num_tokens].type = TOKEN_NULL;
+    tokens[*num_tokens].data = nullptr;
+
+    /* Return the list */
+    return tokens;
+}
+
+///@brief Free (tokens)
+void freeTokens(t_token* tokens, const int num_tokens) {
+    int i;
+    for (i = 0; i < num_tokens; i++) {
+        free(tokens[i].data);
+    }
+    free(tokens);
+}
+
+///@brief Returns a token type of the given char
+enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
+                                       const char cur) {
+    if (IsWhitespace(cur)) {
+        return TOKEN_NULL;
+    } else {
+        if (cur == '[') {
+            return TOKEN_OPEN_SQUARE_BRACKET;
+        } else if (cur == ']') {
+            return TOKEN_CLOSE_SQUARE_BRACKET;
+        } else if (cur == '{') {
+            return TOKEN_OPEN_SQUIG_BRACKET;
+        } else if (cur == '}') {
+            return TOKEN_CLOSE_SQUIG_BRACKET;
+        } else if (cur == ':') {
+            return TOKEN_COLON;
+        } else if (cur == '.') {
+            return TOKEN_DOT;
+        } else if (cur >= '0' && cur <= '9' && cur_token_type != TOKEN_STRING) {
+            return TOKEN_INT;
+        } else {
+            return TOKEN_STRING;
+        }
+    }
+}
+
+///@brief Returns true if the token's type equals to token_type
+bool checkTokenType(const t_token token, enum e_token_type token_type) {
+    if (token.type != token_type) {
+        return false;
+    }
+    return true;
+}
+
+///@brief Returns a 2D array representing the atof result of all the input string entries seperated by whitespace
+void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* instring) {
+    int i, j;
+    char *cur, *cur2, *copy, *final;
+
+    copy = vtr::strdup(instring);
+    final = copy;
+    while (*final != '\0') {
+        final++;
+    }
+
+    cur = copy;
+    i = j = 0;
+    while (cur != final) {
+        while (IsWhitespace(*cur) && cur != final) {
+            if (j == max_j) {
+                i++;
+                j = 0;
+            }
+            cur++;
+        }
+        if (cur == final) {
+            break;
+        }
+        cur2 = cur;
+        while (!IsWhitespace(*cur2) && cur2 != final) {
+            cur2++;
+        }
+        *cur2 = '\0';
+        VTR_ASSERT(i < max_i && j < max_j);
+        matrix[i][j] = vtr::atof(cur);
+        j++;
+        cur = cur2;
+        *cur = ' ';
+    }
+
+    VTR_ASSERT((i == max_i && j == 0) || (i == max_i - 1 && j == max_j));
+
+    free(copy);
+}
+
+/* Date:July 2nd, 2013													*
+ * Author: Daniel Chen													*/
+/** 
+ * @brief Checks if the number of entries (separated by whitespace)	matches the the expected number (max_i * max_j)
+ *
+ * can be used before calling my_atof_2D						
+ */
+bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, int* num_entries) {
+    /* Check if max_i * max_j matches number of entries in instring */
+    const char* cur = instring;
+    bool in_str = false;
+    int entry_count = 0;
+
+    /* First count number of entries in instring */
+    while (*cur != '\0') {
+        if (!IsWhitespace(*cur) && !in_str) {
+            in_str = true;
+            entry_count++;
+        } else if (IsWhitespace(*cur)) {
+            in_str = false;
+        }
+        cur++;
+    }
+    *num_entries = entry_count;
+
+    if (max_i * max_j != entry_count) return false;
+    return true;
+}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_token.h b/third_party/vtr/libs/vtrutil/src/vtr_token.h
new file mode 100644
index 000000000..9556d6614
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_token.h
@@ -0,0 +1,40 @@
+/**
+ * @file
+ * @author Jason Luu
+ * @Date July 22, 2009
+ * @brief Tokenizer
+ */
+
+#ifndef TOKEN_H
+#define TOKEN_H
+
+///@brief Token types
+enum e_token_type {
+    TOKEN_NULL,
+    TOKEN_STRING,
+    TOKEN_INT,
+    TOKEN_OPEN_SQUARE_BRACKET,
+    TOKEN_CLOSE_SQUARE_BRACKET,
+    TOKEN_OPEN_SQUIG_BRACKET,
+    TOKEN_CLOSE_SQUIG_BRACKET,
+    TOKEN_COLON,
+    TOKEN_DOT
+};
+
+///@brief Token structure
+struct t_token {
+    enum e_token_type type;
+    char* data;
+};
+
+t_token* GetTokensFromString(const char* inString, int* num_tokens);
+
+void freeTokens(t_token* tokens, const int num_tokens);
+
+bool checkTokenType(const t_token token, enum e_token_type token_type);
+
+void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* instring);
+
+bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, int* num_entries);
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_util.cc b/third_party/vtr/libs/vtrutil/src/vtr_util.cc
new file mode 100644
index 000000000..45ee30358
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_util.cc
@@ -0,0 +1,504 @@
+#include <cstdarg>
+#include <cstdlib>
+#include <cerrno> //For errno
+#include <cstring>
+#include <memory>
+#include <sstream>
+
+#include "vtr_util.h"
+#include "vtr_assert.h"
+#include "vtr_memory.h"
+#include "vtr_error.h"
+
+#if defined(__unix__)
+#    include <unistd.h> //For getpid()
+#endif
+
+namespace vtr {
+
+std::string out_file_prefix;     /* used by fopen */
+static int file_line_number = 0; /* file in line number being parsed (used by fgets) */
+static int cont;                 /* line continued? (used by strtok)*/
+
+/**
+ * @brief Splits the c-style string 'text' along the specified delimiter characters in 'delims'
+ *
+ * The split strings (excluding the delimiters) are returned
+ */
+std::vector<std::string> split(const char* text, const std::string delims) {
+    if (text) {
+        std::string text_str(text);
+        return split(text_str, delims);
+    }
+    return std::vector<std::string>();
+}
+
+/**
+ * @brief Splits the string 'text' along the specified delimiter characters in 'delims'
+ *
+ * The split strings (excluding the delimiters) are returned
+ */
+std::vector<std::string> split(const std::string& text, const std::string delims) {
+    std::vector<std::string> tokens;
+
+    std::string curr_tok;
+    for (char c : text) {
+        if (delims.find(c) != std::string::npos) {
+            //Delimeter character
+            if (!curr_tok.empty()) {
+                //At the end of the token
+
+                //Save it
+                tokens.push_back(curr_tok);
+
+                //Reset token
+                curr_tok.clear();
+            } else {
+                //Pass
+            }
+        } else {
+            //Non-delimeter append to token
+            curr_tok += c;
+        }
+    }
+
+    //Add last token
+    if (!curr_tok.empty()) {
+        //Save it
+        tokens.push_back(curr_tok);
+    }
+    return tokens;
+}
+
+///@brief Returns 'input' with the first instance of 'search' replaced with 'replace'
+std::string replace_first(const std::string& input, const std::string& search, const std::string& replace) {
+    auto pos = input.find(search);
+
+    std::string output(input, 0, pos);
+    output += replace;
+    output += std::string(input, pos + search.size());
+
+    return output;
+}
+
+///@brief Returns 'input' with all instances of 'search' replaced with 'replace'
+std::string replace_all(const std::string& input, const std::string& search, const std::string& replace) {
+    std::string output;
+
+    size_t last = 0;
+    size_t pos = input.find(search, last); //Find the first instance of 'search' starting at or after 'last'
+    while (pos != std::string::npos) {
+        output += input.substr(last, pos - last); //Append anything in the input string between last and current match
+        output += replace;                        //Add the replacement
+
+        last = pos + search.size(); //Advance past the current match
+
+        pos = input.find(search, last); //Look for the next match
+    }
+    output += input.substr(last, pos - last); //Append anything in 'input' after the last match
+
+    return output;
+}
+
+///@brief Retruns true if str starts with prefix
+bool starts_with(std::string str, std::string prefix) {
+    return str.find(prefix) == 0;
+}
+
+///@brief Returns a std::string formatted using a printf-style format string
+std::string string_fmt(const char* fmt, ...) {
+    // Make a variable argument list
+    va_list va_args;
+
+    // Initialize variable argument list
+    va_start(va_args, fmt);
+
+    //Format string
+    std::string str = vstring_fmt(fmt, va_args);
+
+    // Reset variable argument list
+    va_end(va_args);
+
+    return str;
+}
+
+///@brief Returns a std::string formatted using a printf-style format string taking an explicit va_list
+std::string vstring_fmt(const char* fmt, va_list args) {
+    // We need to copy the args so we don't change them before the true formating
+    va_list va_args_copy;
+    va_copy(va_args_copy, args);
+
+    //Determine the formatted length using a copy of the args
+    int len = std::vsnprintf(nullptr, 0, fmt, va_args_copy);
+
+    va_end(va_args_copy); //Clean-up
+
+    //Negative if there is a problem with the format string
+    VTR_ASSERT_MSG(len >= 0, "Problem decoding format string");
+
+    size_t buf_size = len + 1; //For terminator
+
+    //Allocate a buffer
+    //  unique_ptr will free buffer automatically
+    std::unique_ptr<char[]> buf(new char[buf_size]);
+
+    //Format into the buffer using the original args
+    len = std::vsnprintf(buf.get(), buf_size, fmt, args);
+
+    VTR_ASSERT_MSG(len >= 0, "Problem decoding format string");
+    VTR_ASSERT(static_cast<size_t>(len) == buf_size - 1);
+
+    //Build the string from the buffer
+    return std::string(buf.get(), len);
+}
+
+///@brief An alternate for strncpy since strncpy doesn't work as most people would expect. This ensures null termination
+char* strncpy(char* dest, const char* src, size_t size) {
+    /* Find string's length */
+    size_t len = std::strlen(src);
+
+    /* Cap length at (num - 1) to leave room for \0 */
+    if (size <= len)
+        len = (size - 1);
+
+    /* Copy as much of string as we can fit */
+    std::memcpy(dest, src, len);
+
+    /* explicit null termination */
+    dest[len] = '\0';
+
+    return dest;
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+char* strdup(const char* str) {
+    if (str == nullptr) {
+        return nullptr;
+    }
+
+    size_t Len = std::strlen(str);
+    //use calloc to already make the last char '\0'
+    return (char*)std::memcpy(vtr::calloc(Len + 1, sizeof(char)), str, Len);
+    ;
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+template<class T>
+T atoT(const std::string& value, const std::string& type_name) {
+    //The c version of atof doesn't catch errors.
+    //
+    //This version uses stringstream to detect conversion errors
+    std::istringstream ss(value);
+
+    T val;
+    ss >> val;
+
+    if (ss.fail() || !ss.eof()) {
+        //Failed to convert, or did not consume all input
+        std::stringstream msg;
+        msg << "Failed to convert string '" << value << "' to " << type_name;
+        throw VtrError(msg.str(), __FILE__, __LINE__);
+    }
+
+    return val;
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+int atoi(const std::string& value) {
+    return atoT<int>(value, "int");
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+double atod(const std::string& value) {
+    return atoT<double>(value, "double");
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+float atof(const std::string& value) {
+    return atoT<float>(value, "float");
+}
+
+/**
+ * @brief Legacy c-style function replacements.
+ *
+ * Typically these add extra error checking
+ * and/or correct 'unexpected' behaviour of the standard c-functions
+ */
+unsigned atou(const std::string& value) {
+    return atoT<unsigned>(value, "unsigned int");
+}
+
+/**
+ * @brief Get next token, and wrap to next line if \ at end of line.    
+ *
+ * There is a bit of a "gotcha" in strtok.  It does not make a   *
+ * copy of the character array which you pass by pointer on the  
+ * first call.  Thus, you must make sure this array exists for   
+ * as long as you are using strtok to parse that line.  Don't    
+ * use local buffers in a bunch of subroutines calling each      
+ * other; the local buffer may be overwritten when the stack is  
+ * restored after return from the subroutine.                    
+ */
+char* strtok(char* ptr, const char* tokens, FILE* fp, char* buf) {
+    char* val;
+
+    val = std::strtok(ptr, tokens);
+    for (;;) {
+        if (val != nullptr || cont == 0)
+            return (val);
+
+        /* return unless we have a null value and a continuation line */
+        if (vtr::fgets(buf, bufsize, fp) == nullptr)
+            return (nullptr);
+
+        val = std::strtok(buf, tokens);
+    }
+}
+
+///@brief The legacy fopen function with extra error checking
+FILE* fopen(const char* fname, const char* flag) {
+    FILE* fp;
+    size_t Len;
+    char* new_fname = nullptr;
+    file_line_number = 0;
+
+    /* Appends a prefix string for output files */
+    if (!out_file_prefix.empty()) {
+        if (std::strchr(flag, 'w')) {
+            Len = 1; /* NULL char */
+            Len += std::strlen(out_file_prefix.c_str());
+            Len += std::strlen(fname);
+            new_fname = (char*)vtr::malloc(Len * sizeof(char));
+            strcpy(new_fname, out_file_prefix.c_str());
+            strcat(new_fname, fname);
+            fname = new_fname;
+        }
+    }
+
+    if (nullptr == (fp = std::fopen(fname, flag))) {
+        throw VtrError(string_fmt("Error opening file %s for %s access: %s.\n", fname, flag, strerror(errno)), __FILE__, __LINE__);
+    }
+
+    if (new_fname)
+        std::free(new_fname);
+
+    return (fp);
+}
+
+///@brief The legacy fclose function
+int fclose(FILE* f) {
+    return std::fclose(f);
+}
+
+/**
+ * @brief Get an input line, update the line number and cut off any comment part.
+ *
+ * A \ at the end of a line with no comment part (#) means continue. 
+ * vtr::fgets should give
+ * identical results for Windows (\r\n) and Linux (\n) 
+ * newlines, since it replaces each carriage return \r
+ * by a newline character \n.  Returns NULL after EOF.
+ */
+char* fgets(char* buf, int max_size, FILE* fp) {
+    int ch;
+    int i;
+
+    cont = 0;           /* line continued? */
+    file_line_number++; /* global variable */
+
+    for (i = 0; i < max_size - 1; i++) { /* Keep going until the line finishes or the buffer is full */
+
+        ch = std::fgetc(fp);
+
+        if (std::feof(fp)) { /* end of file */
+            if (i == 0) {
+                return nullptr; /* required so we can write while (vtr::fgets(...) != NULL) */
+            } else {            /* no newline before end of file - last line must be returned */
+                buf[i] = '\0';
+                return buf;
+            }
+        }
+
+        if (ch == '#') { /* comment */
+            buf[i] = '\0';
+            while ((ch = std::fgetc(fp)) != '\n' && !std::feof(fp))
+                ; /* skip the rest of the line */
+            return buf;
+        }
+
+        if (ch == '\r' || ch == '\n') {         /* newline (cross-platform) */
+            if (i != 0 && buf[i - 1] == '\\') { /* if \ at end of line, line continued */
+                cont = 1;
+                buf[i - 1] = '\n'; /* May need this for tokens */
+                buf[i] = '\0';
+            } else {
+                buf[i] = '\n';
+                buf[i + 1] = '\0';
+            }
+            return buf;
+        }
+
+        buf[i] = ch; /* copy character into the buffer */
+    }
+
+    /* Buffer is full but line has not terminated, so error */
+    throw VtrError(string_fmt("Error on line %d -- line is too long for input buffer.\n"
+                              "All lines must be at most %d characters long.\n",
+                              bufsize - 2),
+                   __FILE__, __LINE__);
+    return nullptr;
+}
+
+/**
+ * @brief to get an arbitrary long input line and cut off any
+ * comment part 
+ * 
+ * the getline function is exaly like the __get_delim function 
+ * in GNU with '\n' delimiter. As a result, to make the function 
+ * behaviour identical for Windows (\r\n) and Linux (\n) compiler 
+ * macros for checking operating systems have been used.
+ * 
+ * @note user need to take care of the given pointer,
+ * which will be dynamically allocated by getdelim
+ */
+char* getline(char*& _lineptr, FILE* _stream) {
+    int i;
+    int ch;
+    size_t _n = 0;
+    ssize_t nread;
+
+#if defined(__unix__)
+    nread = getdelim(&_lineptr, &_n, '\n', _stream);
+#elif defined(_WIN32)
+#    define __WIN_NLTK "\r\n"
+    nread = getdelim(&_lineptr, &_n, __WIN_NLTK, _stream);
+#endif
+
+    if (nread == -1) {
+        int errsv = errno;
+        std::string error_msg;
+
+        if (errsv == EINVAL)
+            error_msg = string_fmt("[%s] Bad arguments (_lineptr is NULL, or _stream is not valid).", strerror(errsv));
+        else if (errsv == ENOMEM)
+            error_msg = string_fmt("[%s] Allocation or reallocation of the line buffer failed.", strerror(errsv));
+        else
+            /* end of file so it will return null */
+            return nullptr;
+
+        /* getline was unsuccessful, so error */
+        throw VtrError(string_fmt("Error -- %s\n",
+                                  error_msg.c_str()),
+                       __FILE__, __LINE__);
+        return nullptr;
+    }
+
+    cont = 0;           /* line continued? */
+    file_line_number++; /* global variable */
+
+    for (i = 0; i < nread; i++) { /* Keep going until the line finishes */
+
+        ch = _lineptr[i];
+
+        if (ch == '#') { /* comment */
+            _lineptr[i] = '\0';
+            /* skip the rest of the line */
+            break;
+        }
+    }
+
+    return (_lineptr);
+}
+
+///@brief Returns line number of last opened and read file
+int get_file_line_number_of_last_opened_file() {
+    return file_line_number;
+}
+
+bool file_exists(const char* filename) {
+    FILE* file;
+
+    if (filename == nullptr) {
+        return false;
+    }
+
+    file = std::fopen(filename, "r");
+    if (file) {
+        std::fclose(file);
+        return true;
+    }
+    return false;
+}
+
+/* Date:July 17th, 2013
+ * Author: Daniel Chen */
+/**
+ * @brief Checks the file extension of an file to ensure correct file format. 
+ *
+ * Returns true if format is correct, and false otherwise.
+ * @note This is probably a fragile check, but at least should 
+ * prevent common problems such as swapping architecture file 
+ * and blif file on the VPR command line.
+ */
+bool check_file_name_extension(const char* file_name,
+                               const char* file_extension) {
+    const char* str;
+    int len_extension;
+
+    len_extension = std::strlen(file_extension);
+    str = std::strstr(file_name, file_extension);
+    if (str == nullptr || (*(str + len_extension) != '\0')) {
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * @brief Legacy ReadLine Tokening
+ */
+std::vector<std::string> ReadLineTokens(FILE* InFile, int* LineNum) {
+    std::unique_ptr<char[]> buf(new char[vtr::bufsize]);
+
+    const char* line = vtr::fgets(buf.get(), vtr::bufsize, InFile);
+
+    ++(*LineNum);
+
+    return vtr::split(line);
+}
+
+///@brief Returns pid if os is unix, -1 otherwise.
+int get_pid() {
+#if defined(__unix__)
+    return getpid();
+#else
+    return -1;
+#endif
+}
+
+} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_util.h b/third_party/vtr/libs/vtrutil/src/vtr_util.h
new file mode 100644
index 000000000..08562d3d0
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_util.h
@@ -0,0 +1,123 @@
+#ifndef VTR_UTIL_H
+#define VTR_UTIL_H
+
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <cstdarg>
+#include <array>
+
+namespace vtr {
+
+/**
+ * @brief Splits the string 'text' along the specified delimiter characters in 'delims'
+ *
+ * The split strings (excluding the delimiters) are returned
+ */
+std::vector<std::string> split(const char* text, const std::string delims = " \t\n");
+std::vector<std::string> split(const std::string& text, const std::string delims = " \t\n");
+
+///@brief Returns 'input' with the first instance of 'search' replaced with 'replace'
+std::string replace_first(const std::string& input, const std::string& search, const std::string& replace);
+
+///@brief Returns 'input' with all instances of 'search' replaced with 'replace'
+std::string replace_all(const std::string& input, const std::string& search, const std::string& replace);
+
+///@brief Retruns true if str starts with prefix
+bool starts_with(std::string str, std::string prefix);
+
+///@brief Returns a std::string formatted using a printf-style format string
+std::string string_fmt(const char* fmt, ...);
+
+///@brief Returns a std::string formatted using a printf-style format string taking an explicit va_list
+std::string vstring_fmt(const char* fmt, va_list args);
+
+/**
+ * @brief Joins a sequence by a specified delimeter
+ *
+ *  For example the sequence {"home", "user", "my_files", "test.blif"} with delim="/"
+ *  would return "home/user/my_files/test.blif"
+ */
+template<typename Iter>
+std::string join(Iter begin, Iter end, std::string delim);
+
+template<typename Container>
+std::string join(Container container, std::string delim);
+
+template<typename T>
+std::string join(std::initializer_list<T> list, std::string delim);
+
+template<typename Container>
+void uniquify(Container container);
+
+constexpr size_t bufsize = 32768; /* Maximum line length for various parsing proc. */
+char* strncpy(char* dest, const char* src, size_t size);
+char* strdup(const char* str);
+char* strtok(char* ptr, const char* tokens, FILE* fp, char* buf);
+FILE* fopen(const char* fname, const char* flag);
+int fclose(FILE* f);
+char* fgets(char* buf, int max_size, FILE* fp);
+char* getline(char*& _lineptr, FILE* _stream);
+
+int atoi(const std::string& value);
+unsigned atou(const std::string& value);
+float atof(const std::string& value);
+double atod(const std::string& value);
+
+/**
+ * @brief File utilities
+ */
+int get_file_line_number_of_last_opened_file();
+bool file_exists(const char* filename);
+bool check_file_name_extension(const char* file_name,
+                               const char* file_extension);
+
+extern std::string out_file_prefix;
+
+/**
+ * @brief Legacy ReadLine Tokening
+ */
+std::vector<std::string> ReadLineTokens(FILE* InFile, int* LineNum);
+
+/**
+ * @brief Template join function implementation
+ */
+template<typename Iter>
+std::string join(Iter begin, Iter end, std::string delim) {
+    std::string joined_str;
+    for (auto iter = begin; iter != end; ++iter) {
+        joined_str += *iter;
+        if (iter != end - 1) {
+            joined_str += delim;
+        }
+    }
+    return joined_str;
+}
+
+template<typename Container>
+std::string join(Container container, std::string delim) {
+    return join(std::begin(container), std::end(container), delim);
+}
+
+template<typename T>
+std::string join(std::initializer_list<T> list, std::string delim) {
+    return join(list.begin(), list.end(), delim);
+}
+
+/**
+ * @brief Template uniquify function implementation
+ *
+ * Removes repeated elements in the container
+ */
+template<typename Container>
+void uniquify(Container container) {
+    std::sort(container.begin(), container.end());
+    container.erase(std::unique(container.begin(), container.end()),
+                    container.end());
+}
+
+int get_pid();
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h b/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
new file mode 100644
index 000000000..720722593
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
@@ -0,0 +1,106 @@
+#ifndef VTR_SET_H
+#define VTR_SET_H
+
+#include <vector>
+
+namespace vtr {
+
+/**
+ * @brief Implements a set-like interface which supports multiple operations
+ *
+ * The supported operations are:
+ *  - insertion
+ *  - iteration
+ *  - membership test
+ * all in constant time.
+ *
+ * It assumes the element type (T) is convertable to size_t.
+ * Usually, elements are vtr::StrongIds.
+ *
+ * Iteration through the elements is not strictly ordered, usually
+ * insertion order, unless sort() has been previously called.
+ *
+ * The underlying implementation uses a vector for element
+ * storage (for iteration), and a bit-set for membership tests.
+ */
+template<typename T>
+class vec_id_set {
+  public:
+    typedef typename std::vector<T>::const_iterator const_iterator;
+    typedef const_iterator iterator;
+
+    ///@brief Returns an iterator to the first element in the sequence
+    auto begin() const { return vec_.begin(); }
+    ///@brief Returns an iterator referring to the past-the-end element in the vector container
+    auto end() const { return vec_.end(); }
+
+    ///@brief Returns a constant iterator to the first element in the sequence
+    auto cbegin() const { return vec_.cbegin(); }
+    ///@brief Returns a constant iterator referring to the past-the-end element in the vector container
+    auto cend() const { return vec_.cend(); }
+
+    ///@brief Insert val in the set
+    bool insert(T val) {
+        if (count(val)) { //Already inserted
+            return false;
+        }
+
+        vec_.push_back(val);
+
+        //Mark this value as being contained
+        if (size_t(val) >= contained_.size()) {
+            //We dynamically grow contained_ based on the maximum
+            //value contained. This allows us to avoid expensive
+            contained_.resize(size_t(val) + 1, false);
+        }
+        contained_[size_t(val)] = true;
+
+        return true;
+    }
+
+    ///@brief Iterators specifying a range of elements. Copies of the elements in the range [first,last) are inserted in the container.
+    template<typename Iter>
+    void insert(Iter first, Iter last) {
+        size_t nelem = std::distance(first, last);
+        vec_.reserve(size() + nelem);
+        contained_.reserve(size() + nelem);
+
+        for (Iter itr = first; itr != last; ++itr) {
+            insert(*itr);
+        }
+    }
+
+    ///@brief Count elements with a specific value
+    size_t count(T val) const {
+        if (size_t(val) < contained_.size()) {
+            //Value is with-in range of previously inserted
+            //elements, so look-up its membership
+            return contained_[size_t(val)];
+        }
+        return 0;
+    }
+
+    ///@brief Returns the size of the container
+    size_t size() const {
+        return vec_.size();
+    }
+
+    ///@brief Sort elements in the container
+    void sort() {
+        std::sort(vec_.begin(), vec_.end());
+    }
+
+    ///@bried Clears the container
+    void clear() {
+        vec_.clear();
+        contained_.clear();
+    }
+
+  private:
+    std::vector<T> vec_;          //Elements contained
+    std::vector<bool> contained_; //Bit-set for constant-time membership test
+};
+
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vector.h b/third_party/vtr/libs/vtrutil/src/vtr_vector.h
new file mode 100644
index 000000000..dc8b689af
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_vector.h
@@ -0,0 +1,211 @@
+#ifndef VTR_VECTOR
+#define VTR_VECTOR
+#include <vector>
+#include <cstddef>
+#include <iterator>
+#include "vtr_range.h"
+
+namespace vtr {
+
+/**
+ * @brief A std::vector container which is indexed by K (instead of size_t).
+ *
+ * The main use of this container is to behave like a std::vector which is
+ * indexed by a vtr::StrongId. It assumes that K is explicitly convertable to size_t
+ * (i.e. via operator size_t()), and can be explicitly constructed from a size_t.
+ *
+ * It includes all the following std::vector functions:
+ *      - begin
+ *      - cbegin
+ *      - cend
+ *      - crbegin
+ *      - crend
+ *      - end
+ *      - rbegin
+ *      - rend
+ *      - capacity
+ *      - empty
+ *      - max_size
+ *      - reserve
+ *      - resize
+ *      - shrink_to_fit
+ *      - size
+ *      - back
+ *      - front
+ *      - assign
+ *      - clear
+ *      - emplace
+ *      - emplace_back
+ *      - erase
+ *      - get_allocator
+ *      - insert
+ *      - pop_back
+ *      - push_back
+ *
+ * If you need more std::map-like (instead of std::vector-like) behaviour see
+ * vtr::vector_map.
+ */
+template<typename K, typename V, typename Allocator = std::allocator<V>>
+class vector : private std::vector<V, Allocator> {
+    using storage = std::vector<V, Allocator>;
+
+  public:
+    typedef K key_type;
+
+    class key_iterator;
+    typedef vtr::Range<key_iterator> key_range;
+
+  public:
+    //Pass through std::vector's types
+    using typename storage::allocator_type;
+    using typename storage::const_iterator;
+    using typename storage::const_pointer;
+    using typename storage::const_reference;
+    using typename storage::const_reverse_iterator;
+    using typename storage::difference_type;
+    using typename storage::iterator;
+    using typename storage::pointer;
+    using typename storage::reference;
+    using typename storage::reverse_iterator;
+    using typename storage::size_type;
+    using typename storage::value_type;
+
+    //Pass through storagemethods
+    using std::vector<V, Allocator>::vector;
+
+    using storage::begin;
+    using storage::cbegin;
+    using storage::cend;
+    using storage::crbegin;
+    using storage::crend;
+    using storage::end;
+    using storage::rbegin;
+    using storage::rend;
+
+    using storage::capacity;
+    using storage::empty;
+    using storage::max_size;
+    using storage::reserve;
+    using storage::resize;
+    using storage::shrink_to_fit;
+    using storage::size;
+
+    using storage::back;
+    using storage::front;
+
+    using storage::assign;
+    using storage::clear;
+    using storage::emplace;
+    using storage::emplace_back;
+    using storage::erase;
+    using storage::get_allocator;
+    using storage::insert;
+    using storage::pop_back;
+    using storage::push_back;
+
+    /*
+     * We can't using-forward storage::data, as it might not exist
+     * in the particular specialization (typically: vector<bool>)
+     * causing compiler complains.
+     * Instead, implement it as inline forwarding method whose
+     * compilation is deferred to when it is actually requested.
+     */
+    ///@brief Returns a pointer to the vector's data
+    inline V* data() { return storage::data(); }
+    ///@brief Returns a pointer to the vector's data (immutable)
+    inline const V* data() const { return storage::data(); }
+
+    /*
+     * Don't include operator[] and at() from std::vector,
+     *
+     * since we redine them to take key_type instead of size_t
+     */
+    ///@brief [] operator
+    reference operator[](const key_type id) {
+        auto i = size_t(id);
+        return storage::operator[](i);
+    }
+    ///@brief [] operator immutable
+    const_reference operator[](const key_type id) const {
+        auto i = size_t(id);
+        return storage::operator[](i);
+    }
+    ///@brief at() operator
+    reference at(const key_type id) {
+        auto i = size_t(id);
+        return storage::at(i);
+    }
+    ///@brief at() operator immutable
+    const_reference at(const key_type id) const {
+        auto i = size_t(id);
+        return storage::at(i);
+    }
+
+    // We must re-define swap to avoid inaccessible base class errors
+    ///@brief swap function
+    void swap(vector<K, V, Allocator>& other) {
+        std::swap(*this, other);
+    }
+
+    ///@brief Returns a range containing the keys
+    key_range keys() const {
+        return vtr::make_range(key_begin(), key_end());
+    }
+
+  public:
+    /**
+     * @brief Iterator class which is convertable to the key_type
+     *
+     * This allows end-users to call the parent class's keys() member
+     * to iterate through the keys with a range-based for loop
+     */
+    class key_iterator : public std::iterator<std::bidirectional_iterator_tag, key_type> {
+      public:
+        ///@brief We use the intermediate type my_iter to avoid a potential ambiguity for which clang generates errors and warnings
+        using my_iter = typename std::iterator<std::bidirectional_iterator_tag, K>;
+        using typename my_iter::iterator;
+        using typename my_iter::pointer;
+        using typename my_iter::reference;
+        using typename my_iter::value_type;
+
+        ///@brief constructor
+        key_iterator(key_iterator::value_type init)
+            : value_(init) {}
+
+        /*
+         * vtr::vector assumes that the key time is convertable to size_t.
+         *
+         * It also assumes all the underlying IDs are zero-based and contiguous. That means
+         * we can just increment the underlying Id to build the next key.
+         */
+        ///@brief ++ operator
+        key_iterator operator++() {
+            value_ = value_type(size_t(value_) + 1);
+            return *this;
+        }
+        ///@brief decrement operator
+        key_iterator operator--() {
+            value_ = value_type(size_t(value_) - 1);
+            return *this;
+        }
+        ///@brief dereference oeprator
+        reference operator*() { return value_; }
+        ///@brief -> operator
+        pointer operator->() { return &value_; }
+
+        ///@brief == operator
+        friend bool operator==(const key_iterator lhs, const key_iterator rhs) { return lhs.value_ == rhs.value_; }
+        ///@brief != operator
+        friend bool operator!=(const key_iterator lhs, const key_iterator rhs) { return !(lhs == rhs); }
+
+      private:
+        value_type value_;
+    };
+
+  private:
+    key_iterator key_begin() const { return key_iterator(key_type(0)); }
+    key_iterator key_end() const { return key_iterator(key_type(size())); }
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h b/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
new file mode 100644
index 000000000..50309e86e
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
@@ -0,0 +1,172 @@
+#ifndef VTR_VECTOR_MAP
+#define VTR_VECTOR_MAP
+#include <vector>
+
+#include "vtr_assert.h"
+#include "vtr_sentinels.h"
+
+namespace vtr {
+
+/**
+ * @brief A vector-like container which is indexed by K (instead of size_t as in std::vector).
+ * 
+ * The main use of this container is to behave like a std::vector which is indexed by
+ * vtr::StrongId.
+ * 
+ * Requires that K be convertable to size_t with the size_t operator (i.e. size_t()), and
+ * that the conversion results in a linearly increasing index into the underlying vector.
+ * 
+ * This results in a container that is somewhat similar to a std::map (i.e. converts from one
+ * type to another), but requires contiguously ascending (i.e. linear) keys. Unlike std::map
+ * only the values are stored (at the specified index/key), reducing memory usage and improving
+ * cache locality. Furthermore, operator[] and find() return the value or iterator directly
+ * associated with the value (like std::vector) rather than a std::pair (like std::map).
+ * insert() takes both the key and value as separate arguments and has no return value.
+ * 
+ * Additionally, vector_map will silently create values for 'gaps' in the index range (i.e.
+ * those elements are initialized with Sentinel::INVALID()).
+ * 
+ * If you need a fully featured std::map like container without the above differences see
+ * vtr::linear_map.
+ * 
+ * If you do not need std::map-like features see vtr::vector. Note that vtr::vector_map is very similar 
+ * to vtr::vector. Unless there is a specific reason that vtr::vector_map is needed, it is better to use vtr::vector.
+ * 
+ * Note that it is possible to use vector_map with sparse/non-contiguous keys, but this is typically
+ * memory inefficient as the underlying vector will allocate space for [0..size_t(max_key)-1],
+ * where max_key is the largest key that has been inserted.
+ * 
+ * As with a std::vector, it is the caller's responsibility to ensure there is sufficient space
+ * when a given index/key before it is accessed. The exception to this are the find(), insert() and
+ * update() methods which handle non-existing keys gracefully.
+ */
+
+template<typename K, typename V, typename Sentinel = DefaultSentinel<V>>
+class vector_map {
+  public: //Public types
+    typedef typename std::vector<V>::const_reference const_reference;
+    typedef typename std::vector<V>::reference reference;
+
+    typedef typename std::vector<V>::iterator iterator;
+    typedef typename std::vector<V>::const_iterator const_iterator;
+    typedef typename std::vector<V>::const_reverse_iterator const_reverse_iterator;
+
+  public:
+    ///@brief Constructor
+    template<typename... Args>
+    vector_map(Args&&... args)
+        : vec_(std::forward<Args>(args)...) {}
+
+  public: //Accessors
+    ///@brief Returns an iterator referring to the first element in the map container.
+    const_iterator begin() const { return vec_.begin(); }
+    ///@brief Returns an iterator referring to the past-the-end element in the map container.
+    const_iterator end() const { return vec_.end(); }
+    ///@begin Returns a reverse iterator pointing to the last element in the container (i.e., its reverse beginning).
+    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
+    ///@brief Returns a reverse iterator pointing to the theoretical element right before the first element in the map container (which is considered its reverse end).
+    const_reverse_iterator rend() const { return vec_.rend(); }
+
+    //Indexing
+    ///@brief [] operator immutable
+    const_reference operator[](const K n) const {
+        size_t index = size_t(n);
+
+        /**
+         * Shouldn't check for index >= 0, since size_t is unsigned thus won't be negative
+         *
+         * A negative input to n would result in an absurdly large number close the maximum size of size_t, and be caught by index < vec_.size()
+         * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf chapter 4.7 para 2
+         */
+
+        VTR_ASSERT_SAFE_MSG(index < vec_.size(), "Out-of-range index");
+        return vec_[index];
+    }
+
+    ///@brief Searches the container for an element with a key equivalent to k and returns an iterator to it if found, otherwise it returns an iterator to vector_map::end.
+    const_iterator find(const K key) const {
+        if (size_t(key) < vec_.size()) {
+            return vec_.begin() + size_t(key);
+        } else {
+            return vec_.end();
+        }
+    }
+
+    ///@brief Returns the number of elements in the container.
+    std::size_t size() const { return vec_.size(); }
+
+    ///@brief Returns true if the container is empty
+    bool empty() const { return vec_.empty(); }
+
+    ///@brief Returns true if the container contains key
+    bool contains(const K key) const { return size_t(key) < vec_.size(); }
+    ///@brief Returns 1 if the container contains key, 0 otherwise
+    size_t count(const K key) const { return contains(key) ? 1 : 0; }
+
+  public: //Mutators
+    // Delegate potentially overloaded functions to the underlying vector with perfect forwarding
+    ///@brief push_back function
+    template<typename... Args>
+    void push_back(Args&&... args) { vec_.push_back(std::forward<Args>(args)...); }
+
+    ///@brief emplace_back function
+    template<typename... Args>
+    void emplace_back(Args&&... args) { vec_.emplace_back(std::forward<Args>(args)...); }
+
+    ///@brief resize function
+    template<typename... Args>
+    void resize(Args&&... args) { vec_.resize(std::forward<Args>(args)...); }
+
+    ///@brief clears the container
+    void clear() { vec_.clear(); }
+
+    ///@brief Returns the capacity of the container
+    size_t capacity() const { return vec_.capacity(); }
+    ///@brief Requests the container to reduce its capacity to fit its size.
+    void shrink_to_fit() { vec_.shrink_to_fit(); }
+
+    ///@brief Returns an iterator referring to the first element in the map container.
+    iterator begin() { return vec_.begin(); }
+    ///@brief Returns an iterator referring to the past-the-end element in the map container.
+    iterator end() { return vec_.end(); }
+
+    ///@brief Indexing
+    reference operator[](const K n) {
+        VTR_ASSERT_SAFE_MSG(size_t(n) < vec_.size(), "Out-of-range index");
+        return vec_[size_t(n)];
+    }
+
+    ///@brief Returns an iterator to the first element in the container that compares equal to val. If no such element is found, the function returns end().
+    iterator find(const K key) {
+        if (size_t(key) < vec_.size()) {
+            return vec_.begin() + size_t(key);
+        } else {
+            return vec_.end();
+        }
+    }
+
+    ///@brief Extends the container by inserting new elements, effectively increasing the container size by the number of elements inserted.
+    void insert(const K key, const V value) {
+        if (size_t(key) >= vec_.size()) {
+            //Resize so key is in range
+            vec_.resize(size_t(key) + 1, Sentinel::INVALID());
+        }
+
+        //Insert the value
+        operator[](key) = value;
+    }
+
+    ///@brief Inserts the new key value pair in the container
+    void update(const K key, const V value) { insert(key, value); }
+
+    ///@brief Swap (this enables std::swap via ADL)
+    friend void swap(vector_map<K, V>& x, vector_map<K, V>& y) {
+        std::swap(x.vec_, y.vec_);
+    }
+
+  private:
+    std::vector<V> vec_;
+};
+
+} // namespace vtr
+#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in b/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
new file mode 100644
index 000000000..4755874ba
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
@@ -0,0 +1,20 @@
+#include "vtr_version.h"
+
+//This file is automatically processed by CMAKE and replaces
+//the values between ampersand's with the releveant CMAKE variable
+//before being compiled.
+namespace vtr {
+    const char* VERSION = "@VTR_VERSION@";
+    const char* VERSION_SHORT = "@VTR_VERSION_SHORT@";
+
+    const size_t VERSION_MAJOR = @VTR_VERSION_MAJOR@;
+    const size_t VERSION_MINOR = @VTR_VERSION_MINOR@;
+    const size_t VERSION_PATCH = @VTR_VERSION_PATCH@;
+    const char* VERSION_PRERELEASE = "@VTR_VERSION_PRERELEASE@";
+
+    const char* VCS_REVISION = "@VTR_VCS_REVISION@";
+    const char* VCS_REVISION_SHORT = "@VTR_VCS_REVISION_SHORT@";
+    const char* COMPILER = "@VTR_COMPILER_INFO@";
+    const char* BUILD_TIMESTAMP = "@VTR_BUILD_TIMESTAMP@";
+    const char* BUILD_INFO = "@VTR_BUILD_INFO@";
+}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_version.h b/third_party/vtr/libs/vtrutil/src/vtr_version.h
new file mode 100644
index 000000000..f9bfaac14
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/src/vtr_version.h
@@ -0,0 +1,20 @@
+#ifndef VTR_VERSION_H
+#define VTR_VERSION_H
+#include <cstddef>
+
+namespace vtr {
+extern const char* VERSION;
+extern const char* VERSION_SHORT;
+
+extern const size_t VERSION_MAJOR;
+extern const size_t VERSION_MINOR;
+extern const size_t VERSION_PATCH;
+extern const char* VERSION_PRERELEASE;
+
+extern const char* VCS_REVISION;
+extern const char* COMPILER;
+extern const char* BUILD_TIMESTAMP;
+extern const char* BUILD_INFO;
+} // namespace vtr
+
+#endif
diff --git a/third_party/vtr/libs/vtrutil/test/main.cpp b/third_party/vtr/libs/vtrutil/test/main.cpp
new file mode 100644
index 000000000..f5c7e84bc
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/main.cpp
@@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include "catch2/catch_test_macros.hpp"
diff --git a/third_party/vtr/libs/vtrutil/test/test_array_view.cpp b/third_party/vtr/libs/vtrutil/test/test_array_view.cpp
new file mode 100644
index 000000000..69f92c40d
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_array_view.cpp
@@ -0,0 +1,110 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_array_view.h"
+#include "vtr_strong_id.h"
+#include <array>
+
+struct test_tag;
+using TestStrongId = vtr::StrongId<test_tag>;
+
+TEST_CASE("Array view", "[array_view/array_view]") {
+    std::array<uint16_t, 10> arr;
+    vtr::array_view<uint16_t> arr_view(arr.data(), arr.size());
+
+    const vtr::array_view<uint16_t>& carr_view = arr_view;
+    const vtr::array_view<uint16_t> carr_view2 = arr_view;
+    const vtr::array_view<uint16_t> carr_view3(arr_view);
+
+    REQUIRE(arr.size() == arr_view.size());
+    REQUIRE(arr.data() == arr_view.data());
+    REQUIRE(arr.data() == carr_view.data());
+    REQUIRE(arr.data() == carr_view2.data());
+    REQUIRE(arr.data() == carr_view3.data());
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        arr[i] = i;
+    }
+
+    for (size_t i = 0; i < arr_view.size(); ++i) {
+        REQUIRE(arr_view[i] == i);
+        REQUIRE(carr_view[i] == i);
+        REQUIRE(carr_view2[i] == i);
+        REQUIRE(carr_view3[i] == i);
+    }
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        REQUIRE(&arr[i] == &arr_view[i]);
+        REQUIRE(&arr.at(i) == &arr_view.at(i));
+        REQUIRE(&arr[i] == &carr_view[i]);
+        REQUIRE(&arr.at(i) == &carr_view.at(i));
+        REQUIRE(&arr[i] == &carr_view2[i]);
+        REQUIRE(&arr.at(i) == &carr_view2.at(i));
+        REQUIRE(&arr[i] == &carr_view3[i]);
+        REQUIRE(&arr.at(i) == &carr_view3.at(i));
+    }
+
+    for (size_t i = 0; i < arr_view.size(); ++i) {
+        arr_view[i] = arr_view.size() - i;
+    }
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        REQUIRE(arr[i] == (arr_view.size() - i));
+        REQUIRE(carr_view[i] == (arr_view.size() - i));
+        REQUIRE(carr_view2[i] == (arr_view.size() - i));
+        REQUIRE(carr_view3[i] == (arr_view.size() - i));
+    }
+}
+
+TEST_CASE("Array view id", "[array_view/array_view_id]") {
+    std::array<uint16_t, 10> arr;
+    vtr::array_view_id<TestStrongId, uint16_t> arr_view(arr.data(), arr.size());
+
+    const vtr::array_view_id<TestStrongId, uint16_t>& carr_view = arr_view;
+    const vtr::array_view_id<TestStrongId, uint16_t> carr_view2 = arr_view;
+    const vtr::array_view_id<TestStrongId, uint16_t> carr_view3(arr_view);
+
+    REQUIRE(arr.size() == arr_view.size());
+    REQUIRE(arr.data() == arr_view.data());
+    REQUIRE(arr.data() == carr_view.data());
+    REQUIRE(arr.data() == carr_view2.data());
+    REQUIRE(arr.data() == carr_view3.data());
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        arr[i] = i;
+    }
+
+    for (size_t i = 0; i < arr_view.size(); ++i) {
+        TestStrongId id(i);
+
+        REQUIRE(arr_view[id] == i);
+        REQUIRE(carr_view[id] == i);
+        REQUIRE(carr_view2[id] == i);
+        REQUIRE(carr_view3[id] == i);
+    }
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        TestStrongId id(i);
+
+        REQUIRE(&arr[i] == &arr_view[id]);
+        REQUIRE(&arr.at(i) == &arr_view.at(id));
+        REQUIRE(&arr[i] == &carr_view[id]);
+        REQUIRE(&arr.at(i) == &carr_view.at(id));
+        REQUIRE(&arr[i] == &carr_view2[id]);
+        REQUIRE(&arr.at(i) == &carr_view2.at(id));
+        REQUIRE(&arr[i] == &carr_view3[id]);
+        REQUIRE(&arr.at(i) == &carr_view3.at(id));
+    }
+
+    for (size_t i = 0; i < arr_view.size(); ++i) {
+        TestStrongId id(i);
+        arr_view[id] = arr_view.size() - i;
+    }
+
+    for (size_t i = 0; i < arr.size(); ++i) {
+        TestStrongId id(i);
+        REQUIRE(arr[i] == (arr_view.size() - i));
+        REQUIRE(carr_view[id] == (arr_view.size() - i));
+        REQUIRE(carr_view2[id] == (arr_view.size() - i));
+        REQUIRE(carr_view3[id] == (arr_view.size() - i));
+    }
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp b/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
new file mode 100644
index 000000000..5070ee717
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
@@ -0,0 +1,90 @@
+#include <limits>
+
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_expr_eval.h"
+
+TEST_CASE("Simple Expressions", "[vtr_expr_eval]") {
+    vtr::FormulaParser parser;
+    vtr::t_formula_data vars;
+
+    REQUIRE(parser.parse_formula("0", vars) == 0);
+    REQUIRE(parser.parse_formula("42", vars) == 42);
+
+    REQUIRE(parser.parse_formula("5 + 2", vars) == 7);
+    REQUIRE(parser.parse_formula("5 + 10", vars) == 15);
+    REQUIRE(parser.parse_formula("5 - 2", vars) == 3);
+    REQUIRE(parser.parse_formula("5 - 10", vars) == -5);
+
+    REQUIRE(parser.parse_formula("5 * 5", vars) == 25);
+    REQUIRE(parser.parse_formula("5 / 5", vars) == 1);
+
+    //Floor arithmetic
+    REQUIRE(parser.parse_formula("5 / 10", vars) == 0);
+    REQUIRE(parser.parse_formula("10 / 9", vars) == 1);
+
+    REQUIRE(parser.parse_formula("5 % 10", vars) == 5);
+    REQUIRE(parser.parse_formula("10 % 9", vars) == 1);
+
+    REQUIRE(parser.parse_formula("5 < 10", vars) == 1);
+    REQUIRE(parser.parse_formula("20 < 10", vars) == 0);
+
+    REQUIRE(parser.parse_formula("5 > 10", vars) == 0);
+    REQUIRE(parser.parse_formula("20 > 10", vars) == 1);
+}
+
+TEST_CASE("Negative Literals", "[vtr_expr_eval]") {
+    //TODO: Currently unsupported, should support in the future...
+    //REQUIRE(parser.parse_formula("-5 + 10", vars) == 5);
+    //REQUIRE(parser.parse_formula("-10 + 5", vars) == -5);
+    //REQUIRE(parser.parse_formula("-1", vars) == -1);
+}
+
+TEST_CASE("Bracket Expressions", "[vtr_expr_eval]") {
+    vtr::FormulaParser parser;
+    vtr::t_formula_data vars;
+
+    REQUIRE(parser.parse_formula("20 / (4 + 1)", vars) == 4);
+    REQUIRE(parser.parse_formula("(20 / 5) + 1", vars) == 5);
+    REQUIRE(parser.parse_formula("20 / 5 + 1", vars) == 5);
+}
+
+TEST_CASE("Variable Expressions", "[vtr_expr_eval]") {
+    vtr::FormulaParser parser;
+    vtr::t_formula_data vars;
+    vars.set_var_value("x", 5);
+    vars.set_var_value("y", 10);
+
+    REQUIRE(parser.parse_formula("x", vars) == 5);
+    REQUIRE(parser.parse_formula("y", vars) == 10);
+
+    REQUIRE(parser.parse_formula("x + y", vars) == 15);
+    REQUIRE(parser.parse_formula("y + x", vars) == 15);
+
+    REQUIRE(parser.parse_formula("x - y", vars) == -5);
+    REQUIRE(parser.parse_formula("y - x", vars) == 5);
+
+    REQUIRE(parser.parse_formula("x * y", vars) == 50);
+    REQUIRE(parser.parse_formula("y * x", vars) == 50);
+
+    REQUIRE(parser.parse_formula("x / y", vars) == 0);
+    REQUIRE(parser.parse_formula("y / x", vars) == 2);
+}
+
+TEST_CASE("Function Expressions", "[vtr_expr_eval]") {
+    vtr::FormulaParser parser;
+    vtr::t_formula_data vars;
+
+    REQUIRE(parser.parse_formula("min(5, 2)", vars) == 2);
+    REQUIRE(parser.parse_formula("min(2, 5)", vars) == 2);
+    //REQUIRE(parser.parse_formula("min(-5, 2)", vars) == -5); //Negative literals currently unsupported
+    //REQUIRE(parser.parse_formula("min(-2, 5)", vars) == -2);
+
+    REQUIRE(parser.parse_formula("max(5, 2)", vars) == 5);
+    REQUIRE(parser.parse_formula("max(2, 5)", vars) == 5);
+    //REQUIRE(parser.parse_formula("max(-5, 2)", vars) == 2); //Negative literals currently unsupported
+    //REQUIRE(parser.parse_formula("max(-2, 5)", vars) == 5);
+
+    REQUIRE(parser.parse_formula("gcd(20, 25)", vars) == 5);
+    REQUIRE(parser.parse_formula("lcm(20, 25)", vars) == 100);
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_geometry.cpp b/third_party/vtr/libs/vtrutil/test/test_geometry.cpp
new file mode 100644
index 000000000..3d44d5977
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_geometry.cpp
@@ -0,0 +1,245 @@
+#include "catch2/catch_test_macros.hpp"
+#include "catch2/catch_approx.hpp"
+
+#include "vtr_geometry.h"
+
+using namespace Catch;
+
+TEST_CASE("Point", "[vtr_geometry/Point]") {
+    vtr::Point<int> p1(5, 3);
+    vtr::Point<float> p2(5.3, 3.9);
+    SECTION("location") {
+        REQUIRE(p1.x() == 5);
+        REQUIRE(p1.y() == 3);
+
+        REQUIRE(p2.x() == Approx(5.3));
+        REQUIRE(p2.y() == Approx(3.9));
+    }
+
+    SECTION("equality") {
+        REQUIRE(p1 == p1);
+        REQUIRE(p2 == p2);
+    }
+}
+
+TEST_CASE("Rect", "[vtr_geometry/Rect]") {
+    // int tests
+    {
+        vtr::Point<int> pi_1(5, 3);
+        vtr::Point<int> pi_2(10, 11);
+        vtr::Point<int> pi_3(7, 9);
+
+        vtr::Rect<int> r1(pi_1.x(), pi_1.y(), pi_2.x(), pi_2.y());
+        vtr::Rect<int> r2(pi_1, pi_2);
+        vtr::Rect<int> r3(pi_1, pi_3);
+        vtr::Rect<int> r4(pi_3, pi_2);
+
+        SECTION("equality") {
+            REQUIRE(r1 == r2);
+        }
+
+        SECTION("location") {
+            REQUIRE(r1.xmin() == pi_1.x());
+            REQUIRE(r1.xmax() == pi_2.x());
+            REQUIRE(r1.ymin() == pi_1.y());
+            REQUIRE(r1.ymax() == pi_2.y());
+        }
+
+        SECTION("point_accessors") {
+            REQUIRE(r1.bottom_left() == pi_1);
+            REQUIRE(r1.top_right() == pi_2);
+            REQUIRE(r2.bottom_left() == pi_1);
+            REQUIRE(r2.top_right() == pi_2);
+        }
+
+        SECTION("dimensions") {
+            REQUIRE(r1.width() == 5);
+            REQUIRE(r1.height() == 8);
+            REQUIRE(r2.width() == 5);
+            REQUIRE(r2.height() == 8);
+        }
+
+        SECTION("contains_int") {
+            REQUIRE(r2.contains(pi_1));
+            REQUIRE(r2.contains({6, 4}));
+            REQUIRE_FALSE(r2.contains({100, 4}));
+            REQUIRE_FALSE(r2.contains(pi_2));
+            REQUIRE(vtr::Rect<int>(pi_1).contains(pi_1));
+        }
+
+        SECTION("strictly_contains_int") {
+            REQUIRE_FALSE(r2.strictly_contains(pi_1));
+            REQUIRE(r2.strictly_contains({6, 4}));
+            REQUIRE_FALSE(r2.strictly_contains({100, 4}));
+            REQUIRE_FALSE(r2.strictly_contains(pi_2));
+        }
+
+        SECTION("coincident_int") {
+            REQUIRE(r2.coincident(pi_1));
+            REQUIRE(r2.coincident({6, 4}));
+            REQUIRE_FALSE(r2.coincident({100, 4}));
+            REQUIRE(r2.coincident(pi_2));
+        }
+
+        SECTION("bounds_int") {
+            REQUIRE(r1 == bounding_box(r3, r4));
+        }
+
+        SECTION("empty_int") {
+            REQUIRE(vtr::Rect<int>().empty());
+        }
+
+        SECTION("sample_int") {
+            auto r = vtr::Rect<int>(pi_1, pi_2);
+            REQUIRE(sample(r, 0, 0, 17) == pi_1);
+            REQUIRE(sample(r, 17, 17, 17) == pi_2);
+            auto inside = sample(r, 3, 11, 17);
+            REQUIRE(r.contains(inside));
+        }
+    }
+
+    // float tests
+    {
+        vtr::Point<float> pf_1(5.3, 3.9);
+        vtr::Point<float> pf_2(10.5, 11.1);
+        vtr::Point<float> pf_3(7.2, 9.4);
+
+        vtr::Rect<float> r3(pf_1.x(), pf_1.y(), pf_2.x(), pf_2.y());
+        vtr::Rect<float> r4(pf_1, pf_2);
+        vtr::Rect<float> r5(pf_1, pf_3);
+        vtr::Rect<float> r6(pf_3, pf_2);
+        // vtr::Rect<float> r7(pf_1); // <-- will fail to compile
+
+        SECTION("equality_float") {
+            REQUIRE(r3 == r4);
+        }
+
+        SECTION("location_float") {
+            REQUIRE(r3.xmin() == pf_1.x());
+            REQUIRE(r3.xmax() == pf_2.x());
+            REQUIRE(r3.ymin() == pf_1.y());
+            REQUIRE(r3.ymax() == pf_2.y());
+        }
+
+        SECTION("point_accessors_float") {
+            REQUIRE(r3.bottom_left() == pf_1);
+            REQUIRE(r3.top_right() == pf_2);
+            REQUIRE(r4.bottom_left() == pf_1);
+            REQUIRE(r4.top_right() == pf_2);
+        }
+
+        SECTION("dimensions") {
+            REQUIRE(r3.width() == Approx(5.2));
+            REQUIRE(r3.height() == Approx(7.2));
+            REQUIRE(r4.width() == Approx(5.2));
+            REQUIRE(r4.height() == Approx(7.2));
+        }
+
+        SECTION("contains_float") {
+            REQUIRE(r4.contains(pf_1));
+            REQUIRE(r4.contains({6, 4}));
+            REQUIRE_FALSE(r4.contains({100, 4}));
+            REQUIRE_FALSE(r4.contains(pf_2));
+        }
+
+        SECTION("strictly_contains_float") {
+            REQUIRE_FALSE(r4.strictly_contains(pf_1));
+            REQUIRE(r4.strictly_contains({6, 4}));
+            REQUIRE_FALSE(r4.strictly_contains({100, 4}));
+            REQUIRE_FALSE(r4.strictly_contains(pf_2));
+        }
+
+        SECTION("coincident_float") {
+            REQUIRE(r4.coincident(pf_1));
+            REQUIRE(r4.coincident({6, 4}));
+            REQUIRE_FALSE(r4.coincident({100, 4}));
+            REQUIRE(r4.coincident(pf_2));
+        }
+
+        SECTION("bounds_float") {
+            REQUIRE(r3 == bounding_box(r5, r6));
+        }
+
+        SECTION("empty_float") {
+            REQUIRE(vtr::Rect<float>().empty());
+        }
+    }
+}
+
+TEST_CASE("Line", "[vtr_geometry/Line]") {
+    std::vector<vtr::Point<int>> points = {{0, 0},
+                                           {0, 2},
+                                           {1, 0},
+                                           {1, -2}};
+
+    vtr::Line<int> line(points);
+
+    SECTION("points") {
+        auto line_points = line.points();
+
+        REQUIRE(line_points.size() == points.size());
+
+        int i = 0;
+        for (auto point : line_points) {
+            REQUIRE(points[i] == point);
+            ++i;
+        }
+    }
+
+    SECTION("bounding_box") {
+        auto bb = line.bounding_box();
+
+        REQUIRE(bb.xmin() == 0);
+        REQUIRE(bb.xmax() == 1);
+        REQUIRE(bb.ymin() == -2);
+        REQUIRE(bb.ymax() == 2);
+    }
+}
+
+TEST_CASE("RectUnion", "[vtr_geometry/RectUnion]") {
+    std::vector<vtr::Rect<int>> rects = {{0, 0, 2, 2},
+                                         {1, 1, 3, 3}};
+
+    vtr::RectUnion<int> rect_union(rects);
+
+    SECTION("rects") {
+        auto union_rects = rect_union.rects();
+
+        REQUIRE(union_rects.size() == rects.size());
+
+        int i = 0;
+        for (auto rect : union_rects) {
+            REQUIRE(rects[i] == rect);
+            ++i;
+        }
+    }
+    SECTION("bounding_box") {
+        auto bb = rect_union.bounding_box();
+
+        REQUIRE(bb.xmin() == 0);
+        REQUIRE(bb.xmax() == 3);
+        REQUIRE(bb.ymin() == 0);
+        REQUIRE(bb.ymax() == 3);
+    }
+
+    SECTION("contains") {
+        REQUIRE(rect_union.contains({0, 0}));
+        REQUIRE(rect_union.contains({1, 1}));
+        REQUIRE(rect_union.contains({2, 2}));
+        REQUIRE_FALSE(rect_union.contains({3, 3}));
+    }
+
+    SECTION("strictly_contains") {
+        REQUIRE_FALSE(rect_union.strictly_contains({0, 0}));
+        REQUIRE(rect_union.strictly_contains({1, 1}));
+        REQUIRE(rect_union.strictly_contains({2, 2}));
+        REQUIRE_FALSE(rect_union.strictly_contains({3, 3}));
+    }
+
+    SECTION("coincident") {
+        REQUIRE(rect_union.coincident({0, 0}));
+        REQUIRE(rect_union.coincident({1, 1}));
+        REQUIRE(rect_union.coincident({2, 2}));
+        REQUIRE(rect_union.coincident({3, 3}));
+    }
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_map_util.cpp b/third_party/vtr/libs/vtrutil/test/test_map_util.cpp
new file mode 100644
index 000000000..0021151d0
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_map_util.cpp
@@ -0,0 +1,35 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_map_util.h"
+#include "vtr_range.h"
+
+#include <map>
+
+TEST_CASE("Iterate Map Keys Values", "[vtr_map_util]") {
+    std::vector<int> keys = {0, 1, 2, 3};
+    std::vector<char> values = {'a', 'b', 'c', 'd'};
+
+    //Initialize map
+    std::map<int, char> map;
+    for (size_t i = 0; i < keys.size(); ++i) {
+        map[keys[i]] = values[i];
+    }
+
+    //Check key iteration
+    auto key_range = vtr::make_key_range(map);
+
+    std::vector<int> seen_keys;
+    for (int key : key_range) {
+        seen_keys.push_back(key);
+    }
+    REQUIRE(seen_keys == keys);
+
+    //Check value iteration
+    auto value_range = vtr::make_value_range(map);
+
+    std::vector<char> seen_values;
+    for (char value : value_range) {
+        seen_values.push_back(value);
+    }
+    REQUIRE(seen_values == values);
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_math.cpp b/third_party/vtr/libs/vtrutil/test/test_math.cpp
new file mode 100644
index 000000000..c8e011a8d
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_math.cpp
@@ -0,0 +1,58 @@
+#include <limits>
+
+#include "catch2/catch_test_macros.hpp"
+#include "catch2/catch_approx.hpp"
+
+#include "vtr_math.h"
+
+using namespace Catch;
+
+TEST_CASE("Nearest Integer", "[vtr_math]") {
+    REQUIRE(vtr::nint(0.) == 0);
+    REQUIRE(vtr::nint(0.1) == 0);
+    REQUIRE(vtr::nint(0.5) == 1);
+    REQUIRE(vtr::nint(0.9) == 1);
+
+    REQUIRE(vtr::nint(1.) == 1);
+    REQUIRE(vtr::nint(1.1) == 1);
+    REQUIRE(vtr::nint(1.5) == 2);
+    REQUIRE(vtr::nint(1.9) == 2);
+
+    REQUIRE(vtr::nint(42.) == 42);
+    REQUIRE(vtr::nint(42.1) == 42);
+    REQUIRE(vtr::nint(42.5) == 43);
+    REQUIRE(vtr::nint(42.9) == 43);
+}
+
+TEST_CASE("Safe Ratio", "[vtr_math]") {
+    REQUIRE(vtr::safe_ratio(1., 1.) == Approx(1.));
+    REQUIRE(vtr::safe_ratio(1., 2.) == Approx(0.5));
+    REQUIRE(vtr::safe_ratio(50., 0.) == Approx(0.));
+}
+
+TEST_CASE("Is Close", "[vtr_math]") {
+    //double NAN = std::numeric_limits<double>::quiet_NaN();
+    double INF = std::numeric_limits<double>::infinity();
+
+    double num = 32.4;
+
+    double num_close = num - vtr::DEFAULT_REL_TOL * num / 2;
+    double num_not_quite_close = num - 2 * vtr::DEFAULT_REL_TOL * num;
+    double num_far = 2 * num;
+
+    REQUIRE(vtr::isclose(-1., -1.));
+    REQUIRE(vtr::isclose(1., 1.));
+    REQUIRE(vtr::isclose(0., 0.));
+    REQUIRE(vtr::isclose(num, num));
+    REQUIRE(vtr::isclose(num, num_close));
+    REQUIRE(!vtr::isclose(num, num_not_quite_close));
+    REQUIRE(!vtr::isclose(num, num_far));
+
+    REQUIRE(vtr::isclose(INF, INF));
+    REQUIRE(!vtr::isclose(-INF, INF));
+    REQUIRE(!vtr::isclose(NAN, NAN));
+
+    //Absolute tolerance tests
+    REQUIRE(vtr::isclose(32.2, 32.4, 1e-9, 0.2));
+    REQUIRE(!vtr::isclose(32.2, 32.4, 1e-9, 0.1));
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
new file mode 100644
index 000000000..bc5911917
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
@@ -0,0 +1,104 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_ragged_matrix.h"
+
+#include <numeric>
+
+TEST_CASE("Construction", "[vtr_ragged_matrix]") {
+    vtr::FlatRaggedMatrix<float> empty;
+    REQUIRE(empty.size() == 0);
+    REQUIRE(empty.empty());
+
+    std::vector<size_t> row_sizes = {1, 5, 3, 10};
+    size_t nelem = std::accumulate(row_sizes.begin(), row_sizes.end(), 0u);
+
+    //Construct from container of row sizes
+    vtr::FlatRaggedMatrix<float> ones_container(row_sizes, 1.0);
+    REQUIRE(ones_container.size() == nelem);
+    REQUIRE(!ones_container.empty());
+
+    //Construct from row size callback
+    auto row_size_callback = [&](size_t irow) {
+        return row_sizes[irow];
+    };
+    vtr::FlatRaggedMatrix<float> ones_callback(row_sizes.size(), row_size_callback, 1.0);
+    REQUIRE(ones_callback.size() == nelem);
+    REQUIRE(!ones_callback.empty());
+
+    //Construct from row size iterators
+    vtr::FlatRaggedMatrix<float> ones_iterator(row_sizes.begin(), row_sizes.end(), 1.0);
+    REQUIRE(ones_iterator.size() == nelem);
+    REQUIRE(!ones_iterator.empty());
+
+    //Clear
+    ones_container.clear();
+    REQUIRE(ones_container.size() == 0);
+
+    ones_callback.clear();
+    REQUIRE(ones_callback.size() == 0);
+
+    ones_iterator.clear();
+    REQUIRE(ones_iterator.size() == 0);
+}
+
+TEST_CASE("Iteration", "[vtr_ragged_matrix]") {
+    std::vector<size_t> row_sizes = {1, 5, 3, 10};
+    vtr::FlatRaggedMatrix<float> ones(row_sizes, 1.0);
+
+    float expected_sum = std::accumulate(row_sizes.begin(), row_sizes.end(), 0.f);
+
+    //Iteration by indices
+    float index_iteration_sum = 0.;
+    for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
+        for (size_t icol = 0; icol < row_sizes[irow]; ++icol) {
+            index_iteration_sum += ones[irow][icol];
+        }
+    }
+    REQUIRE(index_iteration_sum == expected_sum);
+
+    //Iteration by first index + proxy
+    float row_for_iteration_sum = 0.;
+    for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
+        REQUIRE(ones[irow].size() == row_sizes[irow]);
+
+        for (float val : ones[irow]) {
+            row_for_iteration_sum += val;
+        }
+    }
+    REQUIRE(row_for_iteration_sum == expected_sum);
+
+    //Iteration by range
+    float for_iteration_sum = 0.;
+    for (float val : ones) {
+        for_iteration_sum += val;
+    }
+    REQUIRE(for_iteration_sum == expected_sum);
+}
+
+TEST_CASE("Modification", "[vtr_ragged_matrix]") {
+    std::vector<size_t> row_sizes = {1, 5, 3, 10};
+    vtr::FlatRaggedMatrix<float> ones(row_sizes, 1.0);
+
+    float base_sum = std::accumulate(row_sizes.begin(), row_sizes.end(), 0.f);
+
+    //Index based modification
+    size_t irow = 3;
+    for (size_t icol = 0; icol < row_sizes[irow]; ++icol) {
+        ones[irow][icol] = 2.;
+    }
+    base_sum += row_sizes[irow];
+    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
+
+    //Range for row modification
+    irow = 2;
+    for (float& val : ones[irow]) {
+        val = 2.;
+    }
+    base_sum += row_sizes[irow];
+    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
+
+    //Single element modification
+    ones[0][0] = 3.;
+    base_sum += 2.;
+    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_random.cpp b/third_party/vtr/libs/vtrutil/test/test_random.cpp
new file mode 100644
index 000000000..c2287749f
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_random.cpp
@@ -0,0 +1,16 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_random.h"
+
+#include <vector>
+#include <iostream>
+
+TEST_CASE("shuffle", "[vtr_random/shuffle]") {
+    std::vector<int> numbers = {1, 2, 3, 4, 5};
+
+    vtr::RandState rand_state = 1;
+    vtr::shuffle(numbers.begin(), numbers.end(), rand_state);
+
+    std::vector<int> numbers_shuffled_1 = {5, 2, 4, 1, 3};
+    REQUIRE(numbers == numbers_shuffled_1);
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_range.cpp b/third_party/vtr/libs/vtrutil/test/test_range.cpp
new file mode 100644
index 000000000..da56318ce
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_range.cpp
@@ -0,0 +1,41 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_range.h"
+#include <vector>
+
+TEST_CASE("Range Ops", "[vtr_range]") {
+    std::vector<int> vec = {1, 2, 3};
+
+    {
+        //From iterator pair
+        auto range = vtr::make_range(vec.begin(), vec.end());
+        REQUIRE(range.size() == vec.size());
+
+        size_t i = 0;
+        for (auto elem : range) {
+            REQUIRE(elem == vec[i]);
+            i++;
+        }
+        REQUIRE(i == vec.size());
+    }
+
+    {
+        //From container
+        auto range = vtr::make_range(vec);
+        REQUIRE(range.size() == vec.size());
+
+        size_t i = 0;
+        for (auto elem : range) {
+            REQUIRE(elem == vec[i]);
+            i++;
+        }
+        REQUIRE(i == vec.size());
+    }
+
+    {
+        //Empty
+        auto range = vtr::make_range(vec.begin(), vec.begin());
+        REQUIRE(range.size() == 0);
+        REQUIRE(range.empty());
+    }
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
new file mode 100644
index 000000000..c0c47ab24
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
@@ -0,0 +1,148 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_small_vector.h"
+#include <vector>
+
+namespace vtr {
+
+//Must be delcared in namespace for argument dependent lookup to work with clang
+template<class T>
+bool operator==(const std::vector<T>& lhs, const vtr::small_vector<T>& rhs) {
+    if (lhs.size() != rhs.size()) return false;
+
+    for (size_t i = 0; i < lhs.size(); ++i) {
+        if (lhs[i] != rhs[i]) return false;
+    }
+    return true;
+}
+
+} // namespace vtr
+
+TEST_CASE("Basic", "[vtr_small_vector]") {
+    std::vector<int> ref;
+    vtr::small_vector<int> vec;
+
+    //Create the vectors the same way
+    int i;
+    for (i = 0; i < 100; i++) {
+        ref.push_back(i);
+        vec.push_back(i);
+
+        REQUIRE(ref == vec);
+    }
+
+    //Check forward iteration
+    auto vec_itr = vec.begin();
+    for (auto ref_itr = ref.begin(); ref_itr != ref.end(); ++ref_itr, ++vec_itr) {
+        REQUIRE(*ref_itr == *vec_itr);
+
+        int dist = std::distance(ref.begin(), ref_itr);
+        REQUIRE(ref[dist] == vec[dist]);
+    }
+
+    //Check backward iteration
+    auto vec_rev_itr = vec.rbegin();
+    for (auto ref_itr = ref.rbegin(); ref_itr != ref.rend(); ++ref_itr, ++vec_rev_itr) {
+        REQUIRE(*ref_itr == *vec_rev_itr);
+    }
+
+    //Check front/back
+    REQUIRE(ref.front() == vec.front());
+    REQUIRE(ref.back() == vec.back());
+
+    //Push/Emplace/Pop back
+    ref.push_back(i);
+    vec.push_back(i);
+    REQUIRE(ref == vec);
+    ++i;
+
+    ref.emplace_back(i);
+    vec.emplace_back(i);
+    REQUIRE(ref == vec);
+    ++i;
+
+    ref.pop_back();
+    vec.pop_back();
+    REQUIRE(ref == vec);
+
+    //Test the short (internal storage) transition
+    size_t inplace_cap = vtr::small_vector<int>::INPLACE_CAPACITY;
+    REQUIRE(inplace_cap > 1);
+
+    //From long to short
+    ref.resize(inplace_cap + 1);
+    vec.resize(inplace_cap + 1);
+    REQUIRE(ref == vec);
+    REQUIRE(vec.size() > inplace_cap);
+
+    ref.pop_back();
+    vec.pop_back();
+    REQUIRE(ref == vec);
+    REQUIRE(vec.size() == inplace_cap);
+
+    ref.pop_back();
+    vec.pop_back();
+    REQUIRE(ref == vec);
+    REQUIRE(vec.size() < inplace_cap);
+
+    //From short to long
+    ref.push_back(i);
+    vec.push_back(i);
+    REQUIRE(ref == vec);
+    REQUIRE(vec.size() == inplace_cap);
+    ++i;
+
+    ref.push_back(i);
+    vec.push_back(i);
+    REQUIRE(ref == vec);
+    REQUIRE(vec.size() > inplace_cap);
+    ++i;
+
+#if 0
+    //Emplace at position
+    auto ref_itr = ref.begin() + ref.size() / 2;
+    ref.emplace(ref_itr, i);
+    vec_itr = vec.begin() + vec.size() / 2;
+    vec.emplace(vec_itr, i);
+    i++;
+    REQUIRE(ref == vec);
+#endif
+
+    //Insert single at position
+    auto ref_itr = ref.begin() + ref.size() / 2;
+    ref.insert(ref_itr, i);
+    vec_itr = vec.begin() + vec.size() / 2;
+    vec.insert(vec_itr, i);
+    i++;
+    REQUIRE(ref == vec);
+
+    //Insert K at position
+    int k = 5;
+    ref_itr = ref.begin() + ref.size() / 2;
+    ref.insert(ref_itr, k, i);
+    vec_itr = vec.begin() + vec.size() / 2;
+    vec.insert(vec_itr, k, i);
+    i++;
+    REQUIRE(ref == vec);
+
+    //Range insert
+    std::vector<int> range_values = {5, 4, 3, 2, 1};
+    ref_itr = ref.begin() + ref.size() / 2;
+    ref.insert(ref_itr, range_values.begin(), range_values.end());
+#if 0
+    vec_itr = vec.begin() + vec.size() / 2;
+    vec.insert(vec_itr, range_values.begin(), range_values.end());
+    REQUIRE(ref == vec);
+#endif
+
+    //Clear
+    ref.clear();
+    vec.clear();
+    REQUIRE(ref == vec);
+
+    //Add after clear
+    ref.push_back(i);
+    vec.push_back(i);
+    REQUIRE(ref == vec);
+    ++i;
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_strings.cpp b/third_party/vtr/libs/vtrutil/test/test_strings.cpp
new file mode 100644
index 000000000..b7fa4ea82
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_strings.cpp
@@ -0,0 +1,211 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_string_view.h"
+#include "vtr_string_interning.h"
+
+TEST_CASE("String view", "[vtr_string_view/string_view]") {
+    vtr::string_view a("test");
+    vtr::string_view b("test");
+    vtr::string_view c("tes");
+    vtr::string_view d("est");
+    vtr::string_view e("es");
+
+    REQUIRE(a.size() == 4);
+    REQUIRE(b.size() == 4);
+    REQUIRE(c.size() == 3);
+    REQUIRE(d.size() == 3);
+    REQUIRE(e.size() == 2);
+
+    REQUIRE(a[0] == 't');
+    REQUIRE(a[1] == 'e');
+    REQUIRE(a[2] == 's');
+    REQUIRE(a[3] == 't');
+
+    auto itr = a.begin();
+    REQUIRE(*itr++ == 't');
+    REQUIRE(*itr++ == 'e');
+    REQUIRE(*itr++ == 's');
+    REQUIRE(*itr++ == 't');
+    REQUIRE(itr == a.end());
+
+    REQUIRE(a.front() == 't');
+    REQUIRE(c.front() == 't');
+    REQUIRE(c.back() == 's');
+
+    REQUIRE(a == b);
+    REQUIRE(a <= b);
+    REQUIRE(a >= b);
+    REQUIRE(a != c);
+
+    REQUIRE(c < a);
+    REQUIRE(a >= c);
+
+    REQUIRE(a > c);
+    REQUIRE(c <= a);
+
+    REQUIRE(c != d);
+    REQUIRE(a.substr(0, 3) == c);
+    REQUIRE(a.substr(1, 3) == d);
+    REQUIRE(a.substr(1) == d);
+    REQUIRE(a.substr(1, 2) == e);
+    REQUIRE(std::hash<vtr::string_view>()(a) == std::hash<vtr::string_view>()(b));
+    REQUIRE(std::hash<vtr::string_view>()(a) != std::hash<vtr::string_view>()(c));
+
+    vtr::string_view f = a;
+    REQUIRE(b == f);
+
+    f = e;
+    REQUIRE(b != f);
+    REQUIRE(e == f);
+
+    std::swap(a, f);
+    REQUIRE(a == e);
+    REQUIRE(f == b);
+}
+
+TEST_CASE("Basic string internment", "[vtr_string_interning/string_internment") {
+    vtr::string_internment internment;
+
+    vtr::interned_string a = internment.intern_string(vtr::string_view("test"));
+    vtr::interned_string b = internment.intern_string(vtr::string_view("test"));
+    vtr::interned_string c = internment.intern_string(vtr::string_view("tes"));
+    vtr::interned_string d = internment.intern_string(vtr::string_view("est"));
+    vtr::interned_string e = internment.intern_string(vtr::string_view("es"));
+
+    auto itr = a.begin(&internment);
+    REQUIRE(*itr++ == 't');
+    REQUIRE(*itr++ == 'e');
+    REQUIRE(*itr++ == 's');
+    REQUIRE(*itr++ == 't');
+    REQUIRE(itr == a.end());
+
+    itr = a.begin(&internment);
+    REQUIRE(*itr == 't');
+    ++itr;
+    REQUIRE(*itr == 'e');
+    ++itr;
+    REQUIRE(*itr == 's');
+    ++itr;
+    REQUIRE(*itr == 't');
+    ++itr;
+    REQUIRE(itr == a.end());
+
+    REQUIRE(a == b);
+    REQUIRE(a.bind(&internment) <= b.bind(&internment));
+    REQUIRE(a.bind(&internment) >= b.bind(&internment));
+    REQUIRE(a != c);
+
+    REQUIRE(c.bind(&internment) < a.bind(&internment));
+    REQUIRE(a.bind(&internment) >= c.bind(&internment));
+
+    REQUIRE(a.bind(&internment) > c.bind(&internment));
+    REQUIRE(c.bind(&internment) <= a.bind(&internment));
+
+    REQUIRE(c != d);
+    REQUIRE(std::hash<vtr::interned_string>()(a) == std::hash<vtr::interned_string>()(b));
+    REQUIRE(std::hash<vtr::interned_string>()(a) != std::hash<vtr::interned_string>()(c));
+
+    std::string g;
+    a.get(&internment, &g);
+    REQUIRE(g == "test");
+    c.get(&internment, &g);
+    REQUIRE(g == "tes");
+    d.get(&internment, &g);
+    REQUIRE(g == "est");
+
+    vtr::interned_string f = a;
+    REQUIRE(b == f);
+
+    f = e;
+    REQUIRE(b != f);
+    REQUIRE(e == f);
+
+    std::swap(a, f);
+    REQUIRE(a == e);
+    REQUIRE(f == b);
+}
+
+static void test_internment_retreval(const vtr::string_internment* internment, vtr::interned_string str, const char* expect) {
+    std::string copy;
+    str.get(internment, &copy);
+    REQUIRE(copy == expect);
+    copy.clear();
+    std::copy(str.begin(internment), str.end(), std::back_inserter(copy));
+    REQUIRE(copy == expect);
+}
+
+TEST_CASE("Split string internment", "[vtr_string_interning/string_internment") {
+    vtr::string_internment internment;
+
+    size_t unique_strings = 0;
+
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string a = internment.intern_string(vtr::string_view("test"));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string b = internment.intern_string(vtr::string_view("test.test"));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string c = internment.intern_string(vtr::string_view("test.test.test"));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string d = internment.intern_string(vtr::string_view("test.test.test.test"));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+
+    test_internment_retreval(&internment, a, "test");
+    test_internment_retreval(&internment, b, "test.test");
+    test_internment_retreval(&internment, c, "test.test.test");
+    test_internment_retreval(&internment, d, "test.test.test.test");
+
+    vtr::interned_string f = internment.intern_string(vtr::string_view("a"));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string g = internment.intern_string(vtr::string_view("b.c"));
+    unique_strings += 2;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string h = internment.intern_string(vtr::string_view("d.e.f"));
+    unique_strings += 3;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string i = internment.intern_string(vtr::string_view("g.h.i.j"));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+
+    test_internment_retreval(&internment, f, "a");
+    test_internment_retreval(&internment, g, "b.c");
+    test_internment_retreval(&internment, h, "d.e.f");
+    test_internment_retreval(&internment, i, "g.h.i.j");
+
+    vtr::interned_string j = internment.intern_string(vtr::string_view("."));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string k = internment.intern_string(vtr::string_view(".."));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string l = internment.intern_string(vtr::string_view("..."));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string m = internment.intern_string(vtr::string_view("...."));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+
+    test_internment_retreval(&internment, j, ".");
+    test_internment_retreval(&internment, k, "..");
+    test_internment_retreval(&internment, l, "...");
+    test_internment_retreval(&internment, m, "....");
+
+    vtr::interned_string n = internment.intern_string(vtr::string_view(".q"));
+    unique_strings += 1;
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string o = internment.intern_string(vtr::string_view(".a."));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string p = internment.intern_string(vtr::string_view("b.c.d"));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string q = internment.intern_string(vtr::string_view("e..f"));
+    REQUIRE(internment.unique_strings() == unique_strings);
+    vtr::interned_string r = internment.intern_string(vtr::string_view("e."));
+    REQUIRE(internment.unique_strings() == unique_strings);
+
+    test_internment_retreval(&internment, n, ".q");
+    test_internment_retreval(&internment, o, ".a.");
+    test_internment_retreval(&internment, p, "b.c.d");
+    test_internment_retreval(&internment, q, "e..f");
+    test_internment_retreval(&internment, r, "e.");
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp b/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
new file mode 100644
index 000000000..d9b766a17
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
@@ -0,0 +1,130 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_strong_id.h"
+#include "vtr_strong_id_range.h"
+
+struct t_test_tag;
+using TestStrongId = vtr::StrongId<t_test_tag>;
+
+TEST_CASE("StrongId", "[StrongId/StrongId]") {
+    TestStrongId a;
+    TestStrongId b;
+    TestStrongId c(0);
+    TestStrongId d(0);
+    TestStrongId e(1);
+    TestStrongId f(2);
+
+    REQUIRE(!bool(a));
+    REQUIRE(!bool(b));
+    REQUIRE(bool(c));
+    REQUIRE(bool(d));
+    REQUIRE(bool(e));
+    REQUIRE(bool(f));
+
+    REQUIRE(a == b);
+    REQUIRE(a == TestStrongId::INVALID());
+
+    REQUIRE(c == d);
+    REQUIRE(c != a);
+    REQUIRE(c != TestStrongId::INVALID());
+    REQUIRE(d != TestStrongId::INVALID());
+
+    REQUIRE(c != e);
+    REQUIRE(c != f);
+    REQUIRE(e != f);
+
+    REQUIRE(c < e);
+    REQUIRE(c < f);
+    REQUIRE(e < f);
+    REQUIRE(!(e < c));
+    REQUIRE(!(f < c));
+    REQUIRE(!(f < e));
+}
+
+TEST_CASE("StrongIdIterator", "[StrongId/StrongIdIterator]") {
+    TestStrongId a(0);
+    TestStrongId b(1);
+    TestStrongId c(5);
+    TestStrongId d(5);
+
+    vtr::StrongIdIterator<TestStrongId> a_iter(a);
+    vtr::StrongIdIterator<TestStrongId> b_iter(b);
+    vtr::StrongIdIterator<TestStrongId> c_iter(c);
+    vtr::StrongIdIterator<TestStrongId> d_iter(d);
+
+    REQUIRE(*a_iter == a);
+    REQUIRE(*b_iter == b);
+    REQUIRE(*c_iter == c);
+    REQUIRE(*c_iter == d);
+    REQUIRE(*d_iter == c);
+    REQUIRE(*d_iter == d);
+
+    REQUIRE(a_iter != b_iter);
+    REQUIRE(a_iter != c_iter);
+    REQUIRE(a_iter != d_iter);
+
+    REQUIRE(c_iter == d_iter);
+    REQUIRE(c_iter != a_iter);
+    REQUIRE(c_iter != b_iter);
+
+    REQUIRE(std::distance(a_iter, b_iter) == 1);
+    REQUIRE(std::distance(c_iter, d_iter) == 0);
+    REQUIRE(std::distance(d_iter, a_iter) == -5);
+
+    REQUIRE(a_iter < b_iter);
+    REQUIRE(b_iter < c_iter);
+    REQUIRE(!(c_iter < b_iter));
+
+    REQUIRE(a_iter[0] == a);
+    REQUIRE(a_iter[1] == b);
+    REQUIRE(a_iter[5] == c);
+    REQUIRE(c_iter[0] == c);
+    REQUIRE(c_iter[-4] == b);
+    REQUIRE(c_iter[-5] == a);
+
+    REQUIRE((a_iter + 5) == c_iter);
+    REQUIRE(a_iter == (c_iter - 5));
+    a_iter += 5;
+    REQUIRE(a_iter == c_iter);
+    a_iter -= 4;
+    REQUIRE(a_iter == b_iter);
+}
+
+TEST_CASE("StrongIdRange", "[StrongId/StrongIdRange]") {
+    TestStrongId a(0);
+    TestStrongId b(0);
+    TestStrongId c(5);
+    TestStrongId d(1);
+
+    vtr::StrongIdRange<TestStrongId> r1(a, b);
+    REQUIRE(r1.size() == 0);
+    REQUIRE(r1.empty());
+
+    vtr::StrongIdRange<TestStrongId> r2(a, c);
+    REQUIRE(r2.size() == 5);
+    REQUIRE(!r2.empty());
+
+    vtr::StrongIdRange<TestStrongId> r3(d, c);
+    REQUIRE(r3.size() == 4);
+    REQUIRE(!r3.empty());
+
+    int count = 0;
+    for (TestStrongId id : r1) {
+        (void)id;
+        count += 1;
+    }
+    REQUIRE(count == 0);
+
+    for (TestStrongId id : r2) {
+        REQUIRE(TestStrongId(count) == id);
+        count += 1;
+    }
+    REQUIRE(count == 5);
+
+    count = 0;
+    for (TestStrongId id : r3) {
+        REQUIRE(TestStrongId(count + 1) == id);
+        count += 1;
+    }
+    REQUIRE(count == 4);
+}
diff --git a/third_party/vtr/libs/vtrutil/test/test_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_vector.cpp
new file mode 100644
index 000000000..247a3df57
--- /dev/null
+++ b/third_party/vtr/libs/vtrutil/test/test_vector.cpp
@@ -0,0 +1,57 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_vector.h"
+#include "vtr_strong_id.h"
+
+#include <ostream>
+
+struct test_tag;
+typedef vtr::StrongId<test_tag> TestId;
+
+std::ostream& operator<<(std::ostream& os, const TestId id);
+
+std::ostream& operator<<(std::ostream& os, const TestId id) {
+    os << "TestId(" << size_t(id) << ")";
+    return os;
+}
+
+TEST_CASE("Basic Ops", "[vtr_vector]") {
+    vtr::vector<TestId, int> vec;
+
+    vec.push_back(1);
+    vec.push_back(2);
+    vec.push_back(3);
+
+    REQUIRE(vec.size() == 3);
+    REQUIRE(vec[TestId(0)] == 1);
+    REQUIRE(vec[TestId(1)] == 2);
+    REQUIRE(vec[TestId(2)] == 3);
+
+    vec.emplace_back(4);
+
+    REQUIRE(vec.size() == 4);
+    REQUIRE(vec[TestId(3)] == 4);
+
+    REQUIRE(vec.front() == 1);
+    REQUIRE(vec.back() == 4);
+}
+
+TEST_CASE("Key Access", "[vtr_vector]") {
+    vtr::vector<TestId, int> vec;
+
+    vec.push_back(1);
+    vec.push_back(2);
+    vec.push_back(3);
+    vec.push_back(4);
+
+    std::vector<TestId> expected_keys = {TestId(0), TestId(1), TestId(2), TestId(3)};
+
+    auto keys = vec.keys();
+    REQUIRE(keys.size() == vec.size());
+
+    size_t i = 0;
+    for (TestId key : keys) {
+        REQUIRE(key == expected_keys[i]);
+        ++i;
+    }
+}
diff --git a/third_party/vtr/verilog/eltwise_layer.v b/third_party/vtr/verilog/eltwise_layer.v
new file mode 100644
index 000000000..11199fb90
--- /dev/null
+++ b/third_party/vtr/verilog/eltwise_layer.v
@@ -0,0 +1,3057 @@
+//////////////////////////////////////////////////////////////////////////////
+// Author: Aman Arora
+//////////////////////////////////////////////////////////////////////////////
+
+`timescale 1ns/1ns
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Eltwise layer
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Precision BF16
+//Each PE has 1 multiplier, an adder and a subtractor.
+//There are 4 PEs in each compute unit. 
+//There are 6 such compute units in the whole layer.
+//So, total compute throughput is 24 ops per cycle.
+//The "per cycle" is because the adder/sub/mul are
+//pipelined. Although they may be take more than 1 cycle,
+//but in the steady state, one result will come out every cycle.
+//
+//There are 6 BRAMs for each input operand. Each location in a BRAM
+//stores 4 inputs. So, the read bandwidth is 24 elements
+//per cycle. This matches the compute throughput. So, we
+//utilize each PE every cycle. There are 6 BRAMs for output.
+//We can write 4 elements per cycle.
+//
+//There are two modes of operation: 
+// 1. Vector/Matrix mode
+//    In this mode, both operands are matrices/vectors.
+//    They are read from BRAMs (A and B). The operation 
+//    selected (using the op input) is performed. This mode
+//    can be used for operations such as residual add, or 
+//    dropout.
+// 2. Scalar mode
+//    In this mode, one operand is a matrix/vector and the
+//    other operand is a scalar. It could be the mean or 
+//    variance of a normalization layer for example. The 
+//    scalar input is provided from the top-level of the design
+//    so it can be easily modified at runtime.
+//
+//Important inputs:
+//   mode: 
+//      0 -> Both operands (A and B) are matrices/vectors. Result is a matrix/vector.
+//      1 -> Operand A is matrix/vector. Operand B is scalar. Result is a matrix/vector.
+//   op:
+//      00 -> Addition
+//      01 -> Subtraction
+//      10 -> Multiplication
+//
+//The whole design can operate on 24xN matrices.  
+//Typically, to use this design, we'd break a large input
+//matrix into 24 column sections and process the matrix 
+//section by section. The number of rows will be programmed
+//in the "iterations" register in the design.
+
+
+`define BFLOAT16 
+
+// IEEE Half Precision => EXPONENT = 5, MANTISSA = 10
+// BFLOAT16 => EXPONENT = 8, MANTISSA = 7 
+
+`ifdef BFLOAT16
+`define EXPONENT 8
+`define MANTISSA 7
+`else // for ieee half precision fp16
+`define EXPONENT 5
+`define MANTISSA 10
+`endif
+
+`define SIGN 1
+`define DWIDTH (`SIGN+`EXPONENT+`MANTISSA)
+
+`define AWIDTH 10
+`define MEM_SIZE 1024
+`define DESIGN_SIZE 12
+`define CU_SIZE 4
+`define MASK_WIDTH 4
+`define MEM_ACCESS_LATENCY 1
+
+`define REG_DATAWIDTH 32
+`define REG_ADDRWIDTH 8
+`define ITERATIONS_WIDTH 32
+
+`define REG_STDN_ADDR 32'h4
+`define REG_MATRIX_A_ADDR 32'he
+`define REG_MATRIX_B_ADDR 32'h12
+`define REG_MATRIX_C_ADDR 32'h16
+`define REG_VALID_MASK_A_ADDR 32'h20
+`define REG_VALID_MASK_B_ADDR 32'h5c
+
+`define REG_ITERATIONS_ADDR 32'h40
+
+//This is the pipeline depth of the PEs (adder/mult)
+`define PE_PIPELINE_DEPTH 5
+
+module eltwise_layer(
+  input clk,
+  input clk_mem,
+  input resetn,
+  input pe_resetn,
+  input        [`REG_ADDRWIDTH-1:0] PADDR,
+  input                             PWRITE,
+  input                             PSEL,
+  input                             PENABLE,
+  input        [`REG_DATAWIDTH-1:0] PWDATA,
+  output reg   [`REG_DATAWIDTH-1:0] PRDATA,
+  output reg                        PREADY,
+  input [`DWIDTH-1:0] scalar_inp,
+  input mode, // mode==0 -> vector/matrix, mode==1 -> scalar
+  input  [1:0] op, //op==11 -> Mul, op==01 -> Sub, op==00 -> Add
+  input  [7:0] bram_select,
+  input  [`AWIDTH-1:0] bram_addr_ext,
+  output reg [`CU_SIZE*`DWIDTH-1:0] bram_rdata_ext,
+  input  [`CU_SIZE*`DWIDTH-1:0] bram_wdata_ext,
+  input  [`CU_SIZE-1:0] bram_we_ext
+);
+
+
+  wire PCLK;
+  assign PCLK = clk;
+  wire PRESETn;
+  assign PRESETn = resetn;
+  reg start_reg;
+  reg clear_done_reg;
+
+  //Dummy register to sync all other invalid/unimplemented addresses
+  reg [`REG_DATAWIDTH-1:0] reg_dummy;
+  
+  reg [`AWIDTH-1:0] bram_addr_a_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_4_ext;
+
+  reg [`AWIDTH-1:0] bram_addr_a_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_a_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_a_5_ext;
+
+    
+  reg [`AWIDTH-1:0] bram_addr_b_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_4_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_b_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_b_5_ext;
+
+  reg [`AWIDTH-1:0] bram_addr_c_0_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_0_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_1_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_1_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_2_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_2_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_3_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_3_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_4_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_4_ext;
+    
+  reg [`AWIDTH-1:0] bram_addr_c_5_ext;
+  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5_ext;
+  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5_ext;
+  reg [`MASK_WIDTH-1:0] bram_we_c_5_ext;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0;
+	wire [`MASK_WIDTH-1:0] bram_we_a_0;
+	wire bram_en_a_0;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2;
+	wire [`MASK_WIDTH-1:0] bram_we_a_2;
+	wire bram_en_a_2;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4;
+	wire [`MASK_WIDTH-1:0] bram_we_a_4;
+	wire bram_en_a_4;
+
+	wire [`AWIDTH-1:0] bram_addr_a_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1;
+	wire [`MASK_WIDTH-1:0] bram_we_a_1;
+	wire bram_en_a_1;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3;
+	wire [`MASK_WIDTH-1:0] bram_we_a_3;
+	wire bram_en_a_3;
+    
+	wire [`AWIDTH-1:0] bram_addr_a_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5;
+	wire [`MASK_WIDTH-1:0] bram_we_a_5;
+	wire bram_en_a_5;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0;
+	wire [`MASK_WIDTH-1:0] bram_we_b_0;
+	wire bram_en_b_0;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1;
+	wire [`MASK_WIDTH-1:0] bram_we_b_1;
+	wire bram_en_b_1;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2;
+	wire [`MASK_WIDTH-1:0] bram_we_b_2;
+	wire bram_en_b_2;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3;
+	wire [`MASK_WIDTH-1:0] bram_we_b_3;
+	wire bram_en_b_3;
+
+  wire [`AWIDTH-1:0] bram_addr_b_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4;
+	wire [`MASK_WIDTH-1:0] bram_we_b_4;
+	wire bram_en_b_4;
+    
+	wire [`AWIDTH-1:0] bram_addr_b_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5;
+	wire [`MASK_WIDTH-1:0] bram_we_b_5;
+	wire bram_en_b_5;
+
+	wire [`AWIDTH-1:0] bram_addr_c_0;
+	wire [`AWIDTH-1:0] bram_addr_c_1;
+	wire [`AWIDTH-1:0] bram_addr_c_2;
+	wire [`AWIDTH-1:0] bram_addr_c_3;
+	wire [`AWIDTH-1:0] bram_addr_c_4;
+	wire [`AWIDTH-1:0] bram_addr_c_5;
+
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5;
+
+	wire [`MASK_WIDTH-1:0] bram_we_c_0;
+	wire [`MASK_WIDTH-1:0] bram_we_c_1;
+	wire [`MASK_WIDTH-1:0] bram_we_c_2;
+	wire [`MASK_WIDTH-1:0] bram_we_c_3;
+	wire [`MASK_WIDTH-1:0] bram_we_c_4;
+	wire [`MASK_WIDTH-1:0] bram_we_c_5;
+    
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5;
+
+  always @ (posedge clk) begin
+    case (bram_select)
+  
+      0: begin
+      bram_addr_a_0_ext <= bram_addr_ext;
+      bram_wdata_a_0_ext <= bram_wdata_ext;
+      bram_we_a_0_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_0_ext;
+      end
+    
+      1: begin
+      bram_addr_a_2_ext <= bram_addr_ext;
+      bram_wdata_a_2_ext <= bram_wdata_ext;
+      bram_we_a_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_2_ext;
+      end
+    
+      2: begin
+      bram_addr_a_4_ext <= bram_addr_ext;
+      bram_wdata_a_4_ext <= bram_wdata_ext;
+      bram_we_a_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_4_ext;
+      end
+
+      3: begin
+      bram_addr_a_1_ext <= bram_addr_ext;
+      bram_wdata_a_1_ext <= bram_wdata_ext;
+      bram_we_a_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_1_ext;
+      end
+    
+      4: begin
+      bram_addr_a_3_ext <= bram_addr_ext;
+      bram_wdata_a_3_ext <= bram_wdata_ext;
+      bram_we_a_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_3_ext;
+      end
+    
+      5: begin
+      bram_addr_a_5_ext <= bram_addr_ext;
+      bram_wdata_a_5_ext <= bram_wdata_ext;
+      bram_we_a_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_a_5_ext;
+      end
+    
+      6: begin
+      bram_addr_b_0_ext = bram_addr_ext;
+      bram_wdata_b_0_ext = bram_wdata_ext;
+      bram_we_b_0_ext = bram_we_ext;
+      bram_rdata_ext = bram_rdata_b_0_ext;
+      end
+    
+      7: begin
+      bram_addr_b_1_ext <= bram_addr_ext;
+      bram_wdata_b_1_ext <= bram_wdata_ext;
+      bram_we_b_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_1_ext;
+      end
+    
+      8: begin
+      bram_addr_b_2_ext <= bram_addr_ext;
+      bram_wdata_b_2_ext <= bram_wdata_ext;
+      bram_we_b_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_2_ext;
+      end
+    
+      9: begin
+      bram_addr_b_3_ext <= bram_addr_ext;
+      bram_wdata_b_3_ext <= bram_wdata_ext;
+      bram_we_b_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_3_ext;
+      end
+    
+      10: begin
+      bram_addr_b_4_ext <= bram_addr_ext;
+      bram_wdata_b_4_ext <= bram_wdata_ext;
+      bram_we_b_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_4_ext;
+      end
+    
+      11: begin
+      bram_addr_b_5_ext <= bram_addr_ext;
+      bram_wdata_b_5_ext <= bram_wdata_ext;
+      bram_we_b_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_b_5_ext;
+      end
+
+      12: begin
+      bram_addr_c_0_ext <= bram_addr_ext;
+      bram_wdata_c_0_ext <= bram_wdata_ext;
+      bram_we_c_0_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_0_ext;
+      end
+    
+      13: begin
+      bram_addr_c_1_ext <= bram_addr_ext;
+      bram_wdata_c_1_ext <= bram_wdata_ext;
+      bram_we_c_1_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_1_ext;
+      end
+    
+      14: begin
+      bram_addr_c_2_ext <= bram_addr_ext;
+      bram_wdata_c_2_ext <= bram_wdata_ext;
+      bram_we_c_2_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_2_ext;
+      end
+    
+      15: begin
+      bram_addr_c_3_ext <= bram_addr_ext;
+      bram_wdata_c_3_ext <= bram_wdata_ext;
+      bram_we_c_3_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_3_ext;
+      end
+    
+      16: begin
+      bram_addr_c_4_ext <= bram_addr_ext;
+      bram_wdata_c_4_ext <= bram_wdata_ext;
+      bram_we_c_4_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_4_ext;
+      end
+    
+      17: begin
+      bram_addr_c_5_ext <= bram_addr_ext;
+      bram_wdata_c_5_ext <= bram_wdata_ext;
+      bram_we_c_5_ext <= bram_we_ext;
+      bram_rdata_ext <= bram_rdata_c_5_ext;
+      end
+    
+      default: begin
+		bram_addr_c_5_ext <= bram_addr_ext;
+      bram_wdata_c_5_ext <= bram_wdata_ext;
+      bram_we_c_5_ext <= bram_we_ext;
+      bram_rdata_ext <= 0;
+      end
+    endcase 
+  end
+  
+/////////////////////////////////////////////////
+// BRAMs to store matrix A
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix A 0
+ram matrix_A_0(
+  .addr0(bram_addr_a_0),
+  .d0(bram_wdata_a_0), 
+  .we0(bram_we_a_0), 
+  .q0(bram_rdata_a_0), 
+  .addr1(bram_addr_a_0_ext),
+  .d1(bram_wdata_a_0_ext), 
+  .we1(bram_we_a_0_ext), 
+  .q1(bram_rdata_a_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 2
+ram matrix_A_2(
+  .addr0(bram_addr_a_2),
+  .d0(bram_wdata_a_2), 
+  .we0(bram_we_a_2), 
+  .q0(bram_rdata_a_2), 
+  .addr1(bram_addr_a_2_ext),
+  .d1(bram_wdata_a_2_ext), 
+  .we1(bram_we_a_2_ext), 
+  .q1(bram_rdata_a_2_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 4
+ram matrix_A_4(
+  .addr0(bram_addr_a_4),
+  .d0(bram_wdata_a_4), 
+  .we0(bram_we_a_4), 
+  .q0(bram_rdata_a_4), 
+  .addr1(bram_addr_a_4_ext),
+  .d1(bram_wdata_a_4_ext), 
+  .we1(bram_we_a_4_ext), 
+  .q1(bram_rdata_a_4_ext), 
+  .clk(clk_mem));
+
+
+    // BRAM matrix A 1
+ram matrix_A_1(
+  .addr0(bram_addr_a_1),
+  .d0(bram_wdata_a_1), 
+  .we0(bram_we_a_1), 
+  .q0(bram_rdata_a_1), 
+  .addr1(bram_addr_a_1_ext),
+  .d1(bram_wdata_a_1_ext), 
+  .we1(bram_we_a_1_ext), 
+  .q1(bram_rdata_a_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 3
+ram matrix_A_3(
+  .addr0(bram_addr_a_3),
+  .d0(bram_wdata_a_3), 
+  .we0(bram_we_a_3), 
+  .q0(bram_rdata_a_3), 
+  .addr1(bram_addr_a_3_ext),
+  .d1(bram_wdata_a_3_ext), 
+  .we1(bram_we_a_3_ext), 
+  .q1(bram_rdata_a_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix A 5
+ram matrix_A_5(
+  .addr0(bram_addr_a_5),
+  .d0(bram_wdata_a_5), 
+  .we0(bram_we_a_5), 
+  .q0(bram_rdata_a_5), 
+  .addr1(bram_addr_a_5_ext),
+  .d1(bram_wdata_a_5_ext), 
+  .we1(bram_we_a_5_ext), 
+  .q1(bram_rdata_a_5_ext), 
+  .clk(clk_mem));
+
+////////////////////////////////////////////////
+// BRAMs to store matrix B
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix B 0
+ram matrix_B_0(
+  .addr0(bram_addr_b_0),
+  .d0(bram_wdata_b_0), 
+  .we0(bram_we_b_0), 
+  .q0(bram_rdata_b_0), 
+  .addr1(bram_addr_b_0_ext),
+  .d1(bram_wdata_b_0_ext), 
+  .we1(bram_we_b_0_ext), 
+  .q1(bram_rdata_b_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 1
+ram matrix_B_1(
+  .addr0(bram_addr_b_1),
+  .d0(bram_wdata_b_1), 
+  .we0(bram_we_b_1), 
+  .q0(bram_rdata_b_1), 
+  .addr1(bram_addr_b_1_ext),
+  .d1(bram_wdata_b_1_ext), 
+  .we1(bram_we_b_1_ext), 
+  .q1(bram_rdata_b_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 2
+ram matrix_B_2(
+  .addr0(bram_addr_b_2),
+  .d0(bram_wdata_b_2), 
+  .we0(bram_we_b_2), 
+  .q0(bram_rdata_b_2), 
+  .addr1(bram_addr_b_2_ext),
+  .d1(bram_wdata_b_2_ext), 
+  .we1(bram_we_b_2_ext), 
+  .q1(bram_rdata_b_2_ext), 
+  .clk(clk_mem));
+
+  	
+  // BRAM matrix B 3
+ram matrix_B_3(
+  .addr0(bram_addr_b_3),
+  .d0(bram_wdata_b_3), 
+  .we0(bram_we_b_3), 
+  .q0(bram_rdata_b_3), 
+  .addr1(bram_addr_b_3_ext),
+  .d1(bram_wdata_b_3_ext), 
+  .we1(bram_we_b_3_ext), 
+  .q1(bram_rdata_b_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix B 4
+ram matrix_B_4(
+  .addr0(bram_addr_b_4),
+  .d0(bram_wdata_b_4), 
+  .we0(bram_we_b_4), 
+  .q0(bram_rdata_b_4), 
+  .addr1(bram_addr_b_4_ext),
+  .d1(bram_wdata_b_4_ext), 
+  .we1(bram_we_b_4_ext), 
+  .q1(bram_rdata_b_4_ext), 
+  .clk(clk_mem));
+
+
+  // BRAM matrix B 5
+ram matrix_B_5(
+  .addr0(bram_addr_b_5),
+  .d0(bram_wdata_b_5), 
+  .we0(bram_we_b_5), 
+  .q0(bram_rdata_b_5), 
+  .addr1(bram_addr_b_5_ext),
+  .d1(bram_wdata_b_5_ext), 
+  .we1(bram_we_b_5_ext), 
+  .q1(bram_rdata_b_5_ext), 
+  .clk(clk_mem));
+
+////////////////////////////////////////////////
+// BRAMs to store matrix C
+/////////////////////////////////////////////////
+
+
+  // BRAM matrix C 0
+ram matrix_C_0(
+  .addr0(bram_addr_c_0),
+  .d0(bram_wdata_c_0), 
+  .we0(bram_we_c_0), 
+  .q0(bram_rdata_c_0), 
+  .addr1(bram_addr_c_0_ext),
+  .d1(bram_wdata_c_0_ext), 
+  .we1(bram_we_c_0_ext), 
+  .q1(bram_rdata_c_0_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 1
+ram matrix_C_1(
+  .addr0(bram_addr_c_1),
+  .d0(bram_wdata_c_1), 
+  .we0(bram_we_c_1), 
+  .q0(bram_rdata_c_1), 
+  .addr1(bram_addr_c_1_ext),
+  .d1(bram_wdata_c_1_ext), 
+  .we1(bram_we_c_1_ext), 
+  .q1(bram_rdata_c_1_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 2
+ram matrix_C_2(
+  .addr0(bram_addr_c_2),
+  .d0(bram_wdata_c_2), 
+  .we0(bram_we_c_2), 
+  .q0(bram_rdata_c_2), 
+  .addr1(bram_addr_c_2_ext),
+  .d1(bram_wdata_c_2_ext), 
+  .we1(bram_we_c_2_ext), 
+  .q1(bram_rdata_c_2_ext), 
+  .clk(clk_mem));
+
+  	
+  // BRAM matrix C 3
+ram matrix_C_3(
+  .addr0(bram_addr_c_3),
+  .d0(bram_wdata_c_3), 
+  .we0(bram_we_c_3), 
+  .q0(bram_rdata_c_3), 
+  .addr1(bram_addr_c_3_ext),
+  .d1(bram_wdata_c_3_ext), 
+  .we1(bram_we_c_3_ext), 
+  .q1(bram_rdata_c_3_ext), 
+  .clk(clk_mem));
+  	
+  // BRAM matrix C 4
+ram matrix_C_4(
+  .addr0(bram_addr_c_4),
+  .d0(bram_wdata_c_4), 
+  .we0(bram_we_c_4), 
+  .q0(bram_rdata_c_4), 
+  .addr1(bram_addr_c_4_ext),
+  .d1(bram_wdata_c_4_ext), 
+  .we1(bram_we_c_4_ext), 
+  .q1(bram_rdata_c_4_ext), 
+  .clk(clk_mem));
+
+
+  // BRAM matrix C 5
+ram matrix_C_5(
+  .addr0(bram_addr_c_5),
+  .d0(bram_wdata_c_5), 
+  .we0(bram_we_c_5), 
+  .q0(bram_rdata_c_5), 
+  .addr1(bram_addr_c_5_ext),
+  .d1(bram_wdata_c_5_ext), 
+  .we1(bram_we_c_5_ext), 
+  .q1(bram_rdata_c_5_ext), 
+  .clk(clk_mem));
+  	
+reg start_eltwise_op;
+wire done_eltwise_op;
+
+reg [3:0] state;
+	
+////////////////////////////////////////////////////////////////
+// Control logic
+////////////////////////////////////////////////////////////////
+	always @( posedge clk) begin
+      if (resetn == 1'b0) begin
+        state <= 4'b0000;
+        start_eltwise_op <= 1'b0;
+      end 
+      else begin
+        case (state)
+
+        4'b0000: begin
+          start_eltwise_op <= 1'b0;
+          if (start_reg == 1'b1) begin
+            state <= 4'b0001;
+          end else begin
+            state <= 4'b0000;
+          end
+        end
+        
+        4'b0001: begin
+          start_eltwise_op <= 1'b1;	      
+          state <= 4'b1010;                    
+        end      
+        
+        4'b1010: begin                 
+          if (done_eltwise_op == 1'b1) begin
+            start_eltwise_op <= 1'b0;
+            state <= 4'b1000;
+          end
+          else begin
+            state <= 4'b1010;
+          end
+        end
+
+       4'b1000: begin
+         if (clear_done_reg == 1'b1) begin
+           state <= 4'b0000;
+         end
+         else begin
+           state <= 4'b1000;
+         end
+       end
+      endcase  
+	end 
+  end
+
+reg [1:0] state_apb;
+`define IDLE     2'b00
+`define W_ENABLE  2'b01
+`define R_ENABLE  2'b10
+
+reg [`AWIDTH-1:0] address_mat_a;
+reg [`AWIDTH-1:0] address_mat_b;
+reg [`AWIDTH-1:0] address_mat_c;
+reg [`MASK_WIDTH-1:0] validity_mask_a;
+reg [`MASK_WIDTH-1:0] validity_mask_b;
+reg [`ITERATIONS_WIDTH-1:0] iterations;
+
+////////////////////////////////////////////////////////////////
+// Configuration logic
+////////////////////////////////////////////////////////////////
+always @(posedge PCLK) begin
+  if (PRESETn == 0) begin
+    state_apb <= `IDLE;
+    PRDATA <= 0;
+    PREADY <= 0;
+    address_mat_a <= 0;
+    address_mat_b <= 0;
+    address_mat_c <= 0;
+    validity_mask_a <= {`MASK_WIDTH{1'b1}};
+    validity_mask_b <= {`MASK_WIDTH{1'b1}};
+  end
+
+  else begin
+    case (state_apb)
+      `IDLE : begin
+        PRDATA <= 0;
+        if (PSEL) begin
+          if (PWRITE) begin
+            state_apb <= `W_ENABLE;
+          end
+          else begin
+            state_apb <= `R_ENABLE;
+          end
+        end
+        PREADY <= 0;
+      end
+
+      `W_ENABLE : begin
+        if (PSEL && PWRITE && PENABLE) begin
+          case (PADDR)
+          `REG_STDN_ADDR       : begin
+                                 start_reg <= PWDATA[0];
+                                 clear_done_reg <= PWDATA[31];
+                                 end
+          `REG_MATRIX_A_ADDR   : address_mat_a <= PWDATA[`AWIDTH-1:0];
+          `REG_MATRIX_B_ADDR   : address_mat_b <= PWDATA[`AWIDTH-1:0];
+          `REG_MATRIX_C_ADDR   : address_mat_c <= PWDATA[`AWIDTH-1:0];
+          `REG_VALID_MASK_A_ADDR: begin
+                                validity_mask_a <= PWDATA[`MASK_WIDTH-1:0];
+                                end
+          `REG_VALID_MASK_B_ADDR: begin
+                                validity_mask_b <= PWDATA[`MASK_WIDTH-1:0];
+                                end
+          `REG_ITERATIONS_ADDR: iterations <= PWDATA[`ITERATIONS_WIDTH-1:0];
+          default : reg_dummy <= PWDATA; //sink writes to a dummy register
+          endcase
+          PREADY <=1;          
+        end
+        state_apb <= `IDLE;
+      end
+
+      `R_ENABLE : begin
+        if (PSEL && !PWRITE && PENABLE) begin
+          PREADY <= 1;
+          case (PADDR)
+          `REG_STDN_ADDR        : PRDATA <= {done_eltwise_op, 30'b0, start_eltwise_op};
+          `REG_MATRIX_A_ADDR    : PRDATA <= address_mat_a;
+          `REG_MATRIX_B_ADDR    : PRDATA <= address_mat_b;
+          `REG_MATRIX_C_ADDR    : PRDATA <= address_mat_c;
+          `REG_VALID_MASK_A_ADDR: PRDATA <= validity_mask_a;
+          `REG_VALID_MASK_B_ADDR: PRDATA <= validity_mask_b;
+          `REG_ITERATIONS_ADDR: PRDATA <= iterations;
+          default : PRDATA <= reg_dummy; //read the dummy register for undefined addresses
+          endcase
+        end
+        state_apb <= `IDLE;
+      end
+      default: begin
+        state_apb <= `IDLE;
+      end
+    endcase
+  end
+end  
+  
+wire reset;
+assign reset = ~resetn;
+wire pe_reset;
+assign pe_reset = ~pe_resetn;
+
+  wire c_data_0_available;
+  wire c_data_1_available;
+  wire c_data_2_available;
+  wire c_data_3_available;
+  wire c_data_4_available;
+  wire c_data_5_available;
+
+  assign bram_wdata_a_0 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_0 = 1'b1;
+  assign bram_we_a_0 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_1 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_1 = 1'b1;
+  assign bram_we_a_1 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_2 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_2 = 1'b1;
+  assign bram_we_a_2 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_3 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_3 = 1'b1;
+  assign bram_we_a_3 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_4 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_4 = 1'b1;
+  assign bram_we_a_4 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_a_5 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_a_5 = 1'b1;
+  assign bram_we_a_5 = {`MASK_WIDTH{1'b0}};
+  	
+  assign bram_wdata_b_0 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_0 = 1'b1;
+  assign bram_we_b_0 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_1 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_1 = 1'b1;
+  assign bram_we_b_1 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_2 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_2 = 1'b1;
+  assign bram_we_b_2 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_3 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_3 = 1'b1;
+  assign bram_we_b_3 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_4 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_4 = 1'b1;
+  assign bram_we_b_4 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_wdata_b_5 = {`CU_SIZE*`DWIDTH{1'b0}};
+  assign bram_en_b_5 = 1'b1;
+  assign bram_we_b_5 = {`MASK_WIDTH{1'b0}};
+
+  assign bram_we_c_0 = (c_data_0_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_2 = (c_data_2_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_4 = (c_data_4_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_1 = (c_data_1_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_3 = (c_data_3_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+  assign bram_we_c_5 = (c_data_5_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
+
+  /////////////////////////////////////////////////
+  // ORing all done signals
+  /////////////////////////////////////////////////
+  wire done_eltwise_op_0;
+  wire done_eltwise_op_1;
+  wire done_eltwise_op_2;
+  wire done_eltwise_op_3;
+  wire done_eltwise_op_4;
+  wire done_eltwise_op_5;
+
+  assign done_eltwise_op = 
+  done_eltwise_op_0 | 
+  done_eltwise_op_1 | 
+  done_eltwise_op_2 | 
+  done_eltwise_op_3 | 
+  done_eltwise_op_4 | 
+  done_eltwise_op_5 ;
+
+  /////////////////////////////////////////////////
+  // Code to allow for scalar mode
+  /////////////////////////////////////////////////
+  
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_0;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_1;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_2;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_3;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_4;
+	wire [`CU_SIZE*`DWIDTH-1:0] b_data_5;
+
+  assign b_data_0 = mode ? bram_rdata_b_0 : {`CU_SIZE{scalar_inp}};
+  assign b_data_1 = mode ? bram_rdata_b_1 : {`CU_SIZE{scalar_inp}};
+  assign b_data_2 = mode ? bram_rdata_b_2 : {`CU_SIZE{scalar_inp}};
+  assign b_data_3 = mode ? bram_rdata_b_3 : {`CU_SIZE{scalar_inp}};
+  assign b_data_4 = mode ? bram_rdata_b_4 : {`CU_SIZE{scalar_inp}};
+  assign b_data_5 = mode ? bram_rdata_b_5 : {`CU_SIZE{scalar_inp}};
+
+  /////////////////////////////////////////////////
+  // Compute Unit 0
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_0(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_0),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_0),
+  .b_data(b_data_0),
+  .c_data_out(bram_wdata_c_0),
+  .a_addr(bram_addr_a_0),
+  .b_addr(bram_addr_b_0),
+  .c_addr(bram_addr_c_0),
+  .c_data_available(c_data_0_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 1
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_1(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_1),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_1),
+  .b_data(b_data_1),
+  .c_data_out(bram_wdata_c_1),
+  .a_addr(bram_addr_a_1),
+  .b_addr(bram_addr_b_1),
+  .c_addr(bram_addr_c_1),
+  .c_data_available(c_data_1_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 2
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_2(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_2),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_2),
+  .b_data(b_data_2),
+  .c_data_out(bram_wdata_c_2),
+  .a_addr(bram_addr_a_2),
+  .b_addr(bram_addr_b_2),
+  .c_addr(bram_addr_c_2),
+  .c_data_available(c_data_2_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 3
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_3(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_3),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_3),
+  .b_data(b_data_3),
+  .c_data_out(bram_wdata_c_3),
+  .a_addr(bram_addr_a_3),
+  .b_addr(bram_addr_b_3),
+  .c_addr(bram_addr_c_3),
+  .c_data_available(c_data_3_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 4
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_4(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_4),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_4),
+  .b_data(b_data_4),
+  .c_data_out(bram_wdata_c_4),
+  .a_addr(bram_addr_a_4),
+  .b_addr(bram_addr_b_4),
+  .c_addr(bram_addr_c_4),
+  .c_data_available(c_data_4_available),
+  .validity_mask_a(4'b1111),
+  .validity_mask_b(4'b1111)
+);
+
+  /////////////////////////////////////////////////
+  // Compute Unit 5
+  /////////////////////////////////////////////////
+
+eltwise_cu u_eltwise_cu_5(
+  .clk(clk),
+  .reset(reset),
+  .pe_reset(pe_reset),
+  .start_eltwise_op(start_eltwise_op),
+  .done_eltwise_op(done_eltwise_op_5),
+  .count(iterations),
+  .op(op),
+  .address_mat_a(address_mat_a),
+  .address_mat_b(address_mat_b),
+  .address_mat_c(address_mat_c),
+  .a_data(bram_rdata_a_5),
+  .b_data(b_data_5),
+  .c_data_out(bram_wdata_c_5),
+  .a_addr(bram_addr_a_5),
+  .b_addr(bram_addr_b_5),
+  .c_addr(bram_addr_c_5),
+  .c_data_available(c_data_5_available),
+  .validity_mask_a(4'b0011),
+  .validity_mask_b(4'b0011)
+);
+
+endmodule
+
+
+//////////////////////////////////
+//////////////////////////////////
+//Dual port RAM
+//////////////////////////////////
+//////////////////////////////////
+module ram (
+        addr0, 
+        d0, 
+        we0, 
+        q0,  
+        addr1,
+        d1,
+        we1,
+        q1,
+        clk);
+
+input [`AWIDTH-1:0] addr0;
+input [`AWIDTH-1:0] addr1;
+input [`CU_SIZE*`DWIDTH-1:0] d0;
+input [`CU_SIZE*`DWIDTH-1:0] d1;
+input [`CU_SIZE-1:0] we0;
+input [`CU_SIZE-1:0] we1;
+output [`CU_SIZE*`DWIDTH-1:0] q0;
+output [`CU_SIZE*`DWIDTH-1:0] q1;
+input clk;
+
+genvar i; 
+
+generate
+`ifdef QUARTUS
+   for (i=0;i<`CU_SIZE;i=i+1) begin: gen_dpram
+`else
+   for (i=0;i<`CU_SIZE;i=i+1) begin
+`endif
+     dpram_original #(.AWIDTH(`AWIDTH),.DWIDTH(`DWIDTH),.NUM_WORDS(1<<`AWIDTH)) dp1 (.clk(clk),.address_a(addr0),.address_b(addr1),.wren_a(we0[i]),.wren_b(we1[i]),.data_a(d0[i*`DWIDTH +: `DWIDTH]),.data_b(d1[i*`DWIDTH +: `DWIDTH]),.out_a(q0[i*`DWIDTH +: `DWIDTH]),.out_b(q1[i*`DWIDTH +: `DWIDTH]));
+   end
+endgenerate
+
+endmodule
+
+module dpram_original (
+    clk,
+    address_a,
+    address_b,
+    wren_a,
+    wren_b,
+    data_a,
+    data_b,
+    out_a,
+    out_b
+);
+parameter AWIDTH=10;
+parameter NUM_WORDS=1024;
+parameter DWIDTH=32;
+input clk;
+input [(AWIDTH-1):0] address_a;
+input [(AWIDTH-1):0] address_b;
+input  wren_a;
+input  wren_b;
+input [(DWIDTH-1):0] data_a;
+input [(DWIDTH-1):0] data_b;
+output reg [(DWIDTH-1):0] out_a;
+output reg [(DWIDTH-1):0] out_b;
+
+`ifndef hard_mem
+
+reg [DWIDTH-1:0] ram[NUM_WORDS-1:0];
+always @ (posedge clk) begin 
+  if (wren_a) begin
+      ram[address_a] <= data_a;
+  end
+  out_a <= ram[address_a];
+end
+  
+always @ (posedge clk) begin 
+  if (wren_b) begin
+      ram[address_b] <= data_b;
+  end 
+  out_b <= ram[address_b];
+end
+
+`else
+
+defparam u_dual_port_ram.ADDR_WIDTH = AWIDTH;
+defparam u_dual_port_ram.DATA_WIDTH = DWIDTH;
+
+dual_port_ram u_dual_port_ram(
+.addr1(address_a),
+.we1(wren_a),
+.data1(data_a),
+.out1(out_a),
+.addr2(address_b),
+.we2(wren_b),
+.data2(data_b),
+.out2(out_b),
+.clk(clk)
+);
+
+`endif
+endmodule
+
+  
+//////////////////////////////////
+//////////////////////////////////
+// Elementwise compute unit
+//////////////////////////////////
+//////////////////////////////////
+module eltwise_cu(
+ clk,
+ reset,
+ pe_reset,
+ start_eltwise_op,
+ done_eltwise_op,
+ count,
+ op,
+ address_mat_a,
+ address_mat_b,
+ address_mat_c,
+ a_data,
+ b_data,
+ c_data_out, 
+ a_addr,
+ b_addr,
+ c_addr,
+ c_data_available,
+ validity_mask_a,
+ validity_mask_b
+);
+
+ input clk;
+ input reset;
+ input pe_reset;
+ input start_eltwise_op;
+ output done_eltwise_op;
+ input [`ITERATIONS_WIDTH-1:0] count;
+ input [1:0] op;
+ input [`AWIDTH-1:0] address_mat_a;
+ input [`AWIDTH-1:0] address_mat_b;
+ input [`AWIDTH-1:0] address_mat_c;
+ input [`CU_SIZE*`DWIDTH-1:0] a_data;
+ input [`CU_SIZE*`DWIDTH-1:0] b_data;
+ output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+ output [`AWIDTH-1:0] a_addr;
+ output [`AWIDTH-1:0] b_addr;
+ output [`AWIDTH-1:0] c_addr;
+ output c_data_available;
+ input [`MASK_WIDTH-1:0] validity_mask_a;
+ input [`MASK_WIDTH-1:0] validity_mask_b;
+
+wire [`DWIDTH-1:0] out0;
+wire [`DWIDTH-1:0] out1;
+wire [`DWIDTH-1:0] out2;
+wire [`DWIDTH-1:0] out3;
+
+wire [`DWIDTH-1:0] a0_data;
+wire [`DWIDTH-1:0] a1_data;
+wire [`DWIDTH-1:0] a2_data;
+wire [`DWIDTH-1:0] a3_data;
+wire [`DWIDTH-1:0] b0_data;
+wire [`DWIDTH-1:0] b1_data;
+wire [`DWIDTH-1:0] b2_data;
+wire [`DWIDTH-1:0] b3_data;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic for done
+//////////////////////////////////////////////////////////////////////////
+wire [7:0] clk_cnt_for_done;
+reg [31:0] clk_cnt;
+reg done_eltwise_op;
+
+assign clk_cnt_for_done = 
+                  `PE_PIPELINE_DEPTH + //This is dependent on the pipeline depth of the PEs
+                  count //The number of iterations asked for this compute unit
+                  ;
+                          
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    clk_cnt <= 0;
+    done_eltwise_op <= 0;
+  end
+  else if (clk_cnt == clk_cnt_for_done) begin
+    done_eltwise_op <= 1;
+    clk_cnt <= clk_cnt + 1;
+  end
+  else if (done_eltwise_op == 0) begin
+    clk_cnt <= clk_cnt + 1;
+  end    
+  else begin
+    done_eltwise_op <= 0;
+    clk_cnt <= clk_cnt + 1;
+  end
+end
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiation of input logic
+//////////////////////////////////////////////////////////////////////////
+input_logic u_input_logic(
+.clk(clk),
+.reset(reset),
+.start_eltwise_op(start_eltwise_op),
+.count(count),
+.a_addr(a_addr),
+.b_addr(b_addr),
+.address_mat_a(address_mat_a),
+.address_mat_b(address_mat_b),
+.a_data(a_data),
+.b_data(b_data),
+.a0_data(a0_data),
+.a1_data(a1_data),
+.a2_data(a2_data),
+.a3_data(a3_data),
+.b0_data(b0_data),
+.b1_data(b1_data),
+.b2_data(b2_data),
+.b3_data(b3_data),
+.validity_mask_a(validity_mask_a),
+.validity_mask_b(validity_mask_b)
+);
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiation of the output logic
+//////////////////////////////////////////////////////////////////////////
+output_logic u_output_logic(
+.clk(clk),
+.reset(reset),
+.start_eltwise_op(start_eltwise_op),
+.done_eltwise_op(done_eltwise_op),
+.address_mat_c(address_mat_c),
+.c_data_out(c_data_out),
+.c_addr(c_addr),
+.c_data_available(c_data_available),
+.out0(out0),
+.out1(out1),
+.out2(out2),
+.out3(out3)
+);
+
+//////////////////////////////////////////////////////////////////////////
+// Instantiations of the actual PEs
+//////////////////////////////////////////////////////////////////////////
+pe_array u_pe_array(
+.reset(reset),
+.clk(clk),
+.pe_reset(pe_reset),
+.op(op),
+.a0(a0_data), 
+.a1(a1_data), 
+.a2(a2_data), 
+.a3(a3_data),
+.b0(b0_data), 
+.b1(b1_data), 
+.b2(b2_data), 
+.b3(b3_data),
+.out0(out0),
+.out1(out1),
+.out2(out2),
+.out3(out3)
+);
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+// Output logic
+//////////////////////////////////////////////////////////////////////////
+module output_logic(
+clk,
+reset,
+start_eltwise_op,
+done_eltwise_op,
+address_mat_c,
+c_data_out, 
+c_addr,
+c_data_available,
+out0,
+out1,
+out2,
+out3
+);
+
+input clk;
+input reset;
+input start_eltwise_op;
+input done_eltwise_op;
+input [`AWIDTH-1:0] address_mat_c;
+output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+output [`AWIDTH-1:0] c_addr;
+output c_data_available;
+input [`DWIDTH-1:0] out0;
+input [`DWIDTH-1:0] out1;
+input [`DWIDTH-1:0] out2;
+input [`DWIDTH-1:0] out3;
+
+reg c_data_available;
+reg [`CU_SIZE*`DWIDTH-1:0] c_data_out;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to capture matrix C data from the PEs and send to RAM
+//////////////////////////////////////////////////////////////////////////
+
+reg [`AWIDTH-1:0] c_addr;
+reg [7:0] cnt;
+
+always @(posedge clk) begin
+  if (reset | ~start_eltwise_op) begin
+    c_data_available <= 1'b0;
+    c_addr <= address_mat_c;
+    c_data_out <= 0;
+    cnt <= 0;
+  end
+  else if (cnt>`PE_PIPELINE_DEPTH) begin
+    c_data_available <= 1'b1;
+    c_addr <= c_addr+1;
+    c_data_out <= {out3, out2, out1, out0};
+    cnt <= cnt + 1;
+  end else begin
+    cnt <= cnt + 1;
+  end 
+end
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+// Data setup
+//////////////////////////////////////////////////////////////////////////
+module input_logic(
+clk,
+reset,
+start_eltwise_op,
+count,
+a_addr,
+b_addr,
+address_mat_a,
+address_mat_b,
+a_data,
+b_data,
+a0_data,
+a1_data,
+a2_data,
+a3_data,
+b0_data,
+b1_data,
+b2_data,
+b3_data,
+validity_mask_a,
+validity_mask_b
+);
+
+input clk;
+input reset;
+input start_eltwise_op;
+input [`ITERATIONS_WIDTH-1:0] count;
+output [`AWIDTH-1:0] a_addr;
+output [`AWIDTH-1:0] b_addr;
+input [`AWIDTH-1:0] address_mat_a;
+input [`AWIDTH-1:0] address_mat_b;
+input [`CU_SIZE*`DWIDTH-1:0] a_data;
+input [`CU_SIZE*`DWIDTH-1:0] b_data;
+output [`DWIDTH-1:0] a0_data;
+output [`DWIDTH-1:0] a1_data;
+output [`DWIDTH-1:0] a2_data;
+output [`DWIDTH-1:0] a3_data;
+output [`DWIDTH-1:0] b0_data;
+output [`DWIDTH-1:0] b1_data;
+output [`DWIDTH-1:0] b2_data;
+output [`DWIDTH-1:0] b3_data;
+input [`MASK_WIDTH-1:0] validity_mask_a;
+input [`MASK_WIDTH-1:0] validity_mask_b;
+
+reg [7:0] iterations;
+
+wire [`DWIDTH-1:0] a0_data;
+wire [`DWIDTH-1:0] a1_data;
+wire [`DWIDTH-1:0] a2_data;
+wire [`DWIDTH-1:0] a3_data;
+wire [`DWIDTH-1:0] b0_data;
+wire [`DWIDTH-1:0] b1_data;
+wire [`DWIDTH-1:0] b2_data;
+wire [`DWIDTH-1:0] b3_data;
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate addresses to BRAM A
+//////////////////////////////////////////////////////////////////////////
+reg [`AWIDTH-1:0] a_addr;
+reg a_mem_access; //flag that tells whether the compute unit is trying to access memory or not
+
+always @(posedge clk) begin
+  //else if (clk_cnt >= a_loc*`CU_SIZE+final_mat_mul_size) begin
+  //Writing the line above to avoid multiplication:
+  if (reset || ~start_eltwise_op) begin
+    a_addr <= address_mat_a;
+    a_mem_access <= 0;
+    iterations <= 0;
+  end
+
+  //else if ((clk_cnt >= a_loc*`CU_SIZE) && (clk_cnt < a_loc*`CU_SIZE+final_mat_mul_size)) begin
+  //Writing the line above to avoid multiplication:
+  else if (iterations <= count) begin
+    a_addr <= a_addr + 1;
+    a_mem_access <= 1;
+    iterations <= iterations + 1;
+  end
+end  
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate valid signals for data coming from BRAM A
+//////////////////////////////////////////////////////////////////////////
+reg [7:0] a_mem_access_counter;
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    a_mem_access_counter <= 0;
+  end
+  else if (a_mem_access == 1) begin
+    a_mem_access_counter <= a_mem_access_counter + 1;  
+
+  end
+  else begin
+    a_mem_access_counter <= 0;
+  end
+end
+
+wire bram_rdata_a_valid; //flag that tells whether the data from memory is valid
+assign bram_rdata_a_valid = 
+       ((validity_mask_a[0]==1'b0 && a_mem_access_counter==1) ||
+        (validity_mask_a[1]==1'b0 && a_mem_access_counter==2) ||
+        (validity_mask_a[2]==1'b0 && a_mem_access_counter==3) ||
+        (validity_mask_a[3]==1'b0 && a_mem_access_counter==4)) ?
+        1'b0 : (a_mem_access_counter >= `MEM_ACCESS_LATENCY);
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to delay certain parts of the data received from BRAM A (systolic data setup)
+//////////////////////////////////////////////////////////////////////////
+//Slice data into chunks and qualify it with whether it is valid or not
+assign a0_data = a_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[0]}};
+assign a1_data = a_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[1]}};
+assign a2_data = a_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[2]}};
+assign a3_data = a_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[3]}};
+
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate addresses to BRAM B
+//////////////////////////////////////////////////////////////////////////
+reg [`AWIDTH-1:0] b_addr;
+reg b_mem_access; //flag that tells whether the compute unit is trying to access memory or not
+
+always @(posedge clk) begin
+  //else if (clk_cnt >= b_loc*`CU_SIZE+final_mat_mul_size) begin
+  //Writing the line above to avoid multiplication:
+  if (reset || ~start_eltwise_op) begin
+    b_addr <= address_mat_b ;
+    b_mem_access <= 0;
+  end
+  //else if ((clk_cnt >= b_loc*`CU_SIZE) && (clk_cnt < b_loc*`CU_SIZE+final_mat_mul_size)) begin
+  //Writing the line above to avoid multiplication:
+  else if (iterations <= count) begin
+    b_addr <= b_addr + 1;
+    b_mem_access <= 1;
+  end
+end  
+
+//////////////////////////////////////////////////////////////////////////
+// Logic to generate valid signals for data coming from BRAM B
+//////////////////////////////////////////////////////////////////////////
+reg [7:0] b_mem_access_counter;
+always @(posedge clk) begin
+  if (reset || ~start_eltwise_op) begin
+    b_mem_access_counter <= 0;
+  end
+  else if (b_mem_access == 1) begin
+    b_mem_access_counter <= b_mem_access_counter + 1;  
+  end
+  else begin
+    b_mem_access_counter <= 0;
+  end
+end
+
+wire bram_rdata_b_valid; //flag that tells whether the data from memory is valid
+assign bram_rdata_b_valid = 
+       ((validity_mask_b[0]==1'b0 && b_mem_access_counter==1) ||
+        (validity_mask_b[1]==1'b0 && b_mem_access_counter==2) ||
+        (validity_mask_b[2]==1'b0 && b_mem_access_counter==3) ||
+        (validity_mask_b[3]==1'b0 && b_mem_access_counter==4)) ?
+        1'b0 : (b_mem_access_counter >= `MEM_ACCESS_LATENCY);
+
+//Slice data into chunks and qualify it with whether it is valid or not
+assign b0_data = b_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[0]}};
+assign b1_data = b_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[1]}};
+assign b2_data = b_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[2]}};
+assign b3_data = b_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[3]}};
+
+
+endmodule
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// Array of processing elements
+//////////////////////////////////////////////////////////////////////////
+module pe_array(
+reset,
+clk,
+pe_reset,
+op,
+a0, a1, a2, a3,
+b0, b1, b2, b3,
+out0, out1, out2, out3
+);
+
+input clk;
+input reset;
+input pe_reset;
+input [1:0] op;
+input [`DWIDTH-1:0] a0;
+input [`DWIDTH-1:0] a1;
+input [`DWIDTH-1:0] a2;
+input [`DWIDTH-1:0] a3;
+input [`DWIDTH-1:0] b0;
+input [`DWIDTH-1:0] b1;
+input [`DWIDTH-1:0] b2;
+input [`DWIDTH-1:0] b3;
+output [`DWIDTH-1:0] out0;
+output [`DWIDTH-1:0] out1;
+output [`DWIDTH-1:0] out2;
+output [`DWIDTH-1:0] out3;
+
+wire [`DWIDTH-1:0] out0, out1, out2, out3;
+
+wire effective_rst;
+assign effective_rst = reset | pe_reset;
+
+processing_element pe0(.reset(effective_rst), .clk(clk), .in_a(a0), .in_b(b0), .op(op), .out(out0));
+processing_element pe1(.reset(effective_rst), .clk(clk), .in_a(a1), .in_b(b1), .op(op), .out(out1));
+processing_element pe2(.reset(effective_rst), .clk(clk), .in_a(a2), .in_b(b2), .op(op), .out(out2));
+processing_element pe3(.reset(effective_rst), .clk(clk), .in_a(a3), .in_b(b3), .op(op), .out(out3));
+
+endmodule
+
+
+//////////////////////////////////////////////////////////////////////////
+// Processing element (PE)
+//////////////////////////////////////////////////////////////////////////
+module processing_element(
+ reset, 
+ clk, 
+ in_a,
+ in_b, 
+ op,
+ out
+ );
+
+ input reset;
+ input clk;
+ input  [`DWIDTH-1:0] in_a;
+ input  [`DWIDTH-1:0] in_b;
+ input  [1:0] op;
+ output [`DWIDTH-1:0] out;
+
+ wire [`DWIDTH-1:0] out_mul;
+ wire [`DWIDTH-1:0] out_sum;
+ wire [`DWIDTH-1:0] out_sub;
+
+ assign out = (op == 2'b00) ? out_sum : 
+              (op == 2'b01) ? out_sub :
+              out_mul;
+
+ seq_mul u_mul(.a(in_a), .b(in_b), .out(out_mul), .reset(reset), .clk(clk));
+ seq_add u_add(.a(in_a), .b(in_b), .out(out_sum), .reset(reset), .clk(clk));
+ seq_sub u_sub(.a(in_a), .b(in_b), .out(out_sub), .reset(reset), .clk(clk));
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Multiply block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_mul(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] mul_out_temp;
+reg [`DWIDTH-1:0] mul_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign mul_out_temp = a * b;
+`ifdef complex_dsp
+mult_fp_clk_16 mul_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(mul_out_temp));
+`else
+FPMult_16 u_FPMult (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .result(mul_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    mul_out_temp_reg <= 0;
+  end else begin
+    mul_out_temp_reg <= mul_out_temp;
+  end
+end
+
+assign out = mul_out_temp_reg;
+
+endmodule
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Addition block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_add(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] sum_out_temp;
+reg [`DWIDTH-1:0] sum_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign sum_out_temp = a + b;
+`ifdef complex_dsp
+addition_fp_clk_16 add_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sum_out_temp));
+`else
+FPAddSub u_FPAddSub (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sum_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    sum_out_temp_reg <= 0;
+  end else begin
+    sum_out_temp_reg <= sum_out_temp;
+  end
+end
+
+assign out = sum_out_temp_reg;
+
+endmodule
+
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Subtraction block
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+module seq_sub(a, b, out, reset, clk);
+input [`DWIDTH-1:0] a;
+input [`DWIDTH-1:0] b;
+input reset;
+input clk;
+output [`DWIDTH-1:0] out;
+
+reg [`DWIDTH-1:0] a_flopped;
+reg [`DWIDTH-1:0] b_flopped;
+
+wire [`DWIDTH-1:0] sub_out_temp;
+reg [`DWIDTH-1:0] sub_out_temp_reg;
+
+always @(posedge clk) begin
+  if (reset) begin
+    a_flopped <= 0;
+    b_flopped <= 0;
+  end else begin
+    a_flopped <= a;
+    b_flopped <= b;
+  end
+end
+
+//assign sub_out_temp = a - b;
+//Floating point adder has both modes - add and sub.
+//We don't provide the name of the mode here though.
+
+`ifdef complex_dsp
+addition_fp_clk_16 sub_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sub_out_temp));
+`else
+FPAddSub u_FPAddSub2(.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sub_out_temp), .flags());
+`endif
+
+always @(posedge clk) begin
+  if (reset) begin
+    sub_out_temp_reg <= 0;
+  end else begin
+    sub_out_temp_reg <= sub_out_temp;
+  end
+end
+
+assign out = sub_out_temp_reg;
+
+endmodule
+
+
+`ifndef complex_dsp
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+// Floating point 16-bit multiplier
+// This is a heavily modified version of:
+// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FPMult
+// Original author: Fredrik Brosser
+// Abridged by: Samidh Mehta
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+
+module FPMult_16(
+		clk,
+		rst,
+		a,
+		b,
+		result,
+		flags
+    );
+	
+	// Input Ports
+	input clk ;							// Clock
+	input rst ;							// Reset signal
+	input [`DWIDTH-1:0] a;						// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b;						// Input B, a 32-bit floating point number
+	
+	// Output ports
+	output [`DWIDTH-1:0] result ;					// Product, result of the operation, 32-bit FP number
+	output [4:0] flags ;						// Flags indicating exceptions according to IEEE754
+	
+	// Internal signals
+	wire [`DWIDTH-1:0] Z_int ;					// Product, result of the operation, 32-bit FP number
+	wire [4:0] Flags_int ;						// Flags indicating exceptions according to IEEE754
+	
+	wire Sa ;							// A's sign
+	wire Sb ;							// B's sign
+	wire Sp ;							// Product sign
+	wire [`EXPONENT-1:0] Ea ;					// A's exponent
+	wire [`EXPONENT-1:0] Eb ;					// B's exponent
+	wire [2*`MANTISSA+1:0] Mp ;					// Product mantissa
+	wire [4:0] InputExc ;						// Exceptions in inputs
+	wire [`MANTISSA-1:0] NormM ;					// Normalized mantissa
+	wire [`EXPONENT:0] NormE ;					// Normalized exponent
+	wire [`MANTISSA:0] RoundM ;					// Normalized mantissa
+	wire [`EXPONENT:0] RoundE ;					// Normalized exponent
+	wire [`MANTISSA:0] RoundMP ;					// Normalized mantissa
+	wire [`EXPONENT:0] RoundEP ;					// Normalized exponent
+	wire GRS ;
+
+	//reg [63:0] pipe_0;						// Pipeline register Input->Prep
+	reg [2*`DWIDTH-1:0] pipe_0;					// Pipeline register Input->Prep
+
+	//reg [92:0] pipe_1;						// Pipeline register Prep->Execute
+	//reg [3*`MANTISSA+2*`EXPONENT+7:0] pipe_1;			// Pipeline register Prep->Execute
+	reg [3*`MANTISSA+2*`EXPONENT+18:0] pipe_1;
+
+	//reg [38:0] pipe_2;						// Pipeline register Execute->Normalize
+	reg [`MANTISSA+`EXPONENT+7:0] pipe_2;				// Pipeline register Execute->Normalize
+	
+	//reg [72:0] pipe_3;						// Pipeline register Normalize->Round
+	reg [2*`MANTISSA+2*`EXPONENT+10:0] pipe_3;			// Pipeline register Normalize->Round
+
+	//reg [36:0] pipe_4;						// Pipeline register Round->Output
+	reg [`DWIDTH+4:0] pipe_4;					// Pipeline register Round->Output
+	
+	assign result = pipe_4[`DWIDTH+4:5] ;
+	assign flags = pipe_4[4:0] ;
+	
+	// Prepare the operands for alignment and check for exceptions
+	FPMult_PrepModule PrepModule(clk, rst, pipe_0[2*`DWIDTH-1:`DWIDTH], pipe_0[`DWIDTH-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]) ;
+
+	// Perform (unsigned) mantissa multiplication
+	FPMult_ExecuteModule ExecuteModule(pipe_1[3*`MANTISSA+`EXPONENT*2+7:2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7:2*`MANTISSA+7], pipe_1[2*`MANTISSA+6:5], pipe_1[2*`MANTISSA+2*`EXPONENT+6:2*`MANTISSA+`EXPONENT+7], pipe_1[2*`MANTISSA+`EXPONENT+6:2*`MANTISSA+7], pipe_1[2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7], Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0], GRS) ;
+
+	// Round result and if necessary, perform a second (post-rounding) normalization step
+	FPMult_NormalizeModule NormalizeModule(pipe_2[`MANTISSA-1:0], pipe_2[`MANTISSA+`EXPONENT:`MANTISSA], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]) ;		
+
+	// Round result and if necessary, perform a second (post-rounding) normalization step
+	//FPMult_RoundModule RoundModule(pipe_3[47:24], pipe_3[23:0], pipe_3[65:57], pipe_3[56:48], pipe_3[66], pipe_3[67], pipe_3[72:68], Z_int[31:0], Flags_int[4:0]) ;		
+	FPMult_RoundModule RoundModule(pipe_3[2*`MANTISSA+1:`MANTISSA+1], pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+2*`EXPONENT+3:2*`MANTISSA+`EXPONENT+3], pipe_3[2*`MANTISSA+`EXPONENT+2:2*`MANTISSA+2], pipe_3[2*`MANTISSA+2*`EXPONENT+4], pipe_3[2*`MANTISSA+2*`EXPONENT+5], pipe_3[2*`MANTISSA+2*`EXPONENT+10:2*`MANTISSA+2*`EXPONENT+6], Z_int[`DWIDTH-1:0], Flags_int[4:0]) ;		
+
+//adding always@ (*) instead of posedge clock to make design combinational
+	always @ (posedge clk) begin	
+		if(rst) begin
+			pipe_0 <= 0;
+			pipe_1 <= 0;
+			pipe_2 <= 0; 
+			pipe_3 <= 0;
+			pipe_4 <= 0;
+		end 
+		else begin		
+			/* PIPE 0
+				[2*`DWIDTH-1:`DWIDTH] A
+				[`DWIDTH-1:0] B
+			*/
+                       pipe_0 <= {a, b} ;
+
+
+			/* PIPE 1
+				[2*`EXPONENT+3*`MANTISSA + 18: 2*`EXPONENT+2*`MANTISSA + 18] //pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH] , mantissa of A
+				[2*`EXPONENT+2*`MANTISSA + 17 :2*`EXPONENT+2*`MANTISSA + 9] // pipe_0[8:0]
+				[2*`EXPONENT+2*`MANTISSA + 8] Sa
+				[2*`EXPONENT+2*`MANTISSA + 7] Sb
+				[2*`EXPONENT+2*`MANTISSA + 6:`EXPONENT+2*`MANTISSA+7] Ea
+				[`EXPONENT +2*`MANTISSA+6:2*`MANTISSA+7] Eb
+				[2*`MANTISSA+1+5:5] Mp
+				[4:0] InputExc
+			*/
+			//pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[`MANTISSA_MUL_SPLIT_LSB-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA-1:0], InputExc[4:0]} ;
+			pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[8:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]} ;
+			
+			/* PIPE 2
+				[`EXPONENT + `MANTISSA + 7:`EXPONENT + `MANTISSA + 3] InputExc
+				[`EXPONENT + `MANTISSA + 2] GRS
+				[`EXPONENT + `MANTISSA + 1] Sp
+				[`EXPONENT + `MANTISSA:`MANTISSA] NormE
+				[`MANTISSA-1:0] NormM
+			*/
+			pipe_2 <= {pipe_1[4:0], GRS, Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0]} ;
+			/* PIPE 3
+				[2*`EXPONENT+2*`MANTISSA+10:2*`EXPONENT+2*`MANTISSA+6] InputExc
+				[2*`EXPONENT+2*`MANTISSA+5] GRS
+				[2*`EXPONENT+2*`MANTISSA+4] Sp	
+				[2*`EXPONENT+2*`MANTISSA+3:`EXPONENT+2*`MANTISSA+3] RoundE
+				[`EXPONENT+2*`MANTISSA+2:2*`MANTISSA+2] RoundEP
+				[2*`MANTISSA+1:`MANTISSA+1] RoundM
+				[`MANTISSA:0] RoundMP
+			*/
+			pipe_3 <= {pipe_2[`EXPONENT+`MANTISSA+7:`EXPONENT+`MANTISSA+1], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]} ;
+			/* PIPE 4
+				[`DWIDTH+4:5] Z
+				[4:0] Flags
+			*/				
+			pipe_4 <= {Z_int[`DWIDTH-1:0], Flags_int[4:0]} ;
+		end
+	end
+		
+endmodule
+
+
+
+module FPMult_PrepModule (
+		clk,
+		rst,
+		a,
+		b,
+		Sa,
+		Sb,
+		Ea,
+		Eb,
+		Mp,
+		InputExc
+	);
+	
+	// Input ports
+	input clk ;
+	input rst ;
+	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
+	
+	// Output ports
+	output Sa ;										// A's sign
+	output Sb ;										// B's sign
+	output [`EXPONENT-1:0] Ea ;								// A's exponent
+	output [`EXPONENT-1:0] Eb ;								// B's exponent
+	output [2*`MANTISSA+1:0] Mp ;							// Mantissa product
+	output [4:0] InputExc ;						// Input numbers are exceptions
+	
+	// Internal signals							// If signal is high...
+	wire ANaN ;										// A is a signalling NaN
+	wire BNaN ;										// B is a signalling NaN
+	wire AInf ;										// A is infinity
+	wire BInf ;										// B is infinity
+    wire [`MANTISSA-1:0] Ma;
+    wire [`MANTISSA-1:0] Mb;
+	
+	assign ANaN = &(a[`DWIDTH-2:`MANTISSA]) &  |(a[`DWIDTH-2:`MANTISSA]) ;			// All one exponent and not all zero mantissa - NaN
+	assign BNaN = &(b[`DWIDTH-2:`MANTISSA]) &  |(b[`MANTISSA-1:0]);			// All one exponent and not all zero mantissa - NaN
+	assign AInf = &(a[`DWIDTH-2:`MANTISSA]) & ~|(a[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
+	assign BInf = &(b[`DWIDTH-2:`MANTISSA]) & ~|(b[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
+	
+	// Check for any exceptions and put all flags into exception vector
+	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
+	//assign InputExc = {(ANaN | ANaN | BNaN |BNaN), ANaN, ANaN, BNaN,BNaN} ;
+	
+	// Take input numbers apart
+	assign Sa = a[`DWIDTH-1] ;							// A's sign
+	assign Sb = b[`DWIDTH-1] ;							// B's sign
+	assign Ea = a[`DWIDTH-2:`MANTISSA];						// Store A's exponent in Ea, unless A is an exception
+	assign Eb = b[`DWIDTH-2:`MANTISSA];						// Store B's exponent in Eb, unless B is an exception	
+//    assign Ma = a[`MANTISSA_MSB:`MANTISSA_LSB];
+  //  assign Mb = b[`MANTISSA_MSB:`MANTISSA_LSB];
+	
+
+
+	//assign Mp = ({4'b0001, a[`MANTISSA-1:0]}*{4'b0001, b[`MANTISSA-1:9]}) ;
+	assign Mp = ({1'b1,a[`MANTISSA-1:0]}*{1'b1, b[`MANTISSA-1:0]}) ;
+
+	
+    //We multiply part of the mantissa here
+    //Full mantissa of A
+    //Bits MANTISSA_MUL_SPLIT_MSB:MANTISSA_MUL_SPLIT_LSB of B
+   // wire [`ACTUAL_MANTISSA-1:0] inp_A;
+   // wire [`ACTUAL_MANTISSA-1:0] inp_B;
+   // assign inp_A = {1'b1, Ma};
+   // assign inp_B = {{(`MANTISSA-(`MANTISSA_MUL_SPLIT_MSB-`MANTISSA_MUL_SPLIT_LSB+1)){1'b0}}, 1'b1, Mb[`MANTISSA_MUL_SPLIT_MSB:`MANTISSA_MUL_SPLIT_LSB]};
+   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_A), .B(inp_B), .TC(1'b0), .PRODUCT(Mp));
+endmodule
+
+
+module FPMult_ExecuteModule(
+		a,
+		b,
+		MpC,
+		Ea,
+		Eb,
+		Sa,
+		Sb,
+		Sp,
+		NormE,
+		NormM,
+		GRS
+    );
+
+	// Input ports
+	input [`MANTISSA-1:0] a ;
+	input [2*`EXPONENT:0] b ;
+	input [2*`MANTISSA+1:0] MpC ;
+	input [`EXPONENT-1:0] Ea ;						// A's exponent
+	input [`EXPONENT-1:0] Eb ;						// B's exponent
+	input Sa ;								// A's sign
+	input Sb ;								// B's sign
+	
+	// Output ports
+	output Sp ;								// Product sign
+	output [`EXPONENT:0] NormE ;													// Normalized exponent
+	output [`MANTISSA-1:0] NormM ;												// Normalized mantissa
+	output GRS ;
+	
+	wire [2*`MANTISSA+1:0] Mp ;
+	
+	assign Sp = (Sa ^ Sb) ;												// Equal signs give a positive product
+	
+   // wire [`ACTUAL_MANTISSA-1:0] inp_a;
+   // wire [`ACTUAL_MANTISSA-1:0] inp_b;
+   // assign inp_a = {1'b1, a};
+   // assign inp_b = {{(`MANTISSA-`MANTISSA_MUL_SPLIT_LSB){1'b0}}, 1'b0, b};
+   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_a), .B(inp_b), .TC(1'b0), .PRODUCT(Mp_temp));
+   // DW01_add #(2*`ACTUAL_MANTISSA) u_add(.A(Mp_temp), .B(MpC<<`MANTISSA_MUL_SPLIT_LSB), .CI(1'b0), .SUM(Mp), .CO());
+
+	//assign Mp = (MpC<<(2*`EXPONENT+1)) + ({4'b0001, a[`MANTISSA-1:0]}*{1'b0, b[2*`EXPONENT:0]}) ;
+	assign Mp = MpC;
+
+
+	assign NormM = (Mp[2*`MANTISSA+1] ? Mp[2*`MANTISSA:`MANTISSA+1] : Mp[2*`MANTISSA-1:`MANTISSA]); 	// Check for overflow
+	assign NormE = (Ea + Eb + Mp[2*`MANTISSA+1]);								// If so, increment exponent
+	
+	assign GRS = ((Mp[`MANTISSA]&(Mp[`MANTISSA+1]))|(|Mp[`MANTISSA-1:0])) ;
+	
+endmodule
+
+module FPMult_NormalizeModule(
+		NormM,
+		NormE,
+		RoundE,
+		RoundEP,
+		RoundM,
+		RoundMP
+    );
+
+	// Input Ports
+	input [`MANTISSA-1:0] NormM ;									// Normalized mantissa
+	input [`EXPONENT:0] NormE ;									// Normalized exponent
+
+	// Output Ports
+	output [`EXPONENT:0] RoundE ;
+	output [`EXPONENT:0] RoundEP ;
+	output [`MANTISSA:0] RoundM ;
+	output [`MANTISSA:0] RoundMP ; 
+	
+// EXPONENT = 5 
+// EXPONENT -1 = 4
+// NEED to subtract 2^4 -1 = 15
+
+wire [`EXPONENT-1 : 0] bias;
+
+assign bias =  ((1<< (`EXPONENT -1)) -1);
+
+	assign RoundE = NormE - bias ;
+	assign RoundEP = NormE - bias -1 ;
+	assign RoundM = NormM ;
+	assign RoundMP = NormM ;
+
+endmodule
+
+module FPMult_RoundModule(
+		RoundM,
+		RoundMP,
+		RoundE,
+		RoundEP,
+		Sp,
+		GRS,
+		InputExc,
+		Z,
+		Flags
+    );
+
+	// Input Ports
+	input [`MANTISSA:0] RoundM ;									// Normalized mantissa
+	input [`MANTISSA:0] RoundMP ;									// Normalized exponent
+	input [`EXPONENT:0] RoundE ;									// Normalized mantissa + 1
+	input [`EXPONENT:0] RoundEP ;									// Normalized exponent + 1
+	input Sp ;												// Product sign
+	input GRS ;
+	input [4:0] InputExc ;
+	
+	// Output Ports
+	output [`DWIDTH-1:0] Z ;										// Final product
+	output [4:0] Flags ;
+	
+	// Internal Signals
+	wire [`EXPONENT:0] FinalE ;									// Rounded exponent
+	wire [`MANTISSA:0] FinalM;
+	wire [`MANTISSA:0] PreShiftM;
+	
+	assign PreShiftM = GRS ? RoundMP : RoundM ;	// Round up if R and (G or S)
+	
+	// Post rounding normalization (potential one bit shift> use shifted mantissa if there is overflow)
+	assign FinalM = (PreShiftM[`MANTISSA] ? {1'b0, PreShiftM[`MANTISSA:1]} : PreShiftM[`MANTISSA:0]) ;
+	
+	assign FinalE = (PreShiftM[`MANTISSA] ? RoundEP : RoundE) ; // Increment exponent if a shift was done
+	
+	assign Z = {Sp, FinalE[`EXPONENT-1:0], FinalM[`MANTISSA-1:0]} ;   // Putting the pieces together
+	assign Flags = InputExc[4:0];
+
+endmodule
+`endif
+
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+// Floating point 16-bit adder
+// This is a heavily modified version of:
+// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FP_AddSub
+// Original author: Fredrik Brosser
+// Abridged by: Samidh Mehta
+///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
+`ifndef complex_dsp
+
+module FPAddSub(
+		//bf16,
+		clk,
+		rst,
+		a,
+		b,
+		operation,			// 0 add, 1 sub
+		result,
+		flags
+	);
+	//input bf16; //1 for Bfloat16, 0 for IEEE half precision
+
+	// Clock and reset
+	input clk ;										// Clock signal
+	input rst ;										// Reset (active high, resets pipeline registers)
+	
+	// Input ports
+	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
+	input operation ;								// Operation select signal
+	
+	// Output ports
+	output [`DWIDTH-1:0] result ;						// Result of the operation
+	output [4:0] flags ;							// Flags indicating exceptions according to IEEE754
+	
+	// Pipeline Registers
+	//reg [79:0] pipe_1;							// Pipeline register PreAlign->Align1
+	reg [2*`EXPONENT + 2*`DWIDTH + 5:0] pipe_1;							// Pipeline register PreAlign->Align1
+
+	//reg [67:0] pipe_2;							// Pipeline register Align1->Align3
+	//reg [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;							// Pipeline register Align1->Align3
+	wire [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;
+
+	//reg [76:0] pipe_3;	68						// Pipeline register Align1->Align3
+	reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_3;							// Pipeline register Align1->Align3
+
+	//reg [69:0] pipe_4;							// Pipeline register Align3->Execute
+	//reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;							// Pipeline register Align3->Execute
+	wire [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;
+	
+	//reg [51:0] pipe_5;							// Pipeline register Execute->Normalize
+	reg [`DWIDTH+`EXPONENT+11:0] pipe_5;							// Pipeline register Execute->Normalize
+
+	//reg [56:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
+	//reg [`DWIDTH+`EXPONENT+16:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
+	wire [`DWIDTH+`EXPONENT+16:0] pipe_6;
+
+	//reg [56:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
+	//reg [`DWIDTH+`EXPONENT+16:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
+	wire [`DWIDTH+`EXPONENT+16:0] pipe_7;
+	//reg [54:0] pipe_8;							// Pipeline register NormalizeShift3->Round
+	reg [`EXPONENT*2+`MANTISSA+15:0] pipe_8;							// Pipeline register NormalizeShift3->Round
+
+	//reg [40:0] pipe_9;							// Pipeline register NormalizeShift3->Round
+	//reg [`DWIDTH+8:0] pipe_9;							// Pipeline register NormalizeShift3->Round
+	wire [`DWIDTH+8:0] pipe_9;
+
+	// Internal wires between modules
+	wire [`DWIDTH-2:0] Aout_0 ;							// A - sign
+	wire [`DWIDTH-2:0] Bout_0 ;							// B - sign
+	wire Opout_0 ;									// A's sign
+	wire Sa_0 ;										// A's sign
+	wire Sb_0 ;										// B's sign
+	wire MaxAB_1 ;									// Indicates the larger of A and B(0/A, 1/B)
+	wire [`EXPONENT-1:0] CExp_1 ;							// Common Exponent
+	wire [`EXPONENT-1:0] Shift_1 ;							// Number of steps to smaller mantissa shift right (align)
+	wire [`MANTISSA-1:0] Mmax_1 ;							// Larger mantissa
+	wire [4:0] InputExc_0 ;						// Input numbers are exceptions
+	wire [2*`EXPONENT-1:0] ShiftDet_0 ;
+	wire [`MANTISSA-1:0] MminS_1 ;						// Smaller mantissa after 0/16 shift
+	wire [`MANTISSA:0] MminS_2 ;						// Smaller mantissa after 0/4/8/12 shift
+	wire [`MANTISSA:0] Mmin_3 ;							// Smaller mantissa after 0/1/2/3 shift
+	wire [`DWIDTH:0] Sum_4 ;
+	wire PSgn_4 ;
+	wire Opr_4 ;
+	wire [`EXPONENT-1:0] Shift_5 ;							// Number of steps to shift sum left (normalize)
+	wire [`DWIDTH:0] SumS_5 ;							// Sum after 0/16 shift
+	wire [`DWIDTH:0] SumS_6 ;							// Sum after 0/16 shift
+	wire [`DWIDTH:0] SumS_7 ;							// Sum after 0/16 shift
+	wire [`MANTISSA-1:0] NormM_8 ;						// Normalized mantissa
+	wire [`EXPONENT:0] NormE_8;							// Adjusted exponent
+	wire ZeroSum_8 ;								// Zero flag
+	wire NegE_8 ;									// Flag indicating negative exponent
+	wire R_8 ;										// Round bit
+	wire S_8 ;										// Final sticky bit
+	wire FG_8 ;										// Final sticky bit
+	wire [`DWIDTH-1:0] P_int ;
+	wire EOF ;
+	
+	// Prepare the operands for alignment and check for exceptions
+	FPAddSub_PrealignModule PrealignModule
+	(	// Inputs
+		a, b, operation,
+		// Outputs
+		Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT-1:0], InputExc_0[4:0], Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Opout_0) ;
+		
+	// Prepare the operands for alignment and check for exceptions
+	FPAddSub_AlignModule AlignModule
+	(	// Inputs
+		pipe_1[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6], pipe_1[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7], pipe_1[2*`EXPONENT+4:5],
+		// Outputs
+		CExp_1[`EXPONENT-1:0], MaxAB_1, Shift_1[`EXPONENT-1:0], MminS_1[`MANTISSA-1:0], Mmax_1[`MANTISSA-1:0]) ;	
+
+	// Alignment Shift Stage 1
+	FPAddSub_AlignShift1 AlignShift1
+	(  // Inputs
+		//bf16, 
+		pipe_2[`MANTISSA-1:0], pipe_2[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 7],
+		// Outputs
+		MminS_2[`MANTISSA:0]) ;
+
+	// Alignment Shift Stage 3 and compution of guard and sticky bits
+	FPAddSub_AlignShift2 AlignShift2  
+	(  // Inputs
+		pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+7:2*`MANTISSA+6],
+		// Outputs
+		Mmin_3[`MANTISSA:0]) ;
+						
+	// Perform mantissa addition
+	FPAddSub_ExecutionModule ExecutionModule
+	(  // Inputs
+		pipe_4[`MANTISSA*2+5:`MANTISSA+6], pipe_4[`MANTISSA:0], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 7], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 6], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9],
+		// Outputs
+		Sum_4[`DWIDTH:0], PSgn_4, Opr_4) ;
+	
+	// Prepare normalization of result
+	FPAddSub_NormalizeModule NormalizeModule
+	(  // Inputs
+		pipe_5[`DWIDTH:0], 
+		// Outputs
+		SumS_5[`DWIDTH:0], Shift_5[4:0]) ;
+					
+	// Normalization Shift Stage 1
+	FPAddSub_NormalizeShift1 NormalizeShift1
+	(  // Inputs
+		pipe_6[`DWIDTH:0], pipe_6[`DWIDTH+`EXPONENT+14:`DWIDTH+`EXPONENT+11],
+		// Outputs
+		SumS_7[`DWIDTH:0]) ;
+		
+	// Normalization Shift Stage 3 and final guard, sticky and round bits
+	FPAddSub_NormalizeShift2 NormalizeShift2
+	(  // Inputs
+		pipe_7[`DWIDTH:0], pipe_7[`DWIDTH+`EXPONENT+5:`DWIDTH+6], pipe_7[`DWIDTH+`EXPONENT+15:`DWIDTH+`EXPONENT+11],
+		// Outputs
+		NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8, FG_8) ;
+
+	// Round and put result together
+	FPAddSub_RoundModule RoundModule
+	(  // Inputs
+		 pipe_8[3], pipe_8[4+`EXPONENT:4], pipe_8[`EXPONENT+`MANTISSA+4:5+`EXPONENT], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT*2+`MANTISSA+15], pipe_8[`EXPONENT*2+`MANTISSA+12], pipe_8[`EXPONENT*2+`MANTISSA+11], pipe_8[`EXPONENT*2+`MANTISSA+14], pipe_8[`EXPONENT*2+`MANTISSA+10], 
+		// Outputs
+		P_int[`DWIDTH-1:0], EOF) ;
+	
+	// Check for exceptions
+	FPAddSub_ExceptionModule Exceptionmodule
+	(  // Inputs
+		pipe_9[8+`DWIDTH:9], pipe_9[8], pipe_9[7], pipe_9[6], pipe_9[5:1], pipe_9[0], 
+		// Outputs
+		result[`DWIDTH-1:0], flags[4:0]) ;			
+	
+
+assign pipe_2 = {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;
+assign pipe_4 = {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;
+assign pipe_6 = {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;
+assign pipe_7 = {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;
+assign pipe_9 = {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;
+
+	always @ (posedge clk) begin	
+		if(rst) begin
+			pipe_1 <= 0;
+			//pipe_2 <= 0;
+			pipe_3 <= 0;
+			//pipe_4 <= 0;
+			pipe_5 <= 0;
+			//pipe_6 <= 0;
+			//pipe_7 <= 0;
+			pipe_8 <= 0;
+			//pipe_9 <= 0;
+		end 
+		else begin
+/* PIPE_1:
+	[2*`EXPONENT + 2*`DWIDTH + 5]  Opout_0
+	[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6] A_out0
+	[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7] Bout_0
+	[2*`EXPONENT +6] Sa_0
+	[2*`EXPONENT +5] Sb_0
+	[2*`EXPONENT +4 : 5] ShiftDet_0
+	[4:0] Input Exc
+*/
+			pipe_1 <= {Opout_0, Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT -1:0], InputExc_0[4:0]} ;	
+/* PIPE_2
+[2*`EXPONENT+ 2*`MANTISSA + 8] operation
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 5] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 4:`EXPONENT+ 2*`MANTISSA + 5] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 5] Shift_0
+[2*`MANTISSA + 4:`MANTISSA + 5] Mmax_0
+[`MANTISSA + 4 : `MANTISSA] InputExc_0
+[`MANTISSA-1:0] MminS_1
+*/
+			//pipe_2 <= {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;	
+/* PIPE_3
+[2*`EXPONENT+ 2*`MANTISSA + 9] operation
+[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
+[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
+[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
+[`MANTISSA:0] MminS_2
+*/
+			pipe_3 <= {pipe_2[2*`EXPONENT+ 2*`MANTISSA + 8:`MANTISSA], MminS_2[`MANTISSA:0]} ;	
+/* PIPE_4
+[2*`EXPONENT+ 2*`MANTISSA + 9] operation
+[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
+[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
+[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
+[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
+[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
+[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
+[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
+[`MANTISSA:0] MminS_3
+*/				
+			//pipe_4 <= {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;	
+/* PIPE_5 :
+[`DWIDTH+ `EXPONENT + 11] operation
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/					
+			pipe_5 <= {pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9], PSgn_4, Opr_4, pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8:`EXPONENT+ 2*`MANTISSA + 6], pipe_4[`MANTISSA+5:`MANTISSA+1], Sum_4[`DWIDTH:0]} ;
+/* PIPE_6 :
+[`DWIDTH+ `EXPONENT + 16] operation
+[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/				
+			//pipe_6 <= {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;	
+/* PIPE_7 :
+[`DWIDTH+ `EXPONENT + 16] operation
+[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
+[`DWIDTH+ `EXPONENT + 10] PSgn_4
+[`DWIDTH+ `EXPONENT + 9] Opr_4
+[`DWIDTH+ `EXPONENT + 8] Sa_0
+[`DWIDTH+ `EXPONENT + 7] Sb_0
+[`DWIDTH+ `EXPONENT + 6] MaxAB_0
+[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
+[`DWIDTH+5:`DWIDTH+1] InputExc_0
+[`DWIDTH:0] Sum_4
+*/						
+			//pipe_7 <= {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;	
+/* PIPE_8:
+[2*`EXPONENT + `MANTISSA + 15] FG_8 
+[2*`EXPONENT + `MANTISSA + 14] operation
+[2*`EXPONENT + `MANTISSA + 13] PSgn_4
+[2*`EXPONENT + `MANTISSA + 12] Sa_0
+[2*`EXPONENT + `MANTISSA + 11] Sb_0
+[2*`EXPONENT + `MANTISSA + 10] MaxAB_0
+[2*`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 10] CExp_0
+[`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 5] InputExc_8
+[`EXPONENT + `MANTISSA + 4 :`EXPONENT + 5] NormM_8 
+[`EXPONENT + 4 :4] NormE_8
+[3] ZeroSum_8
+[2] NegE_8
+[1] R_8
+[0] S_8
+*/				
+			pipe_8 <= {FG_8, pipe_7[`DWIDTH+`EXPONENT+16], pipe_7[`DWIDTH+`EXPONENT+10], pipe_7[`DWIDTH+`EXPONENT+8:`DWIDTH+1], NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8} ;	
+/* pipe_9:
+[`DWIDTH + 8 :9] P_int
+[8] NegE_8
+[7] R_8
+[6] S_8
+[5:1] InputExc_8
+[0] EOF
+*/				
+			//pipe_9 <= {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;	
+		end
+	end		
+	
+endmodule
+
+
+//
+// Description:	 	The pre-alignment module is responsible for taking the inputs
+//							apart and checking the parts for exceptions.
+//							The exponent difference is also calculated in this module.
+//
+
+
+module FPAddSub_PrealignModule(
+		A,
+		B,
+		operation,
+		Sa,
+		Sb,
+		ShiftDet,
+		InputExc,
+		Aout,
+		Bout,
+		Opout
+	);
+	
+	// Input ports
+	input [`DWIDTH-1:0] A ;										// Input A, a 32-bit floating point number
+	input [`DWIDTH-1:0] B ;										// Input B, a 32-bit floating point number
+	input operation ;
+	
+	// Output ports
+	output Sa ;												// A's sign
+	output Sb ;												// B's sign
+	output [2*`EXPONENT-1:0] ShiftDet ;
+	output [4:0] InputExc ;								// Input numbers are exceptions
+	output [`DWIDTH-2:0] Aout ;
+	output [`DWIDTH-2:0] Bout ;
+	output Opout ;
+	
+	// Internal signals									// If signal is high...
+	wire ANaN ;												// A is a NaN (Not-a-Number)
+	wire BNaN ;												// B is a NaN
+	wire AInf ;												// A is infinity
+	wire BInf ;												// B is infinity
+	wire [`EXPONENT-1:0] DAB ;										// ExpA - ExpB					
+	wire [`EXPONENT-1:0] DBA ;										// ExpB - ExpA	
+	
+	assign ANaN = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(A[`MANTISSA-1:0]) ;		// All one exponent and not all zero mantissa - NaN
+	assign BNaN = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(B[`MANTISSA-1:0]);		// All one exponent and not all zero mantissa - NaN
+	assign AInf = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(A[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
+	assign BInf = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(B[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
+	
+	// Put all flags into exception vector
+	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
+	
+	//assign DAB = (A[30:23] - B[30:23]) ;
+	//assign DBA = (B[30:23] - A[30:23]) ;
+	assign DAB = (A[`DWIDTH-2:`MANTISSA] + ~(B[`DWIDTH-2:`MANTISSA]) + 1) ;
+	assign DBA = (B[`DWIDTH-2:`MANTISSA] + ~(A[`DWIDTH-2:`MANTISSA]) + 1) ;
+	
+	assign Sa = A[`DWIDTH-1] ;									// A's sign bit
+	assign Sb = B[`DWIDTH-1] ;									// B's sign	bit
+	assign ShiftDet = {DBA[`EXPONENT-1:0], DAB[`EXPONENT-1:0]} ;		// Shift data
+	assign Opout = operation ;
+	assign Aout = A[`DWIDTH-2:0] ;
+	assign Bout = B[`DWIDTH-2:0] ;
+	
+endmodule
+
+
+//
+// Description:	 	The alignment module determines the larger input operand and
+//							sets the mantissas, shift and common exponent accordingly.
+//
+
+
+module FPAddSub_AlignModule (
+		A,
+		B,
+		ShiftDet,
+		CExp,
+		MaxAB,
+		Shift,
+		Mmin,
+		Mmax
+	);
+	
+	// Input ports
+	input [`DWIDTH-2:0] A ;								// Input A, a 32-bit floating point number
+	input [`DWIDTH-2:0] B ;								// Input B, a 32-bit floating point number
+	input [2*`EXPONENT-1:0] ShiftDet ;
+	
+	// Output ports
+	output [`EXPONENT-1:0] CExp ;							// Common Exponent
+	output MaxAB ;									// Incidates larger of A and B (0/A, 1/B)
+	output [`EXPONENT-1:0] Shift ;							// Number of steps to smaller mantissa shift right
+	output [`MANTISSA-1:0] Mmin ;							// Smaller mantissa 
+	output [`MANTISSA-1:0] Mmax ;							// Larger mantissa
+	
+	// Internal signals
+	//wire BOF ;										// Check for shifting overflow if B is larger
+	//wire AOF ;										// Check for shifting overflow if A is larger
+	
+	assign MaxAB = (A[`DWIDTH-2:0] < B[`DWIDTH-2:0]) ;	
+	//assign BOF = ShiftDet[9:5] < 25 ;		// Cannot shift more than 25 bits
+	//assign AOF = ShiftDet[4:0] < 25 ;		// Cannot shift more than 25 bits
+	
+	// Determine final shift value
+	//assign Shift = MaxAB ? (BOF ? ShiftDet[9:5] : 5'b11001) : (AOF ? ShiftDet[4:0] : 5'b11001) ;
+	
+	assign Shift = MaxAB ? ShiftDet[2*`EXPONENT-1:`EXPONENT] : ShiftDet[`EXPONENT-1:0] ;
+	
+	// Take out smaller mantissa and append shift space
+	assign Mmin = MaxAB ? A[`MANTISSA-1:0] : B[`MANTISSA-1:0] ; 
+	
+	// Take out larger mantissa	
+	assign Mmax = MaxAB ? B[`MANTISSA-1:0]: A[`MANTISSA-1:0] ;	
+	
+	// Common exponent
+	assign CExp = (MaxAB ? B[`MANTISSA+`EXPONENT-1:`MANTISSA] : A[`MANTISSA+`EXPONENT-1:`MANTISSA]) ;		
+	
+endmodule
+
+
+// Description:	 Alignment shift stage 1, performs 16|12|8|4 shift
+//
+
+
+// ONLY THIS MODULE IS HARDCODED for half precision fp16 and bfloat16
+module FPAddSub_AlignShift1(
+		//bf16,
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	//input bf16;
+	input [`MANTISSA-1:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [`EXPONENT-3:0] Shift ;						// Shift amount. Last 2 bits of shifting are done in next stage. Hence, we have [`EXPONENT - 2] bits
+	
+	// Output ports
+	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
+	
+
+	wire bf16;
+	assign bf16 = 1'b1; //hardcoding to 1, to avoid ODIN issue. a `ifdef here wasn't working. apparently, nested `ifdefs don't work
+
+	// Internal signals
+	reg	  [`MANTISSA:0]		Lvl1;
+	reg	  [`MANTISSA:0]		Lvl2;
+	wire    [2*`MANTISSA+1:0]    Stage1;	
+	integer           i;                // Loop variable
+
+	always @(*) begin
+		if (bf16 == 1'b1) begin						
+//hardcoding for bfloat16
+	//For bfloat16, we can shift the mantissa by a max of 7 bits since mantissa has a width of 7. 
+	//Hence if either, bit[3]/bit[4]/bit[5]/bit[6]/bit[7] is 1, we can make it 0. This corresponds to bits [5:1] in our updated shift which doesn't contain last 2 bits.
+		//Lvl1 <= (Shift[1]|Shift[2]|Shift[3]|Shift[4]|Shift[5]) ? {temp_0} : {1'b1, MminP};  // MANTISSA + 1 width	
+		Lvl1 <= (|Shift[`EXPONENT-3:1]) ? 'd0 : {1'b1, MminP};  // MANTISSA + 1 width	
+		end
+		else begin
+		//for half precision fp16, 10 bits can be shifted. Hence, only shifts till 10 (01010)can be made. 
+		Lvl1 <= Shift[2] ? 'd0 : {1'b1, MminP};
+		end
+	end
+	
+	assign Stage1 = {Lvl1, Lvl1}; //2*MANTISSA + 2 width
+
+	always @(*) begin    					// Rotate {0 | 4 } bits
+	if(bf16 == 1'b1) begin
+	  case (Shift[0])
+			// Rotate by 0	
+			1'b0: Lvl2 <= Stage1[`MANTISSA:0];       			
+			// Rotate by 4	
+			1'b1: Lvl2 <= Stage1[`MANTISSA+4:4];
+			default: Lvl2 <= Stage1[`MANTISSA+4:4];
+	  endcase
+	end
+	else begin
+	  case (Shift[1:0])					// Rotate {0 | 4 | 8} bits
+			// Rotate by 0	
+			2'b00: Lvl2 <= Stage1[`MANTISSA:0];       			
+			// Rotate by 4	
+			2'b01: Lvl2 <= Stage1[`MANTISSA+4:4];
+			// Rotate by 8
+			2'b10: Lvl2 <= Stage1[`MANTISSA+8:8];
+			// Rotate by 12	
+			2'b11: Lvl2[`MANTISSA: 0] <= 0; 
+			default: Lvl2[`MANTISSA: 0] <= 0; 
+	  endcase
+	end
+	end
+
+	// Assign output to next shift stage
+	assign Mmin = Lvl2;
+	
+endmodule
+
+
+// Description:	 Alignment shift stage 2, performs 3|2|1 shift
+//
+
+
+module FPAddSub_AlignShift2(
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	input [`MANTISSA:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [1:0] Shift ;						// Shift amount. Last 2 bits
+	
+	// Output ports
+	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
+	
+	// Internal Signal
+	reg	  [`MANTISSA:0]		Lvl3;
+	wire    [2*`MANTISSA+1:0]    Stage2;	
+	integer           j;               // Loop variable
+	
+	assign Stage2 = {MminP, MminP};
+
+	always @(*) begin    // Rotate {0 | 1 | 2 | 3} bits
+	  case (Shift[1:0])
+			// Rotate by 0
+			2'b00: Lvl3 <= Stage2[`MANTISSA:0];   
+			// Rotate by 1
+			2'b01: Lvl3 <= Stage2[`MANTISSA+1:1];
+			// Rotate by 2
+			2'b10: Lvl3 <= Stage2[`MANTISSA+2:2];
+			// Rotate by 3
+			2'b11: Lvl3 <= Stage2[`MANTISSA+3:3]; 
+	  endcase
+	end
+	
+	// Assign output
+	assign Mmin = Lvl3;						// Take out smaller mantissa				
+
+endmodule
+
+
+//
+// Description:	 Module that executes the addition or subtraction on mantissas.
+//
+
+
+module FPAddSub_ExecutionModule(
+		Mmax,
+		Mmin,
+		Sa,
+		Sb,
+		MaxAB,
+		OpMode,
+		Sum,
+		PSgn,
+		Opr
+    );
+
+	// Input ports
+	input [`MANTISSA-1:0] Mmax ;					// The larger mantissa
+	input [`MANTISSA:0] Mmin ;					// The smaller mantissa
+	input Sa ;								// Sign bit of larger number
+	input Sb ;								// Sign bit of smaller number
+	input MaxAB ;							// Indicates the larger number (0/A, 1/B)
+	input OpMode ;							// Operation to be performed (0/Add, 1/Sub)
+	
+	// Output ports
+	output [`DWIDTH:0] Sum ;					// The result of the operation
+	output PSgn ;							// The sign for the result
+	output Opr ;							// The effective (performed) operation
+
+	wire [`EXPONENT-1:0]temp_1;
+
+	assign Opr = (OpMode^Sa^Sb); 		// Resolve sign to determine operation
+	assign temp_1 = 0;
+	// Perform effective operation
+//SAMIDH_UNSURE 5--> 8
+
+	assign Sum = (OpMode^Sa^Sb) ? ({1'b1, Mmax, temp_1} - {Mmin, temp_1}) : ({1'b1, Mmax, temp_1} + {Mmin, temp_1}) ;
+	
+	// Assign result sign
+	assign PSgn = (MaxAB ? Sb : Sa) ;
+
+endmodule
+
+
+//
+// Description:	 Determine the normalization shift amount and perform 16-shift
+//
+
+
+module FPAddSub_NormalizeModule(
+		Sum,
+		Mmin,
+		Shift
+    );
+
+	// Input ports
+	input [`DWIDTH:0] Sum ;					// Mantissa sum including hidden 1 and GRS
+	
+	// Output ports
+	output [`DWIDTH:0] Mmin ;					// Mantissa after 16|0 shift
+	output [4:0] Shift ;					// Shift amount
+	//Changes in this doesn't matter since even Bfloat16 can't go beyond 7 shift to the mantissa (only 3 bits valid here)  
+	// Determine normalization shift amount by finding leading nought
+	assign Shift =  ( 
+		Sum[16] ? 5'b00000 :	 
+		Sum[15] ? 5'b00001 : 
+		Sum[14] ? 5'b00010 : 
+		Sum[13] ? 5'b00011 : 
+		Sum[12] ? 5'b00100 : 
+		Sum[11] ? 5'b00101 : 
+		Sum[10] ? 5'b00110 : 
+		Sum[9] ? 5'b00111 :
+		Sum[8] ? 5'b01000 :
+		Sum[7] ? 5'b01001 :
+		Sum[6] ? 5'b01010 :
+		Sum[5] ? 5'b01011 :
+		Sum[4] ? 5'b01100 : 5'b01101
+	//	Sum[19] ? 5'b01101 :
+	//	Sum[18] ? 5'b01110 :
+	//	Sum[17] ? 5'b01111 :
+	//	Sum[16] ? 5'b10000 :
+	//	Sum[15] ? 5'b10001 :
+	//	Sum[14] ? 5'b10010 :
+	//	Sum[13] ? 5'b10011 :
+	//	Sum[12] ? 5'b10100 :
+	//	Sum[11] ? 5'b10101 :
+	//	Sum[10] ? 5'b10110 :
+	//	Sum[9] ? 5'b10111 :
+	//	Sum[8] ? 5'b11000 :
+	//	Sum[7] ? 5'b11001 : 5'b11010
+	);
+	
+	reg	  [`DWIDTH:0]		Lvl1;
+	
+	always @(*) begin
+		// Rotate by 16?
+		Lvl1 <= Shift[4] ? {Sum[8:0], 8'b00000000} : Sum; 
+	end
+	
+	// Assign outputs
+	assign Mmin = Lvl1;						// Take out smaller mantissa
+
+endmodule
+
+
+// Description:	 Normalization shift stage 1, performs 12|8|4|3|2|1|0 shift
+//
+//Hardcoding loop start and end values of i. To avoid ODIN limitations. i=`DWIDTH*2+1 wasn't working.
+
+module FPAddSub_NormalizeShift1(
+		MminP,
+		Shift,
+		Mmin
+	);
+	
+	// Input ports
+	input [`DWIDTH:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
+	input [3:0] Shift ;						// Shift amount
+	
+	// Output ports
+	output [`DWIDTH:0] Mmin ;						// The smaller mantissa
+	
+	reg	  [`DWIDTH:0]		Lvl2;
+	wire    [2*`DWIDTH+1:0]    Stage1;	
+	reg	  [`DWIDTH:0]		Lvl3;
+	wire    [2*`DWIDTH+1:0]    Stage2;	
+	integer           i;               	// Loop variable
+	
+	assign Stage1 = {MminP, MminP};
+
+	always @(*) begin    					// Rotate {0 | 4 | 8 | 12} bits
+	  case (Shift[3:2])
+			// Rotate by 0
+			2'b00: Lvl2 <= Stage1[`DWIDTH:0];       		
+			// Rotate by 4
+			2'b01: Lvl2 <= Stage1[28:13];
+			// Rotate by 8
+			2'b10: Lvl2 <= Stage1[24:9];
+			// Rotate by 12
+			2'b11: Lvl2 <= Stage1[20:5];
+			default: Lvl2 <= Stage1[`DWIDTH:0];
+	  endcase
+	end
+	
+	assign Stage2 = {Lvl2, Lvl2};
+
+	always @(*) begin   				 		// Rotate {0 | 1 | 2 | 3} bits
+	  case (Shift[1:0])
+			// Rotate by 0
+			2'b00: Lvl3 <= Stage2[`DWIDTH:0];
+			// Rotate by 1
+			2'b01: Lvl3 <= Stage2[31:16];
+			// Rotate by 2
+			2'b10: Lvl3 <= Stage2[30:15];
+			// Rotate by 3
+			2'b11: Lvl3 <= Stage2[29:14];
+			default: Lvl3 <= Stage2[`DWIDTH:0];
+	  endcase
+	end
+	
+	// Assign outputs
+	assign Mmin = Lvl3;						// Take out smaller mantissa			
+	
+endmodule
+
+
+// Description:	 Normalization shift stage 2, calculates post-normalization
+//						 mantissa and exponent, as well as the bits used in rounding		
+//
+
+
+module FPAddSub_NormalizeShift2(
+		PSSum,
+		CExp,
+		Shift,
+		NormM,
+		NormE,
+		ZeroSum,
+		NegE,
+		R,
+		S,
+		FG
+	);
+	
+	// Input ports
+	input [`DWIDTH:0] PSSum ;					// The Pre-Shift-Sum
+	input [`EXPONENT-1:0] CExp ;
+	input [4:0] Shift ;					// Amount to be shifted
+
+	// Output ports
+	output [`MANTISSA-1:0] NormM ;				// Normalized mantissa
+	output [`EXPONENT:0] NormE ;					// Adjusted exponent
+	output ZeroSum ;						// Zero flag
+	output NegE ;							// Flag indicating negative exponent
+	output R ;								// Round bit
+	output S ;								// Final sticky bit
+	output FG ;
+
+	// Internal signals
+	wire MSBShift ;						// Flag indicating that a second shift is needed
+	wire [`EXPONENT:0] ExpOF ;					// MSB set in sum indicates overflow
+	wire [`EXPONENT:0] ExpOK ;					// MSB not set, no adjustment
+	
+	// Calculate normalized exponent and mantissa, check for all-zero sum
+	assign MSBShift = PSSum[`DWIDTH] ;		// Check MSB in unnormalized sum
+	assign ZeroSum = ~|PSSum ;			// Check for all zero sum
+	assign ExpOK = CExp - Shift ;		// Adjust exponent for new normalized mantissa
+	assign NegE = ExpOK[`EXPONENT] ;			// Check for exponent overflow
+	assign ExpOF = CExp - Shift + 1'b1 ;		// If MSB set, add one to exponent(x2)
+	assign NormE = MSBShift ? ExpOF : ExpOK ;			// Check for exponent overflow
+	assign NormM = PSSum[`DWIDTH-1:`EXPONENT+1] ;		// The new, normalized mantissa
+	
+	// Also need to compute sticky and round bits for the rounding stage
+	assign FG = PSSum[`EXPONENT] ; 
+	assign R = PSSum[`EXPONENT-1] ;
+	assign S = |PSSum[`EXPONENT-2:0] ;
+	
+endmodule
+
+
+// Description:	 Performs 'Round to nearest, tie to even'-rounding on the
+//						 normalized mantissa according to the G, R, S bits. Calculates
+//						 final result and checks for exponent overflow.
+//
+
+
+module FPAddSub_RoundModule(
+		ZeroSum,
+		NormE,
+		NormM,
+		R,
+		S,
+		G,
+		Sa,
+		Sb,
+		Ctrl,
+		MaxAB,
+		Z,
+		EOF
+    );
+
+	// Input ports
+	input ZeroSum ;					// Sum is zero
+	input [`EXPONENT:0] NormE ;				// Normalized exponent
+	input [`MANTISSA-1:0] NormM ;				// Normalized mantissa
+	input R ;							// Round bit
+	input S ;							// Sticky bit
+	input G ;
+	input Sa ;							// A's sign bit
+	input Sb ;							// B's sign bit
+	input Ctrl ;						// Control bit (operation)
+	input MaxAB ;
+	
+	// Output ports
+	output [`DWIDTH-1:0] Z ;					// Final result
+	output EOF ;
+	
+	// Internal signals
+	wire [`MANTISSA:0] RoundUpM ;			// Rounded up sum with room for overflow
+	wire [`MANTISSA-1:0] RoundM ;				// The final rounded sum
+	wire [`EXPONENT:0] RoundE ;				// Rounded exponent (note extra bit due to poential overflow	)
+	wire RoundUp ;						// Flag indicating that the sum should be rounded up
+        wire FSgn;
+	wire ExpAdd ;						// May have to add 1 to compensate for overflow 
+	wire RoundOF ;						// Rounding overflow
+	
+	wire [`EXPONENT:0]temp_2;
+	assign temp_2 = 0;
+	// The cases where we need to round upwards (= adding one) in Round to nearest, tie to even
+	assign RoundUp = (G & ((R | S) | NormM[0])) ;
+	
+	// Note that in the other cases (rounding down), the sum is already 'rounded'
+	assign RoundUpM = (NormM + 1) ;								// The sum, rounded up by 1
+	assign RoundM = (RoundUp ? RoundUpM[`MANTISSA-1:0] : NormM) ; 	// Compute final mantissa	
+	assign RoundOF = RoundUp & RoundUpM[`MANTISSA] ; 				// Check for overflow when rounding up
+
+	// Calculate post-rounding exponent
+	assign ExpAdd = (RoundOF ? 1'b1 : 1'b0) ; 				// Add 1 to exponent to compensate for overflow
+	assign RoundE = ZeroSum ? temp_2 : (NormE + ExpAdd) ; 							// Final exponent
+
+	// If zero, need to determine sign according to rounding
+	assign FSgn = (ZeroSum & (Sa ^ Sb)) | (ZeroSum ? (Sa & Sb & ~Ctrl) : ((~MaxAB & Sa) | ((Ctrl ^ Sb) & (MaxAB | Sa)))) ;
+
+	// Assign final result
+	assign Z = {FSgn, RoundE[`EXPONENT-1:0], RoundM[`MANTISSA-1:0]} ;
+	
+	// Indicate exponent overflow
+	assign EOF = RoundE[`EXPONENT];
+	
+endmodule
+
+
+//
+// Description:	 Check the final result for exception conditions and set
+//						 flags accordingly.
+//
+
+
+module FPAddSub_ExceptionModule(
+		Z,
+		NegE,
+		R,
+		S,
+		InputExc,
+		EOF,
+		P,
+		Flags
+    );
+	 
+	// Input ports
+	input [`DWIDTH-1:0] Z	;					// Final product
+	input NegE ;						// Negative exponent?
+	input R ;							// Round bit
+	input S ;							// Sticky bit
+	input [4:0] InputExc ;			// Exceptions in inputs A and B
+	input EOF ;
+	
+	// Output ports
+	output [`DWIDTH-1:0] P ;					// Final result
+	output [4:0] Flags ;				// Exception flags
+	
+	// Internal signals
+	wire Overflow ;					// Overflow flag
+	wire Underflow ;					// Underflow flag
+	wire DivideByZero ;				// Divide-by-Zero flag (always 0 in Add/Sub)
+	wire Invalid ;						// Invalid inputs or result
+	wire Inexact ;						// Result is inexact because of rounding
+	
+	// Exception flags
+	
+	// Result is too big to be represented
+	assign Overflow = EOF | InputExc[1] | InputExc[0] ;
+	
+	// Result is too small to be represented
+	assign Underflow = NegE & (R | S);
+	
+	// Infinite result computed exactly from finite operands
+	assign DivideByZero = &(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~|(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~InputExc[1] & ~InputExc[0];
+	
+	// Invalid inputs or operation
+	assign Invalid = |(InputExc[4:2]) ;
+	
+	// Inexact answer due to rounding, overflow or underflow
+	assign Inexact = (R | S) | Overflow | Underflow;
+	
+	// Put pieces together to form final result
+	assign P = Z ;
+	
+	// Collect exception flags	
+	assign Flags = {Overflow, Underflow, DivideByZero, Invalid, Inexact} ; 	
+	
+endmodule
+
+`endif
+
+
diff --git a/third_party/vtr/verilog/hard_block_include.v b/third_party/vtr/verilog/hard_block_include.v
new file mode 100644
index 000000000..cc4d502c5
--- /dev/null
+++ b/third_party/vtr/verilog/hard_block_include.v
@@ -0,0 +1,3 @@
+`define complex_dsp
+`define hard_mem
+
diff --git a/third_party/vtr/verilog/raygentop.v b/third_party/vtr/verilog/raygentop.v
new file mode 100644
index 000000000..256b3aead
--- /dev/null
+++ b/third_party/vtr/verilog/raygentop.v
@@ -0,0 +1,2978 @@
+ module paj_raygentop_hierarchy_no_mem (rgwant_addr, rgwant_data, rgread_ready, rgaddr_ready, rgdata_ready, rgwant_read, rgdatain, rgdataout, rgaddrin, rgCont, rgStat, rgCfgData, rgwant_CfgData, rgCfgData_ready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, clk, fbdata, fbdatavalid, fbnextscanline, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, rgData, rgAddr, rgWE, rgAddrValid, rgDone, rgResultData, rgResultReady, rgResultSource);
+
+    output rgwant_addr; 
+    wire rgwant_addr;
+    output rgwant_data; 
+    wire rgwant_data;
+    output rgread_ready; 
+    wire rgread_ready;
+    input rgaddr_ready; 
+    input rgdata_ready; 
+
+    input rgwant_read; 
+    input[63:0] rgdatain; 
+    output[63:0] rgdataout; 
+    wire[63:0] rgdataout;
+    input[17:0] rgaddrin; 
+    input[31:0] rgCont; 
+    output[31:0] rgStat; 
+    wire[31:0] rgStat;
+    input[31:0] rgCfgData; 
+    output rgwant_CfgData; 
+    wire rgwant_CfgData;
+    input rgCfgData_ready; 
+
+    input[63:0] tm3_sram_data_in; 
+    wire[63:0] tm3_sram_data_in;
+    output[63:0] tm3_sram_data_out; 
+    wire[63:0] tm3_sram_data_out;
+    wire[63:0] tm3_sram_data_xhdl0;
+    output[18:0] tm3_sram_addr; 
+    wire[18:0] tm3_sram_addr;
+    output[7:0] tm3_sram_we; 
+    wire[7:0] tm3_sram_we;
+    output[1:0] tm3_sram_oe; 
+    wire[1:0] tm3_sram_oe;
+    output tm3_sram_adsp; 
+    wire tm3_sram_adsp;
+    input clk; 
+
+    output[63:0] fbdata; 
+    wire[63:0] fbdata;
+    output fbdatavalid; 
+    wire fbdatavalid;
+    input fbnextscanline; 
+    output[1:0] raygroup01; 
+    wire[1:0] raygroup01;
+    output raygroupvalid01; 
+    wire raygroupvalid01;
+    input busy01; 
+    output[1:0] raygroup10; 
+    wire[1:0] raygroup10;
+
+    output raygroupvalid10; 
+    wire raygroupvalid10;
+    input busy10; 
+    input globalreset; 
+    output[31:0] rgData; 
+    wire[31:0] rgData;
+    output[3:0] rgAddr; 
+    wire[3:0] rgAddr;
+    output[2:0] rgWE; 
+    wire[2:0] rgWE;
+    output rgAddrValid; 
+    wire rgAddrValid;
+
+    input rgDone; 
+    input[31:0] rgResultData; 
+    input rgResultReady; 
+    input[1:0] rgResultSource; 
+
+    wire[2:0] statepeek2; 
+    wire as01; 
+    wire ack01; 
+
+    wire[3:0] addr01; 
+    wire[47:0] dir01; 
+    wire[47:0] dir; 
+    wire[47:0] sramdatal; 
+    wire wantDir; 
+    wire dirReady; 
+    wire dirReadyl; 
+    wire[14:0] address; 
+    wire[30:0] cyclecounter; 
+
+    wire nas01; 
+    wire nas10; 
+    wire go; 
+    reg page; 
+    wire[2:0] statepeekct; 
+    // result Signals
+    wire valid01; 
+    wire valid10; 
+    wire[15:0] id01a; 
+    wire[15:0] id01b; 
+    wire[15:0] id01c; 
+    wire[15:0] id10a; 
+
+    wire[15:0] id10b; 
+    wire[15:0] id10c; 
+    wire hit01a; 
+    wire hit01b; 
+    wire hit01c; 
+    wire hit10a; 
+    wire hit10b; 
+    wire hit10c; 
+    wire[7:0] u01a; 
+    wire[7:0] u01b; 
+    wire[7:0] u01c; 
+    wire[7:0] v01a; 
+
+    wire[7:0] v01b; 
+    wire[7:0] v01c; 
+    wire[7:0] u10a; 
+    wire[7:0] u10b; 
+    wire[7:0] u10c; 
+    wire[7:0] v10a; 
+    wire[7:0] v10b; 
+    wire[7:0] v10c; 
+    wire wantwriteback; 
+    wire writebackack; 
+    wire[63:0] writebackdata; 
+    wire[17:0] writebackaddr; 
+
+    wire[17:0] nextaddr01; 
+    // Shading Signals
+    wire[63:0] shadedata; 
+    wire[15:0] triID; 
+    wire wantshadedata; 
+    wire shadedataready; 
+    // CfgData Signals
+    wire[27:0] origx; 
+    wire[27:0] origy; 
+    wire[27:0] origz; 
+    wire[15:0] m11; 
+    wire[15:0] m12; 
+
+    wire[15:0] m13; 
+    wire[15:0] m21; 
+    wire[15:0] m22; 
+    wire[15:0] m23; 
+    wire[15:0] m31; 
+    wire[15:0] m32; 
+    wire[15:0] m33; 
+    wire[20:0] bkcolour; 
+    // Texture signals
+    wire[20:0] texinfo; 
+    wire[3:0] texaddr; 
+    wire[63:0] texel; 
+
+    wire[17:0] texeladdr; 
+    wire wanttexel; 
+    wire texelready; 
+    // Frame Buffer Read Signals
+    wire fbpage; 
+    // debug signals
+    wire wantcfg; 
+    wire debugglobalreset; 
+
+    assign rgwant_CfgData = wantcfg ;
+
+    onlyonecycle onlyeonecycleinst (rgCont[0], go, globalreset, clk); 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          page <= 1'b1 ; // Reset to 1 such that first flip sets to 0
+       end
+       else
+
+       begin
+          page <= ~page ; 
+       end 
+    end 
+    assign fbpage = ~page ;
+
+    matmult matmultinst(sramdatal[47:32], sramdatal[31:16], sramdatal[15:0], m11, m12, m13, m21, m22, m23, m31, m32, m33, dir[47:32], dir[31:16], dir[15:0], clk); 
+
+    delay1x3 dir01delay(dirReady, dirReadyl, clk); 
+    rgconfigmemory ConfigMemoryInst (rgCfgData[31:28], rgCfgData[27:0], rgCfgData_ready, wantcfg, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk); 
+
+    rgsramcontroller sramcont (rgwant_addr, rgaddr_ready, rgaddrin, rgwant_data, rgdata_ready, rgdatain, rgwant_read, rgread_ready, rgdataout, dirReady, wantDir, sramdatal, address, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
+    raysend raysendinst (as01, ack01, addr01, dir01, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek2); 
+
+    raygencont  raygencontinst(go, rgCont[15:1], rgStat[31], cyclecounter, nextaddr01, nas01, nas10, page, dirReadyl, wantDir, dir, address, as01, addr01, ack01, dir01, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, clk, statepeekct); 
+    resultrecieve resultrecieveinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk); 
+    assign debugglobalreset = globalreset | go ;
+    resultwriter resultwriteinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, nextaddr01, nas01, nas10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, writebackdata, writebackaddr, wantwriteback, writebackack, debugglobalreset, clk);
+    assign rgStat[30:0] = cyclecounter ;
+ endmodule
+
+
+module delay1x3 (datain, dataout, clk);
+
+    input datain; 
+    output dataout; 
+    wire dataout;
+    input clk; 
+
+    reg buff0; 
+    reg buff1; 
+    reg buff2; 
+
+    assign dataout = buff2 ;
+
+    always @(posedge clk)
+    begin
+/* PAJ expanded for loop to hard definition the size of `depth */
+       buff0 <= datain ; 
+		buff1 <= buff0;
+		buff2 <= buff1;
+    end 
+ endmodule
+
+
+    
+
+    
+    
+ // A debugging circuit that allows a single cycle pulse to be 
+ // generated by through the ports package
+ module onlyonecycle (trigger, output_xhdl0, globalreset, clk);
+
+    input trigger; 
+    output output_xhdl0; 
+    reg output_xhdl0;
+    input globalreset; 
+    input clk; 
+
+    reg[1:0] state; 
+    reg[1:0] next_state; 
+    reg count; 
+    reg temp_count; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          count <= 0 ; 
+
+       end
+       else
+       begin
+          state <= next_state ; 
+		count <= temp_count;
+       end 
+    end 
+
+    always @(state or trigger or count)
+    begin
+       case (state)
+          0 :
+                   begin
+       				  output_xhdl0 = 1'b0 ; 
+                      if (trigger == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                         temp_count = 1 - 1 ; 
+                   end
+          1 :
+                   begin
+                      output_xhdl0 = 1'b1 ; 
+                      if (count == 0)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+
+                      begin
+
+                         next_state = 1 ; 
+                      end 
+                         temp_count = count - 1 ; 
+                   end
+          2 :
+                   begin
+       				  output_xhdl0 = 1'b0 ; 
+                      if (trigger == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+
+                      end 
+                   end
+       endcase 
+    end 
+ endmodule
+
+module matmult (Ax, Ay, Az, m11, m12, m13, m21, m22, m23, m31, m32, m33, Cx, Cy, Cz, clk);
+
+    input[16 - 1:0] Ax; 
+    input[16 - 1:0] Ay; 
+    input[16 - 1:0] Az; 
+    input[16 - 1:0] m11; 
+    input[16 - 1:0] m12; 
+
+    input[16 - 1:0] m13; 
+    input[16 - 1:0] m21; 
+    input[16 - 1:0] m22; 
+    input[16 - 1:0] m23; 
+    input[16 - 1:0] m31; 
+    input[16 - 1:0] m32; 
+    input[16 - 1:0] m33; 
+    output[16 - 1:0] Cx; 
+    reg[16 - 1:0] Cx;
+    output[16 - 1:0] Cy; 
+    reg[16 - 1:0] Cy;
+    output[16 - 1:0] Cz; 
+
+    reg[16 - 1:0] Cz;
+    input clk; 
+
+    reg[16 + 16 - 1:0] am11; 
+    reg[16 + 16 - 1:0] am12; 
+    reg[16 + 16 - 1:0] am13; 
+    reg[16 + 16 - 1:0] am21; 
+    reg[16 + 16 - 1:0] am22; 
+    reg[16 + 16 - 1:0] am23; 
+    reg[16 + 16 - 1:0] am31; 
+    reg[16 + 16 - 1:0] am32; 
+    reg[16 + 16 - 1:0] am33; 
+
+
+    always @(posedge clk)
+    begin
+       am11 <= Ax * m11 ; 
+       am12 <= Ay * m12 ; 
+       am13 <= Az * m13 ; 
+       am21 <= Ax * m21 ; 
+       am22 <= Ay * m22 ; 
+       am23 <= Az * m23 ; 
+       am31 <= Ax * m31 ; 
+       am32 <= Ay * m32 ; 
+       am33 <= Az * m33 ; 
+
+       //      Cx <= (am11 + am12 + am13) (`widthA+`widthB-2 downto `widthB-1);
+       //      Cy <= (am21 + am22 + am23) (`widthA+`widthB-2 downto `widthB-1);
+       //      Cz <= (am31 + am32 + am33) (`widthA+`widthB-2 downto `widthB-1);
+       Cx <= (am11[16+16-2:16-1] + am12[16+16-2:16-1] + am13[16+16-2:16-1]) ; 
+       Cy <= (am21[16+16-2:16-1] + am22[16+16-2:16-1] + am23[16+16-2:16-1]); 
+       Cz <= (am31[16+16-2:16-1] + am32[16+16-2:16-1] + am33[16+16-2:16-1]) ;  
+    end 
+ endmodule
+
+    
+    
+
+module rgconfigmemory (CfgAddr, CfgData, CfgData_Ready, want_CfgData, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk);
+
+
+    input[3:0] CfgAddr; 
+    input[27:0] CfgData; 
+    input CfgData_Ready; 
+    output want_CfgData; 
+    reg want_CfgData;
+    output[27:0] origx; 
+    reg[27:0] origx;
+    output[27:0] origy; 
+    reg[27:0] origy;
+    output[27:0] origz; 
+    reg[27:0] origz;
+    output[15:0] m11; 
+    reg[15:0] m11;
+    output[15:0] m12; 
+    reg[15:0] m12;
+    output[15:0] m13; 
+    reg[15:0] m13;
+    output[15:0] m21; 
+    reg[15:0] m21;
+    output[15:0] m22; 
+    reg[15:0] m22;
+    output[15:0] m23; 
+    reg[15:0] m23;
+    output[15:0] m31; 
+    reg[15:0] m31;
+    output[15:0] m32; 
+    reg[15:0] m32;
+    output[15:0] m33; 
+    reg[15:0] m33;
+    output[20:0] bkcolour; 
+    reg[20:0] bkcolour;
+    output[20:0] texinfo; 
+
+    wire[20:0] texinfo;
+    input globalreset; 
+    input clk; 
+
+    reg state; 
+    reg next_state; 
+    wire we; 
+
+    reg[27:0] temp_origx;
+    reg[27:0] temp_origy;
+    reg[27:0] temp_origz;
+    reg[15:0] temp_m11;
+    reg[15:0] temp_m12;
+    reg[15:0] temp_m13;
+    reg[15:0] temp_m21;
+    reg[15:0] temp_m22;
+    reg[15:0] temp_m23;
+    reg[15:0] temp_m31;
+    reg[15:0] temp_m32;
+    reg[15:0] temp_m33;
+    reg[20:0] temp_bkcolour;
+
+    // <<X-HDL>> Can't find translated component 'spram'. Module name may not match
+    spram21x4 spraminst(we, texinfo, CfgData[20:0], clk); 
+    assign we = ((CfgData_Ready == 1'b1) & (CfgAddr == 4'b1110)) ? 1'b1 : 1'b0 ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          origx <= 0;
+          origy <= 0;
+
+          origz <= 0;
+          m11 <= 1;
+          m12 <= 0;
+          m13 <= 0;
+          m21 <= 0;
+          m22 <= 1;
+          m23 <= 0;
+          m31 <= 0;
+          m32 <= 0;
+         m33 <= 1;
+          bkcolour <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+          origx <= temp_origx;
+          origy <= temp_origy;
+          origz <= temp_origz;
+          m11 <= temp_m11;
+          m12 <= temp_m12;
+          m13 <= temp_m13;
+          m21 <= temp_m21;
+          m22 <= temp_m22;
+          m23 <= temp_m23;
+          m31 <= temp_m31;
+          m32 <= temp_m32;
+         m33 <= temp_m33;
+          bkcolour <= bkcolour;
+       end 
+    end 
+
+    always @(state or CfgData_Ready)
+    begin
+       case (state)
+          0 :
+                   begin
+                      want_CfgData = 1'b1 ; 
+                      if (CfgData_Ready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+
+              if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0001))
+                        begin
+											temp_origx = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0010))
+                        begin
+                                           temp_origy = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0011))
+                        begin
+                                           temp_origz = CfgData ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0100))
+                        begin
+                                           temp_m11 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0101))
+                        begin
+                                           temp_m12 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0110))
+                        begin
+                                           temp_m13 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0111))
+                        begin
+                                           temp_m21 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1000))
+                        begin
+                                           temp_m22 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1001))
+                        begin
+                                           temp_m23 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1010))
+                        begin
+                                           temp_m31 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1011))
+                        begin
+                                           temp_m32 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1100))
+                        begin
+                                           temp_m33 = CfgData[15:0] ; 
+						end
+                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1101))
+                        begin
+                                           temp_bkcolour = CfgData[20:0] ; 
+						end
+                   end
+          1 :
+                   begin
+                      want_CfgData = 1'b0 ; 
+                      if (CfgData_Ready == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+ module spram21x4 (we, dataout, datain, clk);
+
+    input we; 
+    output[21 - 1:0] dataout; 
+    wire[21 - 1:0] dataout;
+    input[21 - 1:0] datain; 
+    input clk; 
+
+	reg [7:0] addr;
+	
+	always @ (posedge clk)
+	begin
+	 addr[0] <= we;
+	 addr [1] <= addr[0];
+	 addr [2] <= addr[1];
+	 addr [3] <= addr[2];
+	 addr [4] <= addr[3];
+	 addr [5] <= addr[4];
+	 addr [6] <= addr[5];
+	 addr [7] <= addr[6];
+	 end
+//changed to odin 2 ram specifications
+
+defparam new_ram.ADDR_WIDTH = 8;
+defparam new_ram.DATA_WIDTH = 21;
+single_port_ram new_ram(
+  .clk (clk),
+  .we(we),
+  .data(datain),
+  .out(dataout),
+  .addr(addr)
+  );
+  
+  
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+module rgsramcontroller (want_addr, addr_ready, addrin, want_data, data_ready, datain, want_read, read_ready, dataout, dirReady, wantDir, sramdatal, addr, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
+
+    output want_addr; 
+    reg want_addr;
+    input addr_ready; 
+    input[17:0] addrin; 
+    output want_data; 
+    reg want_data;
+    input data_ready; 
+    input[63:0] datain; 
+    input want_read; 
+    output read_ready; 
+
+    reg read_ready;
+    output[63:0] dataout; 
+    wire[63:0] dataout;
+    output dirReady; 
+    reg dirReady;
+    input wantDir; 
+    output[47:0] sramdatal; 
+    reg[47:0] sramdatal;
+    output[14:0] addr; 
+    wire[14:0] addr;
+    input wantwriteback; 
+    output writebackack; 
+
+    reg writebackack;
+    input[63:0] writebackdata; 
+    input[17:0] writebackaddr; 
+    output[63:0] fbdata; 
+    reg[63:0] fbdata;
+    input fbnextscanline; 
+    output fbdatavalid; 
+    reg fbdatavalid;
+    input fbpage; 
+    output[63:0] shadedata; 
+    wire[63:0] shadedata;
+    input[15:0] triID; 
+
+    input wantshadedata; 
+    output shadedataready; 
+    reg shadedataready;
+    input[17:0] texeladdr; 
+    output[63:0] texel; 
+    wire[63:0] texel;
+    input wanttexel; 
+    output texelready; 
+    reg texelready;
+    input[63:0] tm3_sram_data_in; 
+    wire[63:0] tm3_sram_data_in;
+    output[63:0] tm3_sram_data_out; 
+    wire[63:0] tm3_sram_data_out;
+    reg[63:0] tm3_sram_data_xhdl0;
+
+    output[18:0] tm3_sram_addr; 
+    reg[18:0] tm3_sram_addr;
+    output[7:0] tm3_sram_we; 
+    reg[7:0] tm3_sram_we;
+    output[1:0] tm3_sram_oe; 
+    reg[1:0] tm3_sram_oe;
+    output tm3_sram_adsp; 
+    reg tm3_sram_adsp;
+    input globalreset; 
+    input clk; 
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+    reg[17:0] waddress; 
+    reg[14:0] faddress; 
+    reg[6:0] fcount; 
+    reg fbdatavalidl; 
+
+    reg[17:0] temp_waddress; 
+    reg[14:0] temp_faddress; 
+    reg[6:0] temp_fcount; 
+    reg temp_fbdatavalidl; 
+    reg temp_texelready;
+    reg temp_shadedataready;
+
+    assign tm3_sram_data_out = tm3_sram_data_xhdl0;
+
+    assign dataout = tm3_sram_data_in ;
+    assign addr = tm3_sram_data_in[62:48] ;
+    assign shadedata = tm3_sram_data_in ;
+    assign texel = tm3_sram_data_in ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+
+          state <= 0 ; 
+          waddress <= 0;
+          faddress <= 0;
+          fcount <= 7'b1101011 ; 
+          fbdatavalid <= 1'b0 ; 
+          fbdatavalidl <= 1'b0 ; 
+          shadedataready <= 1'b0 ; 
+          texelready <= 1'b0 ; 
+          sramdatal <= 0;
+          fbdata <= 0;
+       end
+       else
+
+       begin
+          state <= next_state ; 
+          sramdatal <= tm3_sram_data_in[47:0] ; 
+          fbdata <= tm3_sram_data_in ; 
+          fbdatavalid <= fbdatavalidl ; 
+
+fbdatavalidl <= temp_fbdatavalidl;
+texelready <= temp_texelready;
+shadedataready <= temp_shadedataready;
+fcount <= temp_fcount;
+faddress <= temp_faddress;
+waddress <= temp_waddress;
+
+       end 
+    end 
+
+    always @(state or addr_ready or data_ready or waddress or datain or wantDir or 
+             want_read or wantwriteback or writebackdata or writebackaddr or 
+             fcount or fbpage or faddress or fbnextscanline or triID or wantshadedata or 
+             wanttexel or texeladdr)
+
+    begin
+       case (state)
+
+          0 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      if (addr_ready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (want_read == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else if (data_ready == 1'b1)
+                      begin
+
+                         next_state = 3 ; 
+                      end
+                      else if (wantDir == 1'b1)
+                      begin
+                         next_state = 5 ; 
+                      end
+                      else if (wantwriteback == 1'b1)
+                      begin
+                         next_state = 6 ; 
+                      end
+                      else if (wantshadedata == 1'b1)
+                      begin
+
+                         next_state = 9 ; 
+                      end
+                      else if (wanttexel == 1'b1)
+                      begin
+                         next_state = 10 ; 
+                      end
+                      else if (fcount != 0)
+                      begin
+                         next_state = 7 ; 
+                      end
+                      else if (fbnextscanline == 1'b1)
+                      begin
+
+                         next_state = 8 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (addr_ready == 1'b1)
+
+                         begin
+                            temp_waddress = addrin ; 
+                         end 
+
+                   end
+          1 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      want_addr = 1'b0 ; 
+                      if (addr_ready == 1'b0)
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                   end
+          2 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+
+                      read_ready = 1'b0 ; 
+                      if (want_read == 1'b0)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (want_read == 1'b0)
+                         begin
+
+                            temp_waddress = waddress + 1 ; 
+                         end 
+
+                   end
+          3 :
+                   begin
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_data_xhdl0 = datain ; 
+                      tm3_sram_we = 8'b00000000 ; 
+
+
+                   tm3_sram_oe = 2'b11 ; 
+                      tm3_sram_adsp = 1'b0 ; 
+                      want_data = 1'b0 ; 
+                      next_state = 4 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_waddress = waddress + 1 ; 
+
+                   end
+          4 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      if (data_ready == 1'b0)
+                      begin
+
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+                      want_data = 1'b0 ; 
+                   end
+
+          5 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       writebackack = 1'b0 ; 
+
+                     dirReady = 1'b1 ; 
+                      if (wantDir == 1'b0)
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         if (wantDir == 1'b0)
+                         begin
+                            temp_waddress = waddress + 1 ; 
+                         end 
+
+                   end
+          6 :
+                   begin
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+
+                      tm3_sram_data_xhdl0 = writebackdata ; 
+                      tm3_sram_we = 8'b00000000 ; 
+                      tm3_sram_oe = 2'b11 ; 
+                      tm3_sram_adsp = 1'b0 ; 
+                      tm3_sram_addr = {1'b0, writebackaddr} ; 
+                      writebackack = 1'b1 ; 
+                      next_state = 0 ; 
+                   end
+
+          7 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {3'b011, fbpage, faddress} ; 
+                      if ((fcount == 1) | (addr_ready == 1'b1) | (want_read == 1'b1) | (data_ready == 1'b1) | (wantDir == 1'b1) | (wantwriteback == 1'b1))
+                      begin
+                         next_state = 0 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 7 ; 
+                      end 
+
+
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_fbdatavalidl = 1'b1 ; 
+                         if (fcount != 0)
+                         begin
+                            temp_faddress = faddress + 1 ; 
+                            temp_fcount = fcount - 1 ; 
+                         end 
+
+                   end
+          8 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       tm3_sram_addr = {1'b0, waddress} ; 
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      next_state = 7 ; 
+
+				   				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_fcount = 7'b1101011 ; 
+                         if (faddress == 25680)
+                         begin
+                            temp_faddress = 0;
+                         end 
+                   end
+          9 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {3'b010, triID} ; 
+                      next_state = 0 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_texelready = 1'b0 ; 
+                         temp_shadedataready = 1'b1 ; 
+                   end
+
+          10 :
+                   begin
+				       tm3_sram_we = 8'b11111111 ; 
+				       tm3_sram_oe = 2'b01 ; 
+				       tm3_sram_adsp = 1'b0 ; 
+				       tm3_sram_data_xhdl0 = 0;
+				       want_addr = 1'b1 ; 
+				       want_data = 1'b1 ; 
+				       read_ready = 1'b1 ; 
+				       dirReady = 1'b0 ; 
+				       writebackack = 1'b0 ; 
+                      tm3_sram_addr = {1'b0, texeladdr} ; 
+                      next_state = 0 ; 
+
+				          temp_fbdatavalidl = 1'b0 ; 
+				          temp_shadedataready = 1'b0 ; 
+                         temp_texelready = 1'b1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+ module raysend (as, ack, addr, dir, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek);
+
+    input as; 
+    output ack; 
+    reg ack;
+    input[3:0] addr; 
+    input[47:0] dir; 
+    input[27:0] origx; 
+    input[27:0] origy; 
+    input[27:0] origz; 
+    output[31:0] rgData; 
+    reg[31:0] rgData;
+
+    output[3:0] rgAddr; 
+    reg[3:0] rgAddr;
+    output[2:0] rgWE; 
+    reg[2:0] rgWE;
+    output rgAddrValid; 
+    reg rgAddrValid;
+    input rgDone; 
+    input globalreset; 
+    input clk; 
+    output[2:0] statepeek; 
+    reg[2:0] statepeek;
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+
+
+
+    reg[31:0] temp_rgData;
+    reg[2:0] temp_rgWE; 
+    reg temp_rgAddrValid;
+    reg temp_ack;
+    reg[3:0] temp_rgAddr; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          ack <= 1'b0 ; 
+          rgWE <= 3'b000 ; 
+          rgData <= 0;
+          rgAddrValid <= 1'b0 ; 
+          rgAddr <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+rgData <= temp_rgData;
+rgWE <= temp_rgWE;
+rgAddrValid <= temp_rgAddrValid;
+ack <= temp_ack;
+rgAddr <= temp_rgAddr;
+
+       end 
+    end 
+
+    always @(state or ack or as or rgDone)
+    begin
+
+       case (state)
+          0 :
+                   begin
+                      if ((as == 1'b1) & (ack == 1'b0))
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b001 ; 
+
+                         if ((as == 1'b1) & (ack == 1'b0))
+                         begin
+                            temp_rgData = {4'b0000, origx} ; 
+                            temp_rgWE = 3'b001 ; 
+                            temp_rgAddrValid = 1'b1 ; 
+                            temp_rgAddr = addr ; 
+                         end 
+                         if (as == 1'b0 & ack == 1'b1)
+                         begin
+                            temp_ack = 1'b0 ; 
+                         end 
+
+                   end
+          1 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 6 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                      statepeek = 3'b010 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+          2 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 7 ; 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+                      statepeek = 3'b011 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+           3 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 8 ; 
+                      end
+                      else
+                      begin
+                         next_state = 3 ; 
+                      end 
+                      statepeek = 3'b100 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+         4 :
+                   begin
+                      if (rgDone == 1'b1)
+                       begin
+                         next_state = 9 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+                      statepeek = 3'b101 ; 
+
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+                   end
+
+          5 :
+                   begin
+                      if (rgDone == 1'b1)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+                      statepeek = 3'b110 ; 
+
+                         temp_ack = 1'b1 ; 
+                         if (rgDone == 1'b1)
+                         begin
+                            temp_rgAddrValid = 1'b0 ; 
+                         end 
+
+                   end
+
+          6 :
+                   begin
+                      next_state = 2 ; 
+
+                         temp_rgData = {4'b0000, origy} ; 
+                         temp_rgWE = 3'b010 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+
+                   end
+          7 :
+                   begin
+                      next_state = 3 ; 
+
+                         temp_rgData = {4'b0000, origz} ; 
+                         temp_rgWE = 3'b011 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+          8 :
+                   begin
+                      next_state = 4 ; 
+
+                         temp_rgData = {dir[31:16], dir[47:32]} ; 
+                         temp_rgWE = 3'b100 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+           9 :
+                   begin
+                      next_state = 5 ; 
+
+                         temp_rgData = {16'b0000000000000000, dir[15:0]} ; 
+                          temp_rgWE = 3'b101 ; 
+                         temp_rgAddrValid = 1'b1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+
+    
+    
+    
+    
+    
+
+ module raygencont (go, initcount, busyout, cycles, nextaddr, nas0, nas1, page, dirReady, wantDir, dirIn, addrIn, as, addr, ack, dir, raygroup0, raygroupvalid0, busy0, raygroup1, raygroupvalid1, busy1, globalreset, clk, statepeek);
+
+    input go; 
+    input[14:0] initcount; 
+    output busyout; 
+    wire busyout;
+    reg temp_busyout;
+    output[30:0] cycles; 
+    reg[30:0] cycles;
+    output[17:0] nextaddr; 
+    wire[17:0] nextaddr;
+    output nas0; 
+
+    wire nas0;
+    reg temp_nas0;
+    output nas1; 
+    wire nas1;
+    reg temp_nas1;
+    input page; 
+    input dirReady; 
+    output wantDir; 
+    reg wantDir;
+    input[47:0] dirIn; 
+    input[14:0] addrIn; 
+    output as; 
+    reg as;
+    output[3:0] addr; 
+
+    reg[3:0] addr;
+    input ack; 
+    output[47:0] dir; 
+    reg[47:0] dir;
+    output[1:0] raygroup0; 
+    wire[1:0] raygroup0;
+    output raygroupvalid0; 
+    reg raygroupvalid0;
+    input busy0; 
+    output[1:0] raygroup1; 
+    wire[1:0] raygroup1;
+    output raygroupvalid1; 
+
+    reg raygroupvalid1;
+    input busy1; 
+    input globalreset; 
+    input clk; 
+    output[2:0] statepeek; 
+    reg[2:0] statepeek;
+
+
+    reg[2:0] state; 
+    reg[2:0] next_state; 
+    reg[14:0] count; 
+    reg first; 
+    reg[17:0] destaddr; 
+    wire[1:0] busy; 
+    reg[1:0] loaded; 
+    reg[1:0] groupID; 
+    reg active; 
+
+    reg[47:0] temp_dir;
+    reg[30:0] temp_cycles;
+    reg[1:0] temp_addr;
+    reg[1:0] temp_loaded; 
+    reg[1:0] temp_groupID; 
+    reg[14:0] temp_count; 
+    reg temp_active; 
+    reg temp_raygroupvalid1;
+    reg temp_raygroupvalid0;
+
+    assign busy = {busy1, busy0} ;
+
+    always @(posedge clk)
+    begin
+
+       if (globalreset == 1'b1)
+
+       begin
+          state <= 0 ; 
+          cycles <= 0;
+          dir <= 0;
+          addr[1:0] <= 2'b00 ; 
+          groupID <= 2'b00 ; 
+          count <= 0;
+          first <= 1'b0 ; 
+          destaddr <= 0;
+          raygroupvalid0 <= 1'b0 ; 
+          raygroupvalid1 <= 1'b0 ; 
+          loaded <= 2'b00 ; 
+
+          active <= 1'b0 ; 
+       end
+       else
+       begin
+    	addr[3:2] <= (active == 1'b0) ? {1'b0, groupID[0]} : {1'b1, groupID[1]} ;
+	addr[1:0] <= temp_addr[1:0];
+        state <= next_state ; 
+
+	dir <= temp_dir;
+	cycles <= temp_cycles;
+	loaded <= temp_loaded;	
+	groupID <= temp_groupID;
+	count <= temp_count;
+	active <= temp_active;
+	raygroupvalid0 <= temp_raygroupvalid0;
+	raygroupvalid1 <= temp_raygroupvalid1;
+
+       end 
+    end 
+
+    assign raygroup0 = {1'b0, groupID[0]} ;
+    assign raygroup1 = {1'b1, groupID[1]} ;
+    assign nextaddr = {2'b11, page, addrIn} ;
+    assign busyout = temp_busyout;
+    assign nas0 = temp_nas0;
+    assign nas1 = temp_nas1;
+
+    always @(state or go or ack or busy or dirReady or addr or count or loaded)
+    begin
+       case (state)
+          0 :
+                   begin
+       				as = 1'b0 ; 
+       				wantDir = 1'b0 ; 
+                      if (go == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b001 ; 
+						temp_busyout = 1'b0;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+
+                         if (go == 1'b1)
+                         begin
+                            temp_cycles = 0;
+                         end 
+                         temp_addr[1:0] = 2'b00 ; 
+                         temp_loaded = 2'b00 ; 
+                         temp_groupID = 2'b00 ; 
+                         temp_count = initcount ; 
+                         temp_active = 1'b0 ; 
+
+                   end
+          1 :
+                   begin
+                      as = dirReady ; 
+                      wantDir = 1'b1 ; 
+                      if (dirReady == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+                     statepeek = 3'b010 ; 
+						temp_busyout = 1'b1;
+    				if (addr[1:0] == 2'b00 & dirReady == 1'b1 & active == 1'b0) 
+					begin
+						 temp_nas0 = 1'b1;
+						 temp_nas1 = 1'b1;
+					end
+
+                         temp_dir = dirIn ; 
+                         if (dirReady == 1'b1 & addr[1:0] == 2'b10)
+                         begin
+                            if (active == 1'b0)
+                            begin
+                               temp_loaded[0] = 1'b1 ; 
+                            end
+                            else
+                            begin
+                               temp_loaded[1] = 1'b1 ; 
+                            end 
+                         end 
+             temp_cycles = cycles + 1 ; 
+
+
+                   end
+          2 :
+                   begin
+                      wantDir = 1'b0 ; 
+                      as = 1'b1 ; 
+                      if ((ack == 1'b1) & (addr[1:0] != 2'b10))
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (ack == 1'b1)
+                      begin
+                         if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                         begin
+                            next_state = 3 ; 
+                         end
+                         else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                         begin
+                            next_state = 4 ; 
+                         end
+                         else if (loaded != 2'b11)
+                         begin
+
+                            next_state = 1 ; 
+                         end
+                         else
+                         begin
+                            next_state = 2 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+                      statepeek = 3'b011 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                         if ((ack == 1'b1) & (addr[1:0] != 2'b10))
+                         begin
+                            temp_addr[1:0] = addr[1:0] + 2'b01 ; 
+                         end 
+                         else if ((ack == 1'b1) & addr[1:0] == 2'b10)
+                         begin
+                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                            begin
+                               temp_raygroupvalid0 = 1'b1 ; 
+                            end
+                            else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                            begin
+
+                               temp_raygroupvalid1 = 1'b1 ; 
+                            end
+                            else if ((loaded[0]) == 1'b0)
+                            begin
+                               temp_active = 1'b0 ; 
+                               temp_addr[1:0] = 2'b00 ; 
+                            end
+                            else if ((loaded[1]) == 1'b0)
+                            begin
+                               temp_active = 1'b1 ; 
+                               temp_addr[1:0] = 2'b00 ; 
+                            end 
+                         end 
+
+             temp_cycles = cycles + 1 ; 
+                   end
+          4 :
+                   begin
+                      if ((busy[1]) == 1'b0)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                      begin
+                         next_state = 3 ; 
+                      end
+                      else if (count > 0)
+                      begin
+
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+                      statepeek = 3'b101 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                     if ((busy[1]) == 1'b1)
+                         begin
+                            temp_groupID[1] = ~groupID[1] ; 
+                            temp_raygroupvalid1 = 1'b0 ; 
+                            temp_count = count - 1 ; 
+                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
+                            begin
+                               temp_raygroupvalid0 = 1'b1 ; 
+                            end
+
+                            else if ((loaded[0]) == 1'b0)
+                            begin
+                               temp_active = 1'b0 ; 
+                            end
+                            else
+                            begin
+                               temp_active = 1'b1 ; 
+                            end 
+                         end 
+                         temp_loaded[1] = 1'b0 ; 
+                         temp_addr[1:0] = 2'b00 ; 
+
+             temp_cycles = cycles + 1 ; 
+                   end
+          3 :
+                   begin
+                      if ((busy[0]) == 1'b0)
+                      begin
+                         next_state = 3 ; 
+
+                      end
+                      else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else if (count > 0)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+
+                      end 
+                      statepeek = 3'b100 ; 
+						temp_busyout = 1'b1;
+						temp_nas0 = 1'b0;
+						temp_nas1 = 1'b0;
+
+                         if ((busy[0]) == 1'b1)
+                         begin
+                            temp_groupID[0] = ~groupID[0] ; 
+                            temp_raygroupvalid0 = 1'b0 ; 
+                            temp_count = count - 1 ; 
+                            if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
+                            begin
+                               temp_raygroupvalid1 = 1'b1 ; 
+
+                            end
+                            else if ((loaded[1]) == 1'b0)
+                            begin
+                               temp_active = 1'b1 ; 
+                            end
+                            else
+                            begin
+                               temp_active = 1'b0 ; 
+                            end 
+                         end 
+                         temp_loaded[0] = 1'b0 ; 
+                         temp_addr[1:0] = 2'b00 ; 
+
+
+             temp_cycles = cycles + 1 ; 
+                   end
+       endcase 
+    end 
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+
+ module resultrecieve (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk);
+
+    output valid01; 
+    reg valid01;
+    output valid10; 
+    reg valid10;
+    output[15:0] id01a; 
+    reg[15:0] id01a;
+    output[15:0] id01b; 
+    reg[15:0] id01b;
+    output[15:0] id01c; 
+    reg[15:0] id01c;
+
+    output[15:0] id10a; 
+    reg[15:0] id10a;
+    output[15:0] id10b; 
+    reg[15:0] id10b;
+    output[15:0] id10c; 
+    reg[15:0] id10c;
+    output hit01a; 
+    reg hit01a;
+    output hit01b; 
+    reg hit01b;
+    output hit01c; 
+    reg hit01c;
+
+    output hit10a; 
+    reg hit10a;
+    output hit10b; 
+    reg hit10b;
+    output hit10c; 
+    reg hit10c;
+    output[7:0] u01a; 
+    reg[7:0] u01a;
+    output[7:0] u01b; 
+    reg[7:0] u01b;
+    output[7:0] u01c; 
+    reg[7:0] u01c;
+
+    output[7:0] v01a; 
+    reg[7:0] v01a;
+    output[7:0] v01b; 
+    reg[7:0] v01b;
+    output[7:0] v01c; 
+    reg[7:0] v01c;
+    output[7:0] u10a; 
+    reg[7:0] u10a;
+    output[7:0] u10b; 
+    reg[7:0] u10b;
+    output[7:0] u10c; 
+    reg[7:0] u10c;
+
+    output[7:0] v10a; 
+    reg[7:0] v10a;
+    output[7:0] v10b; 
+    reg[7:0] v10b;
+    output[7:0] v10c; 
+    reg[7:0] v10c;
+    input[31:0] rgResultData; 
+    input rgResultReady; 
+    input[1:0] rgResultSource; 
+    input globalreset; 
+    input clk; 
+
+    reg temp_valid01;
+    reg temp_valid10;
+    reg[15:0] temp_id01a;
+    reg[15:0] temp_id01b;
+    reg[15:0] temp_id01c;
+    reg[15:0] temp_id10a;
+    reg[15:0] temp_id10b;
+    reg[15:0] temp_id10c;
+    reg temp_hit01a;
+    reg temp_hit01b;
+    reg temp_hit01c;
+    reg temp_hit10a;
+    reg temp_hit10b;
+    reg temp_hit10c;
+    reg[7:0] temp_u01a;
+    reg[7:0] temp_u01b;
+    reg[7:0] temp_u01c;
+    reg[7:0] temp_v01a;
+    reg[7:0] temp_v01b;
+    reg[7:0] temp_v01c;
+    reg[7:0] temp_u10a;
+    reg[7:0] temp_u10b;
+    reg[7:0] temp_u10c;
+    reg[7:0] temp_v10a;
+    reg[7:0] temp_v10b;
+    reg[7:0] temp_v10c;
+
+
+    reg[2:0] state; 
+    reg[2:0] next_state; 
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          valid01 <= 1'b0 ; 
+          valid10 <= 1'b0 ; 
+          hit01a <= 1'b0 ; 
+          hit01b <= 1'b0 ; 
+          hit01c <= 1'b0 ; 
+          hit10a <= 1'b0 ; 
+          hit10b <= 1'b0 ; 
+          hit10c <= 1'b0 ; 
+          id01a <= 0;
+
+          id01b <= 0;
+          id01c <= 0;
+          id10a <= 0;
+          id10b <= 0;
+          id10c <= 0;
+          u01a <= 0;
+          u01b <= 0;
+          u01c <= 0;
+          v01a <= 0;
+          v01b <= 0;
+          v01c <= 0;
+          u10a <= 0;
+
+          u10b <= 0;
+          u10c <= 0;
+          v10a <= 0;
+          v10b <= 0;
+          v10c <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+valid01 <= temp_valid01;
+valid10 <= temp_valid10;
+id01a <= temp_id01a;
+id01b <= temp_id01b;
+id01c <= temp_id01c;
+hit01a <= temp_hit01a;
+hit01b <= temp_hit01b;
+hit01c <= temp_hit01c;
+u01a <= temp_u01a;
+u01b <= temp_u01b;
+u01c <= temp_u01c;
+u10a <= temp_u10a;
+u10b <= temp_u10b;
+u10c <= temp_u10c;
+v01a <= temp_v01a;
+v01b <= temp_v01b;
+v01c <= temp_v01c;
+v10a <= temp_v10a;
+v10b <= temp_v10b;
+v10c <= temp_v10c;
+hit10a <= temp_hit10a;
+hit10b <= temp_hit10b;
+hit10c <= temp_hit10c;
+       end 
+    end 
+
+
+    always @(state or rgResultReady or rgResultSource)
+    begin
+       case (state)
+          0 :
+                   begin
+                      if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
+                      begin
+
+                         next_state = 4 ; 
+                      end
+                      else
+                      begin
+                         next_state = 0 ; 
+                      end 
+
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
+                         begin
+                            temp_id01a = rgResultData[31:16] ; 
+                            temp_id01b = rgResultData[15:0] ; 
+                         end
+                         else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
+                         begin
+                            temp_id10a = rgResultData[31:16] ; 
+                            temp_id10b = rgResultData[15:0] ; 
+                         end 
+
+                   end
+
+          1 :
+                   begin
+                      next_state = 2 ; 
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_id01c = rgResultData[15:0] ; 
+                         temp_hit01a = rgResultData[18] ; 
+                         temp_hit01b = rgResultData[17] ; 
+                         temp_hit01c = rgResultData[16] ; 
+
+                   end
+          2 :
+
+                   begin
+                      next_state = 3 ; 
+
+			temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_u01a = rgResultData[23:16] ; 
+                         temp_u01b = rgResultData[15:8] ; 
+                         temp_u01c = rgResultData[7:0] ; 
+
+                   end
+          3 :
+                   begin
+                      next_state = 0 ; 
+
+				          temp_valid10 = 1'b0 ; 
+                         temp_v01a = rgResultData[23:16] ; 
+                         temp_v01b = rgResultData[15:8] ; 
+                         temp_v01c = rgResultData[7:0] ; 
+                         temp_valid01 = 1'b1 ; 
+
+                   end
+          4 :
+                   begin
+                      next_state = 5 ; 
+
+          				temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_id10c = rgResultData[15:0] ; 
+
+                         temp_hit10a = rgResultData[18] ; 
+                         temp_hit10b = rgResultData[17] ; 
+                         temp_hit10c = rgResultData[16] ; 
+
+                   end
+          5 :
+
+                   begin
+                      next_state = 6 ; 
+
+          				temp_valid01 = 1'b0 ; 
+				          temp_valid10 = 1'b0 ; 
+                         temp_u10a = rgResultData[23:16] ; 
+                         temp_u10b = rgResultData[15:8] ; 
+                         temp_u10c = rgResultData[7:0] ; 
+
+                   end
+          6 :
+                   begin
+                      next_state = 0 ; 
+
+      				temp_valid01 = 1'b0 ; 
+                         temp_v10a = rgResultData[23:16] ; 
+                         temp_v10b = rgResultData[15:8] ; 
+                         temp_v10c = rgResultData[7:0] ; 
+                         temp_valid10 = 1'b1 ; 
+
+                   end
+       endcase 
+    end 
+ endmodule
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+ module resultwriter (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, addr, as01, as10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, dataout, addrout, write, ack, globalreset, clk);
+
+    input valid01; 
+    input valid10; 
+    input[15:0] id01a; 
+    input[15:0] id01b; 
+    input[15:0] id01c; 
+    input[15:0] id10a; 
+    input[15:0] id10b; 
+    input[15:0] id10c; 
+
+    input hit01a; 
+    input hit01b; 
+    input hit01c; 
+    input hit10a; 
+    input hit10b; 
+    input hit10c; 
+    input[7:0] u01a; 
+    input[7:0] u01b; 
+    input[7:0] u01c; 
+    input[7:0] v01a; 
+    input[7:0] v01b; 
+    input[7:0] v01c; 
+
+    input[7:0] u10a; 
+    input[7:0] u10b; 
+    input[7:0] u10c; 
+    input[7:0] v10a; 
+    input[7:0] v10b; 
+    input[7:0] v10c; 
+    input[17:0] addr; 
+    input as01; 
+    input as10; 
+    input[20:0] bkcolour; 
+    input[63:0] shadedata; 
+    output[15:0] triID; 
+
+    reg[15:0] triID;
+    output wantshadedata; 
+    reg wantshadedata;
+    input shadedataready; 
+    input[20:0] texinfo; 
+    output[3:0] texaddr; 
+    wire[3:0] texaddr;
+    output[17:0] texeladdr; 
+    wire[17:0] texeladdr;
+    input[63:0] texel; 
+    output wanttexel; 
+    reg wanttexel;
+
+    input texelready; 
+    output[63:0] dataout; 
+    // PAJ see lower note wire[63:0] dataout;
+    reg[63:0] dataout;
+    output[17:0] addrout; 
+    wire[17:0] addrout;
+    output write; 
+    wire write;
+    reg temp_write;
+    input ack; 
+    input globalreset; 
+    input clk; 
+
+    reg[3:0] state; 
+    reg[3:0] next_state; 
+    reg pending01; 
+    reg pending10; 
+    reg process01; 
+    wire[17:0] addrout01; 
+    wire[17:0] addrout10; 
+    wire shiften01; 
+    wire shiften10; 
+    reg temp_shiften01; 
+    reg temp_shiften10; 
+    reg[20:0] shadedataa; 
+    reg[20:0] shadedatab; 
+    reg[20:0] shadedatac; 
+    wire hita; 
+    wire hitb; 
+    wire hitc; 
+
+    reg[2:0] selectuv; 
+    wire[6:0] blr; 
+    wire[6:0] blg; 
+    wire[6:0] blb; 
+    reg texmap; 
+    reg lmenable; 
+    wire[1:0] texelselect; 
+    wire[6:0] texelr; 
+    wire[6:0] texelg; 
+    wire[6:0] texelb; 
+    reg[20:0] texinfol; 
+
+    reg temp_pending01; 
+    reg temp_pending10; 
+    reg temp_process01; 
+    reg temp_texmap; 
+    reg[20:0] temp_texinfol; 
+    reg[20:0] temp_shadedataa; 
+    reg[20:0] temp_shadedatab; 
+    reg[20:0] temp_shadedatac; 
+
+    col16to21 col16to21inst (texel, texelselect, texelr, texelg, texelb); 
+    linearmap linearmapinst (blb, blg, texinfol[17:0], texeladdr, texelselect, texinfol[20:18], lmenable, clk); 
+    bilinearintrp bilinearimp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, shadedata[41:35], shadedata[62:56], shadedata[20:14], shadedata[34:28], shadedata[55:49], shadedata[13:7], shadedata[27:21], shadedata[48:42], shadedata[6:0], blr, blg, blb, clk); 
+    fifo3 fifo3insta (addr, as01, addrout01, shiften01, globalreset, clk); 
+    fifo3 fifo3instb (addr, as10, addrout10, shiften10, globalreset, clk); 
+    assign hita = (hit01a & process01) | (hit10a & ~process01) ;
+    assign hitb = (hit01b & process01) | (hit10b & ~process01) ;
+    assign hitc = (hit01c & process01) | (hit10c & ~process01) ;
+    assign texaddr = shadedata[59:56] ;
+    assign shiften01 = temp_shiften01;
+    assign shiften10 = temp_shiften10;
+    assign write = temp_write;
+
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          state <= 0 ; 
+          pending01 <= 1'b0 ; 
+          pending10 <= 1'b0 ; 
+          shadedataa <= 0;
+          shadedatab <= 0;
+          shadedatac <= 0;
+          process01 <= 1'b0 ; 
+          texmap <= 1'b0 ; 
+
+          texinfol <= 0;
+       end
+       else
+       begin
+          state <= next_state ; 
+
+process01 <= temp_process01;
+pending01 <= temp_pending01;
+pending10 <= temp_pending10;
+texmap <= temp_texmap;
+texinfol <= temp_texinfol;
+shadedataa <= temp_shadedataa;
+shadedatab <= temp_shadedatab;
+shadedatac <= temp_shadedatac;
+
+    dataout <= {1'b0, 
+					shadedataa[20],
+					shadedataa[19],
+					shadedataa[18],
+					shadedataa[17],
+					shadedataa[16],
+					shadedataa[15],
+					shadedataa[14],
+					shadedataa[13],
+					shadedataa[12],
+					shadedataa[11],
+					shadedataa[10],
+					shadedataa[9],
+					shadedataa[8],
+					shadedataa[7],
+					shadedataa[6],
+					shadedataa[5],
+					shadedataa[4],
+					shadedataa[3],
+					shadedataa[2],
+					shadedataa[1],
+					shadedataa[0],
+					shadedatab[20],
+					shadedatab[19],
+					shadedatab[18],
+					shadedatab[17],
+					shadedatab[16],
+					shadedatab[15],
+					shadedatab[14],
+					shadedatab[13],
+					shadedatab[12],
+					shadedatab[11],
+					shadedatab[10],
+					shadedatab[9],
+					shadedatab[8],
+					shadedatab[7],
+					shadedatab[6],
+					shadedatab[5],
+					shadedatab[4],
+					shadedatab[3],
+					shadedatab[2],
+					shadedatab[1],
+					shadedatab[0],
+					shadedatac[20],
+					shadedatac[19],
+					shadedatac[18],
+					shadedatac[17],
+					shadedatac[16],
+					shadedatac[15],
+					shadedatac[14],
+					shadedatac[13],
+					shadedatac[12],
+					shadedatac[11],
+					shadedatac[10],
+					shadedatac[9],
+					shadedatac[8],
+					shadedatac[7],
+					shadedatac[6],
+					shadedatac[5],
+					shadedatac[4],
+					shadedatac[3],
+					shadedatac[2],
+					shadedatac[1],
+					shadedatac[0]} ;
+       end 
+//    end 
+// PAJ used to be assign, but weird error, so added as register   assign dataout = {1'b0, 
+    end 
+    assign addrout = (process01 == 1'b1) ? addrout01 : addrout10 ;
+
+    always @(state or process01 or pending10 or ack or shadedataready or id01a or 
+             id01b or id01c or id10a or id10b or id10c or selectuv or hita or 
+             hitb or hitc or shadedata or pending01 or texmap or texelready)
+    begin
+       case (state)
+          0 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      if (pending01 == 1'b1 | pending10 == 1'b1)
+                      begin
+                         next_state = 2 ; 
+                      end
+                      else
+
+                      begin
+                         next_state = 0 ; 
+                      end 
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_process01 = pending01 ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          2 :
+                   begin
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      wantshadedata = 1'b1 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b00 ; 
+                      if (process01 == 1'b1)
+                      begin
+                         triID = id01a ; 
+
+                      end
+                      else
+                      begin
+                         triID = id10a ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hita == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 3 ; 
+                         end
+                         else
+
+                         begin
+                            next_state = 4 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 2 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hita == 1'b1)
+                         begin
+                            temp_shadedataa = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedataa = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          3 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+
+                      selectuv[1:0] = 2'b00 ; 
+                      next_state = 8 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+
+                   end
+          8 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b00 ; 
+                      lmenable = 1'b1 ; 
+                      if (texmap == 1'b1)
+                      begin
+
+                         next_state = 11 ; 
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedataa[6:0] = blb ; 
+                         temp_shadedataa[13:7] = blg ; 
+                         temp_shadedataa[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          11 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 4 ; 
+                      end
+                      else
+                      begin
+                         next_state = 11 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedataa[6:0] = texelb ; 
+                         temp_shadedataa[13:7] = texelg ; 
+                         temp_shadedataa[20:14] = texelr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          12 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 5 ; 
+                      end
+                      else
+                      begin
+                         next_state = 12 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedatab[6:0] = texelb ; 
+                         temp_shadedatab[13:7] = texelg ; 
+                         temp_shadedatab[20:14] = texelr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          13 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+
+                      wanttexel = 1'b1 ; 
+                      if (texelready == 1'b1)
+                      begin
+                         next_state = 1 ; 
+                      end
+                      else
+                      begin
+                         next_state = 13 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedatac[6:0] = texelb ; 
+                         temp_shadedatac[13:7] = texelg ; 
+                         temp_shadedatac[20:14] = texelr ; 
+
+                   end
+          6 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      next_state = 9 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          9 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      lmenable = 1'b1 ; 
+                      if (texmap == 1'b1)
+                      begin
+                         next_state = 12 ; 
+
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         temp_shadedatab[6:0] = blb ; 
+                         temp_shadedatab[13:7] = blg ; 
+                         temp_shadedatab[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          7 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      next_state = 10 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_texinfol = texinfo ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+
+          10 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      if (texmap == 1'b1)
+                      begin
+                         next_state = 13 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+
+                      lmenable = 1'b1 ; 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+                         temp_shadedatac[6:0] = blb ; 
+                         temp_shadedatac[13:7] = blg ; 
+                         temp_shadedatac[20:14] = blr ; 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          4 :
+                   begin
+				       wantshadedata = 1'b0 ; 
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b01 ; 
+                      if (process01 == 1'b1)
+                      begin
+                         triID = id01b ; 
+                      end
+                      else
+                      begin
+
+                         triID = id10b ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hitb == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 6 ; 
+                         end
+                         else
+                         begin
+                            next_state = 5 ; 
+                         end 
+
+                      end
+                      else
+                      begin
+                         next_state = 4 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hitb == 1'b1)
+                         begin
+                            temp_shadedatab = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedatab = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          5 :
+                   begin
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      wantshadedata = 1'b1 ; 
+                      selectuv[2] = ~process01 ; 
+                      selectuv[1:0] = 2'b10 ; 
+                      if (process01 == 1'b1)
+
+                      begin
+                         triID = id01c ; 
+                      end
+                      else
+                      begin
+                         triID = id10c ; 
+                      end 
+                      if (shadedataready == 1'b1)
+                      begin
+                         if (hitc == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
+                         begin
+                            next_state = 7 ; 
+
+                         end
+                         else
+                         begin
+                            next_state = 1 ; 
+                         end 
+                      end
+                      else
+                      begin
+                         next_state = 5 ; 
+                      end 
+
+	          if (valid01 == 1'b1)
+				          begin
+				             temp_pending01 = 1'b1 ; 
+				          end 
+				          if (valid10 == 1'b1)
+				          begin
+				             temp_pending10 = 1'b1 ; 
+				          end 
+
+                         if (hitc == 1'b1)
+                          begin
+                            temp_shadedatac = shadedata[20:0] ; 
+                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
+                         end
+                         else
+                         begin
+                            temp_shadedatac = bkcolour ; 
+                         end 
+
+							temp_shiften01 = 1'b0;
+							temp_shiften10 = 1'b0;
+					    	temp_write = 1'b0;
+                   end
+          1 :
+
+                   begin
+				       wantshadedata = 1'b0 ; 
+   				       triID = 0;
+				       selectuv = 0;
+				       lmenable = 1'b0 ; 
+				       wanttexel = 1'b0 ; 
+                      if (ack == 1'b1)
+                      begin
+                         next_state = 0 ; 
+                      end
+                      else
+                      begin
+                         next_state = 1 ; 
+                      end 
+
+                         if (ack == 1'b1 & process01 == 1'b1)
+                         begin
+                            temp_pending01 = 1'b0 ; 
+                         end
+
+                          else if (ack == 1'b1 & process01 == 1'b0)
+                         begin
+                            temp_pending10 = 1'b0 ; 
+                         end 
+
+    				if (process01 == 1'b1 &  ack == 1'b1)
+						begin
+							temp_shiften01 = 1'b1;
+							temp_shiften10 = 1'b1;
+						end
+					    temp_write = 1'b1;
+                   end
+       endcase 
+    end 
+ endmodule
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ //
+ // Verilog file generated by X-HDL - Revision 3.2.38  Jan. 9, 2004 
+ // Sun Feb  8 14:14:35 2004
+ //
+ //      Input file         : G:/jamieson/VERILOG_BENCHMARKS/RAYTRACE/col16to21.vhd
+ //      Design name        : col16to21
+ //      Author             : 
+ //      Company            : 
+ //
+ //      Description        : 
+ //
+ //
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ //
+ module col16to21 (dataline, texelselect, r, g, b);
+
+    input[63:0] dataline; 
+    input[1:0] texelselect; 
+    output[6:0] r; 
+    wire[6:0] r;
+    output[6:0] g; 
+    wire[6:0] g;
+    output[6:0] b; 
+    wire[6:0] b;
+
+    reg[15:0] col16; 
+
+    always @(dataline or texelselect)
+    begin
+       case (texelselect)
+          2'b00 :
+                   begin
+                      col16 = dataline[15:0] ; 
+                   end
+          2'b01 :
+                   begin
+                      col16 = dataline[31:16] ; 
+                   end
+          2'b10 :
+                   begin
+                      col16 = dataline[47:32] ; 
+                   end
+          2'b11 :
+                   begin
+                      col16 = dataline[63:48] ; 
+                   end
+       endcase 
+    end 
+    assign r = {col16[15:10], 1'b0} ;
+    assign g = {col16[9:5], 2'b00} ;
+    assign b = {col16[4:0], 2'b00} ;
+ endmodule
+ module linearmap (u, v, start, addr, texelselect, factor, enable, clk);
+
+    input[6:0] u; 
+    input[6:0] v; 
+    input[17:0] start; 
+    output[17:0] addr; 
+    reg[17:0] addr;
+    output[1:0] texelselect; 
+    wire[1:0] texelselect;
+
+    input[2:0] factor; 
+    input enable; 
+    input clk; 
+
+    reg[6:0] ul; 
+    reg[6:0] vl; 
+
+    assign texelselect = ul[1:0] ;
+
+    always @(posedge clk)
+    begin
+       if (enable == 1'b1)
+       begin
+          ul <= u ; 
+          vl <= v ; 
+       end 
+       else
+       begin
+          ul <= ul ; 
+          vl <= vl ; 
+       end 
+       case (factor)
+          3'b000 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({11'b00000000000, vl}) ; 
+                   end
+          3'b001 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({10'b0000000000, vl, 1'b0}) ; 
+
+                   end
+          3'b010 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({9'b000000000, vl, 2'b00}) ; 
+                   end
+          3'b011 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({8'b00000000, vl, 3'b000}) ; 
+                   end
+          3'b100 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({7'b0000000, vl, 4'b0000}) ; 
+
+                   end
+          3'b101 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({6'b000000, vl, 5'b00000}) ; 
+                   end
+          3'b110 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({5'b00000, vl, 6'b000000}) ; 
+                   end
+          3'b111 :
+                   begin
+                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({4'b0000, vl, 7'b0000000}) ; 
+
+                   end
+       endcase  
+    end 
+ endmodule
+     module bilinearintrp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, ru, rv, rw, gu, gv, gw, bu, bv, bw, r, g, b, clk);
+
+        input[7:0] u01a; 
+        input[7:0] u01b; 
+        input[7:0] u01c; 
+        input[7:0] v01a; 
+        input[7:0] v01b; 
+        input[7:0] v01c; 
+        input[7:0] u10a; 
+        input[7:0] u10b; 
+        input[7:0] u10c; 
+        input[7:0] v10a; 
+        input[7:0] v10b; 
+        input[7:0] v10c; 
+        input[2:0] selectuv; 
+        input[6:0] ru; 
+        input[6:0] rv; 
+        input[6:0] rw; 
+        input[6:0] gu; 
+        input[6:0] gv; 
+        input[6:0] gw; 
+        input[6:0] bu; 
+        input[6:0] bv; 
+        input[6:0] bw; 
+        output[6:0] r; 
+        wire[6:0] r;
+        output[6:0] g; 
+        wire[6:0] g;
+        output[6:0] b; 
+        wire[6:0] b;
+        input clk; 
+
+        reg[7:0] u; 
+        reg[7:0] v; 
+        reg[7:0] ul; 
+        reg[7:0] vl; 
+        reg[7:0] wl; 
+        reg[14:0] i1b; 
+        reg[14:0] i2b; 
+        reg[14:0] i3b; 
+        reg[14:0] i1g; 
+        reg[14:0] i2g; 
+        reg[14:0] i3g; 
+        reg[14:0] i1r; 
+        reg[14:0] i2r; 
+        reg[14:0] i3r; 
+        reg[6:0] rul; 
+        reg[6:0] rvl; 
+        reg[6:0] rwl; 
+        reg[6:0] gul; 
+        reg[6:0] gvl; 
+        reg[6:0] gwl; 
+        reg[6:0] bul; 
+        reg[6:0] bvl; 
+        reg[6:0] bwl; 
+
+        always @(selectuv or u01a or u01b or u01c or v01a or v01b or v01c or u10a or 
+                 u10b or u10c or v10a or v10b or v10c)
+        begin
+           case (selectuv)
+              3'b000 :
+                       begin
+                          u = u01a ; 
+                          v = v01a ; 
+                       end
+              3'b001 :
+                       begin
+                          u = u01b ; 
+						 v = v01b ; 
+                       end
+              3'b010 :
+                       begin
+                          u = u01c ; 
+                          v = v01c ; 
+                       end
+              3'b100 :
+                       begin
+                          u = u10a ; 
+                          v = v10a ; 
+                       end
+              3'b101 :
+                       begin
+                          u = u10b ; 
+                          v = v10b ; 
+                       end
+              3'b110 :
+                       begin
+                          u = u10c ; 
+                          v = v10c ; 
+                       end
+              default :
+                       begin
+                          u = 0;
+                          v = 0;
+                       end
+           endcase 
+        end 
+
+        always @(posedge clk)
+        begin
+           wl <= 8'b11111111 - u - v ; 
+           ul <= u ; 
+           vl <= v ; 
+           rul <= ru ; 
+           rvl <= rv ; 
+           rwl <= rw ; 
+           gul <= gu ; 
+           gvl <= gv ; 
+           gwl <= gw ; 
+           bul <= bu ; 
+           bvl <= bv ; 
+           bwl <= bw ; 
+           i1r <= ul * rul ; 
+           i2r <= vl * rvl ; 
+           i3r <= wl * rwl ; 
+           i1g <= ul * gul ; 
+           i2g <= vl * gvl ; 
+           i3g <= wl * gwl ; 
+           i1b <= ul * bul ; 
+           i2b <= vl * bvl ; 
+           i3b <= wl * bwl ;  
+        end 
+        assign r = (i1r + i2r + i3r) ;
+        assign g = (i1g + i2g + i3g) ;
+        assign b = (i1b + i2b + i3b) ;
+     endmodule
+
+
+
+module fifo3 (datain, writeen, dataout, shiften, globalreset, clk);
+
+    input[18 - 1:0] datain; 
+    input writeen; 
+    output[18 - 1:0] dataout; 
+    wire[18 - 1:0] dataout;
+    input shiften; 
+    input globalreset; 
+    input clk; 
+
+    reg[18 - 1:0] data0; 
+    reg[18 - 1:0] data1; 
+    reg[18 - 1:0] data2; 
+
+    reg[1:0] pos; 
+
+    assign dataout = data0 ;
+
+    always @(posedge clk)
+    begin
+       if (globalreset == 1'b1)
+       begin
+          pos <= 2'b00 ; 
+          data0 <= 0 ; 
+          data1 <= 0 ; 
+          data2 <= 0 ; 
+       end
+       else
+       begin
+          if (writeen == 1'b1 & shiften == 1'b1)
+          begin
+             case (pos)
+                2'b00 :
+                         begin
+                            data0 <= 0 ; 
+                            data1 <= 0 ; 
+                            data2 <= 0 ; 
+                         end
+
+                2'b01 :
+                         begin
+                            data0 <= datain ; 
+                            data1 <= 0 ; 
+                            data2 <= 0 ; 
+                         end
+                2'b10 :
+                         begin
+                            data0 <= data1 ; 
+                            data1 <= datain ; 
+                            data2 <= 0 ; 
+                         end
+
+                2'b11 :
+                         begin
+                            data0 <= data1 ; 
+                            data1 <= data2 ; 
+                            data2 <= datain ; 
+                         end
+             endcase 
+          end
+          else if (shiften == 1'b1)
+          begin
+             data0 <= data1 ; 
+             data1 <= data2 ; 
+             pos <= pos - 1 ; 
+          end
+          else if (writeen == 1'b1)
+          begin
+             case (pos)
+                2'b00 :
+                         begin
+                            data0 <= datain ; 
+                         end
+                2'b01 :
+    					begin
+                            data1 <= datain ; 
+                         end
+                2'b10 :
+                         begin
+                            data2 <= datain ; 
+                         end
+             endcase 
+             pos <= pos + 1 ; 
+          end 
+       end 
+    end 
+ endmodule
+
diff --git a/third_party/vtr/vtr_primitives.v b/third_party/vtr/vtr_primitives.v
new file mode 100644
index 000000000..678af1ccd
--- /dev/null
+++ b/third_party/vtr/vtr_primitives.v
@@ -0,0 +1,329 @@
+`timescale 1ps/1ps
+//Overivew
+//========
+//This file contains the verilog primitives produced by VPR's
+//post-synthesis netlist writer.
+//
+//If you wish to do back-annotated timing simulation you will need
+//to link with this file during simulation.
+//
+//To ensure currect result when performing back-annoatation with 
+//Modelsim see the notes at the end of this comment.
+//
+//Specifying Timing Edges
+//=======================
+//To perform timing back-annotation the simulator must know the delay 
+//dependancies (timing edges) between the ports on each primitive.
+//
+//During back-annotation the simulator will attempt to annotate SDF delay
+//values onto the timing edges.  It should give a warning if was unable
+//to find a matching edge.
+//
+//
+//In Verilog timing edges are specified using a specify block (delimited by the
+//'specify' and 'endspecify' keywords.
+//
+//Inside the specify block a set of specify statements are used to describe
+//the timing edges.  For example consider:
+//
+//  input [1:0] in;
+//  output [1:0] out;
+//  specify
+//      (in[0] => out[0]) = "";
+//      (in[1] => out[1]) = "";
+//  endspecify
+//
+//This states that there are the following timing edges (dependancies):
+//  * from in[0] to out[0]
+//  * from in[1] to out[1]
+//
+//We could (according to the Verilog standard) equivalently have used:
+//
+//  input [1:0] in;
+//  output [1:0] out;
+//  specify
+//      (in => out) = "";
+//  endspecify
+//
+//However NOT ALL SIMULATORS TREAT MULTIBIT SPECIFY STATEMENTS CORRECTLY,
+//at least by default (in particular ModelSim, see notes below).
+//
+//The previous examples use the 'parrallel connection' operator '=>', which
+//creates parallel edges between the two operands (i.e. bit 0 to bit 0, bit
+//1 to bit 1 etc.).  Note that both operands must have the same bit-width. 
+//
+//Verilog also supports the 'full connection' operator '*>' which will create
+//a fully connected set of edges (e.g. from all-to-all). It does not require
+//both operands to have the same bit-width. For example:
+//
+//  input [1:0] in;
+//  output [2:0] out;
+//  specify
+//      (in *> out) = "";
+//  endspecify
+//
+//states that there are the following timing edges (dependancies):
+//  * from in[0] to out[0]
+//  * from in[0] to out[1]
+//  * from in[0] to out[2]
+//  * from in[1] to out[0]
+//  * from in[1] to out[1]
+//  * from in[1] to out[2]
+//
+//For more details on specify blocks see Section 14 "Specify Blocks" of the
+//Verilog standard (IEEE 1364-2005).
+//
+//Back-annotation with Modelsim
+//=============================
+//
+//Ensuring Multi-bit Specifies are Handled Correctly: Bit-blasting
+//----------------------------------------------------------------
+//
+//ModelSim (tested on Modelsim SE 10.4c) ignores multi-bit specify statements
+//by default.
+//
+//This causes SDF annotation errors such as:
+//
+//  vsim-SDF-3261: Failed to find matching specify module path
+//
+//To force Modelsim to correctly interpret multi-bit specify statements you
+//should provide the '+bitblast' option to the vsim executable.
+//This forces it to apply specify statements using multi-bit operands to
+//each bit of the operand (i.e. according to the Verilog standard).
+//
+//Confirming back-annotation is occuring correctly
+//------------------------------------------------
+//
+//Another useful option is '+sdf_verbose' which produces extra output about
+//SDF annotation, which can be used to verify annotation occured correctly.
+//
+//For example:
+//
+//      Summary of Verilog design objects annotated: 
+//      
+//           Module path delays =          5
+//      
+//       ******************************************************************************
+//      
+//       Summary of constructs read: 
+//      
+//                 IOPATH =          5
+//
+//shows that all 5 IOPATH constructs in the SDF were annotated to the verilog
+//design.
+//
+//Example vsim Command Line
+//--------------------------
+//The following is an example command-line to vsim (where 'tb' is the name of your
+//testbench):
+//
+//  vsim -t 1ps -L rtl_work -L work -voptargs="+acc" +sdf_verbose +bitblast tb
+
+
+
+
+//K-input Look-Up Table
+module LUT_K #(
+    //The Look-up Table size (number of inputs)
+    parameter K = 1, 
+
+    //The lut mask.  
+    //Left-most (MSB) bit corresponds to all inputs logic one. 
+    //Defaults to always false.
+    parameter LUT_MASK={2**K{1'b0}} 
+) (
+    input [K-1:0] in,
+    output out
+);
+
+    specify
+        (in *> out) = "";
+    endspecify
+
+    assign out = LUT_MASK[in];
+
+endmodule
+
+//D-FlipFlop module
+module DFF #(
+    parameter INITIAL_VALUE=1'b0    
+) (
+    input clk,
+    input D,
+    output reg Q
+);
+
+    specify
+        (clk => Q) = "";
+        $setup(D, posedge clk, "");
+        $hold(posedge clk, D, "");
+    endspecify
+
+    initial begin
+        Q <= INITIAL_VALUE;
+    end
+
+    always@(posedge clk) begin
+        Q <= D;
+    end
+endmodule
+
+//Routing fpga_interconnect module
+module fpga_interconnect(
+    input datain,
+    output dataout
+);
+
+    specify
+        (datain=>dataout)="";
+    endspecify
+
+    assign dataout = datain;
+
+endmodule
+
+
+//2-to-1 mux module
+module mux(
+    input select,
+    input x,
+    input y,
+    output z
+);
+
+    assign z = (x & ~select) | (y & select);
+
+endmodule
+
+//n-bit adder
+module adder #(
+    parameter WIDTH = 1   
+) (
+    input [WIDTH-1:0] a, 
+    input [WIDTH-1:0] b, 
+    input cin, 
+    output cout, 
+    output [WIDTH-1:0] sumout);
+
+   specify
+      (a*>sumout)="";
+      (b*>sumout)="";
+      (cin*>sumout)="";
+      (a*>cout)="";
+      (b*>cout)="";
+      (cin=>cout)="";
+   endspecify
+   
+   assign {cout, sumout} = a + b + cin;
+   
+endmodule
+   
+//nxn multiplier module
+module multiply #(
+    //The width of input signals
+    parameter WIDTH = 1
+) (
+    input [WIDTH-1:0] a,
+    input [WIDTH-1:0] b,
+    output [2*WIDTH-1:0] out
+);
+
+    specify
+        (a *> out) = "";
+        (b *> out) = "";
+    endspecify
+
+    assign out = a * b;
+
+endmodule // mult
+
+//single_port_ram module
+(* keep_hierarchy *)
+module single_port_ram #(
+    parameter ADDR_WIDTH = 1,
+    parameter DATA_WIDTH = 1
+) (
+    input clk,
+    input [ADDR_WIDTH-1:0] addr,
+    input [DATA_WIDTH-1:0] data,
+    input we,
+    output reg [DATA_WIDTH-1:0] out
+);
+
+    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
+
+    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
+
+    specify
+        (clk*>out)="";
+        $setup(addr, posedge clk, "");
+        $setup(data, posedge clk, "");
+        $setup(we, posedge clk, "");
+        $hold(posedge clk, addr, "");
+        $hold(posedge clk, data, "");
+        $hold(posedge clk, we, "");
+    endspecify
+   
+    always@(posedge clk) begin
+        if(we) begin
+            Mem[addr] = data;
+        end
+    	out = Mem[addr]; //New data read-during write behaviour (blocking assignments)
+    end
+   
+endmodule // single_port_RAM
+
+//dual_port_ram module
+(* keep_hierarchy *)
+module dual_port_ram #(
+    parameter ADDR_WIDTH = 1,
+    parameter DATA_WIDTH = 1
+) (
+    input clk,
+
+    input [ADDR_WIDTH-1:0] addr1,
+    input [ADDR_WIDTH-1:0] addr2,
+    input [DATA_WIDTH-1:0] data1,
+    input [DATA_WIDTH-1:0] data2,
+    input we1,
+    input we2,
+    output reg [DATA_WIDTH-1:0] out1,
+    output reg [DATA_WIDTH-1:0] out2
+);
+
+    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
+
+    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
+
+    specify
+        (clk*>out1)="";
+        (clk*>out2)="";
+        $setup(addr1, posedge clk, "");
+        $setup(addr2, posedge clk, "");
+        $setup(data1, posedge clk, "");
+        $setup(data2, posedge clk, "");
+        $setup(we1, posedge clk, "");
+        $setup(we2, posedge clk, "");
+        $hold(posedge clk, addr1, "");
+        $hold(posedge clk, addr2, "");
+        $hold(posedge clk, data1, "");
+        $hold(posedge clk, data2, "");
+        $hold(posedge clk, we1, "");
+        $hold(posedge clk, we2, "");
+    endspecify
+   
+    always@(posedge clk) begin //Port 1
+        if(we1) begin
+            Mem[addr1] = data1;
+        end
+        out1 = Mem[addr1]; //New data read-during write behaviour (blocking assignments)
+    end
+
+    always@(posedge clk) begin //Port 2
+        if(we2) begin
+            Mem[addr2] = data2;
+        end
+        out2 = Mem[addr2]; //New data read-during write behaviour (blocking assignments)
+    end
+   
+endmodule // dual_port_ram

From d43f15ca982b2b9dca7a606a19fa0b2dae6e7025 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 12:51:00 -0400
Subject: [PATCH 03/56] ISC license

---
 parmys-plugin/techlibs/adff2dff.v   |  9 ++++++---
 parmys-plugin/techlibs/adffe2dff.v  | 11 +++++++----
 parmys-plugin/techlibs/aldff2dff.v  |  9 ++++++---
 parmys-plugin/techlibs/aldffe2dff.v |  9 ++++++---
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/parmys-plugin/techlibs/adff2dff.v b/parmys-plugin/techlibs/adff2dff.v
index bf34a02be..52b6b9fcd 100644
--- a/parmys-plugin/techlibs/adff2dff.v
+++ b/parmys-plugin/techlibs/adff2dff.v
@@ -1,11 +1,11 @@
 // yosys -- Yosys Open SYnthesis Suite
-
+//
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
-
+//
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
 // copyright notice and this permission notice appear in all copies.
-
+//
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
@@ -13,6 +13,9 @@
 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// SPDX-License-Identifier: ISC
+
 (* techmap_celltype = "$adff" *)
 module adff2dff (CLK, ARST, D, Q);
 	parameter WIDTH = 1;
diff --git a/parmys-plugin/techlibs/adffe2dff.v b/parmys-plugin/techlibs/adffe2dff.v
index 266b546ab..cb9f35c83 100644
--- a/parmys-plugin/techlibs/adffe2dff.v
+++ b/parmys-plugin/techlibs/adffe2dff.v
@@ -1,11 +1,11 @@
 // yosys -- Yosys Open SYnthesis Suite
-
+//
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
-
+//
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
 // copyright notice and this permission notice appear in all copies.
-
+//
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
@@ -13,8 +13,11 @@
 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
+//
 // Modified version of adff2dff for addfe
+//
+// SPDX-License-Identifier: ISC
+
 (* techmap_celltype = "$adffe" *)
 module adffe2dff (CLK, ARST, EN, D, Q);
 	parameter WIDTH = 1;
diff --git a/parmys-plugin/techlibs/aldff2dff.v b/parmys-plugin/techlibs/aldff2dff.v
index 683c3ef86..f8ab2281c 100644
--- a/parmys-plugin/techlibs/aldff2dff.v
+++ b/parmys-plugin/techlibs/aldff2dff.v
@@ -1,12 +1,12 @@
 // yosys -- Yosys Open SYnthesis Suite
-
+//
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
 // Copyright (C) 2022  Daniel Khadivi
-
+//
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
 // copyright notice and this permission notice appear in all copies.
-
+//
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
@@ -14,6 +14,9 @@
 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// SPDX-License-Identifier: ISC
+
 (* techmap_celltype = "$aldff" *)
 module aldff2dff (CLK, ALOAD, AD, D, Q);
 	parameter WIDTH = 1;
diff --git a/parmys-plugin/techlibs/aldffe2dff.v b/parmys-plugin/techlibs/aldffe2dff.v
index e8842c630..613d646da 100644
--- a/parmys-plugin/techlibs/aldffe2dff.v
+++ b/parmys-plugin/techlibs/aldffe2dff.v
@@ -1,12 +1,12 @@
 // yosys -- Yosys Open SYnthesis Suite
-
+//
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
 // Copyright (C) 2022  Daniel Khadivi
-
+//
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
 // copyright notice and this permission notice appear in all copies.
-
+//
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
@@ -14,6 +14,9 @@
 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// SPDX-License-Identifier: ISC
+
 (* techmap_celltype = "$aldffe" *)
 module aldffe2dff (CLK, ALOAD, AD, D, EN, Q);
 	parameter WIDTH = 1;

From d04367108cffe42246c8cfbd13823c6b5eae46bb Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 12:52:41 -0400
Subject: [PATCH 04/56] vtr licensing

---
 third_party/vtr/arch/LICENSE.md    | 69 ++++++++++++++++++++++++++++++
 third_party/vtr/libs/LICENSE.md    | 69 ++++++++++++++++++++++++++++++
 third_party/vtr/verilog/LICENSE.md | 69 ++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+)
 create mode 100644 third_party/vtr/arch/LICENSE.md
 create mode 100644 third_party/vtr/libs/LICENSE.md
 create mode 100644 third_party/vtr/verilog/LICENSE.md

diff --git a/third_party/vtr/arch/LICENSE.md b/third_party/vtr/arch/LICENSE.md
new file mode 100644
index 000000000..01332da43
--- /dev/null
+++ b/third_party/vtr/arch/LICENSE.md
@@ -0,0 +1,69 @@
+# VTR License
+
+The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
+well as additional benchmarks, documentation, libraries and scripts. The authors
+of the various components of VTR retain their ownership of their tools.
+
+* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
+all software, documents, and scripts in VTR, follows the standard MIT license described
+[here](http://www.opensource.org/licenses/mit-license.php) copied below for
+your convenience:
+
+> The MIT License (MIT)
+>
+> Copyright 2012 VTR Developers
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy of
+> this software and associated documentation files (the "Software"), to deal in
+> the Software without restriction, including without limitation the rights to
+> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+> of the Software, and to permit persons to whom the Software is furnished to do
+> so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+* Terms and conditions for ABC is found
+[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
+for your convenience:
+
+> Copyright (c) The Regents of the University of California. All rights reserved.
+>
+> Permission is hereby granted, without written agreement and without license or
+> royalty fees, to use, copy, modify, and distribute this software and its
+> documentation for any purpose, provided that the above copyright notice and the
+> following two paragraphs appear in all copies of this software.
+>
+> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+>
+> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+The benchmark circuits are all open source but each have their own
+individual terms and conditions which are listed in the source code of each
+benchmark.
+
+Subject to these conditions, the software is provided free of charge to all
+interested parties.
+
+If you do decide to use this tool, please reference our work as references are
+important in academia.
+
+Donations in the form of research grants to promote further research and
+development on the tools will be gladly accepted, either anonymously or with
+attribution on our future publications.
+
diff --git a/third_party/vtr/libs/LICENSE.md b/third_party/vtr/libs/LICENSE.md
new file mode 100644
index 000000000..01332da43
--- /dev/null
+++ b/third_party/vtr/libs/LICENSE.md
@@ -0,0 +1,69 @@
+# VTR License
+
+The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
+well as additional benchmarks, documentation, libraries and scripts. The authors
+of the various components of VTR retain their ownership of their tools.
+
+* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
+all software, documents, and scripts in VTR, follows the standard MIT license described
+[here](http://www.opensource.org/licenses/mit-license.php) copied below for
+your convenience:
+
+> The MIT License (MIT)
+>
+> Copyright 2012 VTR Developers
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy of
+> this software and associated documentation files (the "Software"), to deal in
+> the Software without restriction, including without limitation the rights to
+> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+> of the Software, and to permit persons to whom the Software is furnished to do
+> so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+* Terms and conditions for ABC is found
+[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
+for your convenience:
+
+> Copyright (c) The Regents of the University of California. All rights reserved.
+>
+> Permission is hereby granted, without written agreement and without license or
+> royalty fees, to use, copy, modify, and distribute this software and its
+> documentation for any purpose, provided that the above copyright notice and the
+> following two paragraphs appear in all copies of this software.
+>
+> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+>
+> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+The benchmark circuits are all open source but each have their own
+individual terms and conditions which are listed in the source code of each
+benchmark.
+
+Subject to these conditions, the software is provided free of charge to all
+interested parties.
+
+If you do decide to use this tool, please reference our work as references are
+important in academia.
+
+Donations in the form of research grants to promote further research and
+development on the tools will be gladly accepted, either anonymously or with
+attribution on our future publications.
+
diff --git a/third_party/vtr/verilog/LICENSE.md b/third_party/vtr/verilog/LICENSE.md
new file mode 100644
index 000000000..01332da43
--- /dev/null
+++ b/third_party/vtr/verilog/LICENSE.md
@@ -0,0 +1,69 @@
+# VTR License
+
+The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
+well as additional benchmarks, documentation, libraries and scripts. The authors
+of the various components of VTR retain their ownership of their tools.
+
+* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
+all software, documents, and scripts in VTR, follows the standard MIT license described
+[here](http://www.opensource.org/licenses/mit-license.php) copied below for
+your convenience:
+
+> The MIT License (MIT)
+>
+> Copyright 2012 VTR Developers
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy of
+> this software and associated documentation files (the "Software"), to deal in
+> the Software without restriction, including without limitation the rights to
+> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+> of the Software, and to permit persons to whom the Software is furnished to do
+> so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+* Terms and conditions for ABC is found
+[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
+for your convenience:
+
+> Copyright (c) The Regents of the University of California. All rights reserved.
+>
+> Permission is hereby granted, without written agreement and without license or
+> royalty fees, to use, copy, modify, and distribute this software and its
+> documentation for any purpose, provided that the above copyright notice and the
+> following two paragraphs appear in all copies of this software.
+>
+> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+>
+> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+The benchmark circuits are all open source but each have their own
+individual terms and conditions which are listed in the source code of each
+benchmark.
+
+Subject to these conditions, the software is provided free of charge to all
+interested parties.
+
+If you do decide to use this tool, please reference our work as references are
+important in academia.
+
+Donations in the form of research grants to promote further research and
+development on the tools will be gladly accepted, either anonymously or with
+attribution on our future publications.
+

From e7cc4b8483cb9d1401102a7b3c99343957fd622c Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 12:55:13 -0400
Subject: [PATCH 05/56] ci

---
 .github/workflows/licensing.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index ddf249b39..9f30fd833 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,12 +33,6 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
-          ./parmys-plugin/tests/eltwise_layer/eltwise_layer.v
-          ./parmys-plugin/tests/raygentop/raygentop.v
-          ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
-          ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
-          ./parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
-          ./parmys-plugin/techlibs/vtr_primitives.v
         third_party: |
           ./third_party/googletest/
           ./third_party/libargparse/

From c2fbe0df64b725ce4c6c4eb7aaf4739aaa5f5044 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 13:02:51 -0400
Subject: [PATCH 06/56] Revert "ci"

This reverts commit e7cc4b8483cb9d1401102a7b3c99343957fd622c.
---
 .github/workflows/licensing.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 9f30fd833..ddf249b39 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,6 +33,12 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
+          ./parmys-plugin/tests/eltwise_layer/eltwise_layer.v
+          ./parmys-plugin/tests/raygentop/raygentop.v
+          ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
+          ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+          ./parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
+          ./parmys-plugin/techlibs/vtr_primitives.v
         third_party: |
           ./third_party/googletest/
           ./third_party/libargparse/

From a900beb3cbae9d3787658d456c322efbeb57200a Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 13:05:31 -0400
Subject: [PATCH 07/56] all together

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 9e53932f7..aa1d61923 100644
--- a/Makefile
+++ b/Makefile
@@ -14,8 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))

From 2ec748da28a1e60f2c1412ce9c150ca6774c202b Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 13:38:05 -0400
Subject: [PATCH 08/56] visualization

---
 parmys-plugin/parmys.cc                     | 20 ++++++++++++++++++--
 parmys-plugin/tests/Makefile                |  2 +-
 parmys-plugin/tests/raygentop/raygentop.tcl |  2 +-
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index 8d47b0e96..a4dddd8aa 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -45,6 +45,7 @@
 #include "multipliers.h"
 #include "netlist_cleanup.h"
 #include "netlist_statistic.h"
+#include "netlist_visualizer.h"
 #include "read_xml_config_file.h"
 #include "subtractions.h"
 
@@ -755,8 +756,6 @@ struct ParMYSPass : public Pass {
         double optimization_time = wall_time();
 
         if (odin_netlist) {
-            check_netlist(odin_netlist);
-
             /* point for all netlist optimizations. */
             log("Performing Optimization on the Netlist\n");
             if (hard_multipliers) {
@@ -857,6 +856,9 @@ struct ParMYSPass : public Pass {
         log("    -vtr_prim\n");
         log("        loads vtr primitives as modules, if the design uses vtr prmitives then this flag is mandatory for first run\n");
         log("\n");
+        log("    -viz\n");
+        log("        visualizes the netlist at 3 different stages: raw, optimized, and mapped.\n");
+        log("\n");
     }
     void execute(std::vector<std::string> args, RTLIL::Design *design) override
     {
@@ -864,6 +866,7 @@ struct ParMYSPass : public Pass {
         bool flag_config_file = false;
         bool flag_load_vtr_primitives = false;
         bool flag_no_pass = false;
+        bool flag_visualize = false;
         std::string arch_file_path;
         std::string config_file_path;
         std::string top_module_name;
@@ -894,6 +897,10 @@ struct ParMYSPass : public Pass {
                 flag_load_vtr_primitives = true;
                 continue;
             }
+            if (args[argidx] == "-viz") {
+                flag_visualize = true;
+                continue;
+            }
             if (args[argidx] == "-nopass") {
                 flag_no_pass = true;
                 continue;
@@ -1042,6 +1049,9 @@ struct ParMYSPass : public Pass {
         try {
             elaborate(transformed);
             log("Successful Elaboration of the design by Odin-II\n");
+            if (flag_visualize) {
+                graphVizOutputNetlist(".", "netlist.elaborated.net", 111, transformed);
+            }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to parse Verilog / load BLIF file: %s with exit code:%d \n", vtr_error.what(), ERROR_ELABORATION);
         }
@@ -1050,6 +1060,9 @@ struct ParMYSPass : public Pass {
         try {
             optimization(transformed);
             log("Successful Optimization of netlist by Odin-II\n");
+            if (flag_visualize) {
+                graphVizOutputNetlist(".", "netlist.optimized.net", 222, transformed);
+            }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to perform netlist optimization %s with exit code:%d \n", vtr_error.what(), ERROR_OPTIMIZATION);
         }
@@ -1058,6 +1071,9 @@ struct ParMYSPass : public Pass {
         try {
             techmap(transformed);
             log("Successful Partial Technology Mapping by Odin-II\n");
+            if (flag_visualize) {
+                graphVizOutputNetlist(".", "netlist.mapped.net", 333, transformed);
+            }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to perform partial mapping to target device %s with exit code:%d \n", vtr_error.what(), ERROR_TECHMAP);
         }
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
index 7d1e20505..59b7f57ad 100644
--- a/parmys-plugin/tests/Makefile
+++ b/parmys-plugin/tests/Makefile
@@ -23,6 +23,6 @@ raygentop_verify = true
 eltwise_layer_verify = true
 
 clean_modules:
-	@find . -name "net.dot" -or -name "*.yosys.blif" | xargs rm -rf
+	@find . -name "*.net.dot" -or -name "*.yosys.blif" | xargs rm -rf
 
 clean: clean_modules
diff --git a/parmys-plugin/tests/raygentop/raygentop.tcl b/parmys-plugin/tests/raygentop/raygentop.tcl
index fbbe78568..0b44ee689 100644
--- a/parmys-plugin/tests/raygentop/raygentop.tcl
+++ b/parmys-plugin/tests/raygentop/raygentop.tcl
@@ -72,7 +72,7 @@ techmap -map +/parmys/aldffe2dff.v
 
 opt -full
 
-parmys -a k6_frac_N10_frac_chain_mem32K_40nm.xml -nopass -c odin_config.xml
+parmys -a k6_frac_N10_frac_chain_mem32K_40nm.xml -nopass -c odin_config.xml -viz
 
 opt -full
 

From 9cbd08fff819da5658ba33722bd28f860c45bd52 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 13:59:51 -0400
Subject: [PATCH 09/56] block mem bug

---
 parmys-plugin/parmys.cc            | 11 ++--
 parmys-plugin/src/BlockMemories.cc | 90 ++++++++++++++++++++++++++----
 2 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index a4dddd8aa..ad977b906 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -857,7 +857,7 @@ struct ParMYSPass : public Pass {
         log("        loads vtr primitives as modules, if the design uses vtr prmitives then this flag is mandatory for first run\n");
         log("\n");
         log("    -viz\n");
-        log("        visualizes the netlist at 3 different stages: raw, optimized, and mapped.\n");
+        log("        visualizes the netlist at 3 different stages: elaborated, optimized, and mapped.\n");
         log("\n");
     }
     void execute(std::vector<std::string> args, RTLIL::Design *design) override
@@ -1050,7 +1050,8 @@ struct ParMYSPass : public Pass {
             elaborate(transformed);
             log("Successful Elaboration of the design by Odin-II\n");
             if (flag_visualize) {
-                graphVizOutputNetlist(".", "netlist.elaborated.net", 111, transformed);
+                graphVizOutputNetlist(DEFAULT_OUTPUT, "netlist.elaborated.net", 111, transformed);
+                log("Successful visualization of the elaborated netlist\n");
             }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to parse Verilog / load BLIF file: %s with exit code:%d \n", vtr_error.what(), ERROR_ELABORATION);
@@ -1061,7 +1062,8 @@ struct ParMYSPass : public Pass {
             optimization(transformed);
             log("Successful Optimization of netlist by Odin-II\n");
             if (flag_visualize) {
-                graphVizOutputNetlist(".", "netlist.optimized.net", 222, transformed);
+                graphVizOutputNetlist(DEFAULT_OUTPUT, "netlist.optimized.net", 222, transformed);
+                log("Successful visualization of the optimized netlist\n");
             }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to perform netlist optimization %s with exit code:%d \n", vtr_error.what(), ERROR_OPTIMIZATION);
@@ -1072,7 +1074,8 @@ struct ParMYSPass : public Pass {
             techmap(transformed);
             log("Successful Partial Technology Mapping by Odin-II\n");
             if (flag_visualize) {
-                graphVizOutputNetlist(".", "netlist.mapped.net", 333, transformed);
+                graphVizOutputNetlist(DEFAULT_OUTPUT, "netlist.mapped.net", 333, transformed);
+                log("Successful visualization of the mapped netlist\n");
             }
         } catch (vtr::VtrError &vtr_error) {
             log_error("Odin-II Failed to perform partial mapping to target device %s with exit code:%d \n", vtr_error.what(), ERROR_TECHMAP);
diff --git a/parmys-plugin/src/BlockMemories.cc b/parmys-plugin/src/BlockMemories.cc
index 0f26bbd61..50325efda 100644
--- a/parmys-plugin/src/BlockMemories.cc
+++ b/parmys-plugin/src/BlockMemories.cc
@@ -64,6 +64,7 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
 static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
 static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
 static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
+static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t *netlist);
 
 static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number);
 static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number);
@@ -831,11 +832,6 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
         add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
     }
 
-    /* delete rd pins since we use corresponding wr_en and zero*/
-    for (i = 0; i < bram->read_en->count; ++i) {
-        delete_npin(bram->read_en->pins[i]);
-    }
-
     /**
      * [NOTE]:
      * Odin-II handle memory block with more than two distint
@@ -855,11 +851,16 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
             free_signal_list(rd_data2);
 
             /* all ports have different address */
-            create_nrmw_dual_port_ram(bram, netlist);
+            create_2rw_multiplexed_dual_port_ram(bram, netlist);
             return;
         }
     }
 
+    /* delete rd pins since we use corresponding wr_en and zero*/
+    for (i = 0; i < bram->read_en->count; ++i) {
+        delete_npin(bram->read_en->pins[i]);
+    }
+
     /* map matched read data to the out1 */
     signals->out1 = (first_match) ? rd_data1 : rd_data2;
 
@@ -961,11 +962,6 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
         add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
     }
 
-    /* delete rd pins since we use corresponding wr_en and zero*/
-    for (i = 0; i < bram->read_en->count; ++i) {
-        delete_npin(bram->read_en->pins[i]);
-    }
-
     /**
      * [NOTE]:
      * Odin-II handle memory block with more than two distint
@@ -994,6 +990,11 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
         return;
     }
 
+    /* delete rd pins since we use corresponding wr_en and zero*/
+    for (i = 0; i < bram->read_en->count; ++i) {
+        delete_npin(bram->read_en->pins[i]);
+    }
+
     /* create a list of dpram ram signals */
     dp_ram_signals *signals = (dp_ram_signals *)vtr::malloc(sizeof(dp_ram_signals));
 
@@ -1130,6 +1131,73 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     free_dp_ram_signals(signals);
 }
 
+/**
+ * (function: create_2rw_multiplexed_dual_port_ram)
+ *
+ * @brief read ports are multiplexed using read enable.
+ * Then, the BRAM will be mapped to a DPRAM
+ *
+ * @param bram pointing to a bram node node
+ * @param netlist pointer to the current netlist file
+ */
+static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
+{
+    int i;
+    nnode_t *old_node = bram->node;
+    int data_width = bram->node->attributes->DBITS;
+    int addr_width = bram->node->attributes->ABITS;
+    int num_rd_ports = old_node->attributes->RD_PORTS;
+    int num_wr_ports = old_node->attributes->WR_PORTS;
+    /* should have been resovled before this function */
+    oassert(num_rd_ports == 2);
+    oassert(num_wr_ports == 1);
+    /* dual port ram signals */
+    dp_ram_signals *signals = (dp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
+    signal_list_t *selectors = NULL;
+    /* INPUTS */
+    selectors = copy_input_signals(bram->read_en);
+    /* adding the read addr input port as address1 */
+    signals->addr1 = split_cascade_port(bram->read_addr, selectors, addr_width, old_node, netlist);
+    free_signal_list(selectors);
+    signals->addr2 = init_signal_list();
+    for (i = 0; i < bram->write_addr->count; ++i) {
+        add_pin_to_signal_list(signals->addr2, bram->write_addr->pins[i]);
+    }
+
+    /* handling clock signals */
+    signals->clk = bram->clk->pins[0];
+    /* we pad the first data port using pad pins */
+    signals->data1 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
+    }
+    signals->data2 = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->data2, bram->write_data->pins[i]);
+    }
+    /* first port does not have data, so the enable is GND */
+    signals->we1 = get_zero_pin(netlist);
+    signal_list_t *we2_signal;
+    signal_list_t *vcc_signals;
+    signals->we2 = bram->write_en->pins[0];
+    /* OUTPUT */
+    /* leaving out1 of dpram null, so it will create a new pins */
+    signals->out1 = NULL;
+    signals->out2 = NULL;
+    /* create a DPRAM node */
+    nnode_t *dpram = create_dual_port_ram(signals, old_node);
+    signal_list_t *dpram_outputs = init_signal_list();
+    for (i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(dpram_outputs, dpram->output_pins[i]);
+    }
+    /* decode the spram outputs to the n bram output ports */
+    decode_out_port(dpram_outputs, bram->read_data, bram->read_en, old_node, netlist);
+    // CLEAN UP
+    cleanup_block_memory_old_node(old_node);
+    free_signal_list(dpram_outputs);
+    free_dp_ram_signals(signals);
+}
+
 /**
  * (function: map_rom_to_mem_hardblocks)
  *

From b390300b98a262422df3eccfb528e467e60d3899 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:04:44 -0400
Subject: [PATCH 10/56] warning

---
 parmys-plugin/src/BlockMemories.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/parmys-plugin/src/BlockMemories.cc b/parmys-plugin/src/BlockMemories.cc
index 50325efda..14116e74f 100644
--- a/parmys-plugin/src/BlockMemories.cc
+++ b/parmys-plugin/src/BlockMemories.cc
@@ -1177,8 +1177,6 @@ static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t
     }
     /* first port does not have data, so the enable is GND */
     signals->we1 = get_zero_pin(netlist);
-    signal_list_t *we2_signal;
-    signal_list_t *vcc_signals;
     signals->we2 = bram->write_en->pins[0];
     /* OUTPUT */
     /* leaving out1 of dpram null, so it will create a new pins */

From 2c0ef3d251845018460150185e626dab3a67f8ea Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:05:28 -0400
Subject: [PATCH 11/56] ci

---
 .github/workflows/licensing.yml | 1 -
 Makefile                        | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index ddf249b39..8a4181df2 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,7 +33,6 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
-          ./parmys-plugin/tests/eltwise_layer/eltwise_layer.v
           ./parmys-plugin/tests/raygentop/raygentop.v
           ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
           ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
diff --git a/Makefile b/Makefile
index aa1d61923..8e371ba00 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))

From 1c35cd57478ca7da5c96d69450b4f7e0cd93b45f Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:09:11 -0400
Subject: [PATCH 12/56] eltwiselayer rm

---
 .../tests/eltwise_layer/eltwise_layer.v       | 3057 -----------------
 1 file changed, 3057 deletions(-)
 delete mode 100644 parmys-plugin/tests/eltwise_layer/eltwise_layer.v

diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
deleted file mode 100644
index 11199fb90..000000000
--- a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
+++ /dev/null
@@ -1,3057 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////
-// Author: Aman Arora
-//////////////////////////////////////////////////////////////////////////////
-
-`timescale 1ns/1ns
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-// Eltwise layer
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-// Precision BF16
-//Each PE has 1 multiplier, an adder and a subtractor.
-//There are 4 PEs in each compute unit. 
-//There are 6 such compute units in the whole layer.
-//So, total compute throughput is 24 ops per cycle.
-//The "per cycle" is because the adder/sub/mul are
-//pipelined. Although they may be take more than 1 cycle,
-//but in the steady state, one result will come out every cycle.
-//
-//There are 6 BRAMs for each input operand. Each location in a BRAM
-//stores 4 inputs. So, the read bandwidth is 24 elements
-//per cycle. This matches the compute throughput. So, we
-//utilize each PE every cycle. There are 6 BRAMs for output.
-//We can write 4 elements per cycle.
-//
-//There are two modes of operation: 
-// 1. Vector/Matrix mode
-//    In this mode, both operands are matrices/vectors.
-//    They are read from BRAMs (A and B). The operation 
-//    selected (using the op input) is performed. This mode
-//    can be used for operations such as residual add, or 
-//    dropout.
-// 2. Scalar mode
-//    In this mode, one operand is a matrix/vector and the
-//    other operand is a scalar. It could be the mean or 
-//    variance of a normalization layer for example. The 
-//    scalar input is provided from the top-level of the design
-//    so it can be easily modified at runtime.
-//
-//Important inputs:
-//   mode: 
-//      0 -> Both operands (A and B) are matrices/vectors. Result is a matrix/vector.
-//      1 -> Operand A is matrix/vector. Operand B is scalar. Result is a matrix/vector.
-//   op:
-//      00 -> Addition
-//      01 -> Subtraction
-//      10 -> Multiplication
-//
-//The whole design can operate on 24xN matrices.  
-//Typically, to use this design, we'd break a large input
-//matrix into 24 column sections and process the matrix 
-//section by section. The number of rows will be programmed
-//in the "iterations" register in the design.
-
-
-`define BFLOAT16 
-
-// IEEE Half Precision => EXPONENT = 5, MANTISSA = 10
-// BFLOAT16 => EXPONENT = 8, MANTISSA = 7 
-
-`ifdef BFLOAT16
-`define EXPONENT 8
-`define MANTISSA 7
-`else // for ieee half precision fp16
-`define EXPONENT 5
-`define MANTISSA 10
-`endif
-
-`define SIGN 1
-`define DWIDTH (`SIGN+`EXPONENT+`MANTISSA)
-
-`define AWIDTH 10
-`define MEM_SIZE 1024
-`define DESIGN_SIZE 12
-`define CU_SIZE 4
-`define MASK_WIDTH 4
-`define MEM_ACCESS_LATENCY 1
-
-`define REG_DATAWIDTH 32
-`define REG_ADDRWIDTH 8
-`define ITERATIONS_WIDTH 32
-
-`define REG_STDN_ADDR 32'h4
-`define REG_MATRIX_A_ADDR 32'he
-`define REG_MATRIX_B_ADDR 32'h12
-`define REG_MATRIX_C_ADDR 32'h16
-`define REG_VALID_MASK_A_ADDR 32'h20
-`define REG_VALID_MASK_B_ADDR 32'h5c
-
-`define REG_ITERATIONS_ADDR 32'h40
-
-//This is the pipeline depth of the PEs (adder/mult)
-`define PE_PIPELINE_DEPTH 5
-
-module eltwise_layer(
-  input clk,
-  input clk_mem,
-  input resetn,
-  input pe_resetn,
-  input        [`REG_ADDRWIDTH-1:0] PADDR,
-  input                             PWRITE,
-  input                             PSEL,
-  input                             PENABLE,
-  input        [`REG_DATAWIDTH-1:0] PWDATA,
-  output reg   [`REG_DATAWIDTH-1:0] PRDATA,
-  output reg                        PREADY,
-  input [`DWIDTH-1:0] scalar_inp,
-  input mode, // mode==0 -> vector/matrix, mode==1 -> scalar
-  input  [1:0] op, //op==11 -> Mul, op==01 -> Sub, op==00 -> Add
-  input  [7:0] bram_select,
-  input  [`AWIDTH-1:0] bram_addr_ext,
-  output reg [`CU_SIZE*`DWIDTH-1:0] bram_rdata_ext,
-  input  [`CU_SIZE*`DWIDTH-1:0] bram_wdata_ext,
-  input  [`CU_SIZE-1:0] bram_we_ext
-);
-
-
-  wire PCLK;
-  assign PCLK = clk;
-  wire PRESETn;
-  assign PRESETn = resetn;
-  reg start_reg;
-  reg clear_done_reg;
-
-  //Dummy register to sync all other invalid/unimplemented addresses
-  reg [`REG_DATAWIDTH-1:0] reg_dummy;
-  
-  reg [`AWIDTH-1:0] bram_addr_a_0_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_0_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_a_2_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_2_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_a_4_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_4_ext;
-
-  reg [`AWIDTH-1:0] bram_addr_a_1_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_1_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_a_3_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_3_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_a_5_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_a_5_ext;
-
-    
-  reg [`AWIDTH-1:0] bram_addr_b_0_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_0_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_b_1_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_1_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_b_2_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_2_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_b_3_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_3_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_b_4_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_4_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_b_5_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_b_5_ext;
-
-  reg [`AWIDTH-1:0] bram_addr_c_0_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_0_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_c_1_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_1_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_c_2_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_2_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_c_3_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_3_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_c_4_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_4_ext;
-    
-  reg [`AWIDTH-1:0] bram_addr_c_5_ext;
-  wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5_ext;
-  reg [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5_ext;
-  reg [`MASK_WIDTH-1:0] bram_we_c_5_ext;
-    
-	wire [`AWIDTH-1:0] bram_addr_a_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_0;
-	wire [`MASK_WIDTH-1:0] bram_we_a_0;
-	wire bram_en_a_0;
-    
-	wire [`AWIDTH-1:0] bram_addr_a_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_2;
-	wire [`MASK_WIDTH-1:0] bram_we_a_2;
-	wire bram_en_a_2;
-    
-	wire [`AWIDTH-1:0] bram_addr_a_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_4;
-	wire [`MASK_WIDTH-1:0] bram_we_a_4;
-	wire bram_en_a_4;
-
-	wire [`AWIDTH-1:0] bram_addr_a_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_1;
-	wire [`MASK_WIDTH-1:0] bram_we_a_1;
-	wire bram_en_a_1;
-    
-	wire [`AWIDTH-1:0] bram_addr_a_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_3;
-	wire [`MASK_WIDTH-1:0] bram_we_a_3;
-	wire bram_en_a_3;
-    
-	wire [`AWIDTH-1:0] bram_addr_a_5;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_a_5;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_a_5;
-	wire [`MASK_WIDTH-1:0] bram_we_a_5;
-	wire bram_en_a_5;
-    
-	wire [`AWIDTH-1:0] bram_addr_b_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_0;
-	wire [`MASK_WIDTH-1:0] bram_we_b_0;
-	wire bram_en_b_0;
-    
-	wire [`AWIDTH-1:0] bram_addr_b_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_1;
-	wire [`MASK_WIDTH-1:0] bram_we_b_1;
-	wire bram_en_b_1;
-    
-	wire [`AWIDTH-1:0] bram_addr_b_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_2;
-	wire [`MASK_WIDTH-1:0] bram_we_b_2;
-	wire bram_en_b_2;
-    
-	wire [`AWIDTH-1:0] bram_addr_b_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_3;
-	wire [`MASK_WIDTH-1:0] bram_we_b_3;
-	wire bram_en_b_3;
-
-  wire [`AWIDTH-1:0] bram_addr_b_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_4;
-	wire [`MASK_WIDTH-1:0] bram_we_b_4;
-	wire bram_en_b_4;
-    
-	wire [`AWIDTH-1:0] bram_addr_b_5;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_b_5;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_b_5;
-	wire [`MASK_WIDTH-1:0] bram_we_b_5;
-	wire bram_en_b_5;
-
-	wire [`AWIDTH-1:0] bram_addr_c_0;
-	wire [`AWIDTH-1:0] bram_addr_c_1;
-	wire [`AWIDTH-1:0] bram_addr_c_2;
-	wire [`AWIDTH-1:0] bram_addr_c_3;
-	wire [`AWIDTH-1:0] bram_addr_c_4;
-	wire [`AWIDTH-1:0] bram_addr_c_5;
-
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_wdata_c_5;
-
-	wire [`MASK_WIDTH-1:0] bram_we_c_0;
-	wire [`MASK_WIDTH-1:0] bram_we_c_1;
-	wire [`MASK_WIDTH-1:0] bram_we_c_2;
-	wire [`MASK_WIDTH-1:0] bram_we_c_3;
-	wire [`MASK_WIDTH-1:0] bram_we_c_4;
-	wire [`MASK_WIDTH-1:0] bram_we_c_5;
-    
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] bram_rdata_c_5;
-
-  always @ (posedge clk) begin
-    case (bram_select)
-  
-      0: begin
-      bram_addr_a_0_ext <= bram_addr_ext;
-      bram_wdata_a_0_ext <= bram_wdata_ext;
-      bram_we_a_0_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_0_ext;
-      end
-    
-      1: begin
-      bram_addr_a_2_ext <= bram_addr_ext;
-      bram_wdata_a_2_ext <= bram_wdata_ext;
-      bram_we_a_2_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_2_ext;
-      end
-    
-      2: begin
-      bram_addr_a_4_ext <= bram_addr_ext;
-      bram_wdata_a_4_ext <= bram_wdata_ext;
-      bram_we_a_4_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_4_ext;
-      end
-
-      3: begin
-      bram_addr_a_1_ext <= bram_addr_ext;
-      bram_wdata_a_1_ext <= bram_wdata_ext;
-      bram_we_a_1_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_1_ext;
-      end
-    
-      4: begin
-      bram_addr_a_3_ext <= bram_addr_ext;
-      bram_wdata_a_3_ext <= bram_wdata_ext;
-      bram_we_a_3_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_3_ext;
-      end
-    
-      5: begin
-      bram_addr_a_5_ext <= bram_addr_ext;
-      bram_wdata_a_5_ext <= bram_wdata_ext;
-      bram_we_a_5_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_a_5_ext;
-      end
-    
-      6: begin
-      bram_addr_b_0_ext = bram_addr_ext;
-      bram_wdata_b_0_ext = bram_wdata_ext;
-      bram_we_b_0_ext = bram_we_ext;
-      bram_rdata_ext = bram_rdata_b_0_ext;
-      end
-    
-      7: begin
-      bram_addr_b_1_ext <= bram_addr_ext;
-      bram_wdata_b_1_ext <= bram_wdata_ext;
-      bram_we_b_1_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_b_1_ext;
-      end
-    
-      8: begin
-      bram_addr_b_2_ext <= bram_addr_ext;
-      bram_wdata_b_2_ext <= bram_wdata_ext;
-      bram_we_b_2_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_b_2_ext;
-      end
-    
-      9: begin
-      bram_addr_b_3_ext <= bram_addr_ext;
-      bram_wdata_b_3_ext <= bram_wdata_ext;
-      bram_we_b_3_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_b_3_ext;
-      end
-    
-      10: begin
-      bram_addr_b_4_ext <= bram_addr_ext;
-      bram_wdata_b_4_ext <= bram_wdata_ext;
-      bram_we_b_4_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_b_4_ext;
-      end
-    
-      11: begin
-      bram_addr_b_5_ext <= bram_addr_ext;
-      bram_wdata_b_5_ext <= bram_wdata_ext;
-      bram_we_b_5_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_b_5_ext;
-      end
-
-      12: begin
-      bram_addr_c_0_ext <= bram_addr_ext;
-      bram_wdata_c_0_ext <= bram_wdata_ext;
-      bram_we_c_0_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_0_ext;
-      end
-    
-      13: begin
-      bram_addr_c_1_ext <= bram_addr_ext;
-      bram_wdata_c_1_ext <= bram_wdata_ext;
-      bram_we_c_1_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_1_ext;
-      end
-    
-      14: begin
-      bram_addr_c_2_ext <= bram_addr_ext;
-      bram_wdata_c_2_ext <= bram_wdata_ext;
-      bram_we_c_2_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_2_ext;
-      end
-    
-      15: begin
-      bram_addr_c_3_ext <= bram_addr_ext;
-      bram_wdata_c_3_ext <= bram_wdata_ext;
-      bram_we_c_3_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_3_ext;
-      end
-    
-      16: begin
-      bram_addr_c_4_ext <= bram_addr_ext;
-      bram_wdata_c_4_ext <= bram_wdata_ext;
-      bram_we_c_4_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_4_ext;
-      end
-    
-      17: begin
-      bram_addr_c_5_ext <= bram_addr_ext;
-      bram_wdata_c_5_ext <= bram_wdata_ext;
-      bram_we_c_5_ext <= bram_we_ext;
-      bram_rdata_ext <= bram_rdata_c_5_ext;
-      end
-    
-      default: begin
-		bram_addr_c_5_ext <= bram_addr_ext;
-      bram_wdata_c_5_ext <= bram_wdata_ext;
-      bram_we_c_5_ext <= bram_we_ext;
-      bram_rdata_ext <= 0;
-      end
-    endcase 
-  end
-  
-/////////////////////////////////////////////////
-// BRAMs to store matrix A
-/////////////////////////////////////////////////
-
-
-  // BRAM matrix A 0
-ram matrix_A_0(
-  .addr0(bram_addr_a_0),
-  .d0(bram_wdata_a_0), 
-  .we0(bram_we_a_0), 
-  .q0(bram_rdata_a_0), 
-  .addr1(bram_addr_a_0_ext),
-  .d1(bram_wdata_a_0_ext), 
-  .we1(bram_we_a_0_ext), 
-  .q1(bram_rdata_a_0_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix A 2
-ram matrix_A_2(
-  .addr0(bram_addr_a_2),
-  .d0(bram_wdata_a_2), 
-  .we0(bram_we_a_2), 
-  .q0(bram_rdata_a_2), 
-  .addr1(bram_addr_a_2_ext),
-  .d1(bram_wdata_a_2_ext), 
-  .we1(bram_we_a_2_ext), 
-  .q1(bram_rdata_a_2_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix A 4
-ram matrix_A_4(
-  .addr0(bram_addr_a_4),
-  .d0(bram_wdata_a_4), 
-  .we0(bram_we_a_4), 
-  .q0(bram_rdata_a_4), 
-  .addr1(bram_addr_a_4_ext),
-  .d1(bram_wdata_a_4_ext), 
-  .we1(bram_we_a_4_ext), 
-  .q1(bram_rdata_a_4_ext), 
-  .clk(clk_mem));
-
-
-    // BRAM matrix A 1
-ram matrix_A_1(
-  .addr0(bram_addr_a_1),
-  .d0(bram_wdata_a_1), 
-  .we0(bram_we_a_1), 
-  .q0(bram_rdata_a_1), 
-  .addr1(bram_addr_a_1_ext),
-  .d1(bram_wdata_a_1_ext), 
-  .we1(bram_we_a_1_ext), 
-  .q1(bram_rdata_a_1_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix A 3
-ram matrix_A_3(
-  .addr0(bram_addr_a_3),
-  .d0(bram_wdata_a_3), 
-  .we0(bram_we_a_3), 
-  .q0(bram_rdata_a_3), 
-  .addr1(bram_addr_a_3_ext),
-  .d1(bram_wdata_a_3_ext), 
-  .we1(bram_we_a_3_ext), 
-  .q1(bram_rdata_a_3_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix A 5
-ram matrix_A_5(
-  .addr0(bram_addr_a_5),
-  .d0(bram_wdata_a_5), 
-  .we0(bram_we_a_5), 
-  .q0(bram_rdata_a_5), 
-  .addr1(bram_addr_a_5_ext),
-  .d1(bram_wdata_a_5_ext), 
-  .we1(bram_we_a_5_ext), 
-  .q1(bram_rdata_a_5_ext), 
-  .clk(clk_mem));
-
-////////////////////////////////////////////////
-// BRAMs to store matrix B
-/////////////////////////////////////////////////
-
-
-  // BRAM matrix B 0
-ram matrix_B_0(
-  .addr0(bram_addr_b_0),
-  .d0(bram_wdata_b_0), 
-  .we0(bram_we_b_0), 
-  .q0(bram_rdata_b_0), 
-  .addr1(bram_addr_b_0_ext),
-  .d1(bram_wdata_b_0_ext), 
-  .we1(bram_we_b_0_ext), 
-  .q1(bram_rdata_b_0_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix B 1
-ram matrix_B_1(
-  .addr0(bram_addr_b_1),
-  .d0(bram_wdata_b_1), 
-  .we0(bram_we_b_1), 
-  .q0(bram_rdata_b_1), 
-  .addr1(bram_addr_b_1_ext),
-  .d1(bram_wdata_b_1_ext), 
-  .we1(bram_we_b_1_ext), 
-  .q1(bram_rdata_b_1_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix B 2
-ram matrix_B_2(
-  .addr0(bram_addr_b_2),
-  .d0(bram_wdata_b_2), 
-  .we0(bram_we_b_2), 
-  .q0(bram_rdata_b_2), 
-  .addr1(bram_addr_b_2_ext),
-  .d1(bram_wdata_b_2_ext), 
-  .we1(bram_we_b_2_ext), 
-  .q1(bram_rdata_b_2_ext), 
-  .clk(clk_mem));
-
-  	
-  // BRAM matrix B 3
-ram matrix_B_3(
-  .addr0(bram_addr_b_3),
-  .d0(bram_wdata_b_3), 
-  .we0(bram_we_b_3), 
-  .q0(bram_rdata_b_3), 
-  .addr1(bram_addr_b_3_ext),
-  .d1(bram_wdata_b_3_ext), 
-  .we1(bram_we_b_3_ext), 
-  .q1(bram_rdata_b_3_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix B 4
-ram matrix_B_4(
-  .addr0(bram_addr_b_4),
-  .d0(bram_wdata_b_4), 
-  .we0(bram_we_b_4), 
-  .q0(bram_rdata_b_4), 
-  .addr1(bram_addr_b_4_ext),
-  .d1(bram_wdata_b_4_ext), 
-  .we1(bram_we_b_4_ext), 
-  .q1(bram_rdata_b_4_ext), 
-  .clk(clk_mem));
-
-
-  // BRAM matrix B 5
-ram matrix_B_5(
-  .addr0(bram_addr_b_5),
-  .d0(bram_wdata_b_5), 
-  .we0(bram_we_b_5), 
-  .q0(bram_rdata_b_5), 
-  .addr1(bram_addr_b_5_ext),
-  .d1(bram_wdata_b_5_ext), 
-  .we1(bram_we_b_5_ext), 
-  .q1(bram_rdata_b_5_ext), 
-  .clk(clk_mem));
-
-////////////////////////////////////////////////
-// BRAMs to store matrix C
-/////////////////////////////////////////////////
-
-
-  // BRAM matrix C 0
-ram matrix_C_0(
-  .addr0(bram_addr_c_0),
-  .d0(bram_wdata_c_0), 
-  .we0(bram_we_c_0), 
-  .q0(bram_rdata_c_0), 
-  .addr1(bram_addr_c_0_ext),
-  .d1(bram_wdata_c_0_ext), 
-  .we1(bram_we_c_0_ext), 
-  .q1(bram_rdata_c_0_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix C 1
-ram matrix_C_1(
-  .addr0(bram_addr_c_1),
-  .d0(bram_wdata_c_1), 
-  .we0(bram_we_c_1), 
-  .q0(bram_rdata_c_1), 
-  .addr1(bram_addr_c_1_ext),
-  .d1(bram_wdata_c_1_ext), 
-  .we1(bram_we_c_1_ext), 
-  .q1(bram_rdata_c_1_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix C 2
-ram matrix_C_2(
-  .addr0(bram_addr_c_2),
-  .d0(bram_wdata_c_2), 
-  .we0(bram_we_c_2), 
-  .q0(bram_rdata_c_2), 
-  .addr1(bram_addr_c_2_ext),
-  .d1(bram_wdata_c_2_ext), 
-  .we1(bram_we_c_2_ext), 
-  .q1(bram_rdata_c_2_ext), 
-  .clk(clk_mem));
-
-  	
-  // BRAM matrix C 3
-ram matrix_C_3(
-  .addr0(bram_addr_c_3),
-  .d0(bram_wdata_c_3), 
-  .we0(bram_we_c_3), 
-  .q0(bram_rdata_c_3), 
-  .addr1(bram_addr_c_3_ext),
-  .d1(bram_wdata_c_3_ext), 
-  .we1(bram_we_c_3_ext), 
-  .q1(bram_rdata_c_3_ext), 
-  .clk(clk_mem));
-  	
-  // BRAM matrix C 4
-ram matrix_C_4(
-  .addr0(bram_addr_c_4),
-  .d0(bram_wdata_c_4), 
-  .we0(bram_we_c_4), 
-  .q0(bram_rdata_c_4), 
-  .addr1(bram_addr_c_4_ext),
-  .d1(bram_wdata_c_4_ext), 
-  .we1(bram_we_c_4_ext), 
-  .q1(bram_rdata_c_4_ext), 
-  .clk(clk_mem));
-
-
-  // BRAM matrix C 5
-ram matrix_C_5(
-  .addr0(bram_addr_c_5),
-  .d0(bram_wdata_c_5), 
-  .we0(bram_we_c_5), 
-  .q0(bram_rdata_c_5), 
-  .addr1(bram_addr_c_5_ext),
-  .d1(bram_wdata_c_5_ext), 
-  .we1(bram_we_c_5_ext), 
-  .q1(bram_rdata_c_5_ext), 
-  .clk(clk_mem));
-  	
-reg start_eltwise_op;
-wire done_eltwise_op;
-
-reg [3:0] state;
-	
-////////////////////////////////////////////////////////////////
-// Control logic
-////////////////////////////////////////////////////////////////
-	always @( posedge clk) begin
-      if (resetn == 1'b0) begin
-        state <= 4'b0000;
-        start_eltwise_op <= 1'b0;
-      end 
-      else begin
-        case (state)
-
-        4'b0000: begin
-          start_eltwise_op <= 1'b0;
-          if (start_reg == 1'b1) begin
-            state <= 4'b0001;
-          end else begin
-            state <= 4'b0000;
-          end
-        end
-        
-        4'b0001: begin
-          start_eltwise_op <= 1'b1;	      
-          state <= 4'b1010;                    
-        end      
-        
-        4'b1010: begin                 
-          if (done_eltwise_op == 1'b1) begin
-            start_eltwise_op <= 1'b0;
-            state <= 4'b1000;
-          end
-          else begin
-            state <= 4'b1010;
-          end
-        end
-
-       4'b1000: begin
-         if (clear_done_reg == 1'b1) begin
-           state <= 4'b0000;
-         end
-         else begin
-           state <= 4'b1000;
-         end
-       end
-      endcase  
-	end 
-  end
-
-reg [1:0] state_apb;
-`define IDLE     2'b00
-`define W_ENABLE  2'b01
-`define R_ENABLE  2'b10
-
-reg [`AWIDTH-1:0] address_mat_a;
-reg [`AWIDTH-1:0] address_mat_b;
-reg [`AWIDTH-1:0] address_mat_c;
-reg [`MASK_WIDTH-1:0] validity_mask_a;
-reg [`MASK_WIDTH-1:0] validity_mask_b;
-reg [`ITERATIONS_WIDTH-1:0] iterations;
-
-////////////////////////////////////////////////////////////////
-// Configuration logic
-////////////////////////////////////////////////////////////////
-always @(posedge PCLK) begin
-  if (PRESETn == 0) begin
-    state_apb <= `IDLE;
-    PRDATA <= 0;
-    PREADY <= 0;
-    address_mat_a <= 0;
-    address_mat_b <= 0;
-    address_mat_c <= 0;
-    validity_mask_a <= {`MASK_WIDTH{1'b1}};
-    validity_mask_b <= {`MASK_WIDTH{1'b1}};
-  end
-
-  else begin
-    case (state_apb)
-      `IDLE : begin
-        PRDATA <= 0;
-        if (PSEL) begin
-          if (PWRITE) begin
-            state_apb <= `W_ENABLE;
-          end
-          else begin
-            state_apb <= `R_ENABLE;
-          end
-        end
-        PREADY <= 0;
-      end
-
-      `W_ENABLE : begin
-        if (PSEL && PWRITE && PENABLE) begin
-          case (PADDR)
-          `REG_STDN_ADDR       : begin
-                                 start_reg <= PWDATA[0];
-                                 clear_done_reg <= PWDATA[31];
-                                 end
-          `REG_MATRIX_A_ADDR   : address_mat_a <= PWDATA[`AWIDTH-1:0];
-          `REG_MATRIX_B_ADDR   : address_mat_b <= PWDATA[`AWIDTH-1:0];
-          `REG_MATRIX_C_ADDR   : address_mat_c <= PWDATA[`AWIDTH-1:0];
-          `REG_VALID_MASK_A_ADDR: begin
-                                validity_mask_a <= PWDATA[`MASK_WIDTH-1:0];
-                                end
-          `REG_VALID_MASK_B_ADDR: begin
-                                validity_mask_b <= PWDATA[`MASK_WIDTH-1:0];
-                                end
-          `REG_ITERATIONS_ADDR: iterations <= PWDATA[`ITERATIONS_WIDTH-1:0];
-          default : reg_dummy <= PWDATA; //sink writes to a dummy register
-          endcase
-          PREADY <=1;          
-        end
-        state_apb <= `IDLE;
-      end
-
-      `R_ENABLE : begin
-        if (PSEL && !PWRITE && PENABLE) begin
-          PREADY <= 1;
-          case (PADDR)
-          `REG_STDN_ADDR        : PRDATA <= {done_eltwise_op, 30'b0, start_eltwise_op};
-          `REG_MATRIX_A_ADDR    : PRDATA <= address_mat_a;
-          `REG_MATRIX_B_ADDR    : PRDATA <= address_mat_b;
-          `REG_MATRIX_C_ADDR    : PRDATA <= address_mat_c;
-          `REG_VALID_MASK_A_ADDR: PRDATA <= validity_mask_a;
-          `REG_VALID_MASK_B_ADDR: PRDATA <= validity_mask_b;
-          `REG_ITERATIONS_ADDR: PRDATA <= iterations;
-          default : PRDATA <= reg_dummy; //read the dummy register for undefined addresses
-          endcase
-        end
-        state_apb <= `IDLE;
-      end
-      default: begin
-        state_apb <= `IDLE;
-      end
-    endcase
-  end
-end  
-  
-wire reset;
-assign reset = ~resetn;
-wire pe_reset;
-assign pe_reset = ~pe_resetn;
-
-  wire c_data_0_available;
-  wire c_data_1_available;
-  wire c_data_2_available;
-  wire c_data_3_available;
-  wire c_data_4_available;
-  wire c_data_5_available;
-
-  assign bram_wdata_a_0 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_0 = 1'b1;
-  assign bram_we_a_0 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_a_1 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_1 = 1'b1;
-  assign bram_we_a_1 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_a_2 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_2 = 1'b1;
-  assign bram_we_a_2 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_a_3 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_3 = 1'b1;
-  assign bram_we_a_3 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_a_4 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_4 = 1'b1;
-  assign bram_we_a_4 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_a_5 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_a_5 = 1'b1;
-  assign bram_we_a_5 = {`MASK_WIDTH{1'b0}};
-  	
-  assign bram_wdata_b_0 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_0 = 1'b1;
-  assign bram_we_b_0 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_b_1 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_1 = 1'b1;
-  assign bram_we_b_1 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_b_2 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_2 = 1'b1;
-  assign bram_we_b_2 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_b_3 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_3 = 1'b1;
-  assign bram_we_b_3 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_b_4 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_4 = 1'b1;
-  assign bram_we_b_4 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_wdata_b_5 = {`CU_SIZE*`DWIDTH{1'b0}};
-  assign bram_en_b_5 = 1'b1;
-  assign bram_we_b_5 = {`MASK_WIDTH{1'b0}};
-
-  assign bram_we_c_0 = (c_data_0_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-  assign bram_we_c_2 = (c_data_2_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-  assign bram_we_c_4 = (c_data_4_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-  assign bram_we_c_1 = (c_data_1_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-  assign bram_we_c_3 = (c_data_3_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-  assign bram_we_c_5 = (c_data_5_available) ? {`MASK_WIDTH{1'b1}} : {`MASK_WIDTH{1'b0}};  
-
-  /////////////////////////////////////////////////
-  // ORing all done signals
-  /////////////////////////////////////////////////
-  wire done_eltwise_op_0;
-  wire done_eltwise_op_1;
-  wire done_eltwise_op_2;
-  wire done_eltwise_op_3;
-  wire done_eltwise_op_4;
-  wire done_eltwise_op_5;
-
-  assign done_eltwise_op = 
-  done_eltwise_op_0 | 
-  done_eltwise_op_1 | 
-  done_eltwise_op_2 | 
-  done_eltwise_op_3 | 
-  done_eltwise_op_4 | 
-  done_eltwise_op_5 ;
-
-  /////////////////////////////////////////////////
-  // Code to allow for scalar mode
-  /////////////////////////////////////////////////
-  
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_0;
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_1;
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_2;
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_3;
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_4;
-	wire [`CU_SIZE*`DWIDTH-1:0] b_data_5;
-
-  assign b_data_0 = mode ? bram_rdata_b_0 : {`CU_SIZE{scalar_inp}};
-  assign b_data_1 = mode ? bram_rdata_b_1 : {`CU_SIZE{scalar_inp}};
-  assign b_data_2 = mode ? bram_rdata_b_2 : {`CU_SIZE{scalar_inp}};
-  assign b_data_3 = mode ? bram_rdata_b_3 : {`CU_SIZE{scalar_inp}};
-  assign b_data_4 = mode ? bram_rdata_b_4 : {`CU_SIZE{scalar_inp}};
-  assign b_data_5 = mode ? bram_rdata_b_5 : {`CU_SIZE{scalar_inp}};
-
-  /////////////////////////////////////////////////
-  // Compute Unit 0
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_0(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_0),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_0),
-  .b_data(b_data_0),
-  .c_data_out(bram_wdata_c_0),
-  .a_addr(bram_addr_a_0),
-  .b_addr(bram_addr_b_0),
-  .c_addr(bram_addr_c_0),
-  .c_data_available(c_data_0_available),
-  .validity_mask_a(4'b1111),
-  .validity_mask_b(4'b1111)
-);
-
-  /////////////////////////////////////////////////
-  // Compute Unit 1
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_1(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_1),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_1),
-  .b_data(b_data_1),
-  .c_data_out(bram_wdata_c_1),
-  .a_addr(bram_addr_a_1),
-  .b_addr(bram_addr_b_1),
-  .c_addr(bram_addr_c_1),
-  .c_data_available(c_data_1_available),
-  .validity_mask_a(4'b1111),
-  .validity_mask_b(4'b1111)
-);
-
-  /////////////////////////////////////////////////
-  // Compute Unit 2
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_2(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_2),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_2),
-  .b_data(b_data_2),
-  .c_data_out(bram_wdata_c_2),
-  .a_addr(bram_addr_a_2),
-  .b_addr(bram_addr_b_2),
-  .c_addr(bram_addr_c_2),
-  .c_data_available(c_data_2_available),
-  .validity_mask_a(4'b1111),
-  .validity_mask_b(4'b1111)
-);
-
-  /////////////////////////////////////////////////
-  // Compute Unit 3
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_3(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_3),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_3),
-  .b_data(b_data_3),
-  .c_data_out(bram_wdata_c_3),
-  .a_addr(bram_addr_a_3),
-  .b_addr(bram_addr_b_3),
-  .c_addr(bram_addr_c_3),
-  .c_data_available(c_data_3_available),
-  .validity_mask_a(4'b1111),
-  .validity_mask_b(4'b1111)
-);
-
-  /////////////////////////////////////////////////
-  // Compute Unit 4
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_4(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_4),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_4),
-  .b_data(b_data_4),
-  .c_data_out(bram_wdata_c_4),
-  .a_addr(bram_addr_a_4),
-  .b_addr(bram_addr_b_4),
-  .c_addr(bram_addr_c_4),
-  .c_data_available(c_data_4_available),
-  .validity_mask_a(4'b1111),
-  .validity_mask_b(4'b1111)
-);
-
-  /////////////////////////////////////////////////
-  // Compute Unit 5
-  /////////////////////////////////////////////////
-
-eltwise_cu u_eltwise_cu_5(
-  .clk(clk),
-  .reset(reset),
-  .pe_reset(pe_reset),
-  .start_eltwise_op(start_eltwise_op),
-  .done_eltwise_op(done_eltwise_op_5),
-  .count(iterations),
-  .op(op),
-  .address_mat_a(address_mat_a),
-  .address_mat_b(address_mat_b),
-  .address_mat_c(address_mat_c),
-  .a_data(bram_rdata_a_5),
-  .b_data(b_data_5),
-  .c_data_out(bram_wdata_c_5),
-  .a_addr(bram_addr_a_5),
-  .b_addr(bram_addr_b_5),
-  .c_addr(bram_addr_c_5),
-  .c_data_available(c_data_5_available),
-  .validity_mask_a(4'b0011),
-  .validity_mask_b(4'b0011)
-);
-
-endmodule
-
-
-//////////////////////////////////
-//////////////////////////////////
-//Dual port RAM
-//////////////////////////////////
-//////////////////////////////////
-module ram (
-        addr0, 
-        d0, 
-        we0, 
-        q0,  
-        addr1,
-        d1,
-        we1,
-        q1,
-        clk);
-
-input [`AWIDTH-1:0] addr0;
-input [`AWIDTH-1:0] addr1;
-input [`CU_SIZE*`DWIDTH-1:0] d0;
-input [`CU_SIZE*`DWIDTH-1:0] d1;
-input [`CU_SIZE-1:0] we0;
-input [`CU_SIZE-1:0] we1;
-output [`CU_SIZE*`DWIDTH-1:0] q0;
-output [`CU_SIZE*`DWIDTH-1:0] q1;
-input clk;
-
-genvar i; 
-
-generate
-`ifdef QUARTUS
-   for (i=0;i<`CU_SIZE;i=i+1) begin: gen_dpram
-`else
-   for (i=0;i<`CU_SIZE;i=i+1) begin
-`endif
-     dpram_original #(.AWIDTH(`AWIDTH),.DWIDTH(`DWIDTH),.NUM_WORDS(1<<`AWIDTH)) dp1 (.clk(clk),.address_a(addr0),.address_b(addr1),.wren_a(we0[i]),.wren_b(we1[i]),.data_a(d0[i*`DWIDTH +: `DWIDTH]),.data_b(d1[i*`DWIDTH +: `DWIDTH]),.out_a(q0[i*`DWIDTH +: `DWIDTH]),.out_b(q1[i*`DWIDTH +: `DWIDTH]));
-   end
-endgenerate
-
-endmodule
-
-module dpram_original (
-    clk,
-    address_a,
-    address_b,
-    wren_a,
-    wren_b,
-    data_a,
-    data_b,
-    out_a,
-    out_b
-);
-parameter AWIDTH=10;
-parameter NUM_WORDS=1024;
-parameter DWIDTH=32;
-input clk;
-input [(AWIDTH-1):0] address_a;
-input [(AWIDTH-1):0] address_b;
-input  wren_a;
-input  wren_b;
-input [(DWIDTH-1):0] data_a;
-input [(DWIDTH-1):0] data_b;
-output reg [(DWIDTH-1):0] out_a;
-output reg [(DWIDTH-1):0] out_b;
-
-`ifndef hard_mem
-
-reg [DWIDTH-1:0] ram[NUM_WORDS-1:0];
-always @ (posedge clk) begin 
-  if (wren_a) begin
-      ram[address_a] <= data_a;
-  end
-  out_a <= ram[address_a];
-end
-  
-always @ (posedge clk) begin 
-  if (wren_b) begin
-      ram[address_b] <= data_b;
-  end 
-  out_b <= ram[address_b];
-end
-
-`else
-
-defparam u_dual_port_ram.ADDR_WIDTH = AWIDTH;
-defparam u_dual_port_ram.DATA_WIDTH = DWIDTH;
-
-dual_port_ram u_dual_port_ram(
-.addr1(address_a),
-.we1(wren_a),
-.data1(data_a),
-.out1(out_a),
-.addr2(address_b),
-.we2(wren_b),
-.data2(data_b),
-.out2(out_b),
-.clk(clk)
-);
-
-`endif
-endmodule
-
-  
-//////////////////////////////////
-//////////////////////////////////
-// Elementwise compute unit
-//////////////////////////////////
-//////////////////////////////////
-module eltwise_cu(
- clk,
- reset,
- pe_reset,
- start_eltwise_op,
- done_eltwise_op,
- count,
- op,
- address_mat_a,
- address_mat_b,
- address_mat_c,
- a_data,
- b_data,
- c_data_out, 
- a_addr,
- b_addr,
- c_addr,
- c_data_available,
- validity_mask_a,
- validity_mask_b
-);
-
- input clk;
- input reset;
- input pe_reset;
- input start_eltwise_op;
- output done_eltwise_op;
- input [`ITERATIONS_WIDTH-1:0] count;
- input [1:0] op;
- input [`AWIDTH-1:0] address_mat_a;
- input [`AWIDTH-1:0] address_mat_b;
- input [`AWIDTH-1:0] address_mat_c;
- input [`CU_SIZE*`DWIDTH-1:0] a_data;
- input [`CU_SIZE*`DWIDTH-1:0] b_data;
- output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
- output [`AWIDTH-1:0] a_addr;
- output [`AWIDTH-1:0] b_addr;
- output [`AWIDTH-1:0] c_addr;
- output c_data_available;
- input [`MASK_WIDTH-1:0] validity_mask_a;
- input [`MASK_WIDTH-1:0] validity_mask_b;
-
-wire [`DWIDTH-1:0] out0;
-wire [`DWIDTH-1:0] out1;
-wire [`DWIDTH-1:0] out2;
-wire [`DWIDTH-1:0] out3;
-
-wire [`DWIDTH-1:0] a0_data;
-wire [`DWIDTH-1:0] a1_data;
-wire [`DWIDTH-1:0] a2_data;
-wire [`DWIDTH-1:0] a3_data;
-wire [`DWIDTH-1:0] b0_data;
-wire [`DWIDTH-1:0] b1_data;
-wire [`DWIDTH-1:0] b2_data;
-wire [`DWIDTH-1:0] b3_data;
-
-//////////////////////////////////////////////////////////////////////////
-// Logic for done
-//////////////////////////////////////////////////////////////////////////
-wire [7:0] clk_cnt_for_done;
-reg [31:0] clk_cnt;
-reg done_eltwise_op;
-
-assign clk_cnt_for_done = 
-                  `PE_PIPELINE_DEPTH + //This is dependent on the pipeline depth of the PEs
-                  count //The number of iterations asked for this compute unit
-                  ;
-                          
-always @(posedge clk) begin
-  if (reset || ~start_eltwise_op) begin
-    clk_cnt <= 0;
-    done_eltwise_op <= 0;
-  end
-  else if (clk_cnt == clk_cnt_for_done) begin
-    done_eltwise_op <= 1;
-    clk_cnt <= clk_cnt + 1;
-  end
-  else if (done_eltwise_op == 0) begin
-    clk_cnt <= clk_cnt + 1;
-  end    
-  else begin
-    done_eltwise_op <= 0;
-    clk_cnt <= clk_cnt + 1;
-  end
-end
-
-//////////////////////////////////////////////////////////////////////////
-// Instantiation of input logic
-//////////////////////////////////////////////////////////////////////////
-input_logic u_input_logic(
-.clk(clk),
-.reset(reset),
-.start_eltwise_op(start_eltwise_op),
-.count(count),
-.a_addr(a_addr),
-.b_addr(b_addr),
-.address_mat_a(address_mat_a),
-.address_mat_b(address_mat_b),
-.a_data(a_data),
-.b_data(b_data),
-.a0_data(a0_data),
-.a1_data(a1_data),
-.a2_data(a2_data),
-.a3_data(a3_data),
-.b0_data(b0_data),
-.b1_data(b1_data),
-.b2_data(b2_data),
-.b3_data(b3_data),
-.validity_mask_a(validity_mask_a),
-.validity_mask_b(validity_mask_b)
-);
-
-//////////////////////////////////////////////////////////////////////////
-// Instantiation of the output logic
-//////////////////////////////////////////////////////////////////////////
-output_logic u_output_logic(
-.clk(clk),
-.reset(reset),
-.start_eltwise_op(start_eltwise_op),
-.done_eltwise_op(done_eltwise_op),
-.address_mat_c(address_mat_c),
-.c_data_out(c_data_out),
-.c_addr(c_addr),
-.c_data_available(c_data_available),
-.out0(out0),
-.out1(out1),
-.out2(out2),
-.out3(out3)
-);
-
-//////////////////////////////////////////////////////////////////////////
-// Instantiations of the actual PEs
-//////////////////////////////////////////////////////////////////////////
-pe_array u_pe_array(
-.reset(reset),
-.clk(clk),
-.pe_reset(pe_reset),
-.op(op),
-.a0(a0_data), 
-.a1(a1_data), 
-.a2(a2_data), 
-.a3(a3_data),
-.b0(b0_data), 
-.b1(b1_data), 
-.b2(b2_data), 
-.b3(b3_data),
-.out0(out0),
-.out1(out1),
-.out2(out2),
-.out3(out3)
-);
-
-endmodule
-
-//////////////////////////////////////////////////////////////////////////
-// Output logic
-//////////////////////////////////////////////////////////////////////////
-module output_logic(
-clk,
-reset,
-start_eltwise_op,
-done_eltwise_op,
-address_mat_c,
-c_data_out, 
-c_addr,
-c_data_available,
-out0,
-out1,
-out2,
-out3
-);
-
-input clk;
-input reset;
-input start_eltwise_op;
-input done_eltwise_op;
-input [`AWIDTH-1:0] address_mat_c;
-output [`CU_SIZE*`DWIDTH-1:0] c_data_out;
-output [`AWIDTH-1:0] c_addr;
-output c_data_available;
-input [`DWIDTH-1:0] out0;
-input [`DWIDTH-1:0] out1;
-input [`DWIDTH-1:0] out2;
-input [`DWIDTH-1:0] out3;
-
-reg c_data_available;
-reg [`CU_SIZE*`DWIDTH-1:0] c_data_out;
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to capture matrix C data from the PEs and send to RAM
-//////////////////////////////////////////////////////////////////////////
-
-reg [`AWIDTH-1:0] c_addr;
-reg [7:0] cnt;
-
-always @(posedge clk) begin
-  if (reset | ~start_eltwise_op) begin
-    c_data_available <= 1'b0;
-    c_addr <= address_mat_c;
-    c_data_out <= 0;
-    cnt <= 0;
-  end
-  else if (cnt>`PE_PIPELINE_DEPTH) begin
-    c_data_available <= 1'b1;
-    c_addr <= c_addr+1;
-    c_data_out <= {out3, out2, out1, out0};
-    cnt <= cnt + 1;
-  end else begin
-    cnt <= cnt + 1;
-  end 
-end
-
-endmodule
-
-//////////////////////////////////////////////////////////////////////////
-// Data setup
-//////////////////////////////////////////////////////////////////////////
-module input_logic(
-clk,
-reset,
-start_eltwise_op,
-count,
-a_addr,
-b_addr,
-address_mat_a,
-address_mat_b,
-a_data,
-b_data,
-a0_data,
-a1_data,
-a2_data,
-a3_data,
-b0_data,
-b1_data,
-b2_data,
-b3_data,
-validity_mask_a,
-validity_mask_b
-);
-
-input clk;
-input reset;
-input start_eltwise_op;
-input [`ITERATIONS_WIDTH-1:0] count;
-output [`AWIDTH-1:0] a_addr;
-output [`AWIDTH-1:0] b_addr;
-input [`AWIDTH-1:0] address_mat_a;
-input [`AWIDTH-1:0] address_mat_b;
-input [`CU_SIZE*`DWIDTH-1:0] a_data;
-input [`CU_SIZE*`DWIDTH-1:0] b_data;
-output [`DWIDTH-1:0] a0_data;
-output [`DWIDTH-1:0] a1_data;
-output [`DWIDTH-1:0] a2_data;
-output [`DWIDTH-1:0] a3_data;
-output [`DWIDTH-1:0] b0_data;
-output [`DWIDTH-1:0] b1_data;
-output [`DWIDTH-1:0] b2_data;
-output [`DWIDTH-1:0] b3_data;
-input [`MASK_WIDTH-1:0] validity_mask_a;
-input [`MASK_WIDTH-1:0] validity_mask_b;
-
-reg [7:0] iterations;
-
-wire [`DWIDTH-1:0] a0_data;
-wire [`DWIDTH-1:0] a1_data;
-wire [`DWIDTH-1:0] a2_data;
-wire [`DWIDTH-1:0] a3_data;
-wire [`DWIDTH-1:0] b0_data;
-wire [`DWIDTH-1:0] b1_data;
-wire [`DWIDTH-1:0] b2_data;
-wire [`DWIDTH-1:0] b3_data;
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to generate addresses to BRAM A
-//////////////////////////////////////////////////////////////////////////
-reg [`AWIDTH-1:0] a_addr;
-reg a_mem_access; //flag that tells whether the compute unit is trying to access memory or not
-
-always @(posedge clk) begin
-  //else if (clk_cnt >= a_loc*`CU_SIZE+final_mat_mul_size) begin
-  //Writing the line above to avoid multiplication:
-  if (reset || ~start_eltwise_op) begin
-    a_addr <= address_mat_a;
-    a_mem_access <= 0;
-    iterations <= 0;
-  end
-
-  //else if ((clk_cnt >= a_loc*`CU_SIZE) && (clk_cnt < a_loc*`CU_SIZE+final_mat_mul_size)) begin
-  //Writing the line above to avoid multiplication:
-  else if (iterations <= count) begin
-    a_addr <= a_addr + 1;
-    a_mem_access <= 1;
-    iterations <= iterations + 1;
-  end
-end  
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to generate valid signals for data coming from BRAM A
-//////////////////////////////////////////////////////////////////////////
-reg [7:0] a_mem_access_counter;
-always @(posedge clk) begin
-  if (reset || ~start_eltwise_op) begin
-    a_mem_access_counter <= 0;
-  end
-  else if (a_mem_access == 1) begin
-    a_mem_access_counter <= a_mem_access_counter + 1;  
-
-  end
-  else begin
-    a_mem_access_counter <= 0;
-  end
-end
-
-wire bram_rdata_a_valid; //flag that tells whether the data from memory is valid
-assign bram_rdata_a_valid = 
-       ((validity_mask_a[0]==1'b0 && a_mem_access_counter==1) ||
-        (validity_mask_a[1]==1'b0 && a_mem_access_counter==2) ||
-        (validity_mask_a[2]==1'b0 && a_mem_access_counter==3) ||
-        (validity_mask_a[3]==1'b0 && a_mem_access_counter==4)) ?
-        1'b0 : (a_mem_access_counter >= `MEM_ACCESS_LATENCY);
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to delay certain parts of the data received from BRAM A (systolic data setup)
-//////////////////////////////////////////////////////////////////////////
-//Slice data into chunks and qualify it with whether it is valid or not
-assign a0_data = a_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[0]}};
-assign a1_data = a_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[1]}};
-assign a2_data = a_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[2]}};
-assign a3_data = a_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_a_valid}} & {`DWIDTH{validity_mask_a[3]}};
-
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to generate addresses to BRAM B
-//////////////////////////////////////////////////////////////////////////
-reg [`AWIDTH-1:0] b_addr;
-reg b_mem_access; //flag that tells whether the compute unit is trying to access memory or not
-
-always @(posedge clk) begin
-  //else if (clk_cnt >= b_loc*`CU_SIZE+final_mat_mul_size) begin
-  //Writing the line above to avoid multiplication:
-  if (reset || ~start_eltwise_op) begin
-    b_addr <= address_mat_b ;
-    b_mem_access <= 0;
-  end
-  //else if ((clk_cnt >= b_loc*`CU_SIZE) && (clk_cnt < b_loc*`CU_SIZE+final_mat_mul_size)) begin
-  //Writing the line above to avoid multiplication:
-  else if (iterations <= count) begin
-    b_addr <= b_addr + 1;
-    b_mem_access <= 1;
-  end
-end  
-
-//////////////////////////////////////////////////////////////////////////
-// Logic to generate valid signals for data coming from BRAM B
-//////////////////////////////////////////////////////////////////////////
-reg [7:0] b_mem_access_counter;
-always @(posedge clk) begin
-  if (reset || ~start_eltwise_op) begin
-    b_mem_access_counter <= 0;
-  end
-  else if (b_mem_access == 1) begin
-    b_mem_access_counter <= b_mem_access_counter + 1;  
-  end
-  else begin
-    b_mem_access_counter <= 0;
-  end
-end
-
-wire bram_rdata_b_valid; //flag that tells whether the data from memory is valid
-assign bram_rdata_b_valid = 
-       ((validity_mask_b[0]==1'b0 && b_mem_access_counter==1) ||
-        (validity_mask_b[1]==1'b0 && b_mem_access_counter==2) ||
-        (validity_mask_b[2]==1'b0 && b_mem_access_counter==3) ||
-        (validity_mask_b[3]==1'b0 && b_mem_access_counter==4)) ?
-        1'b0 : (b_mem_access_counter >= `MEM_ACCESS_LATENCY);
-
-//Slice data into chunks and qualify it with whether it is valid or not
-assign b0_data = b_data[1*`DWIDTH-1:0*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[0]}};
-assign b1_data = b_data[2*`DWIDTH-1:1*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[1]}};
-assign b2_data = b_data[3*`DWIDTH-1:2*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[2]}};
-assign b3_data = b_data[4*`DWIDTH-1:3*`DWIDTH] & {`DWIDTH{bram_rdata_b_valid}} & {`DWIDTH{validity_mask_b[3]}};
-
-
-endmodule
-
-
-
-//////////////////////////////////////////////////////////////////////////
-// Array of processing elements
-//////////////////////////////////////////////////////////////////////////
-module pe_array(
-reset,
-clk,
-pe_reset,
-op,
-a0, a1, a2, a3,
-b0, b1, b2, b3,
-out0, out1, out2, out3
-);
-
-input clk;
-input reset;
-input pe_reset;
-input [1:0] op;
-input [`DWIDTH-1:0] a0;
-input [`DWIDTH-1:0] a1;
-input [`DWIDTH-1:0] a2;
-input [`DWIDTH-1:0] a3;
-input [`DWIDTH-1:0] b0;
-input [`DWIDTH-1:0] b1;
-input [`DWIDTH-1:0] b2;
-input [`DWIDTH-1:0] b3;
-output [`DWIDTH-1:0] out0;
-output [`DWIDTH-1:0] out1;
-output [`DWIDTH-1:0] out2;
-output [`DWIDTH-1:0] out3;
-
-wire [`DWIDTH-1:0] out0, out1, out2, out3;
-
-wire effective_rst;
-assign effective_rst = reset | pe_reset;
-
-processing_element pe0(.reset(effective_rst), .clk(clk), .in_a(a0), .in_b(b0), .op(op), .out(out0));
-processing_element pe1(.reset(effective_rst), .clk(clk), .in_a(a1), .in_b(b1), .op(op), .out(out1));
-processing_element pe2(.reset(effective_rst), .clk(clk), .in_a(a2), .in_b(b2), .op(op), .out(out2));
-processing_element pe3(.reset(effective_rst), .clk(clk), .in_a(a3), .in_b(b3), .op(op), .out(out3));
-
-endmodule
-
-
-//////////////////////////////////////////////////////////////////////////
-// Processing element (PE)
-//////////////////////////////////////////////////////////////////////////
-module processing_element(
- reset, 
- clk, 
- in_a,
- in_b, 
- op,
- out
- );
-
- input reset;
- input clk;
- input  [`DWIDTH-1:0] in_a;
- input  [`DWIDTH-1:0] in_b;
- input  [1:0] op;
- output [`DWIDTH-1:0] out;
-
- wire [`DWIDTH-1:0] out_mul;
- wire [`DWIDTH-1:0] out_sum;
- wire [`DWIDTH-1:0] out_sub;
-
- assign out = (op == 2'b00) ? out_sum : 
-              (op == 2'b01) ? out_sub :
-              out_mul;
-
- seq_mul u_mul(.a(in_a), .b(in_b), .out(out_mul), .reset(reset), .clk(clk));
- seq_add u_add(.a(in_a), .b(in_b), .out(out_sum), .reset(reset), .clk(clk));
- seq_sub u_sub(.a(in_a), .b(in_b), .out(out_sub), .reset(reset), .clk(clk));
-
-endmodule
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-// Multiply block
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-module seq_mul(a, b, out, reset, clk);
-input [`DWIDTH-1:0] a;
-input [`DWIDTH-1:0] b;
-input reset;
-input clk;
-output [`DWIDTH-1:0] out;
-
-reg [`DWIDTH-1:0] a_flopped;
-reg [`DWIDTH-1:0] b_flopped;
-
-wire [`DWIDTH-1:0] mul_out_temp;
-reg [`DWIDTH-1:0] mul_out_temp_reg;
-
-always @(posedge clk) begin
-  if (reset) begin
-    a_flopped <= 0;
-    b_flopped <= 0;
-  end else begin
-    a_flopped <= a;
-    b_flopped <= b;
-  end
-end
-
-//assign mul_out_temp = a * b;
-`ifdef complex_dsp
-mult_fp_clk_16 mul_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(mul_out_temp));
-`else
-FPMult_16 u_FPMult (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .result(mul_out_temp), .flags());
-`endif
-
-always @(posedge clk) begin
-  if (reset) begin
-    mul_out_temp_reg <= 0;
-  end else begin
-    mul_out_temp_reg <= mul_out_temp;
-  end
-end
-
-assign out = mul_out_temp_reg;
-
-endmodule
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-// Addition block
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-module seq_add(a, b, out, reset, clk);
-input [`DWIDTH-1:0] a;
-input [`DWIDTH-1:0] b;
-input reset;
-input clk;
-output [`DWIDTH-1:0] out;
-
-reg [`DWIDTH-1:0] a_flopped;
-reg [`DWIDTH-1:0] b_flopped;
-
-wire [`DWIDTH-1:0] sum_out_temp;
-reg [`DWIDTH-1:0] sum_out_temp_reg;
-
-always @(posedge clk) begin
-  if (reset) begin
-    a_flopped <= 0;
-    b_flopped <= 0;
-  end else begin
-    a_flopped <= a;
-    b_flopped <= b;
-  end
-end
-
-//assign sum_out_temp = a + b;
-`ifdef complex_dsp
-addition_fp_clk_16 add_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sum_out_temp));
-`else
-FPAddSub u_FPAddSub (.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sum_out_temp), .flags());
-`endif
-
-always @(posedge clk) begin
-  if (reset) begin
-    sum_out_temp_reg <= 0;
-  end else begin
-    sum_out_temp_reg <= sum_out_temp;
-  end
-end
-
-assign out = sum_out_temp_reg;
-
-endmodule
-
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-// Subtraction block
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-module seq_sub(a, b, out, reset, clk);
-input [`DWIDTH-1:0] a;
-input [`DWIDTH-1:0] b;
-input reset;
-input clk;
-output [`DWIDTH-1:0] out;
-
-reg [`DWIDTH-1:0] a_flopped;
-reg [`DWIDTH-1:0] b_flopped;
-
-wire [`DWIDTH-1:0] sub_out_temp;
-reg [`DWIDTH-1:0] sub_out_temp_reg;
-
-always @(posedge clk) begin
-  if (reset) begin
-    a_flopped <= 0;
-    b_flopped <= 0;
-  end else begin
-    a_flopped <= a;
-    b_flopped <= b;
-  end
-end
-
-//assign sub_out_temp = a - b;
-//Floating point adder has both modes - add and sub.
-//We don't provide the name of the mode here though.
-
-`ifdef complex_dsp
-addition_fp_clk_16 sub_u1(.clk(clk), .a(a_flopped), .b(b_flopped), .out(sub_out_temp));
-`else
-FPAddSub u_FPAddSub2(.clk(clk), .rst(1'b0), .a(a_flopped), .b(b_flopped), .operation(1'b0), .result(sub_out_temp), .flags());
-`endif
-
-always @(posedge clk) begin
-  if (reset) begin
-    sub_out_temp_reg <= 0;
-  end else begin
-    sub_out_temp_reg <= sub_out_temp;
-  end
-end
-
-assign out = sub_out_temp_reg;
-
-endmodule
-
-
-`ifndef complex_dsp
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-// Floating point 16-bit multiplier
-// This is a heavily modified version of:
-// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FPMult
-// Original author: Fredrik Brosser
-// Abridged by: Samidh Mehta
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-module FPMult_16(
-		clk,
-		rst,
-		a,
-		b,
-		result,
-		flags
-    );
-	
-	// Input Ports
-	input clk ;							// Clock
-	input rst ;							// Reset signal
-	input [`DWIDTH-1:0] a;						// Input A, a 32-bit floating point number
-	input [`DWIDTH-1:0] b;						// Input B, a 32-bit floating point number
-	
-	// Output ports
-	output [`DWIDTH-1:0] result ;					// Product, result of the operation, 32-bit FP number
-	output [4:0] flags ;						// Flags indicating exceptions according to IEEE754
-	
-	// Internal signals
-	wire [`DWIDTH-1:0] Z_int ;					// Product, result of the operation, 32-bit FP number
-	wire [4:0] Flags_int ;						// Flags indicating exceptions according to IEEE754
-	
-	wire Sa ;							// A's sign
-	wire Sb ;							// B's sign
-	wire Sp ;							// Product sign
-	wire [`EXPONENT-1:0] Ea ;					// A's exponent
-	wire [`EXPONENT-1:0] Eb ;					// B's exponent
-	wire [2*`MANTISSA+1:0] Mp ;					// Product mantissa
-	wire [4:0] InputExc ;						// Exceptions in inputs
-	wire [`MANTISSA-1:0] NormM ;					// Normalized mantissa
-	wire [`EXPONENT:0] NormE ;					// Normalized exponent
-	wire [`MANTISSA:0] RoundM ;					// Normalized mantissa
-	wire [`EXPONENT:0] RoundE ;					// Normalized exponent
-	wire [`MANTISSA:0] RoundMP ;					// Normalized mantissa
-	wire [`EXPONENT:0] RoundEP ;					// Normalized exponent
-	wire GRS ;
-
-	//reg [63:0] pipe_0;						// Pipeline register Input->Prep
-	reg [2*`DWIDTH-1:0] pipe_0;					// Pipeline register Input->Prep
-
-	//reg [92:0] pipe_1;						// Pipeline register Prep->Execute
-	//reg [3*`MANTISSA+2*`EXPONENT+7:0] pipe_1;			// Pipeline register Prep->Execute
-	reg [3*`MANTISSA+2*`EXPONENT+18:0] pipe_1;
-
-	//reg [38:0] pipe_2;						// Pipeline register Execute->Normalize
-	reg [`MANTISSA+`EXPONENT+7:0] pipe_2;				// Pipeline register Execute->Normalize
-	
-	//reg [72:0] pipe_3;						// Pipeline register Normalize->Round
-	reg [2*`MANTISSA+2*`EXPONENT+10:0] pipe_3;			// Pipeline register Normalize->Round
-
-	//reg [36:0] pipe_4;						// Pipeline register Round->Output
-	reg [`DWIDTH+4:0] pipe_4;					// Pipeline register Round->Output
-	
-	assign result = pipe_4[`DWIDTH+4:5] ;
-	assign flags = pipe_4[4:0] ;
-	
-	// Prepare the operands for alignment and check for exceptions
-	FPMult_PrepModule PrepModule(clk, rst, pipe_0[2*`DWIDTH-1:`DWIDTH], pipe_0[`DWIDTH-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]) ;
-
-	// Perform (unsigned) mantissa multiplication
-	FPMult_ExecuteModule ExecuteModule(pipe_1[3*`MANTISSA+`EXPONENT*2+7:2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7:2*`MANTISSA+7], pipe_1[2*`MANTISSA+6:5], pipe_1[2*`MANTISSA+2*`EXPONENT+6:2*`MANTISSA+`EXPONENT+7], pipe_1[2*`MANTISSA+`EXPONENT+6:2*`MANTISSA+7], pipe_1[2*`MANTISSA+2*`EXPONENT+8], pipe_1[2*`MANTISSA+2*`EXPONENT+7], Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0], GRS) ;
-
-	// Round result and if necessary, perform a second (post-rounding) normalization step
-	FPMult_NormalizeModule NormalizeModule(pipe_2[`MANTISSA-1:0], pipe_2[`MANTISSA+`EXPONENT:`MANTISSA], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]) ;		
-
-	// Round result and if necessary, perform a second (post-rounding) normalization step
-	//FPMult_RoundModule RoundModule(pipe_3[47:24], pipe_3[23:0], pipe_3[65:57], pipe_3[56:48], pipe_3[66], pipe_3[67], pipe_3[72:68], Z_int[31:0], Flags_int[4:0]) ;		
-	FPMult_RoundModule RoundModule(pipe_3[2*`MANTISSA+1:`MANTISSA+1], pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+2*`EXPONENT+3:2*`MANTISSA+`EXPONENT+3], pipe_3[2*`MANTISSA+`EXPONENT+2:2*`MANTISSA+2], pipe_3[2*`MANTISSA+2*`EXPONENT+4], pipe_3[2*`MANTISSA+2*`EXPONENT+5], pipe_3[2*`MANTISSA+2*`EXPONENT+10:2*`MANTISSA+2*`EXPONENT+6], Z_int[`DWIDTH-1:0], Flags_int[4:0]) ;		
-
-//adding always@ (*) instead of posedge clock to make design combinational
-	always @ (posedge clk) begin	
-		if(rst) begin
-			pipe_0 <= 0;
-			pipe_1 <= 0;
-			pipe_2 <= 0; 
-			pipe_3 <= 0;
-			pipe_4 <= 0;
-		end 
-		else begin		
-			/* PIPE 0
-				[2*`DWIDTH-1:`DWIDTH] A
-				[`DWIDTH-1:0] B
-			*/
-                       pipe_0 <= {a, b} ;
-
-
-			/* PIPE 1
-				[2*`EXPONENT+3*`MANTISSA + 18: 2*`EXPONENT+2*`MANTISSA + 18] //pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH] , mantissa of A
-				[2*`EXPONENT+2*`MANTISSA + 17 :2*`EXPONENT+2*`MANTISSA + 9] // pipe_0[8:0]
-				[2*`EXPONENT+2*`MANTISSA + 8] Sa
-				[2*`EXPONENT+2*`MANTISSA + 7] Sb
-				[2*`EXPONENT+2*`MANTISSA + 6:`EXPONENT+2*`MANTISSA+7] Ea
-				[`EXPONENT +2*`MANTISSA+6:2*`MANTISSA+7] Eb
-				[2*`MANTISSA+1+5:5] Mp
-				[4:0] InputExc
-			*/
-			//pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[`MANTISSA_MUL_SPLIT_LSB-1:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA-1:0], InputExc[4:0]} ;
-			pipe_1 <= {pipe_0[`DWIDTH+`MANTISSA-1:`DWIDTH], pipe_0[8:0], Sa, Sb, Ea[`EXPONENT-1:0], Eb[`EXPONENT-1:0], Mp[2*`MANTISSA+1:0], InputExc[4:0]} ;
-			
-			/* PIPE 2
-				[`EXPONENT + `MANTISSA + 7:`EXPONENT + `MANTISSA + 3] InputExc
-				[`EXPONENT + `MANTISSA + 2] GRS
-				[`EXPONENT + `MANTISSA + 1] Sp
-				[`EXPONENT + `MANTISSA:`MANTISSA] NormE
-				[`MANTISSA-1:0] NormM
-			*/
-			pipe_2 <= {pipe_1[4:0], GRS, Sp, NormE[`EXPONENT:0], NormM[`MANTISSA-1:0]} ;
-			/* PIPE 3
-				[2*`EXPONENT+2*`MANTISSA+10:2*`EXPONENT+2*`MANTISSA+6] InputExc
-				[2*`EXPONENT+2*`MANTISSA+5] GRS
-				[2*`EXPONENT+2*`MANTISSA+4] Sp	
-				[2*`EXPONENT+2*`MANTISSA+3:`EXPONENT+2*`MANTISSA+3] RoundE
-				[`EXPONENT+2*`MANTISSA+2:2*`MANTISSA+2] RoundEP
-				[2*`MANTISSA+1:`MANTISSA+1] RoundM
-				[`MANTISSA:0] RoundMP
-			*/
-			pipe_3 <= {pipe_2[`EXPONENT+`MANTISSA+7:`EXPONENT+`MANTISSA+1], RoundE[`EXPONENT:0], RoundEP[`EXPONENT:0], RoundM[`MANTISSA:0], RoundMP[`MANTISSA:0]} ;
-			/* PIPE 4
-				[`DWIDTH+4:5] Z
-				[4:0] Flags
-			*/				
-			pipe_4 <= {Z_int[`DWIDTH-1:0], Flags_int[4:0]} ;
-		end
-	end
-		
-endmodule
-
-
-
-module FPMult_PrepModule (
-		clk,
-		rst,
-		a,
-		b,
-		Sa,
-		Sb,
-		Ea,
-		Eb,
-		Mp,
-		InputExc
-	);
-	
-	// Input ports
-	input clk ;
-	input rst ;
-	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
-	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
-	
-	// Output ports
-	output Sa ;										// A's sign
-	output Sb ;										// B's sign
-	output [`EXPONENT-1:0] Ea ;								// A's exponent
-	output [`EXPONENT-1:0] Eb ;								// B's exponent
-	output [2*`MANTISSA+1:0] Mp ;							// Mantissa product
-	output [4:0] InputExc ;						// Input numbers are exceptions
-	
-	// Internal signals							// If signal is high...
-	wire ANaN ;										// A is a signalling NaN
-	wire BNaN ;										// B is a signalling NaN
-	wire AInf ;										// A is infinity
-	wire BInf ;										// B is infinity
-    wire [`MANTISSA-1:0] Ma;
-    wire [`MANTISSA-1:0] Mb;
-	
-	assign ANaN = &(a[`DWIDTH-2:`MANTISSA]) &  |(a[`DWIDTH-2:`MANTISSA]) ;			// All one exponent and not all zero mantissa - NaN
-	assign BNaN = &(b[`DWIDTH-2:`MANTISSA]) &  |(b[`MANTISSA-1:0]);			// All one exponent and not all zero mantissa - NaN
-	assign AInf = &(a[`DWIDTH-2:`MANTISSA]) & ~|(a[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
-	assign BInf = &(b[`DWIDTH-2:`MANTISSA]) & ~|(b[`DWIDTH-2:`MANTISSA]) ;		// All one exponent and all zero mantissa - Infinity
-	
-	// Check for any exceptions and put all flags into exception vector
-	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
-	//assign InputExc = {(ANaN | ANaN | BNaN |BNaN), ANaN, ANaN, BNaN,BNaN} ;
-	
-	// Take input numbers apart
-	assign Sa = a[`DWIDTH-1] ;							// A's sign
-	assign Sb = b[`DWIDTH-1] ;							// B's sign
-	assign Ea = a[`DWIDTH-2:`MANTISSA];						// Store A's exponent in Ea, unless A is an exception
-	assign Eb = b[`DWIDTH-2:`MANTISSA];						// Store B's exponent in Eb, unless B is an exception	
-//    assign Ma = a[`MANTISSA_MSB:`MANTISSA_LSB];
-  //  assign Mb = b[`MANTISSA_MSB:`MANTISSA_LSB];
-	
-
-
-	//assign Mp = ({4'b0001, a[`MANTISSA-1:0]}*{4'b0001, b[`MANTISSA-1:9]}) ;
-	assign Mp = ({1'b1,a[`MANTISSA-1:0]}*{1'b1, b[`MANTISSA-1:0]}) ;
-
-	
-    //We multiply part of the mantissa here
-    //Full mantissa of A
-    //Bits MANTISSA_MUL_SPLIT_MSB:MANTISSA_MUL_SPLIT_LSB of B
-   // wire [`ACTUAL_MANTISSA-1:0] inp_A;
-   // wire [`ACTUAL_MANTISSA-1:0] inp_B;
-   // assign inp_A = {1'b1, Ma};
-   // assign inp_B = {{(`MANTISSA-(`MANTISSA_MUL_SPLIT_MSB-`MANTISSA_MUL_SPLIT_LSB+1)){1'b0}}, 1'b1, Mb[`MANTISSA_MUL_SPLIT_MSB:`MANTISSA_MUL_SPLIT_LSB]};
-   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_A), .B(inp_B), .TC(1'b0), .PRODUCT(Mp));
-endmodule
-
-
-module FPMult_ExecuteModule(
-		a,
-		b,
-		MpC,
-		Ea,
-		Eb,
-		Sa,
-		Sb,
-		Sp,
-		NormE,
-		NormM,
-		GRS
-    );
-
-	// Input ports
-	input [`MANTISSA-1:0] a ;
-	input [2*`EXPONENT:0] b ;
-	input [2*`MANTISSA+1:0] MpC ;
-	input [`EXPONENT-1:0] Ea ;						// A's exponent
-	input [`EXPONENT-1:0] Eb ;						// B's exponent
-	input Sa ;								// A's sign
-	input Sb ;								// B's sign
-	
-	// Output ports
-	output Sp ;								// Product sign
-	output [`EXPONENT:0] NormE ;													// Normalized exponent
-	output [`MANTISSA-1:0] NormM ;												// Normalized mantissa
-	output GRS ;
-	
-	wire [2*`MANTISSA+1:0] Mp ;
-	
-	assign Sp = (Sa ^ Sb) ;												// Equal signs give a positive product
-	
-   // wire [`ACTUAL_MANTISSA-1:0] inp_a;
-   // wire [`ACTUAL_MANTISSA-1:0] inp_b;
-   // assign inp_a = {1'b1, a};
-   // assign inp_b = {{(`MANTISSA-`MANTISSA_MUL_SPLIT_LSB){1'b0}}, 1'b0, b};
-   // DW02_mult #(`ACTUAL_MANTISSA,`ACTUAL_MANTISSA) u_mult(.A(inp_a), .B(inp_b), .TC(1'b0), .PRODUCT(Mp_temp));
-   // DW01_add #(2*`ACTUAL_MANTISSA) u_add(.A(Mp_temp), .B(MpC<<`MANTISSA_MUL_SPLIT_LSB), .CI(1'b0), .SUM(Mp), .CO());
-
-	//assign Mp = (MpC<<(2*`EXPONENT+1)) + ({4'b0001, a[`MANTISSA-1:0]}*{1'b0, b[2*`EXPONENT:0]}) ;
-	assign Mp = MpC;
-
-
-	assign NormM = (Mp[2*`MANTISSA+1] ? Mp[2*`MANTISSA:`MANTISSA+1] : Mp[2*`MANTISSA-1:`MANTISSA]); 	// Check for overflow
-	assign NormE = (Ea + Eb + Mp[2*`MANTISSA+1]);								// If so, increment exponent
-	
-	assign GRS = ((Mp[`MANTISSA]&(Mp[`MANTISSA+1]))|(|Mp[`MANTISSA-1:0])) ;
-	
-endmodule
-
-module FPMult_NormalizeModule(
-		NormM,
-		NormE,
-		RoundE,
-		RoundEP,
-		RoundM,
-		RoundMP
-    );
-
-	// Input Ports
-	input [`MANTISSA-1:0] NormM ;									// Normalized mantissa
-	input [`EXPONENT:0] NormE ;									// Normalized exponent
-
-	// Output Ports
-	output [`EXPONENT:0] RoundE ;
-	output [`EXPONENT:0] RoundEP ;
-	output [`MANTISSA:0] RoundM ;
-	output [`MANTISSA:0] RoundMP ; 
-	
-// EXPONENT = 5 
-// EXPONENT -1 = 4
-// NEED to subtract 2^4 -1 = 15
-
-wire [`EXPONENT-1 : 0] bias;
-
-assign bias =  ((1<< (`EXPONENT -1)) -1);
-
-	assign RoundE = NormE - bias ;
-	assign RoundEP = NormE - bias -1 ;
-	assign RoundM = NormM ;
-	assign RoundMP = NormM ;
-
-endmodule
-
-module FPMult_RoundModule(
-		RoundM,
-		RoundMP,
-		RoundE,
-		RoundEP,
-		Sp,
-		GRS,
-		InputExc,
-		Z,
-		Flags
-    );
-
-	// Input Ports
-	input [`MANTISSA:0] RoundM ;									// Normalized mantissa
-	input [`MANTISSA:0] RoundMP ;									// Normalized exponent
-	input [`EXPONENT:0] RoundE ;									// Normalized mantissa + 1
-	input [`EXPONENT:0] RoundEP ;									// Normalized exponent + 1
-	input Sp ;												// Product sign
-	input GRS ;
-	input [4:0] InputExc ;
-	
-	// Output Ports
-	output [`DWIDTH-1:0] Z ;										// Final product
-	output [4:0] Flags ;
-	
-	// Internal Signals
-	wire [`EXPONENT:0] FinalE ;									// Rounded exponent
-	wire [`MANTISSA:0] FinalM;
-	wire [`MANTISSA:0] PreShiftM;
-	
-	assign PreShiftM = GRS ? RoundMP : RoundM ;	// Round up if R and (G or S)
-	
-	// Post rounding normalization (potential one bit shift> use shifted mantissa if there is overflow)
-	assign FinalM = (PreShiftM[`MANTISSA] ? {1'b0, PreShiftM[`MANTISSA:1]} : PreShiftM[`MANTISSA:0]) ;
-	
-	assign FinalE = (PreShiftM[`MANTISSA] ? RoundEP : RoundE) ; // Increment exponent if a shift was done
-	
-	assign Z = {Sp, FinalE[`EXPONENT-1:0], FinalM[`MANTISSA-1:0]} ;   // Putting the pieces together
-	assign Flags = InputExc[4:0];
-
-endmodule
-`endif
-
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-// Floating point 16-bit adder
-// This is a heavily modified version of:
-// https://github.com/fbrosser/DSP48E1-FP/tree/master/src/FP_AddSub
-// Original author: Fredrik Brosser
-// Abridged by: Samidh Mehta
-///////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////
-`ifndef complex_dsp
-
-module FPAddSub(
-		//bf16,
-		clk,
-		rst,
-		a,
-		b,
-		operation,			// 0 add, 1 sub
-		result,
-		flags
-	);
-	//input bf16; //1 for Bfloat16, 0 for IEEE half precision
-
-	// Clock and reset
-	input clk ;										// Clock signal
-	input rst ;										// Reset (active high, resets pipeline registers)
-	
-	// Input ports
-	input [`DWIDTH-1:0] a ;								// Input A, a 32-bit floating point number
-	input [`DWIDTH-1:0] b ;								// Input B, a 32-bit floating point number
-	input operation ;								// Operation select signal
-	
-	// Output ports
-	output [`DWIDTH-1:0] result ;						// Result of the operation
-	output [4:0] flags ;							// Flags indicating exceptions according to IEEE754
-	
-	// Pipeline Registers
-	//reg [79:0] pipe_1;							// Pipeline register PreAlign->Align1
-	reg [2*`EXPONENT + 2*`DWIDTH + 5:0] pipe_1;							// Pipeline register PreAlign->Align1
-
-	//reg [67:0] pipe_2;							// Pipeline register Align1->Align3
-	//reg [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;							// Pipeline register Align1->Align3
-	wire [2*`EXPONENT+ 2*`MANTISSA + 8:0] pipe_2;
-
-	//reg [76:0] pipe_3;	68						// Pipeline register Align1->Align3
-	reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_3;							// Pipeline register Align1->Align3
-
-	//reg [69:0] pipe_4;							// Pipeline register Align3->Execute
-	//reg [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;							// Pipeline register Align3->Execute
-	wire [2*`EXPONENT+ 2*`MANTISSA + 9:0] pipe_4;
-	
-	//reg [51:0] pipe_5;							// Pipeline register Execute->Normalize
-	reg [`DWIDTH+`EXPONENT+11:0] pipe_5;							// Pipeline register Execute->Normalize
-
-	//reg [56:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
-	//reg [`DWIDTH+`EXPONENT+16:0] pipe_6;							// Pipeline register Nomalize->NormalizeShift1
-	wire [`DWIDTH+`EXPONENT+16:0] pipe_6;
-
-	//reg [56:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
-	//reg [`DWIDTH+`EXPONENT+16:0] pipe_7;							// Pipeline register NormalizeShift2->NormalizeShift3
-	wire [`DWIDTH+`EXPONENT+16:0] pipe_7;
-	//reg [54:0] pipe_8;							// Pipeline register NormalizeShift3->Round
-	reg [`EXPONENT*2+`MANTISSA+15:0] pipe_8;							// Pipeline register NormalizeShift3->Round
-
-	//reg [40:0] pipe_9;							// Pipeline register NormalizeShift3->Round
-	//reg [`DWIDTH+8:0] pipe_9;							// Pipeline register NormalizeShift3->Round
-	wire [`DWIDTH+8:0] pipe_9;
-
-	// Internal wires between modules
-	wire [`DWIDTH-2:0] Aout_0 ;							// A - sign
-	wire [`DWIDTH-2:0] Bout_0 ;							// B - sign
-	wire Opout_0 ;									// A's sign
-	wire Sa_0 ;										// A's sign
-	wire Sb_0 ;										// B's sign
-	wire MaxAB_1 ;									// Indicates the larger of A and B(0/A, 1/B)
-	wire [`EXPONENT-1:0] CExp_1 ;							// Common Exponent
-	wire [`EXPONENT-1:0] Shift_1 ;							// Number of steps to smaller mantissa shift right (align)
-	wire [`MANTISSA-1:0] Mmax_1 ;							// Larger mantissa
-	wire [4:0] InputExc_0 ;						// Input numbers are exceptions
-	wire [2*`EXPONENT-1:0] ShiftDet_0 ;
-	wire [`MANTISSA-1:0] MminS_1 ;						// Smaller mantissa after 0/16 shift
-	wire [`MANTISSA:0] MminS_2 ;						// Smaller mantissa after 0/4/8/12 shift
-	wire [`MANTISSA:0] Mmin_3 ;							// Smaller mantissa after 0/1/2/3 shift
-	wire [`DWIDTH:0] Sum_4 ;
-	wire PSgn_4 ;
-	wire Opr_4 ;
-	wire [`EXPONENT-1:0] Shift_5 ;							// Number of steps to shift sum left (normalize)
-	wire [`DWIDTH:0] SumS_5 ;							// Sum after 0/16 shift
-	wire [`DWIDTH:0] SumS_6 ;							// Sum after 0/16 shift
-	wire [`DWIDTH:0] SumS_7 ;							// Sum after 0/16 shift
-	wire [`MANTISSA-1:0] NormM_8 ;						// Normalized mantissa
-	wire [`EXPONENT:0] NormE_8;							// Adjusted exponent
-	wire ZeroSum_8 ;								// Zero flag
-	wire NegE_8 ;									// Flag indicating negative exponent
-	wire R_8 ;										// Round bit
-	wire S_8 ;										// Final sticky bit
-	wire FG_8 ;										// Final sticky bit
-	wire [`DWIDTH-1:0] P_int ;
-	wire EOF ;
-	
-	// Prepare the operands for alignment and check for exceptions
-	FPAddSub_PrealignModule PrealignModule
-	(	// Inputs
-		a, b, operation,
-		// Outputs
-		Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT-1:0], InputExc_0[4:0], Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Opout_0) ;
-		
-	// Prepare the operands for alignment and check for exceptions
-	FPAddSub_AlignModule AlignModule
-	(	// Inputs
-		pipe_1[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6], pipe_1[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7], pipe_1[2*`EXPONENT+4:5],
-		// Outputs
-		CExp_1[`EXPONENT-1:0], MaxAB_1, Shift_1[`EXPONENT-1:0], MminS_1[`MANTISSA-1:0], Mmax_1[`MANTISSA-1:0]) ;	
-
-	// Alignment Shift Stage 1
-	FPAddSub_AlignShift1 AlignShift1
-	(  // Inputs
-		//bf16, 
-		pipe_2[`MANTISSA-1:0], pipe_2[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 7],
-		// Outputs
-		MminS_2[`MANTISSA:0]) ;
-
-	// Alignment Shift Stage 3 and compution of guard and sticky bits
-	FPAddSub_AlignShift2 AlignShift2  
-	(  // Inputs
-		pipe_3[`MANTISSA:0], pipe_3[2*`MANTISSA+7:2*`MANTISSA+6],
-		// Outputs
-		Mmin_3[`MANTISSA:0]) ;
-						
-	// Perform mantissa addition
-	FPAddSub_ExecutionModule ExecutionModule
-	(  // Inputs
-		pipe_4[`MANTISSA*2+5:`MANTISSA+6], pipe_4[`MANTISSA:0], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 7], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 6], pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9],
-		// Outputs
-		Sum_4[`DWIDTH:0], PSgn_4, Opr_4) ;
-	
-	// Prepare normalization of result
-	FPAddSub_NormalizeModule NormalizeModule
-	(  // Inputs
-		pipe_5[`DWIDTH:0], 
-		// Outputs
-		SumS_5[`DWIDTH:0], Shift_5[4:0]) ;
-					
-	// Normalization Shift Stage 1
-	FPAddSub_NormalizeShift1 NormalizeShift1
-	(  // Inputs
-		pipe_6[`DWIDTH:0], pipe_6[`DWIDTH+`EXPONENT+14:`DWIDTH+`EXPONENT+11],
-		// Outputs
-		SumS_7[`DWIDTH:0]) ;
-		
-	// Normalization Shift Stage 3 and final guard, sticky and round bits
-	FPAddSub_NormalizeShift2 NormalizeShift2
-	(  // Inputs
-		pipe_7[`DWIDTH:0], pipe_7[`DWIDTH+`EXPONENT+5:`DWIDTH+6], pipe_7[`DWIDTH+`EXPONENT+15:`DWIDTH+`EXPONENT+11],
-		// Outputs
-		NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8, FG_8) ;
-
-	// Round and put result together
-	FPAddSub_RoundModule RoundModule
-	(  // Inputs
-		 pipe_8[3], pipe_8[4+`EXPONENT:4], pipe_8[`EXPONENT+`MANTISSA+4:5+`EXPONENT], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT*2+`MANTISSA+15], pipe_8[`EXPONENT*2+`MANTISSA+12], pipe_8[`EXPONENT*2+`MANTISSA+11], pipe_8[`EXPONENT*2+`MANTISSA+14], pipe_8[`EXPONENT*2+`MANTISSA+10], 
-		// Outputs
-		P_int[`DWIDTH-1:0], EOF) ;
-	
-	// Check for exceptions
-	FPAddSub_ExceptionModule Exceptionmodule
-	(  // Inputs
-		pipe_9[8+`DWIDTH:9], pipe_9[8], pipe_9[7], pipe_9[6], pipe_9[5:1], pipe_9[0], 
-		// Outputs
-		result[`DWIDTH-1:0], flags[4:0]) ;			
-	
-
-assign pipe_2 = {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;
-assign pipe_4 = {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;
-assign pipe_6 = {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;
-assign pipe_7 = {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;
-assign pipe_9 = {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;
-
-	always @ (posedge clk) begin	
-		if(rst) begin
-			pipe_1 <= 0;
-			//pipe_2 <= 0;
-			pipe_3 <= 0;
-			//pipe_4 <= 0;
-			pipe_5 <= 0;
-			//pipe_6 <= 0;
-			//pipe_7 <= 0;
-			pipe_8 <= 0;
-			//pipe_9 <= 0;
-		end 
-		else begin
-/* PIPE_1:
-	[2*`EXPONENT + 2*`DWIDTH + 5]  Opout_0
-	[2*`EXPONENT + 2*`DWIDTH + 4: 2*`EXPONENT +`DWIDTH + 6] A_out0
-	[2*`EXPONENT +`DWIDTH + 5 :  2*`EXPONENT +7] Bout_0
-	[2*`EXPONENT +6] Sa_0
-	[2*`EXPONENT +5] Sb_0
-	[2*`EXPONENT +4 : 5] ShiftDet_0
-	[4:0] Input Exc
-*/
-			pipe_1 <= {Opout_0, Aout_0[`DWIDTH-2:0], Bout_0[`DWIDTH-2:0], Sa_0, Sb_0, ShiftDet_0[2*`EXPONENT -1:0], InputExc_0[4:0]} ;	
-/* PIPE_2
-[2*`EXPONENT+ 2*`MANTISSA + 8] operation
-[2*`EXPONENT+ 2*`MANTISSA + 7] Sa_0
-[2*`EXPONENT+ 2*`MANTISSA + 6] Sb_0
-[2*`EXPONENT+ 2*`MANTISSA + 5] MaxAB_0
-[2*`EXPONENT+ 2*`MANTISSA + 4:`EXPONENT+ 2*`MANTISSA + 5] CExp_0
-[`EXPONENT+ 2*`MANTISSA + 4 : 2*`MANTISSA + 5] Shift_0
-[2*`MANTISSA + 4:`MANTISSA + 5] Mmax_0
-[`MANTISSA + 4 : `MANTISSA] InputExc_0
-[`MANTISSA-1:0] MminS_1
-*/
-			//pipe_2 <= {pipe_1[2*`EXPONENT + 2*`DWIDTH + 5], pipe_1[2*`EXPONENT +6:2*`EXPONENT +5], MaxAB_1, CExp_1[`EXPONENT-1:0], Shift_1[`EXPONENT-1:0], Mmax_1[`MANTISSA-1:0], pipe_1[4:0], MminS_1[`MANTISSA-1:0]} ;	
-/* PIPE_3
-[2*`EXPONENT+ 2*`MANTISSA + 9] operation
-[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
-[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
-[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
-[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
-[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
-[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
-[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
-[`MANTISSA:0] MminS_2
-*/
-			pipe_3 <= {pipe_2[2*`EXPONENT+ 2*`MANTISSA + 8:`MANTISSA], MminS_2[`MANTISSA:0]} ;	
-/* PIPE_4
-[2*`EXPONENT+ 2*`MANTISSA + 9] operation
-[2*`EXPONENT+ 2*`MANTISSA + 8] Sa_0
-[2*`EXPONENT+ 2*`MANTISSA + 7] Sb_0
-[2*`EXPONENT+ 2*`MANTISSA + 6] MaxAB_0
-[2*`EXPONENT+ 2*`MANTISSA + 5:`EXPONENT+ 2*`MANTISSA + 6] CExp_0
-[`EXPONENT+ 2*`MANTISSA + 5 : 2*`MANTISSA + 6] Shift_0
-[2*`MANTISSA + 5:`MANTISSA + 6] Mmax_0
-[`MANTISSA + 5 : `MANTISSA + 1] InputExc_0
-[`MANTISSA:0] MminS_3
-*/				
-			//pipe_4 <= {pipe_3[2*`EXPONENT+ 2*`MANTISSA + 9:`MANTISSA+1], Mmin_3[`MANTISSA:0]} ;	
-/* PIPE_5 :
-[`DWIDTH+ `EXPONENT + 11] operation
-[`DWIDTH+ `EXPONENT + 10] PSgn_4
-[`DWIDTH+ `EXPONENT + 9] Opr_4
-[`DWIDTH+ `EXPONENT + 8] Sa_0
-[`DWIDTH+ `EXPONENT + 7] Sb_0
-[`DWIDTH+ `EXPONENT + 6] MaxAB_0
-[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
-[`DWIDTH+5:`DWIDTH+1] InputExc_0
-[`DWIDTH:0] Sum_4
-*/					
-			pipe_5 <= {pipe_4[2*`EXPONENT+ 2*`MANTISSA + 9], PSgn_4, Opr_4, pipe_4[2*`EXPONENT+ 2*`MANTISSA + 8:`EXPONENT+ 2*`MANTISSA + 6], pipe_4[`MANTISSA+5:`MANTISSA+1], Sum_4[`DWIDTH:0]} ;
-/* PIPE_6 :
-[`DWIDTH+ `EXPONENT + 16] operation
-[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
-[`DWIDTH+ `EXPONENT + 10] PSgn_4
-[`DWIDTH+ `EXPONENT + 9] Opr_4
-[`DWIDTH+ `EXPONENT + 8] Sa_0
-[`DWIDTH+ `EXPONENT + 7] Sb_0
-[`DWIDTH+ `EXPONENT + 6] MaxAB_0
-[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
-[`DWIDTH+5:`DWIDTH+1] InputExc_0
-[`DWIDTH:0] Sum_4
-*/				
-			//pipe_6 <= {pipe_5[`DWIDTH+`EXPONENT+11], Shift_5[4:0], pipe_5[`DWIDTH+`EXPONENT+10:`DWIDTH+1], SumS_5[`DWIDTH:0]} ;	
-/* PIPE_7 :
-[`DWIDTH+ `EXPONENT + 16] operation
-[`DWIDTH+ `EXPONENT + 15:`DWIDTH+ `EXPONENT + 11] Shift_5
-[`DWIDTH+ `EXPONENT + 10] PSgn_4
-[`DWIDTH+ `EXPONENT + 9] Opr_4
-[`DWIDTH+ `EXPONENT + 8] Sa_0
-[`DWIDTH+ `EXPONENT + 7] Sb_0
-[`DWIDTH+ `EXPONENT + 6] MaxAB_0
-[`DWIDTH+ `EXPONENT + 5 :`DWIDTH+6] CExp_0
-[`DWIDTH+5:`DWIDTH+1] InputExc_0
-[`DWIDTH:0] Sum_4
-*/						
-			//pipe_7 <= {pipe_6[`DWIDTH+`EXPONENT+16:`DWIDTH+1], SumS_7[`DWIDTH:0]} ;	
-/* PIPE_8:
-[2*`EXPONENT + `MANTISSA + 15] FG_8 
-[2*`EXPONENT + `MANTISSA + 14] operation
-[2*`EXPONENT + `MANTISSA + 13] PSgn_4
-[2*`EXPONENT + `MANTISSA + 12] Sa_0
-[2*`EXPONENT + `MANTISSA + 11] Sb_0
-[2*`EXPONENT + `MANTISSA + 10] MaxAB_0
-[2*`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 10] CExp_0
-[`EXPONENT + `MANTISSA + 9:`EXPONENT + `MANTISSA + 5] InputExc_8
-[`EXPONENT + `MANTISSA + 4 :`EXPONENT + 5] NormM_8 
-[`EXPONENT + 4 :4] NormE_8
-[3] ZeroSum_8
-[2] NegE_8
-[1] R_8
-[0] S_8
-*/				
-			pipe_8 <= {FG_8, pipe_7[`DWIDTH+`EXPONENT+16], pipe_7[`DWIDTH+`EXPONENT+10], pipe_7[`DWIDTH+`EXPONENT+8:`DWIDTH+1], NormM_8[`MANTISSA-1:0], NormE_8[`EXPONENT:0], ZeroSum_8, NegE_8, R_8, S_8} ;	
-/* pipe_9:
-[`DWIDTH + 8 :9] P_int
-[8] NegE_8
-[7] R_8
-[6] S_8
-[5:1] InputExc_8
-[0] EOF
-*/				
-			//pipe_9 <= {P_int[`DWIDTH-1:0], pipe_8[2], pipe_8[1], pipe_8[0], pipe_8[`EXPONENT+`MANTISSA+9:`EXPONENT+`MANTISSA+5], EOF} ;	
-		end
-	end		
-	
-endmodule
-
-
-//
-// Description:	 	The pre-alignment module is responsible for taking the inputs
-//							apart and checking the parts for exceptions.
-//							The exponent difference is also calculated in this module.
-//
-
-
-module FPAddSub_PrealignModule(
-		A,
-		B,
-		operation,
-		Sa,
-		Sb,
-		ShiftDet,
-		InputExc,
-		Aout,
-		Bout,
-		Opout
-	);
-	
-	// Input ports
-	input [`DWIDTH-1:0] A ;										// Input A, a 32-bit floating point number
-	input [`DWIDTH-1:0] B ;										// Input B, a 32-bit floating point number
-	input operation ;
-	
-	// Output ports
-	output Sa ;												// A's sign
-	output Sb ;												// B's sign
-	output [2*`EXPONENT-1:0] ShiftDet ;
-	output [4:0] InputExc ;								// Input numbers are exceptions
-	output [`DWIDTH-2:0] Aout ;
-	output [`DWIDTH-2:0] Bout ;
-	output Opout ;
-	
-	// Internal signals									// If signal is high...
-	wire ANaN ;												// A is a NaN (Not-a-Number)
-	wire BNaN ;												// B is a NaN
-	wire AInf ;												// A is infinity
-	wire BInf ;												// B is infinity
-	wire [`EXPONENT-1:0] DAB ;										// ExpA - ExpB					
-	wire [`EXPONENT-1:0] DBA ;										// ExpB - ExpA	
-	
-	assign ANaN = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(A[`MANTISSA-1:0]) ;		// All one exponent and not all zero mantissa - NaN
-	assign BNaN = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & |(B[`MANTISSA-1:0]);		// All one exponent and not all zero mantissa - NaN
-	assign AInf = &(A[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(A[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
-	assign BInf = &(B[`DWIDTH-2:`DWIDTH-1-`EXPONENT]) & ~|(B[`MANTISSA-1:0]) ;	// All one exponent and all zero mantissa - Infinity
-	
-	// Put all flags into exception vector
-	assign InputExc = {(ANaN | BNaN | AInf | BInf), ANaN, BNaN, AInf, BInf} ;
-	
-	//assign DAB = (A[30:23] - B[30:23]) ;
-	//assign DBA = (B[30:23] - A[30:23]) ;
-	assign DAB = (A[`DWIDTH-2:`MANTISSA] + ~(B[`DWIDTH-2:`MANTISSA]) + 1) ;
-	assign DBA = (B[`DWIDTH-2:`MANTISSA] + ~(A[`DWIDTH-2:`MANTISSA]) + 1) ;
-	
-	assign Sa = A[`DWIDTH-1] ;									// A's sign bit
-	assign Sb = B[`DWIDTH-1] ;									// B's sign	bit
-	assign ShiftDet = {DBA[`EXPONENT-1:0], DAB[`EXPONENT-1:0]} ;		// Shift data
-	assign Opout = operation ;
-	assign Aout = A[`DWIDTH-2:0] ;
-	assign Bout = B[`DWIDTH-2:0] ;
-	
-endmodule
-
-
-//
-// Description:	 	The alignment module determines the larger input operand and
-//							sets the mantissas, shift and common exponent accordingly.
-//
-
-
-module FPAddSub_AlignModule (
-		A,
-		B,
-		ShiftDet,
-		CExp,
-		MaxAB,
-		Shift,
-		Mmin,
-		Mmax
-	);
-	
-	// Input ports
-	input [`DWIDTH-2:0] A ;								// Input A, a 32-bit floating point number
-	input [`DWIDTH-2:0] B ;								// Input B, a 32-bit floating point number
-	input [2*`EXPONENT-1:0] ShiftDet ;
-	
-	// Output ports
-	output [`EXPONENT-1:0] CExp ;							// Common Exponent
-	output MaxAB ;									// Incidates larger of A and B (0/A, 1/B)
-	output [`EXPONENT-1:0] Shift ;							// Number of steps to smaller mantissa shift right
-	output [`MANTISSA-1:0] Mmin ;							// Smaller mantissa 
-	output [`MANTISSA-1:0] Mmax ;							// Larger mantissa
-	
-	// Internal signals
-	//wire BOF ;										// Check for shifting overflow if B is larger
-	//wire AOF ;										// Check for shifting overflow if A is larger
-	
-	assign MaxAB = (A[`DWIDTH-2:0] < B[`DWIDTH-2:0]) ;	
-	//assign BOF = ShiftDet[9:5] < 25 ;		// Cannot shift more than 25 bits
-	//assign AOF = ShiftDet[4:0] < 25 ;		// Cannot shift more than 25 bits
-	
-	// Determine final shift value
-	//assign Shift = MaxAB ? (BOF ? ShiftDet[9:5] : 5'b11001) : (AOF ? ShiftDet[4:0] : 5'b11001) ;
-	
-	assign Shift = MaxAB ? ShiftDet[2*`EXPONENT-1:`EXPONENT] : ShiftDet[`EXPONENT-1:0] ;
-	
-	// Take out smaller mantissa and append shift space
-	assign Mmin = MaxAB ? A[`MANTISSA-1:0] : B[`MANTISSA-1:0] ; 
-	
-	// Take out larger mantissa	
-	assign Mmax = MaxAB ? B[`MANTISSA-1:0]: A[`MANTISSA-1:0] ;	
-	
-	// Common exponent
-	assign CExp = (MaxAB ? B[`MANTISSA+`EXPONENT-1:`MANTISSA] : A[`MANTISSA+`EXPONENT-1:`MANTISSA]) ;		
-	
-endmodule
-
-
-// Description:	 Alignment shift stage 1, performs 16|12|8|4 shift
-//
-
-
-// ONLY THIS MODULE IS HARDCODED for half precision fp16 and bfloat16
-module FPAddSub_AlignShift1(
-		//bf16,
-		MminP,
-		Shift,
-		Mmin
-	);
-	
-	// Input ports
-	//input bf16;
-	input [`MANTISSA-1:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
-	input [`EXPONENT-3:0] Shift ;						// Shift amount. Last 2 bits of shifting are done in next stage. Hence, we have [`EXPONENT - 2] bits
-	
-	// Output ports
-	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
-	
-
-	wire bf16;
-	assign bf16 = 1'b1; //hardcoding to 1, to avoid ODIN issue. a `ifdef here wasn't working. apparently, nested `ifdefs don't work
-
-	// Internal signals
-	reg	  [`MANTISSA:0]		Lvl1;
-	reg	  [`MANTISSA:0]		Lvl2;
-	wire    [2*`MANTISSA+1:0]    Stage1;	
-	integer           i;                // Loop variable
-
-	always @(*) begin
-		if (bf16 == 1'b1) begin						
-//hardcoding for bfloat16
-	//For bfloat16, we can shift the mantissa by a max of 7 bits since mantissa has a width of 7. 
-	//Hence if either, bit[3]/bit[4]/bit[5]/bit[6]/bit[7] is 1, we can make it 0. This corresponds to bits [5:1] in our updated shift which doesn't contain last 2 bits.
-		//Lvl1 <= (Shift[1]|Shift[2]|Shift[3]|Shift[4]|Shift[5]) ? {temp_0} : {1'b1, MminP};  // MANTISSA + 1 width	
-		Lvl1 <= (|Shift[`EXPONENT-3:1]) ? 'd0 : {1'b1, MminP};  // MANTISSA + 1 width	
-		end
-		else begin
-		//for half precision fp16, 10 bits can be shifted. Hence, only shifts till 10 (01010)can be made. 
-		Lvl1 <= Shift[2] ? 'd0 : {1'b1, MminP};
-		end
-	end
-	
-	assign Stage1 = {Lvl1, Lvl1}; //2*MANTISSA + 2 width
-
-	always @(*) begin    					// Rotate {0 | 4 } bits
-	if(bf16 == 1'b1) begin
-	  case (Shift[0])
-			// Rotate by 0	
-			1'b0: Lvl2 <= Stage1[`MANTISSA:0];       			
-			// Rotate by 4	
-			1'b1: Lvl2 <= Stage1[`MANTISSA+4:4];
-			default: Lvl2 <= Stage1[`MANTISSA+4:4];
-	  endcase
-	end
-	else begin
-	  case (Shift[1:0])					// Rotate {0 | 4 | 8} bits
-			// Rotate by 0	
-			2'b00: Lvl2 <= Stage1[`MANTISSA:0];       			
-			// Rotate by 4	
-			2'b01: Lvl2 <= Stage1[`MANTISSA+4:4];
-			// Rotate by 8
-			2'b10: Lvl2 <= Stage1[`MANTISSA+8:8];
-			// Rotate by 12	
-			2'b11: Lvl2[`MANTISSA: 0] <= 0; 
-			default: Lvl2[`MANTISSA: 0] <= 0; 
-	  endcase
-	end
-	end
-
-	// Assign output to next shift stage
-	assign Mmin = Lvl2;
-	
-endmodule
-
-
-// Description:	 Alignment shift stage 2, performs 3|2|1 shift
-//
-
-
-module FPAddSub_AlignShift2(
-		MminP,
-		Shift,
-		Mmin
-	);
-	
-	// Input ports
-	input [`MANTISSA:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
-	input [1:0] Shift ;						// Shift amount. Last 2 bits
-	
-	// Output ports
-	output [`MANTISSA:0] Mmin ;						// The smaller mantissa
-	
-	// Internal Signal
-	reg	  [`MANTISSA:0]		Lvl3;
-	wire    [2*`MANTISSA+1:0]    Stage2;	
-	integer           j;               // Loop variable
-	
-	assign Stage2 = {MminP, MminP};
-
-	always @(*) begin    // Rotate {0 | 1 | 2 | 3} bits
-	  case (Shift[1:0])
-			// Rotate by 0
-			2'b00: Lvl3 <= Stage2[`MANTISSA:0];   
-			// Rotate by 1
-			2'b01: Lvl3 <= Stage2[`MANTISSA+1:1];
-			// Rotate by 2
-			2'b10: Lvl3 <= Stage2[`MANTISSA+2:2];
-			// Rotate by 3
-			2'b11: Lvl3 <= Stage2[`MANTISSA+3:3]; 
-	  endcase
-	end
-	
-	// Assign output
-	assign Mmin = Lvl3;						// Take out smaller mantissa				
-
-endmodule
-
-
-//
-// Description:	 Module that executes the addition or subtraction on mantissas.
-//
-
-
-module FPAddSub_ExecutionModule(
-		Mmax,
-		Mmin,
-		Sa,
-		Sb,
-		MaxAB,
-		OpMode,
-		Sum,
-		PSgn,
-		Opr
-    );
-
-	// Input ports
-	input [`MANTISSA-1:0] Mmax ;					// The larger mantissa
-	input [`MANTISSA:0] Mmin ;					// The smaller mantissa
-	input Sa ;								// Sign bit of larger number
-	input Sb ;								// Sign bit of smaller number
-	input MaxAB ;							// Indicates the larger number (0/A, 1/B)
-	input OpMode ;							// Operation to be performed (0/Add, 1/Sub)
-	
-	// Output ports
-	output [`DWIDTH:0] Sum ;					// The result of the operation
-	output PSgn ;							// The sign for the result
-	output Opr ;							// The effective (performed) operation
-
-	wire [`EXPONENT-1:0]temp_1;
-
-	assign Opr = (OpMode^Sa^Sb); 		// Resolve sign to determine operation
-	assign temp_1 = 0;
-	// Perform effective operation
-//SAMIDH_UNSURE 5--> 8
-
-	assign Sum = (OpMode^Sa^Sb) ? ({1'b1, Mmax, temp_1} - {Mmin, temp_1}) : ({1'b1, Mmax, temp_1} + {Mmin, temp_1}) ;
-	
-	// Assign result sign
-	assign PSgn = (MaxAB ? Sb : Sa) ;
-
-endmodule
-
-
-//
-// Description:	 Determine the normalization shift amount and perform 16-shift
-//
-
-
-module FPAddSub_NormalizeModule(
-		Sum,
-		Mmin,
-		Shift
-    );
-
-	// Input ports
-	input [`DWIDTH:0] Sum ;					// Mantissa sum including hidden 1 and GRS
-	
-	// Output ports
-	output [`DWIDTH:0] Mmin ;					// Mantissa after 16|0 shift
-	output [4:0] Shift ;					// Shift amount
-	//Changes in this doesn't matter since even Bfloat16 can't go beyond 7 shift to the mantissa (only 3 bits valid here)  
-	// Determine normalization shift amount by finding leading nought
-	assign Shift =  ( 
-		Sum[16] ? 5'b00000 :	 
-		Sum[15] ? 5'b00001 : 
-		Sum[14] ? 5'b00010 : 
-		Sum[13] ? 5'b00011 : 
-		Sum[12] ? 5'b00100 : 
-		Sum[11] ? 5'b00101 : 
-		Sum[10] ? 5'b00110 : 
-		Sum[9] ? 5'b00111 :
-		Sum[8] ? 5'b01000 :
-		Sum[7] ? 5'b01001 :
-		Sum[6] ? 5'b01010 :
-		Sum[5] ? 5'b01011 :
-		Sum[4] ? 5'b01100 : 5'b01101
-	//	Sum[19] ? 5'b01101 :
-	//	Sum[18] ? 5'b01110 :
-	//	Sum[17] ? 5'b01111 :
-	//	Sum[16] ? 5'b10000 :
-	//	Sum[15] ? 5'b10001 :
-	//	Sum[14] ? 5'b10010 :
-	//	Sum[13] ? 5'b10011 :
-	//	Sum[12] ? 5'b10100 :
-	//	Sum[11] ? 5'b10101 :
-	//	Sum[10] ? 5'b10110 :
-	//	Sum[9] ? 5'b10111 :
-	//	Sum[8] ? 5'b11000 :
-	//	Sum[7] ? 5'b11001 : 5'b11010
-	);
-	
-	reg	  [`DWIDTH:0]		Lvl1;
-	
-	always @(*) begin
-		// Rotate by 16?
-		Lvl1 <= Shift[4] ? {Sum[8:0], 8'b00000000} : Sum; 
-	end
-	
-	// Assign outputs
-	assign Mmin = Lvl1;						// Take out smaller mantissa
-
-endmodule
-
-
-// Description:	 Normalization shift stage 1, performs 12|8|4|3|2|1|0 shift
-//
-//Hardcoding loop start and end values of i. To avoid ODIN limitations. i=`DWIDTH*2+1 wasn't working.
-
-module FPAddSub_NormalizeShift1(
-		MminP,
-		Shift,
-		Mmin
-	);
-	
-	// Input ports
-	input [`DWIDTH:0] MminP ;						// Smaller mantissa after 16|12|8|4 shift
-	input [3:0] Shift ;						// Shift amount
-	
-	// Output ports
-	output [`DWIDTH:0] Mmin ;						// The smaller mantissa
-	
-	reg	  [`DWIDTH:0]		Lvl2;
-	wire    [2*`DWIDTH+1:0]    Stage1;	
-	reg	  [`DWIDTH:0]		Lvl3;
-	wire    [2*`DWIDTH+1:0]    Stage2;	
-	integer           i;               	// Loop variable
-	
-	assign Stage1 = {MminP, MminP};
-
-	always @(*) begin    					// Rotate {0 | 4 | 8 | 12} bits
-	  case (Shift[3:2])
-			// Rotate by 0
-			2'b00: Lvl2 <= Stage1[`DWIDTH:0];       		
-			// Rotate by 4
-			2'b01: Lvl2 <= Stage1[28:13];
-			// Rotate by 8
-			2'b10: Lvl2 <= Stage1[24:9];
-			// Rotate by 12
-			2'b11: Lvl2 <= Stage1[20:5];
-			default: Lvl2 <= Stage1[`DWIDTH:0];
-	  endcase
-	end
-	
-	assign Stage2 = {Lvl2, Lvl2};
-
-	always @(*) begin   				 		// Rotate {0 | 1 | 2 | 3} bits
-	  case (Shift[1:0])
-			// Rotate by 0
-			2'b00: Lvl3 <= Stage2[`DWIDTH:0];
-			// Rotate by 1
-			2'b01: Lvl3 <= Stage2[31:16];
-			// Rotate by 2
-			2'b10: Lvl3 <= Stage2[30:15];
-			// Rotate by 3
-			2'b11: Lvl3 <= Stage2[29:14];
-			default: Lvl3 <= Stage2[`DWIDTH:0];
-	  endcase
-	end
-	
-	// Assign outputs
-	assign Mmin = Lvl3;						// Take out smaller mantissa			
-	
-endmodule
-
-
-// Description:	 Normalization shift stage 2, calculates post-normalization
-//						 mantissa and exponent, as well as the bits used in rounding		
-//
-
-
-module FPAddSub_NormalizeShift2(
-		PSSum,
-		CExp,
-		Shift,
-		NormM,
-		NormE,
-		ZeroSum,
-		NegE,
-		R,
-		S,
-		FG
-	);
-	
-	// Input ports
-	input [`DWIDTH:0] PSSum ;					// The Pre-Shift-Sum
-	input [`EXPONENT-1:0] CExp ;
-	input [4:0] Shift ;					// Amount to be shifted
-
-	// Output ports
-	output [`MANTISSA-1:0] NormM ;				// Normalized mantissa
-	output [`EXPONENT:0] NormE ;					// Adjusted exponent
-	output ZeroSum ;						// Zero flag
-	output NegE ;							// Flag indicating negative exponent
-	output R ;								// Round bit
-	output S ;								// Final sticky bit
-	output FG ;
-
-	// Internal signals
-	wire MSBShift ;						// Flag indicating that a second shift is needed
-	wire [`EXPONENT:0] ExpOF ;					// MSB set in sum indicates overflow
-	wire [`EXPONENT:0] ExpOK ;					// MSB not set, no adjustment
-	
-	// Calculate normalized exponent and mantissa, check for all-zero sum
-	assign MSBShift = PSSum[`DWIDTH] ;		// Check MSB in unnormalized sum
-	assign ZeroSum = ~|PSSum ;			// Check for all zero sum
-	assign ExpOK = CExp - Shift ;		// Adjust exponent for new normalized mantissa
-	assign NegE = ExpOK[`EXPONENT] ;			// Check for exponent overflow
-	assign ExpOF = CExp - Shift + 1'b1 ;		// If MSB set, add one to exponent(x2)
-	assign NormE = MSBShift ? ExpOF : ExpOK ;			// Check for exponent overflow
-	assign NormM = PSSum[`DWIDTH-1:`EXPONENT+1] ;		// The new, normalized mantissa
-	
-	// Also need to compute sticky and round bits for the rounding stage
-	assign FG = PSSum[`EXPONENT] ; 
-	assign R = PSSum[`EXPONENT-1] ;
-	assign S = |PSSum[`EXPONENT-2:0] ;
-	
-endmodule
-
-
-// Description:	 Performs 'Round to nearest, tie to even'-rounding on the
-//						 normalized mantissa according to the G, R, S bits. Calculates
-//						 final result and checks for exponent overflow.
-//
-
-
-module FPAddSub_RoundModule(
-		ZeroSum,
-		NormE,
-		NormM,
-		R,
-		S,
-		G,
-		Sa,
-		Sb,
-		Ctrl,
-		MaxAB,
-		Z,
-		EOF
-    );
-
-	// Input ports
-	input ZeroSum ;					// Sum is zero
-	input [`EXPONENT:0] NormE ;				// Normalized exponent
-	input [`MANTISSA-1:0] NormM ;				// Normalized mantissa
-	input R ;							// Round bit
-	input S ;							// Sticky bit
-	input G ;
-	input Sa ;							// A's sign bit
-	input Sb ;							// B's sign bit
-	input Ctrl ;						// Control bit (operation)
-	input MaxAB ;
-	
-	// Output ports
-	output [`DWIDTH-1:0] Z ;					// Final result
-	output EOF ;
-	
-	// Internal signals
-	wire [`MANTISSA:0] RoundUpM ;			// Rounded up sum with room for overflow
-	wire [`MANTISSA-1:0] RoundM ;				// The final rounded sum
-	wire [`EXPONENT:0] RoundE ;				// Rounded exponent (note extra bit due to poential overflow	)
-	wire RoundUp ;						// Flag indicating that the sum should be rounded up
-        wire FSgn;
-	wire ExpAdd ;						// May have to add 1 to compensate for overflow 
-	wire RoundOF ;						// Rounding overflow
-	
-	wire [`EXPONENT:0]temp_2;
-	assign temp_2 = 0;
-	// The cases where we need to round upwards (= adding one) in Round to nearest, tie to even
-	assign RoundUp = (G & ((R | S) | NormM[0])) ;
-	
-	// Note that in the other cases (rounding down), the sum is already 'rounded'
-	assign RoundUpM = (NormM + 1) ;								// The sum, rounded up by 1
-	assign RoundM = (RoundUp ? RoundUpM[`MANTISSA-1:0] : NormM) ; 	// Compute final mantissa	
-	assign RoundOF = RoundUp & RoundUpM[`MANTISSA] ; 				// Check for overflow when rounding up
-
-	// Calculate post-rounding exponent
-	assign ExpAdd = (RoundOF ? 1'b1 : 1'b0) ; 				// Add 1 to exponent to compensate for overflow
-	assign RoundE = ZeroSum ? temp_2 : (NormE + ExpAdd) ; 							// Final exponent
-
-	// If zero, need to determine sign according to rounding
-	assign FSgn = (ZeroSum & (Sa ^ Sb)) | (ZeroSum ? (Sa & Sb & ~Ctrl) : ((~MaxAB & Sa) | ((Ctrl ^ Sb) & (MaxAB | Sa)))) ;
-
-	// Assign final result
-	assign Z = {FSgn, RoundE[`EXPONENT-1:0], RoundM[`MANTISSA-1:0]} ;
-	
-	// Indicate exponent overflow
-	assign EOF = RoundE[`EXPONENT];
-	
-endmodule
-
-
-//
-// Description:	 Check the final result for exception conditions and set
-//						 flags accordingly.
-//
-
-
-module FPAddSub_ExceptionModule(
-		Z,
-		NegE,
-		R,
-		S,
-		InputExc,
-		EOF,
-		P,
-		Flags
-    );
-	 
-	// Input ports
-	input [`DWIDTH-1:0] Z	;					// Final product
-	input NegE ;						// Negative exponent?
-	input R ;							// Round bit
-	input S ;							// Sticky bit
-	input [4:0] InputExc ;			// Exceptions in inputs A and B
-	input EOF ;
-	
-	// Output ports
-	output [`DWIDTH-1:0] P ;					// Final result
-	output [4:0] Flags ;				// Exception flags
-	
-	// Internal signals
-	wire Overflow ;					// Overflow flag
-	wire Underflow ;					// Underflow flag
-	wire DivideByZero ;				// Divide-by-Zero flag (always 0 in Add/Sub)
-	wire Invalid ;						// Invalid inputs or result
-	wire Inexact ;						// Result is inexact because of rounding
-	
-	// Exception flags
-	
-	// Result is too big to be represented
-	assign Overflow = EOF | InputExc[1] | InputExc[0] ;
-	
-	// Result is too small to be represented
-	assign Underflow = NegE & (R | S);
-	
-	// Infinite result computed exactly from finite operands
-	assign DivideByZero = &(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~|(Z[`MANTISSA+`EXPONENT-1:`MANTISSA]) & ~InputExc[1] & ~InputExc[0];
-	
-	// Invalid inputs or operation
-	assign Invalid = |(InputExc[4:2]) ;
-	
-	// Inexact answer due to rounding, overflow or underflow
-	assign Inexact = (R | S) | Overflow | Underflow;
-	
-	// Put pieces together to form final result
-	assign P = Z ;
-	
-	// Collect exception flags	
-	assign Flags = {Overflow, Underflow, DivideByZero, Invalid, Inexact} ; 	
-	
-endmodule
-
-`endif
-
-

From 1751c4348d29f619801c1044539056d4b5c59674 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:09:56 -0400
Subject: [PATCH 13/56] eltwise_layer.v symlink

---
 parmys-plugin/tests/eltwise_layer/eltwise_layer.v | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 parmys-plugin/tests/eltwise_layer/eltwise_layer.v

diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
new file mode 120000
index 000000000..ff069de4e
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
@@ -0,0 +1 @@
+../../../third_party/vtr/verilog/eltwise_layer.v
\ No newline at end of file

From dd4e2feea0d7096b350819075c9e73dad117dcff Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:12:39 -0400
Subject: [PATCH 14/56] ci

---
 .github/workflows/licensing.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 8a4181df2..2e92bfac0 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,6 +33,7 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
+          ./third_party/vtr/verilog/eltwise_layer.v
           ./parmys-plugin/tests/raygentop/raygentop.v
           ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
           ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml

From cb5432a806ccc80e9d9d3a1f2fd9e44973dbe2bd Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:14:11 -0400
Subject: [PATCH 15/56] rm files

---
 .../tests/eltwise_layer/hard_block_include.v  |    3 -
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml | 3246 -----------------
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    | 1505 --------
 parmys-plugin/tests/raygentop/raygentop.v     | 2978 ---------------
 4 files changed, 7732 deletions(-)
 delete mode 100644 parmys-plugin/tests/eltwise_layer/hard_block_include.v
 delete mode 100644 parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
 delete mode 100644 parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
 delete mode 100644 parmys-plugin/tests/raygentop/raygentop.v

diff --git a/parmys-plugin/tests/eltwise_layer/hard_block_include.v b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
deleted file mode 100644
index cc4d502c5..000000000
--- a/parmys-plugin/tests/eltwise_layer/hard_block_include.v
+++ /dev/null
@@ -1,3 +0,0 @@
-`define complex_dsp
-`define hard_mem
-
diff --git a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
deleted file mode 100644
index 8170d72b0..000000000
--- a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+++ /dev/null
@@ -1,3246 +0,0 @@
-<!--
-    This is the architecture file for a modern Intel FPGA. The blocks (logic, RAM, DSP)
-    are Agilex-like, but the routing architecture is similar to Stratix IV. It is based
-    off the Stratix-10-like Architecture discussed in [1], the Agilex-like Architecture
-    mentioned in [6] and Stratix-IV-like Architecture mentioned in [5].
-
-    The delays and areas of various components in this arch come from COFFE [2]
-    runs using a 22nm technology node [3].
-
-    ##############################
-    Parameters
-    ##############################
-    Parameter | Value | Definition
-    __________|_______|______________________________
-    N         |    10 | Number of BLEs per cluster
-    W         |   300 | Channel width
-    L         |  4,16 | Wire segment length
-    I         |    60 | Number of cluster inputs
-    O         |    40 | Number of cluster outputs
-    K         |     6 | LUT size
-    Fs        |     3 | Switch block flexibility
-    Fcin      |  0.15 | Cluster input flexibility
-    Fcout     |   0.1 | Cluster output flexibility
-    Fclocal   |   0.5 | Local input crossbar population
-
-    ##############################
-    Logic Cluster
-    ##############################
-    This architecture has 10 ALMs (or FLEs: Fracturable Logic Elements) per Logic Cluster
-    (or LAB or CLB), where each ALM is a 6-LUT fracturable into
-    two 5-LUTs. The ALM has 8 inputs and 4 optionally registered outputs.The two 5-LUTs should
-    share at least two inputs. Each two ALM outputs are logically equivalent, which means any
-    output signal that can reach ALM.out[0] can reach ALM.out[1] and the same thing for
-    ALM.out[2] and ALM.out[3]. The ALMs in this architecture have an arithmetic mode
-    where each 5-LUT is fractured into two 4-LUTs, resulting in a total of four 4-LUTs and two
-    bits of addition per ALM. This architecture has a single carry chain that spans the 10 ALMs
-    in the LAB.
-
-    The LAB (or Logic Cluster or CLB) has 60 inputs and 40 outputs. Two outputs of each ALM are fed 
-    to the right and left LAB using direct links and are also fed back to the LAB as feedback connections 
-    sharing the 60 input ports with the signals coming from the routing channels.
-
-    The LAB has a 50% sparsely populated input crossbar.
-    
-    ##############################
-    DSP Slice
-    ##############################
-    This architecture has a DSP block that supports the following modes:
-
-    Fixed point modes:
-    _________________
-    1. 27x27 fixed point multiplier (multiply)
-    2. 27x27 fixed point mac (mac_int_27x27)
-    3. Two 18x19 fixed point multipliers (multiply)
-    4. Two 18x19 fixed point macs (mac_int_18x19)
-    5. Four 9x9 fixed point multipliers (multiply)
-    6. Four 9x9 fixed point macs (mac_int_9x9)
-    7. 27x27 plus 64 mode (mult_add_mode_27_27_64/mult_add_int_27x27). 27 * 27 + 64 -> 64. result = ax * ay + bx + chainin. chainout = result 
-    8. 18x19 sum-of-2 mode (sop_2_mode/int_sop_2) result = (bx * by) + (ax * ay) + chainin. chainout = result    
-    9. 18x19 plus 36 mode (mult_add_mode_18_19_36/mult_add_int_18x19). 18 * 19 + 36 -> 64. result = ax * ay + bx + chainin. chainout = result 
-    10. 9x9 sum-of-4 mode (sop_4_mode/int_sop_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result 
-    11. 9x9 sum-of-4 accum mode (sop_4_accum_mode/int_sop_accum_4) result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result 
-
-    Floating point modes:
-    ____________________
-
-    IMPORTANT:
-    The precisions supported are IEEE floating point 32-bit, IEEE floating point 16-bit and
-    Brain floating point (BF16). In the 16-bit mode descriptions, wherever "fp16" is used, it
-    refers to either IEEE floating point 16-bit or BF16. There are mode bits on the DSP slice
-    that can be used to differentiate between them. Doing this saves the effort of explicitly
-    specifying all the 16-bit modes twice in this file. 
-    Since the goal is architectural exploration and not functional simulation, the mode bits 
-    can be specified to any random value while instantiating the DSP slice in a Verilog benchmark.
-
-    1A. One fp32 multiplier (mult_fp_32)
-    1B. One fp32 multiplier, clocked (mult_fp_clk_32)
-    2A. One fp32 adder/subtractor (addition_fp_32)
-    2B. One fp32 adder/subtractor, clocked (addition_fp_clk_32)
-    3. One fp32 mac (mac_fp_32)
-    4A. Two fp16 multipliers (mult_fp_16)
-    4B. Two fp16 multipliers, clocked (mult_fp_clk_16)
-    5A. Two fp16 adders/subtractors (addition_fp_16)
-    5B. Two fp16 adders/subtractors, clocked (addition_fp_clk_16)
-    6. Two fp16 macs (mac_fp_16)
-    7. floating point fp16 sum-of-products mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. chainout = third_inp or result) (fp16_sum_of_products_mode/fp16_mult_add)
-    8. floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result) (fp16_sum_of_products_2_mult_mode/fp16_sop2_mult)
-    9. floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result) (fp16_sum_of_products_2_accum_mode/fp16_sop2_accum)
-    10. floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp) (fp16_mult_fp32_add/fp16_mult_fp32_add)
-    11. floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator) (fp16_mult_fp32_accum/fp16_mult_fp32_accum)
-    12. floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) (fp32_mult_then_add/fp32_mult_then_add)
-    13. floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin) (fp32_mult_add/fp32_mult_add)
-
-    The DSP block was designed in Verilog and COFFE's [2] hybrid flow was used to generate
-    area and delay results. The standard cell library used was Cadence GPDK 45nm (gsclib045_svt_v4.4)
-    and area/delay scaling euqations from [4] were used.
-
-    A 50% sparsely populated input crossbar was added to the DSP block but is commented out.
-    It was leading to a failure in in VPR. See the discussion on this commit: 
-    https://github.com/verilog-to-routing/vtr-verilog-to-routing/commit/ea7acf1582ece35e892c26b756aa302d2e12ddb2
-
-    Once this is fixed, the input crossbar code can be enabled.
-
-    ##############################
-    Memory Blocks
-    ##############################
-    The architecture also has a 20Kb memory blocks (or M20k or BRAM) that has true and simple dual port modes. 
-    In simple dual port mode the memory can be configured in the following modes: 512x40, 1024x20 and 2048x10,
-    while in true dual port mode it can be configured as: 1024x20 and 2048x10.
-
-    The BRAM has registered inputs and outputs. See details on how the delays for this block were 
-    obtained, in the comments before the specification of the BRAM primitive, towards the end of this file.
-
-    The BRAM doesn't have an input crossbar. Adding an input crossbar was leading to a 
-    seg fault in VPR, likely because of https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1475
-
-    Once this is fixed, an input crossbar can be added. The input crossbar delay from COFFE was: 29.47ps
-
-    ##############################
-    Routing/Interconnect
-    ##############################
-    The routing channel width is 300. Note that the channel width isn't specified directly in this arch file. 
-    Switch pattern calculations assume that value. During experiments, channel width can be specified using 
-    the command line switch `route_chan_width`.
-    The architecture uses unidirectional routing with wire segments of length 4 (260 out of 300 wires) and 
-    length 16 (40 out of 300 wires). The length 16 wires do not directly connect to block pins and are only 
-    accessible from the length 4 wires. Switches appear after every 4 blocks on the length 16 wires. 
-    The switch blocks use a custom switching pattern based on the Stratix-IV-like architecture used in the 
-    Titan flow [5]. 
-
-    ##############################
-    I/Os
-    ##############################
-    I/O pads are arranged along the perimeter of the FPGA. No area values provided for the I/Os.
-
-    ##############################
-    Comments on similarities and differences with Intel FPGA architecture.
-    ##############################
-    The main parameters of the logic blocks, DSPs and RAMs are similar to Intel FPGAs. But here are
-    some important points:
-    1. The DSP slice supports lower precision modes - int8 (actually 9x9) and 16-bit floating point
-       (IEEE half-precision and bfloat16). These modes are present in Intel Agilex FPGA DSPs.
-    2. DSPs are chained in vertical direction (chainin-chainout connections for output cascading
-       and scanin-scanout connections for input cascading). This is a common feature
-       in modern FPGAs.
-    3. There are no registers on the interconnect/routing wires in this architecture. That is a main
-       feature in the Stratix10 and Agilex families of Intel FPGA (it's called HyperFlex by Intel).
-    4. The architecture doesn't have sectors. All blocks are laid out in columns on the entire chip.
-       Most modern Intel FPGAs have sector based layout.
-    5. The IOs are on the perimeter, instead of being arranged in columns. Modern FPGAs arrange I/Os in
-       columns.
-    6. The routing architecture is similar to Stratix IV. There are wire segments of L=4 
-       and L=16. And a custom switch pattern (not a standard wilton switch) is used. 
-
-    [1] M. Eldafrawy, A. Boutros, S. Yazdanshenas, and V. Betz, "FPGA Logic Block Architectures for
-        Efficient Deep Learning Inference" in ACM TRETS, 2020
-    [2] S. Yazdanshenas, and V. Betz, "COFFE 2: Automatic Modelling and Optimization of
-        Complex and Heterogeneous FPGA Architectures" in ACM TRETS, 2019. 
-    [3] PTM High Performance 22nm Metal Gate / High-K / Strained-Si 22NM_BULK_HP, from http://ptm.asu.edu/
-        See: https://github.com/vaughnbetz/COFFE/blob/master/spice_models/ptm_22nm_bulk_hp.l
-    [4] A. Stillmaker and B. Baas, "Scaling equations for the accurate prediction of CMOS device 
-        performance from 180 nm to 7 nm" in Integration, the VLSI Journal (2017)
-    [5] K. E. Murray et al., “Timing-Driven Titan: Enabling Large Benchmarks and Exploring the Gap between 
-        Academic and Commercial CAD,” TRETS 2015.
-    [6] A. Arora et al., "Tensor Slices to the Rescue: Supercharging ML Acceleration on FPGAs", ISFPGA 2020.
--->
-
-<architecture>
-  <!-- 
-         ODIN II specific config begins 
-         Describes the types of user-specified netlist blocks (in blif, this corresponds to 
-         ".model [type_of_block]") that this architecture supports.
-
-         Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
-         already special structures in blif (.names, .input, .output, and .latch) 
-         that describe them.
-    -->
-  <models>
-    <model name="single_port_ram">
-      <input_ports>
-        <port name="we" clock="clk" combinational_sink_ports="out"/>
-        <!-- control -->
-        <port name="addr" clock="clk" combinational_sink_ports="out"/>
-        <!-- address lines -->
-        <port name="data" clock="clk" combinational_sink_ports="out"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <model name="dual_port_ram">
-      <input_ports>
-        <port name="we1" clock="clk" combinational_sink_ports="out1"/>
-        <!-- write enable -->
-        <port name="we2" clock="clk" combinational_sink_ports="out2"/>
-        <!-- write enable -->
-        <port name="addr1" clock="clk" combinational_sink_ports="out1"/>
-        <!-- address lines -->
-        <port name="addr2" clock="clk" combinational_sink_ports="out2"/>
-        <!-- address lines -->
-        <port name="data1" clock="clk" combinational_sink_ports="out1"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="data2" clock="clk" combinational_sink_ports="out2"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out1" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-        <port name="out2" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <!-- Used inside DSPs. 
-         Fixed point multiplication.
-         ODIN infers these when * sign appears in RTL. -->
-    <model name="multiply">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <!-- Used inside DSPs. 
-         Floating point multiplication. -->
-    <model name="mult_fp_16">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>   
-    <model name="mult_fp_32">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>   
-    <model name="mult_fp_clk_16">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <model name="mult_fp_clk_32">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <!-- Only used inside CLBs for 1-bit adder.
-         ODIN infers these when + sign appears in RTL.
-         Can't use this inside DSP slice
-         because ODIN gets confused and starts to connect multi
-         bit adders and single bit adders in different PBs -->
-    <model name="adder">
-      <input_ports>
-        <port name="a" combinational_sink_ports="cout sumout"/>
-        <port name="b" combinational_sink_ports="cout sumout"/>
-        <port name="cin" combinational_sink_ports="cout sumout"/>
-      </input_ports>
-      <output_ports>
-        <port name="cout"/>
-        <port name="sumout"/>
-      </output_ports>
-    </model>
-    <!-- Multi bit floating point adder inside DSP slices -->
-    <model name="addition_fp_16">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="addition_fp_32">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="addition_fp_clk_16">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <model name="addition_fp_clk_32">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="int_sop_2">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="mult_add_int_27x27">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-        <port name="scanout"/>
-      </output_ports>
-    </model>
-    <model name="mult_add_int_18x19">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
-        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-        <port name="scanout"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="int_sop_4">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="int_sop_accum_4">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>
-    <!-- Floating point MAC inside DSP slices -->
-    <model name="mac_fp_16">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="out"/>
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <model name="mac_fp_32">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="out"/>
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <!-- Fixed point MAC inside DSP slices -->
-    <model name="mac_int_27x27">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="out"/>
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <model name="mac_int_18x19">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="out"/>
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <model name="mac_int_9x9">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="out"/>
-        <port name="a" clock="clk" combinational_sink_ports="out"/>
-        <port name="b" clock="clk" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="fp16_mult_add">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model> 
-    <!--A mode in DSP slice-->
-    <model name="fp16_sop2_mult">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model> 
-    <!--A mode in DSP slice-->
-    <model name="fp16_sop2_accum">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="fp16_mult_fp32_add">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>    
-    <!--A mode in DSP slice-->
-    <model name="fp16_mult_fp32_accum">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="top_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="bot_b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model>
-    <!--A mode in DSP slice-->
-    <model name="fp32_mult_add">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model> 
-    <!--A mode in DSP slice-->
-    <model name="fp32_mult_then_add">
-      <input_ports>
-        <port name="clk" is_clock="1"/>  
-        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="mode_sigs" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="a" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="b" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="fp32_in" clock="clk" combinational_sink_ports="result chainout"/>
-        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
-      </input_ports>
-      <output_ports>
-        <port name="result" clock="clk"/>
-        <port name="chainout"/>
-      </output_ports>
-    </model> 
-  </models>
-  <tiles>
-    <tile name="io" area="0">
-      <sub_tile name="io" capacity="8">
-        <equivalent_sites>
-          <site pb_type="io" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="outpad" num_pins="1"/>
-        <output name="inpad" num_pins="1"/>
-        <clock name="clock" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="custom">
-          <loc side="left">io.outpad io.inpad io.clock</loc>
-          <loc side="top">io.outpad io.inpad io.clock</loc>
-          <loc side="right">io.outpad io.inpad io.clock</loc>
-          <loc side="bottom">io.outpad io.inpad io.clock</loc>
-        </pinlocations>
-      </sub_tile>
-    </tile>
-    <tile name="clb" height="1" width="1" area="27905">
-      <sub_tile name="clb">
-        <equivalent_sites>
-          <site pb_type="clb" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="I1" num_pins="15" equivalent="full"/>
-        <input name="I2" num_pins="15" equivalent="full"/>
-        <input name="I3" num_pins="15" equivalent="full"/>
-        <input name="I4" num_pins="15" equivalent="full"/>
-        <input name="cin" num_pins="1"/>
-        <output name="O" num_pins="40" equivalent="none"/>
-        <output name="cout" num_pins="1"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
-          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
-          <!-- clock pins do not connect to local routing -->
-          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
-        </fc>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="dsp_top" height="4" width="1" area="253779">
-      <sub_tile name="dsp_top">
-        <equivalent_sites>
-          <site pb_type="dsp_top" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="reset" num_pins="1" is_non_clock_global="true"/>
-        <input name="dsp_I1" num_pins="64" />
-        <input name="dsp_I2" num_pins="64" />
-        <input name="chainin" num_pins="64"/>
-        <input name="scanin" num_pins="27"/>
-        <output name="result" num_pins="74"/>
-        <output name="chainout" num_pins="64"/>
-        <output name="scanout" num_pins="27"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
-          <!-- clock pins and chain ports do not connect to local routing -->
-          <fc_override port_name="clk" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="chainin" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="chainout" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="scanin" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="scanout" fc_type="frac" fc_val="0"/>
-        </fc>
-        <pinlocations pattern="custom">
-        	  <loc side="left" yoffset="0">dsp_top.dsp_I1[31:0]</loc>
-	          <loc side="right" yoffset="1">dsp_top.dsp_I1[63:32]</loc>
-	          <loc side="left" yoffset="2">dsp_top.dsp_I2[31:0]</loc>
-	          <loc side="right" yoffset="3">dsp_top.dsp_I2[63:32]</loc>
-	          <loc side="top">dsp_top.chainin dsp_top.scanin</loc>
-	          <loc side="bottom">dsp_top.chainout dsp_top.scanout</loc>
-	          <loc side="right" yoffset="0">dsp_top.result[17:0] dsp_top.clk</loc>
-	          <loc side="left" yoffset="1">dsp_top.result[36:18]</loc>
-	          <loc side="right" yoffset="2">dsp_top.result[55:37] </loc>
-	          <loc side="left" yoffset="3">dsp_top.result[73:56] dsp_top.reset</loc>
-        </pinlocations>
-      </sub_tile>
-    </tile>
-    <tile name="memory" height="2" width="1" area="137668">
-      <sub_tile name="memory">
-        <equivalent_sites>
-          <site pb_type="memory" pin_mapping="direct"/>
-        </equivalent_sites>
-      <input name="addr1" num_pins="11"/>
-      <input name="addr2" num_pins="11"/>
-      <input name="data" num_pins="40"/>
-      <input name="we1" num_pins="1"/>
-      <input name="we2" num_pins="1"/>
-      <output name="out" num_pins="40"/>
-      <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
-          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
-        </fc>  
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-  </tiles>
-  <!-- ODIN II specific config ends -->
-  <layout>
-    <!-- Physical descriptions begin -->
-    <auto_layout aspect_ratio="1.0">
-      <perimeter type="io" priority="101"/>
-      <corners type="EMPTY" priority="102"/>
-      <fill type="clb" priority="10"/>
-      <col type="dsp_top" startx="6" starty="1" repeatx="16" priority="20"/>
-      <col type="memory" startx="2" starty="1" repeatx="16" priority="20"/>
-    </auto_layout>
-    <!--
-    <fixed_layout name="mylayout" width="178" height="82">
-      <perimeter type="io" priority="101"/>
-      <corners type="EMPTY" priority="102"/>
-      
-      <col type="dsp_top"  startx="1"  starty="1"  priority="100"/>
-      <col type="clb"  startx="2"  starty="1"  priority="100"/>
-      <col type="clb"  startx="3"  starty="1"  priority="100"/>
-      <col type="dsp_top"  startx="4"  starty="1"  priority="100"/>
-      <col type="clb"  startx="5"  starty="1"  priority="100"/>
-      <col type="clb"  startx="6"  starty="1"  priority="100"/>
-      <col type="dsp_top"  startx="7"  starty="1"  priority="100"/>
-      <col type="clb"  startx="8"  starty="1"  priority="100"/>
-      <col type="clb"  startx="9"  starty="1"  priority="100"/>
-      <col type="dsp_top"  startx="10"  starty="1"  priority="100"/>
-      <col type="clb"  startx="11"  starty="1"  priority="100"/>
-      <col type="clb"  startx="12"  starty="1"  priority="100"/>
-      <col type="dsp_top"  startx="13"  starty="1"  priority="100"/>
-
-      <region type="clb" startx="14"   endx="88"   starty="1" incrx="5"  priority="20"/>
-      <region type="clb" startx="15"   endx="88"   starty="1" incrx="5"  priority="20"/>
-      <region type="clb" startx="16"   endx="88"   starty="1" incrx="5"  priority="20"/>
-      <region type="dsp_top" startx="17"   endx="88"   starty="1" incrx="5"  priority="20"/>
-      <region type="memory" startx="18"   endx="88"   starty="1" incrx="5"  priority="20"/>
-
-      <region type="memory" startx="89"   endx="163"   starty="1" incrx="5"  priority="20"/>
-      <region type="dsp_top" startx="90"   endx="163"   starty="1" incrx="5"  priority="20"/>
-      <region type="clb" startx="91"   endx="163"   starty="1" incrx="5"  priority="20"/>
-      <region type="clb" startx="92"   endx="163"   starty="1" incrx="5"  priority="20"/>
-      <region type="clb" startx="93"   endx="163"   starty="1" incrx="5"  priority="20"/>
-
-      <col type="dsp_top"  startx="164"  starty="1"  priority="20"/>
-      <col type="clb"  startx="165"  starty="1"  priority="1"/>
-      <col type="clb"  startx="166"  starty="1"  priority="1"/>
-      <col type="dsp_top"  startx="167"  starty="1"  priority="20"/>
-      <col type="clb"  startx="168"  starty="1"  priority="1"/>
-      <col type="clb"  startx="169"  starty="1"  priority="1"/>
-      <col type="dsp_top"  startx="170"  starty="1"  priority="20"/>
-      <col type="clb"  startx="171"  starty="1"  priority="1"/>
-      <col type="clb"  startx="172"  starty="1"  priority="1"/>
-      <col type="dsp_top"  startx="173"  starty="1"  priority="20"/>
-      <col type="clb"  startx="174"  starty="1"  priority="1"/>
-      <col type="clb"  startx="175"  starty="1"  priority="1"/>
-      <col type="dsp_top"  startx="176"  starty="1"  priority="20"/>
-    </fixed_layout> 
-    -->
-  </layout>
-  <device>
-    <sizing R_minW_nmos="13090" R_minW_pmos="19086.83"/>
-    <area grid_logic_tile_area="0"/>
-    <chan_width_distr>
-      <x distr="uniform" peak="1.000000"/>
-      <y distr="uniform" peak="1.000000"/>
-    </chan_width_distr>
-    <switch_block type="custom"/>
-    <connection_block input_switch_name="ipin_cblock"/>
-  </device>
-  <switchlist>
-    <switch type="mux" name="L4_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="207.9e-12" mux_trans_size="2.377" buf_size="35.69"/>
-    <!-- Delay of L16 driver is scaled from L4 by a factor of 1.5x (based on numbers from the Titan Stratix IV architecture file)
-	 Area numbers will not be totally accurate because of the same buf_size -->
-    <switch type="mux" name="L16_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="312.9e-12" mux_trans_size="2.377" buf_size="35.69"/> 
-    <switch type="mux" name="ipin_cblock" R="0.0" Cout="0.0" Cin="0.0" Tdel="130e-12" mux_trans_size="1.508" buf_size="11.71"/>
-  </switchlist>
-  <segmentlist>
-    <segment name="L4" freq="260" length="4" type="unidir" Rmetal="0.0" Cmetal="0.0">
-      <mux name="L4_driver"/>
-      <sb type="pattern">1 1 1 1 1</sb>
-      <cb type="pattern">1 1 1 1</cb>
-    </segment>
-    <segment name="L16" freq="40" length="16" type="unidir" Rmetal="0.0" Cmetal="0.0">
-      <mux name="L16_driver"/>
-      <!-- Vias from the top of the metal stack (global layers, where the long wires are 
-           implemented) down to the middle/bottom of the metal stack (semi-global layers, 
-           where the short wires are implemented) are expensive and restrictive.
-           As a result Startix IV only places long wire switch blocks every 4 LABs -->
-      <sb type="pattern">1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1</sb>
-      <!-- For the same reasons, long wires do not connect to block pins in Stratix IV -->
-      <cb type="pattern">0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0</cb>
-    </segment>
-  </segmentlist>
-  <directlist>
-    <!-- Direct connect from one LAB to the LAB directly below it (carry chain) -->
-    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
-    <!-- Direct connect from one DSP to the DSP directly below it -->
-    <direct name="dsp_out_chain" from_pin="dsp_top.chainout" from_side="bottom" to_pin="dsp_top.chainin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
-    <direct name="dsp_in_chain" from_pin="dsp_top.scanout" from_side="bottom" to_pin="dsp_top.scanin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
-  </directlist>
-  <complexblocklist>
-    <!-- Define I/O pads begin -->
-    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
-    <pb_type name="io">
-      <input name="outpad" num_pins="1"/>
-      <output name="inpad" num_pins="1"/>
-      <clock name="clock" num_pins="1"/>
-      <!-- IOs can operate as either inputs or outputs.
-	     Delays below come from Ian Kuon. They are small, so they should be interpreted as
-	     the delays to and from registers in the I/O (and generally I/Os are registered 
-	     today and that is when you timing analyze them.
-	     -->
-      <mode name="inpad">
-        <pb_type name="inpad" blif_model=".input" num_pb="1">
-          <output name="inpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="inpad" input="inpad.inpad" output="io.inpad">
-            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="outpad">
-        <pb_type name="outpad" blif_model=".output" num_pb="1">
-          <input name="outpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="outpad" input="io.outpad" output="outpad.outpad">
-            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- IOs go on the periphery of the FPGA, for consistency, 
-          make it physically equivalent on all sides so that only one definition of I/Os is needed.
-          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-        -->
-      <!-- Place I/Os on the sides of the FPGA -->
-    </pb_type>
-    <!-- Define I/O pads ends -->
-    <!-- Define general purpose logic block (CLB) begin -->
-    <pb_type name="clb">
-      <input name="I1" num_pins="15" equivalent="full"/>
-      <input name="I2" num_pins="15" equivalent="full"/>
-      <input name="I3" num_pins="15" equivalent="full"/>
-      <input name="I4" num_pins="15" equivalent="full"/>
-      <input name="cin" num_pins="1"/>
-      <output name="O" num_pins="40" equivalent="none"/>
-      <output name="cout" num_pins="1"/>
-      <clock name="clk" num_pins="1"/>
-      <pb_type name="lab" num_pb="1">
-        <input name="I1" num_pins="15"/>
-        <input name="I2" num_pins="15"/>
-        <input name="I3" num_pins="15"/>
-        <input name="I4" num_pins="15"/>
-        <input name="cin" num_pins="1"/>
-        <output name="O" num_pins="40"/>
-        <output name="cout" num_pins="1"/>
-        <clock name="clk" num_pins="1"/>
-        <!-- Describe fracturable logic element.  
-                 Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
-                 The outputs of the fracturable logic element can be optionally registered
-            -->
-        <pb_type name="fle" num_pb="10">
-          <input name="in" num_pins="8"/>
-          <input name="cin" num_pins="1"/>
-          <output name="out" num_pins="4"/>
-          <output name="cout" num_pins="1"/>
-          <clock name="clk" num_pins="1"/>
-          <!-- 
-                    The ALM inputs are as follows:
-                            A -> fle[0]
-                            B -> fle[1]
-                            C -> fle[2]
-                            D -> fle[3]
-                            E -> fle[4]
-                            F -> fle[5]
-                            G -> fle[6]
-                            H -> fle[7]
-              -->
-          <mode name="n2_lut5">
-            <pb_type name="ble5" num_pb="2">
-              <input name="in" num_pins="5"/>
-              <input name="cin" num_pins="1"/>
-              <output name="out" num_pins="2"/>
-              <output name="cout" num_pins="1"/>
-              <clock name="clk" num_pins="1"/>
-              <mode name="blut5">
-                <pb_type name="flut5" num_pb="1">
-                  <input name="in" num_pins="5"/>
-                  <output name="out" num_pins="2"/>
-                  <clock name="clk" num_pins="1"/>
-                  <!-- Regular LUT mode -->
-                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
-                    <input name="in" num_pins="5" port_class="lut_in"/>
-                    <output name="out" num_pins="1" port_class="lut_out"/>
-                    <!-- LUT timing using delay matrix -->
-                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
-                             we instead take the average of these numbers to get more stable results
-                             note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
-                             subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
-                             210.96e-12
-                             206.85e-12
-                             143.46e-12
-                             136.94e-12
-                             68.12e-12
-                          -->
-                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
-                            153.27e-12
-                            153.27e-12
-                            153.27e-12
-                            153.27e-12
-                            153.27e-12
-                        </delay_matrix>
-                  </pb_type>
-                  <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
-                    <input name="D" num_pins="1" port_class="D"/>
-                    <output name="Q" num_pins="1" port_class="Q"/>
-                    <clock name="clk" num_pins="1" port_class="clock"/>
-                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
-                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
-                  </pb_type>
-                  <interconnect>
-                    <direct name="lut5_in" input="flut5.in" output="lut5.in"/>
-                    <direct name="reg_in" input="flut5.in[0]" output="ff[0].D"/>
-                    <direct name="lut5_ff" input="lut5.out" output="ff[1].D">
-                      <delay_constant max="18.96e-12" in_port="lut5.out" out_port="ff[1].D"/>
-                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff[1].D"/>
-                    </direct>
-                    <complete name="clock" input="flut5.clk" output="ff.clk"/>
-                    <complete name="out_mux" input="ff.Q lut5.out" output="flut5.out">
-                      <delay_constant max="39.85e-12" in_port="lut5.out" out_port="flut5.out"/>
-                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="flut5.out"/>
-                    </complete>
-                  </interconnect>
-                </pb_type>
-                <interconnect>
-                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
-                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
-                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
-                </interconnect>
-              </mode>
-              <mode name="arithmetic">
-                <pb_type name="arithmetic" num_pb="1">
-                  <input name="in" num_pins="4"/>
-                  <input name="cin" num_pins="1"/>
-                  <output name="out" num_pins="2"/>
-                  <output name="cout" num_pins="1"/>
-                  <clock name="clk" num_pins="1"/>
-                  <!-- Special dual-LUT mode that drives adder only -->
-                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
-                    <input name="in" num_pins="4" port_class="lut_in"/>
-                    <output name="out" num_pins="1" port_class="lut_out"/>
-                    <!-- LUT timing using delay matrix -->
-                    <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
-                           we instead take the average of these numbers to get more stable results
-                           note that those are the same delays for inputs A - E as the ones used for the 6-LUT, however, we have 
-                           subtracted the delay of the last mux stage to get the delay of inputs A - E till the 5-LUT output
-                             168.12e-12
-                             164.02e-12
-                             100.63e-12
-                             94.11e-12
-                          -->
-                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
-                            131.72e-12
-                            131.72e-12
-                            131.72e-12
-                            131.72e-12
-                        </delay_matrix>
-                  </pb_type>
-                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
-                    <input name="a" num_pins="1"/>
-                    <input name="b" num_pins="1"/>
-                    <input name="cin" num_pins="1"/>
-                    <output name="cout" num_pins="1"/>
-                    <output name="sumout" num_pins="1"/>
-                    <delay_constant max="68.74e-12" in_port="adder.a" out_port="adder.sumout"/>
-                    <delay_constant max="68.74e-12" in_port="adder.b" out_port="adder.sumout"/>
-                    <delay_constant max="35.46e-12" in_port="adder.cin" out_port="adder.sumout"/>
-                    <delay_constant max="49.32e-12" in_port="adder.a" out_port="adder.cout"/>
-                    <delay_constant max="49.32e-12" in_port="adder.b" out_port="adder.cout"/>
-                    <delay_constant max="25.56e-12" in_port="adder.cin" out_port="adder.cout"/>
-                  </pb_type>
-                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-                    <input name="D" num_pins="1" port_class="D"/>
-                    <output name="Q" num_pins="1" port_class="Q"/>
-                    <clock name="clk" num_pins="1" port_class="clock"/>
-                    <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
-                    <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
-                  </pb_type>
-                  <interconnect>
-                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
-                    <direct name="lut4_in1" input="arithmetic.in" output="lut4[0].in"/>
-                    <direct name="lut4_in2" input="arithmetic.in" output="lut4[1].in"/>
-                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a"/>
-                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b"/>
-                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
-                      <delay_constant max="18.96e-12" in_port="adder.sumout" out_port="ff.D"/>
-                      <!--pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/-->
-                    </direct>
-                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
-                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
-                    </direct>
-                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
-                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
-                    </direct>
-                    <complete name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
-                      <delay_constant max="39.85e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
-                      <delay_constant max="39.85e-12" in_port="ff.Q" out_port="arithmetic.out"/>
-                    </complete>
-                  </interconnect>
-                </pb_type>
-                <interconnect>
-                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
-                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
-                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
-                  </direct>
-                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
-                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
-                  </direct>
-                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
-                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
-                </interconnect>
-              </mode>
-            </pb_type>
-            <interconnect>
-              <!-- Shared inputs between the two 5-LUTs -->
-              <complete name="lut5_reg1" input="fle.in[0]" output="ble5[0].in[0] ble5[1].in[1]"/>
-              <complete name="lut5_reg2" input="fle.in[1]" output="ble5[0].in[1] ble5[1].in[0]"/>
-              <!-- Rest of the 5-LUT inputs -->
-              <direct name="lut5_inputs_1" input="fle.in[4:2]" output="ble5[0].in[4:2]"/>
-              <direct name="lut5_inputs_22" input="fle.in[7:5]" output="ble5[1].in[4:2]"/>
-              <direct name="lut5_outputs_1" input="ble5[0].out" output="fle.out[1:0]"/>
-              <direct name="lut5_outputs_2" input="ble5[1].out" output="fle.out[3:2]"/>
-              <direct name="carry_in" input="fle.cin" output="ble5[0].cin">
-                <pack_pattern name="chain" in_port="fle.cin" out_port="ble5[0].cin"/>
-              </direct>
-              <direct name="carry_out" input="ble5[1].cout" output="fle.cout">
-                <pack_pattern name="chain" in_port="ble5[1].cout" out_port="fle.cout"/>
-              </direct>
-              <direct name="carry_link" input="ble5[0].cout" output="ble5[1].cin">
-                <pack_pattern name="chain" in_port="ble5[0].cout" out_port="ble5[1].cout"/>
-              </direct>
-              <complete name="clock" input="fle.clk" output="ble5[1:0].clk"/>
-            </interconnect>
-          </mode>
-          <!-- n2_lut5 -->
-          <mode name="n1_lut6">
-            <pb_type name="ble6" num_pb="1">
-              <input name="in" num_pins="6"/>
-              <output name="out" num_pins="4"/>
-              <clock name="clk" num_pins="1"/>
-              <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
-                <input name="in" num_pins="6" port_class="lut_in"/>
-                <output name="out" num_pins="1" port_class="lut_out"/>
-                <!-- LUT timing using delay matrix -->
-                <!-- These are the physical delay inputs on a Stratix 10 LUT but because VPR cannot do LUT rebalancing,
-                           we instead take the average of these numbers to get more stable results
-                           257.8e-12
-                           253.69e-12
-                           190.3e-12
-                           183.78e-12
-                           114.96e-12
-                           77.18e-12
-                      -->
-                <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
-                        179.6e-12
-                        179.6e-12
-                        179.6e-12
-                        179.6e-12
-                        179.6e-12
-                        179.6e-12
-                    </delay_matrix>
-              </pb_type>
-              <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
-                <input name="D" num_pins="1" port_class="D"/>
-                <output name="Q" num_pins="1" port_class="Q"/>
-                <clock name="clk" num_pins="1" port_class="clock"/>
-                <T_setup value="18.91e-12" port="ff.D" clock="clk"/>
-                <T_clock_to_Q max="60.32e-12" port="ff.Q" clock="clk"/>
-              </pb_type>
-              <interconnect>
-                <direct name="lut6_inputs" input="ble6.in" output="lut6.in"/>
-                <direct name="lut6_ff" input="lut6.out" output="ff[1].D">
-                  <delay_constant max="18.96e-12" in_port="lut6.out" out_port="ff[1].D"/>
-                  <pack_pattern name="ble6" in_port="lut6.out" out_port="ff[1].D"/>
-                </direct>
-                <complete name="clock" input="ble6.clk" output="ff.clk"/>
-                <direct name="input_to_ff" input="ble6.in[0]" output="ff[0].D"/>
-                <mux name="mux1" input="ff[0].Q lut6.out" output="ble6.out[0]">
-                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[0]"/>
-                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[0]"/>
-                </mux>
-                <!-- This mux is the same as mux1 but connected to output 2 -->
-                <mux name="mux2" input="ff[0].Q lut6.out" output="ble6.out[1]">
-                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[1]"/>
-                  <delay_constant max="39.85e-12" in_port="ff[0].Q" out_port="ble6.out[1]"/>
-                </mux>
-                <mux name="mux3" input="ff[1].Q lut6.out" output="ble6.out[2]">
-                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[2]"/>
-                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[2]"/>
-                </mux>
-                <!-- This mux is the same as mux2 but connected to output 3 -->
-                <mux name="mux4" input="ff[1].Q lut6.out" output="ble6.out[3]">
-                  <delay_constant max="39.85e-12" in_port="lut6.out" out_port="ble6.out[3]"/>
-                  <delay_constant max="39.85e-12" in_port="ff[1].Q" out_port="ble6.out[3]"/>
-                </mux>
-              </interconnect>
-            </pb_type>
-            <interconnect>
-              <!-- ble6 takes inputs A, B, C, D, E, & F; where F is fle[7] -->
-              <direct name="lut6_inputs1" input="fle.in[4:0]" output="ble6.in[4:0]"/>
-              <direct name="lut6_inputs2" input="fle.in[7]" output="ble6.in[5]"/>
-              <direct name="direct2" input="ble6.out" output="fle.out"/>
-              <direct name="direct4" input="fle.clk" output="ble6.clk"/>
-            </interconnect>
-          </mode>
-          <!-- n1_lut6 -->
-        </pb_type>
-        <interconnect>
-          <!-- 50% sparsely populated local routing -->
-          <!-- This 50% sparsity pattern divides the cluster inputs and local feedbacks into four groups, 
-               and then selects two of the four groups to feed each LUT input. This means half of the cluster 
-               inputs and local feedbacks can feed each LUT input. There is partial overlap in the inputs that 
-               feed the various LUT inputs, which helps routability vs. simply having half the cluster inputs 
-               feed one set of half the LUT inputs and the other half of cluster inputs feed the other set of 
-               LUT inputs. This pattern is used by Stratix (I - 10) architectures. -->
-          <complete name="lutA" input="lab.I4 lab.I3" output="fle[9:0].in[0:0]">
-            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[0:0]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[0:0]"/>
-          </complete>
-          <complete name="lutB" input="lab.I3 lab.I2" output="fle[9:0].in[1:1]">
-            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[1:1]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[1:1]"/>
-          </complete>
-          <complete name="lutC" input="lab.I2 lab.I1" output="fle[9:0].in[2:2]">
-            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[2:2]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[2:2]"/>
-          </complete>
-          <complete name="lutD" input="lab.I4 lab.I2" output="fle[9:0].in[3:3]">
-            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[3:3]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[3:3]"/>
-          </complete>
-          <complete name="lutE" input="lab.I3 lab.I1" output="fle[9:0].in[4:4]">
-            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[4:4]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[4:4]"/>
-          </complete>
-          <complete name="lutF" input="lab.I4 lab.I1" output="fle[9:0].in[5:5]">
-            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[5:5]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I1" out_port="fle.in[5:5]"/>
-          </complete>
-          <complete name="lutG" input="lab.I4 lab.I3" output="fle[9:0].in[6:6]">
-            <delay_constant max="74.71e-12" in_port="lab.I4" out_port="fle.in[6:6]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[6:6]"/>
-          </complete>
-          <complete name="lutH" input="lab.I3 lab.I2" output="fle[9:0].in[7:7]">
-            <delay_constant max="74.71e-12" in_port="lab.I3" out_port="fle.in[7:7]"/>
-            <delay_constant max="74.71e-12" in_port="lab.I2" out_port="fle.in[7:7]"/>
-          </complete>
-          <complete name="clks" input="lab.clk" output="fle[9:0].clk"/>
-          <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
-                     By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
-                     then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
-                     naive specification).
-              -->
-          <direct name="labouts1" input="fle[9:0].out[0]" output="lab.O[9:0]"/>
-          <direct name="labouts2" input="fle[9:0].out[1]" output="lab.O[19:10]"/>
-          <direct name="labouts3" input="fle[9:0].out[2]" output="lab.O[29:20]"/>
-          <direct name="labouts4" input="fle[9:0].out[3]" output="lab.O[39:30]"/>
-          <!-- Carry chain links -->
-          <direct name="carry_in" input="lab.cin" output="fle[0:0].cin">
-            <!-- Put all inter-block carry chain delay on this one edge -->
-            <delay_constant max="18.47e-12" in_port="lab.cin" out_port="fle[0:0].cin"/>
-            <pack_pattern name="chain" in_port="lab.cin" out_port="fle[0:0].cin"/>
-          </direct>
-          <direct name="carry_out" input="fle[9:9].cout" output="lab.cout">
-            <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="lab.cout"/>
-          </direct>
-          <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
-            <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
-          </direct>
-        </interconnect>
-      </pb_type>
-      <interconnect>
-        <direct name="carry_in" input="clb.cin" output="lab.cin"/>
-        <direct name="carry_out" input="lab.cout" output="clb.cout"/>
-        <direct name="clock" input="clb.clk" output="lab.clk"/>
-        <complete name="Input_feedback_I1" input="clb.I1 lab.O[4:0]" output="lab.I1"/>
-        <complete name="Input_feedback_I2" input="clb.I2 lab.O[24:20]" output="lab.I2"/>
-        <complete name="Input_feedback_I3" input="clb.I3 lab.O[9:5]" output="lab.I3"/>
-        <complete name="Input_feedback_I4" input="clb.I4 lab.O[29:25]" output="lab.I4"/>
-        <!--
-        <direct name="Input_I1" input="clb.I1" output="lab.I1"/>
-        <direct name="Input_I2" input="clb.I2" output="lab.I2"/>
-        <direct name="Input_I3" input="clb.I3" output="lab.I3"/>
-        <direct name="Input_I4" input="clb.I4" output="lab.I4"/>
-        -->
-        <direct name="output" input="lab.O" output="clb.O"/>
-      </interconnect>
-    </pb_type>
-    <!-- Define general purpose logic block (CLB) ends -->
-
-    <!-- Define DSP slice begin -->
-    <pb_type name="dsp_top">
-      <input name="reset" num_pins="1" is_non_clock_global="true"/>
-      <input name="dsp_I1" num_pins="64" />
-      <input name="dsp_I2" num_pins="64" />
-      <input name="chainin" num_pins="64"/>
-      <input name="scanin" num_pins="27"/>
-      <output name="result" num_pins="74"/>
-      <output name="chainout" num_pins="64"/>
-      <output name="scanout" num_pins="27"/>
-      <clock name="clk" num_pins="1"/>
-
-    <pb_type name="dsp" num_pb="1">
-      <input name="reset" num_pins="1"/>
-      <input name="dsp_I1" num_pins="64"/>
-      <input name="dsp_I2" num_pins="64"/>
-      <input name="chainin" num_pins="64"/>
-      <input name="scanin" num_pins="27"/>
-      <output name="result" num_pins="74"/>
-      <output name="chainout" num_pins="64"/>
-      <output name="scanout" num_pins="27"/>
-      <clock name="clk" num_pins="1"/>
-
-      <pb_type name="dsp_pb" num_pb="1">
-        <input name="reset" num_pins="1"/>
-        <input name="mode_sigs" num_pins="12"/>
-        <input name="datain" num_pins="116"/>
-        <input name="chainin" num_pins="64"/>
-        <input name="scanin" num_pins="27"/>
-        <output name="result" num_pins="74"/>
-        <output name="chainout" num_pins="64"/>
-        <output name="scanout" num_pins="27"/>
-        <clock name="clk" num_pins="1"/>
-
-        <!-- fixed-point multiplier mode (1 27x27 multiplier) result = ax*ay -->
-        <mode name="one_mult_27x27">
-          <pb_type name="one_mult_27x27" num_pb="1">
-            <input name="a" num_pins="27"/>
-            <input name="b" num_pins="27"/>
-            <output name="out" num_pins="54"/>
-            <pb_type name="mult_27x27" blif_model=".subckt multiply" num_pb="1">
-              <input name="a" num_pins="27"/>
-              <input name="b" num_pins="27"/>
-              <output name="out" num_pins="54"/>
-              <delay_constant max="2.14e-9" in_port="mult_27x27.a" out_port="mult_27x27.out"/>
-              <delay_constant max="2.14e-9" in_port="mult_27x27.b" out_port="mult_27x27.out"/>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="one_mult_27x27.a" output="mult_27x27.a">
-              </direct>
-              <direct name="b2b" input="one_mult_27x27.b" output="mult_27x27.b">
-              </direct>
-              <direct name="out2out" input="mult_27x27.out" output="one_mult_27x27.out">
-              </direct>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="datain2a" input="dsp_pb.datain[26:0]" output="one_mult_27x27.a">
-            </direct>
-            <direct name="datain2b" input="dsp_pb.datain[53:27]" output="one_mult_27x27.b">
-            </direct>
-            <direct name="out2dataout" input="one_mult_27x27.out" output="dsp_pb.result[53:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point multiplier mode (2 18x19 multipliers) result[some:bits] = ax*ay, result[other:bits] = bx*by -->
-        <mode name="two_mult_18x19">
-          <pb_type name="two_mult_18x19" num_pb="2">
-            <input name="a" num_pins="18"/>
-            <input name="b" num_pins="19"/>
-            <output name="out" num_pins="37"/>
-            <pb_type name="mult_18x19" blif_model=".subckt multiply" num_pb="1">
-              <input name="a" num_pins="18"/>
-              <input name="b" num_pins="19"/>
-              <output name="out" num_pins="37"/>
-              <delay_constant max="2.14e-9" in_port="mult_18x19.a" out_port="mult_18x19.out"/>
-              <delay_constant max="2.14e-9" in_port="mult_18x19.b" out_port="mult_18x19.out"/>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="two_mult_18x19.a" output="mult_18x19.a">
-                 </direct>
-              <direct name="b2b" input="two_mult_18x19.b" output="mult_18x19.b">
-                 </direct>
-              <direct name="out2out" input="mult_18x19.out" output="two_mult_18x19.out">
-                 </direct>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="datain2a1" input="dsp_pb.datain[17:0]" output="two_mult_18x19[0].a">
-            </direct>
-            <direct name="datain2b1" input="dsp_pb.datain[36:18]" output="two_mult_18x19[0].b">
-            </direct>
-            <direct name="datain2a2" input="dsp_pb.datain[54:37]" output="two_mult_18x19[1].a">
-            </direct>
-            <direct name="datain2b2" input="dsp_pb.datain[73:55]" output="two_mult_18x19[1].b">
-            </direct>
-            <direct name="out2result" input="two_mult_18x19.out" output="dsp_pb.result[73:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- Fixed point multiplier mode (4 9x9 multipliers) result = ax*ay -->
-        <mode name="mult_9x9_fixed_pt_mode">
-          <pb_type name="mult_9x9_fixed_pt" blif_model=".subckt multiply" num_pb="4">
-            <input name="a" num_pins="9"/>
-            <input name="b" num_pins="9"/>
-            <output name="out" num_pins="18"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.a" out_port="mult_9x9_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mult_9x9_fixed_pt.b" out_port="mult_9x9_fixed_pt.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mult_9x9_fixed_pt[0].a"/>
-            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mult_9x9_fixed_pt[0].b"/>
-            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mult_9x9_fixed_pt[1].a"/>
-            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mult_9x9_fixed_pt[1].b"/>
-            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mult_9x9_fixed_pt[2].a"/>
-            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mult_9x9_fixed_pt[2].b"/>
-            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mult_9x9_fixed_pt[3].a"/>
-            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mult_9x9_fixed_pt[3].b"/>
-            <direct name="sumouttosumout0" input="mult_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
-            <direct name="sumouttosumout1" input="mult_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
-            <direct name="sumouttosumout2" input="mult_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
-            <direct name="sumouttosumout3" input="mult_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point multiplier-add-sum mode result = (bx * by) + (ax * ay) + chainin. chainout = result -->
-        <mode name="sop_2_mode">
-          <pb_type name="sop_2" num_pb="1" blif_model=".subckt int_sop_2">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="ax" num_pins="18"/>
-            <input name="ay" num_pins="19"/>
-            <input name="bx" num_pins="18"/>
-            <input name="by" num_pins="19"/>
-            <input name="chainin" num_pins="37"/>
-            <output name="result" num_pins="37"/>
-            <output name="chainout" num_pins="37"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.result"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_2.reset" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.mode_sigs" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.ax" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.ay" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.bx" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.by" out_port="sop_2.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_2.chainin" out_port="sop_2.chainout"/>
-
-            <T_setup value="18.91e-12" port="sop_2.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.ax" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.ay" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.bx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.by" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_2.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ax" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.ay" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.bx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.by" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_2.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="sop_2.clk">
-            </direct>
-            <direct name="reset" input="dsp_pb.reset" output="sop_2.reset">
-            </direct>
-            <direct name="modesigs" input="dsp_pb.mode_sigs" output="sop_2.mode_sigs">
-            </direct>
-            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="sop_2.ax">
-            </direct>
-            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="sop_2.ay">
-            </direct>
-            <direct name="datain2bx" input="dsp_pb.datain[54:37]" output="sop_2.bx">
-            </direct>
-            <direct name="datain2by" input="dsp_pb.datain[73:55]" output="sop_2.by">
-            </direct>
-            <direct name="chainin"   input="dsp_pb.chainin[36:0]" output="sop_2.chainin">
-            </direct>
-            <direct name="dataout2result" input="sop_2.result" output="dsp_pb.result[36:0]">
-            </direct>
-            <direct name="chainout" input="sop_2.chainout" output="dsp_pb.chainout[36:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
-        <mode name="mult_add_mode_18_19_36">
-          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_18x19">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="ax" num_pins="18"/>
-            <input name="ay" num_pins="19"/>
-            <input name="bx" num_pins="36"/>
-            <input name="chainin" num_pins="64"/>
-            <input name="scanin" num_pins="19"/>
-            <output name="result" num_pins="64"/>
-            <output name="chainout" num_pins="64"/>
-            <output name="scanout" num_pins="19"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
-
-            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
-            </direct>
-            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
-            </direct>
-            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
-            </direct>
-            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="mult_add.ax">
-            </direct>
-            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="mult_add.ay">
-            </direct>
-            <direct name="datain2bx" input="dsp_pb.datain[72:37]" output="mult_add.bx">
-            </direct>
-            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
-            </direct>
-            <direct name="scanin"   input="dsp_pb.scanin[18:0]" output="mult_add.scanin">
-            </direct>
-            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
-            </direct>
-            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
-            </direct>
-            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[18:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
-        <mode name="mult_add_mode_27_27_64">
-          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_27x27">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="ax" num_pins="27"/>
-            <input name="ay" num_pins="27"/>
-            <input name="bx" num_pins="36"/>
-            <input name="chainin" num_pins="64"/>
-            <input name="scanin" num_pins="27"/>
-            <output name="result" num_pins="64"/>
-            <output name="chainout" num_pins="64"/>
-            <output name="scanout" num_pins="27"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
-
-            <delay_constant max="2.14e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.mode_sigs" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
-            <delay_constant max="2.14e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
-
-            <T_setup value="18.91e-12" port="mult_add.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.ax" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.ay" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.bx" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.scanin" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_add.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ax" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.ay" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.bx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.scanin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_add.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
-            </direct>
-            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
-            </direct>
-            <direct name="modesigs" input="dsp_pb.mode_sigs" output="mult_add.mode_sigs">
-            </direct>
-            <direct name="datain2ax" input="dsp_pb.datain[26:0]" output="mult_add.ax">
-            </direct>
-            <direct name="datain2ay" input="dsp_pb.datain[53:27]" output="mult_add.ay">
-            </direct>
-            <direct name="datain2bx" input="dsp_pb.datain[89:54]" output="mult_add.bx">
-            </direct>
-            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
-            </direct>
-            <direct name="scanin"   input="dsp_pb.scanin[26:0]" output="mult_add.scanin">
-            </direct>
-            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
-            </direct>
-            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
-            </direct>
-            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[26:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point sum-of-4 mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result -->
-        <mode name="sop_4_mode">
-          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_4">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="ax" num_pins="9"/>
-            <input name="ay" num_pins="9"/>
-            <input name="bx" num_pins="9"/>
-            <input name="by" num_pins="9"/>
-            <input name="cx" num_pins="9"/>
-            <input name="cy" num_pins="9"/>
-            <input name="dx" num_pins="9"/>
-            <input name="dy" num_pins="9"/>
-            <input name="chainin" num_pins="64"/>
-            <output name="result" num_pins="64"/>
-            <output name="chainout" num_pins="64"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
-
-            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
-            </direct>
-            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
-            </direct>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
-            </direct>
-            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
-            </direct>
-            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
-            </direct>
-            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
-            </direct>
-            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
-            </direct>
-            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
-            </direct>
-            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
-            </direct>
-            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
-            </direct>
-            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
-            </direct>
-            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
-            </direct>
-            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
-            </direct>
-            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- fixed-point sum-of-4 accum mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin + accumulator. chainout = result -->
-        <mode name="sop_4_accum_mode">
-          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_accum_4">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="ax" num_pins="9"/>
-            <input name="ay" num_pins="9"/>
-            <input name="bx" num_pins="9"/>
-            <input name="by" num_pins="9"/>
-            <input name="cx" num_pins="9"/>
-            <input name="cy" num_pins="9"/>
-            <input name="dx" num_pins="9"/>
-            <input name="dy" num_pins="9"/>
-            <input name="chainin" num_pins="64"/>
-            <output name="result" num_pins="64"/>
-            <output name="chainout" num_pins="64"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
-
-            <delay_constant max="2.14e-9" in_port="sop_4.mode_sigs" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
-            <delay_constant max="2.14e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
-
-            <T_setup value="18.91e-12" port="sop_4.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.ax" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.ay" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.bx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.by" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.cx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.cy" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.dx" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.dy" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="sop_4.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ax" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.ay" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.bx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.by" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.cy" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dx" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.dy" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="sop_4.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
-            </direct>
-            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
-            </direct>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="sop_4.mode_sigs">
-            </direct>
-            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
-            </direct>
-            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
-            </direct>
-            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
-            </direct>
-            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
-            </direct>
-            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
-            </direct>
-            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
-            </direct>
-            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
-            </direct>
-            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
-            </direct>
-            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
-            </direct>
-            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
-            </direct>
-            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- Fixed point mac mode (result = a*b + accumulated value-->
-        <mode name="mac_27x27_fixed_pt_mode">
-          <pb_type name="mac_27x27_fixed_pt" blif_model=".subckt mac_int_27x27" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="a" num_pins="27"/>
-            <input name="b" num_pins="27"/>
-            <output name="out" num_pins="54"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.a" out_port="mac_27x27_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.b" out_port="mac_27x27_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_27x27_fixed_pt.reset" out_port="mac_27x27_fixed_pt.out"/>
-
-            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_27x27_fixed_pt.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="mac_27x27_fixed_pt.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="mac_27x27_fixed_pt.clk"/>
-            <direct name="atoa" input="dsp_pb.datain[26:0]" output="mac_27x27_fixed_pt.a"/>
-            <direct name="btob" input="dsp_pb.datain[53:27]" output="mac_27x27_fixed_pt.b"/>
-            <direct name="sumouttosumout" input="mac_27x27_fixed_pt.out" output="dsp_pb.result[53:0]"/>
-          </interconnect>
-        </mode>
-
-        <!-- Fixed point mac mode (result = a*b + accumulated value-->
-        <mode name="mac_18x19_fixed_pt_mode">
-          <pb_type name="mac_fixed_pt" blif_model=".subckt mac_int_18x19" num_pb="2">
-            <input name="reset" num_pins="1"/>
-            <input name="a" num_pins="18"/>
-            <input name="b" num_pins="19"/>
-            <output name="out" num_pins="37"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.a" out_port="mac_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.b" out_port="mac_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_fixed_pt.reset" out_port="mac_fixed_pt.out"/>
-
-            <T_setup value="18.91e-12" port="mac_fixed_pt.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fixed_pt.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fixed_pt.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fixed_pt.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fixed_pt.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset0" input="dsp_pb.reset" output="mac_fixed_pt[0].reset"/>
-            <direct name="reset1" input="dsp_pb.reset" output="mac_fixed_pt[1].reset"/>
-            <direct name="clk0" input="dsp_pb.clk" output="mac_fixed_pt[0].clk"/>
-            <direct name="clk1" input="dsp_pb.clk" output="mac_fixed_pt[1].clk"/>
-            <direct name="atoa0" input="dsp_pb.datain[17:0]" output="mac_fixed_pt[0].a"/>
-            <direct name="btob0" input="dsp_pb.datain[36:18]" output="mac_fixed_pt[0].b"/>
-            <direct name="atoa1" input="dsp_pb.datain[54:37]" output="mac_fixed_pt[1].a"/>
-            <direct name="btob1" input="dsp_pb.datain[73:55]" output="mac_fixed_pt[1].b"/>
-            <direct name="sumouttosumout0" input="mac_fixed_pt[0].out" output="dsp_pb.result[36:0]"/>
-            <direct name="sumouttosumout1" input="mac_fixed_pt[1].out" output="dsp_pb.result[73:37]"/>
-          </interconnect>
-        </mode>
-
-        <!-- Fixed point mac mode (result = a*b + accumulated value-->
-        <mode name="mac_9x9_fixed_pt_mode">
-          <pb_type name="mac_9x9_fixed_pt" blif_model=".subckt mac_int_9x9" num_pb="4">
-            <input name="reset" num_pins="1"/>
-            <input name="a" num_pins="9"/>
-            <input name="b" num_pins="9"/>
-            <output name="out" num_pins="18"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.a" out_port="mac_9x9_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.b" out_port="mac_9x9_fixed_pt.out"/>
-            <delay_constant max="2.14e-9" in_port="mac_9x9_fixed_pt.reset" out_port="mac_9x9_fixed_pt.out"/>
-
-            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_9x9_fixed_pt.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset0" input="dsp_pb.reset" output="mac_9x9_fixed_pt[0].reset"/>
-            <direct name="reset1" input="dsp_pb.reset" output="mac_9x9_fixed_pt[1].reset"/>
-            <direct name="reset2" input="dsp_pb.reset" output="mac_9x9_fixed_pt[2].reset"/>
-            <direct name="reset3" input="dsp_pb.reset" output="mac_9x9_fixed_pt[3].reset"/>
-            <direct name="clk0" input="dsp_pb.clk" output="mac_9x9_fixed_pt[0].clk"/>
-            <direct name="clk1" input="dsp_pb.clk" output="mac_9x9_fixed_pt[1].clk"/>
-            <direct name="clk2" input="dsp_pb.clk" output="mac_9x9_fixed_pt[2].clk"/>
-            <direct name="clk3" input="dsp_pb.clk" output="mac_9x9_fixed_pt[3].clk"/>
-            <direct name="atoa0" input="dsp_pb.datain[8:0]" output="mac_9x9_fixed_pt[0].a"/>
-            <direct name="btob0" input="dsp_pb.datain[17:9]" output="mac_9x9_fixed_pt[0].b"/>
-            <direct name="atoa1" input="dsp_pb.datain[26:18]" output="mac_9x9_fixed_pt[1].a"/>
-            <direct name="btob1" input="dsp_pb.datain[35:27]" output="mac_9x9_fixed_pt[1].b"/>
-            <direct name="atoa2" input="dsp_pb.datain[44:36]" output="mac_9x9_fixed_pt[2].a"/>
-            <direct name="btob2" input="dsp_pb.datain[53:45]" output="mac_9x9_fixed_pt[2].b"/>
-            <direct name="atoa3" input="dsp_pb.datain[62:54]" output="mac_9x9_fixed_pt[3].a"/>
-            <direct name="btob3" input="dsp_pb.datain[71:63]" output="mac_9x9_fixed_pt[3].b"/>
-            <direct name="sumouttosumout0" input="mac_9x9_fixed_pt[0].out" output="dsp_pb.result[17:0]"/>
-            <direct name="sumouttosumout1" input="mac_9x9_fixed_pt[1].out" output="dsp_pb.result[35:18]"/>
-            <direct name="sumouttosumout2" input="mac_9x9_fixed_pt[2].out" output="dsp_pb.result[53:36]"/>
-            <direct name="sumouttosumout3" input="mac_9x9_fixed_pt[3].out" output="dsp_pb.result[71:54]"/>
-          </interconnect>
-        </mode>
-
-        <!-- floating point multiplier mode (result = a * b)-->
-        <mode name="mult_fp32_mode">
-          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_32" num_pb="1">
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <output name="out" num_pins="32"/>
-
-            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
-            </direct>
-            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
-            </direct>
-            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point multiplier mode (result = a * b)-->
-        <mode name="mult_fp16_mode">
-          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_16" num_pb="2">
-            <input name="a" num_pins="16"/>
-            <input name="b" num_pins="16"/>
-            <output name="out" num_pins="16"/>
-
-            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
-            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
-            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
-            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
-            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
-            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point adder mode (result = a + b)-->
-        <mode name="adder_fp32_mode"> 
-          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_32" num_pb="1">
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <output name="out" num_pins="32"/>
-
-            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
-            </direct>
-            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
-            </direct>
-            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point adder mode (result = a + b)-->
-        <mode name="adder_fp16_mode">
-          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_16" num_pb="2">
-            <input name="a" num_pins="16"/>
-            <input name="b" num_pins="16"/>
-            <output name="out" num_pins="16"/>
-
-            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
-            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
-            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
-            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
-            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
-            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
-          </interconnect>
-        </mode>
-
-        <!-- clocked floating point multiplier mode (result = a * b)-->
-        <mode name="mult_fp32_clocked_mode">
-          <pb_type name="mult_fp32" blif_model=".subckt mult_fp_clk_32" num_pb="1">
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <output name="out" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="mult_fp32.a" out_port="mult_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="mult_fp32.b" out_port="mult_fp32.out"/>
-
-            <T_setup value="18.91e-12" port="mult_fp32.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_fp32.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_fp32.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp32.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="mult_fp32.clk"/>
-            <direct name="a2a" input="dsp_pb.datain[31:0]" output="mult_fp32.a">
-            </direct>
-            <direct name="b2b" input="dsp_pb.datain[63:32]" output="mult_fp32.b">
-            </direct>
-            <direct name="out2out" input="mult_fp32.out" output="dsp_pb.result[31:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- clocked floating point multiplier mode (result = a * b)-->
-        <mode name="mult_fp16_clocked_mode">
-          <pb_type name="mult_fp16" blif_model=".subckt mult_fp_clk_16" num_pb="2">
-            <input name="a" num_pins="16"/>
-            <input name="b" num_pins="16"/>
-            <output name="out" num_pins="16"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="mult_fp16.a" out_port="mult_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="mult_fp16.b" out_port="mult_fp16.out"/>
-
-            <T_setup value="18.91e-12" port="mult_fp16.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_fp16.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mult_fp16.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mult_fp16.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk0" input="dsp_pb.clk" output="mult_fp16[0].clk"/>
-            <direct name="clk1" input="dsp_pb.clk" output="mult_fp16[1].clk"/>
-            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="mult_fp16[0].a"></direct>
-            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="mult_fp16[1].a"></direct>
-            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="mult_fp16[0].b"></direct>
-            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="mult_fp16[1].b"></direct>
-            <direct name="out2out0" input="mult_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
-            <direct name="out2out1" input="mult_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
-          </interconnect>
-        </mode>
-
-        <!-- clocked floating point adder mode (result = a + b)-->
-        <mode name="adder_fp32_clocked_mode"> 
-          <pb_type name="adder_fp32" blif_model=".subckt addition_fp_clk_32" num_pb="1">
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <output name="out" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="adder_fp32.a" out_port="adder_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="adder_fp32.b" out_port="adder_fp32.out"/>
-
-            <T_setup value="18.91e-12" port="adder_fp32.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="adder_fp32.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="adder_fp32.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp32.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="adder_fp32.clk"/>
-            <direct name="atoa" input="dsp_pb.datain[31:0]" output="adder_fp32.a">
-            </direct>
-            <direct name="btob" input="dsp_pb.datain[63:32]" output="adder_fp32.b">
-            </direct>
-            <direct name="sumouttosumout" input="adder_fp32.out" output="dsp_pb.result[31:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- clocked floating point adder mode (result = a + b)-->
-        <mode name="adder_fp16_clocked_mode">
-          <pb_type name="adder_fp16" blif_model=".subckt addition_fp_clk_16" num_pb="2">
-            <input name="a" num_pins="16"/>
-            <input name="b" num_pins="16"/>
-            <output name="out" num_pins="16"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="adder_fp16.a" out_port="adder_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="adder_fp16.b" out_port="adder_fp16.out"/>
-
-            <T_setup value="18.91e-12" port="adder_fp16.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="adder_fp16.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="adder_fp16.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="adder_fp16.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk0" input="dsp_pb.clk" output="adder_fp16[0].clk"/>
-            <direct name="clk1" input="dsp_pb.clk" output="adder_fp16[1].clk"/>
-            <direct name="a2a0" input="dsp_pb.datain[15:0]" output="adder_fp16[0].a"></direct>
-            <direct name="a2a1" input="dsp_pb.datain[31:16]" output="adder_fp16[1].a"></direct>
-            <direct name="b2b0" input="dsp_pb.datain[47:32]" output="adder_fp16[0].b"></direct>
-            <direct name="b2b1" input="dsp_pb.datain[63:48]" output="adder_fp16[1].b"></direct>
-            <direct name="out2out0" input="adder_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
-            <direct name="out2out1" input="adder_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point mac mode (result = a*b + accumulated value-->
-        <mode name="mac_fp32_mode">
-          <pb_type name="mac_fp32" blif_model=".subckt mac_fp_32" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <output name="out" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="mac_fp32.a" out_port="mac_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="mac_fp32.b" out_port="mac_fp32.out"/>
-            <delay_constant max="2.56e-9" in_port="mac_fp32.reset" out_port="mac_fp32.out"/>
-
-            <T_setup value="18.91e-12" port="mac_fp32.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp32.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp32.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp32.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp32.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="mac_fp32.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="mac_fp32.clk"/>
-            <direct name="atoa" input="dsp_pb.datain[31:0]" output="mac_fp32.a">
-            </direct>
-            <direct name="btob" input="dsp_pb.datain[63:32]" output="mac_fp32.b">
-            </direct>
-            <direct name="sumouttosumout" input="mac_fp32.out" output="dsp_pb.result[31:0]">
-            </direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point 16-bit mac mode (result = a*b + accumulated value-->
-        <mode name="mac_fp16_mode">
-          <pb_type name="mac_fp16" blif_model=".subckt mac_fp_16" num_pb="2">
-            <input name="reset" num_pins="1"/>
-            <input name="a" num_pins="16"/>
-            <input name="b" num_pins="16"/>
-            <output name="out" num_pins="16"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="mac_fp16.a" out_port="mac_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="mac_fp16.b" out_port="mac_fp16.out"/>
-            <delay_constant max="2.56e-9" in_port="mac_fp16.reset" out_port="mac_fp16.out"/>
-
-            <T_setup value="18.91e-12" port="mac_fp16.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp16.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp16.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="mac_fp16.out" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="mac_fp16.out" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset0" input="dsp_pb.reset" output="mac_fp16[0].reset"></direct>
-            <direct name="reset1" input="dsp_pb.reset" output="mac_fp16[1].reset"></direct>
-            <direct name="clk0" input="dsp_pb.clk" output="mac_fp16[0].clk"></direct>
-            <direct name="clk1" input="dsp_pb.clk" output="mac_fp16[1].clk"></direct>
-            <direct name="atoa0" input="dsp_pb.datain[15:0]"  output="mac_fp16[0].a"></direct>
-            <direct name="atoa1" input="dsp_pb.datain[31:16]" output="mac_fp16[1].a"></direct>
-            <direct name="btob0" input="dsp_pb.datain[47:32]" output="mac_fp16[0].b"></direct>
-            <direct name="btob1" input="dsp_pb.datain[63:48]" output="mac_fp16[1].b"></direct>
-            <direct name="sumouttosumout0" input="mac_fp16[0].out" output="dsp_pb.result[15:0]"></direct>
-            <direct name="sumouttosumout1" input="mac_fp16[1].out" output="dsp_pb.result[31:16]"></direct>
-          </interconnect>
-        </mode>
-
-        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b). chainout = third_inp or result-->
-        <mode name="fp16_sum_of_products_mode"> 
-          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_mult_add" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="top_a" num_pins="16"/>
-            <input name="top_b" num_pins="16"/>
-            <input name="bot_a" num_pins="16"/>
-            <input name="bot_b" num_pins="16"/>
-            <input name="fp32_in" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
-            </direct>
-            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
-            </direct>
-            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
-            </direct>
-            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
-            </direct>
-            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
-            </direct>
-            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>  
-
-        <!-- floating point fp16 sum-of-2 mult mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + fp32 chainin or third inp. chainout = third_inp or result)-->
-        <mode name="fp16_sum_of_products_2_mult_mode"> 
-          <pb_type name="fp16_sum_of_2_mult" blif_model=".subckt fp16_sop2_mult" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="top_a" num_pins="16"/>
-            <input name="top_b" num_pins="16"/>
-            <input name="bot_a" num_pins="16"/>
-            <input name="bot_b" num_pins="16"/>
-            <input name="fp32_in" num_pins="32"/>
-            <input name="chainin" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.reset" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.mode_sigs" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_a" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.top_b" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_a" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.bot_b" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.chainin" out_port="fp16_sum_of_2_mult.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_mult.fp32_in" out_port="fp16_sum_of_2_mult.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.top_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.bot_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_mult.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_mult.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_mult.clk"/>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_mult.mode_sigs">
-            </direct>
-            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_mult.top_a">
-            </direct>
-            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_mult.top_b">
-            </direct>
-            <direct name="result_top" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_mult.bot_a">
-            </direct>
-            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_mult.bot_b">
-            </direct>
-            <direct name="result_bot" input="fp16_sum_of_2_mult.result" output="dsp_pb.result[63:32]">
-            </direct>
-            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_sum_of_2_mult.chainin">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_sum_of_2_mult.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp16_sum_of_2_mult.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>        
-
-        <!-- floating point fp16 sum-of-2 accum mode (result = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b + accumulator. chainout = result)-->
-        <mode name="fp16_sum_of_products_2_accum_mode"> 
-          <pb_type name="fp16_sum_of_2_accum" blif_model=".subckt fp16_sop2_accum" num_pb="1">
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="reset" num_pins="1"/>
-            <input name="top_a" num_pins="16"/>
-            <input name="top_b" num_pins="16"/>
-            <input name="bot_a" num_pins="16"/>
-            <input name="bot_b" num_pins="16"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.mode_sigs" out_port="fp16_sum_of_2_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_a" out_port="fp16_sum_of_2_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.top_b" out_port="fp16_sum_of_2_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_a" out_port="fp16_sum_of_2_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.bot_b" out_port="fp16_sum_of_2_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_sum_of_2_accum.reset" out_port="fp16_sum_of_2_accum.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.top_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.bot_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_sum_of_2_accum.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_sum_of_2_accum.mode_sigs"/>
-            <direct name="reset" input="dsp_pb.reset" output="fp16_sum_of_2_accum.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp16_sum_of_2_accum.clk"/>
-            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_sum_of_2_accum.top_a">
-            </direct>
-            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_sum_of_2_accum.top_b">
-            </direct>
-            <direct name="result_top" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_sum_of_2_accum.bot_a">
-            </direct>
-            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_sum_of_2_accum.bot_b">
-            </direct>
-            <direct name="result_bot" input="fp16_sum_of_2_accum.result" output="dsp_pb.result[63:32]">
-            </direct>
-            <direct name="chainout" input="fp16_sum_of_2_accum.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>        
-
-        <!-- floating point fp16 mult, fp32 add mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = chainin + third_inp)-->
-        <mode name="fp16_mult_fp32_add"> 
-          <pb_type name="fp16_mult_fp32_add" blif_model=".subckt fp16_mult_fp32_add" num_pb="1">
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="reset" num_pins="1"/>
-            <input name="top_a" num_pins="16"/>
-            <input name="top_b" num_pins="16"/>
-            <input name="bot_a" num_pins="16"/>
-            <input name="bot_b" num_pins="16"/>
-            <input name="fp32_in" num_pins="32"/>
-            <input name="chainin" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.reset" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.mode_sigs" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_a" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.top_b" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_a" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.bot_b" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.chainin" out_port="fp16_mult_fp32_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_add.fp32_in" out_port="fp16_mult_fp32_add.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.top_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.bot_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_add.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_add.clk"/>
-            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_add.reset"/>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_add.mode_sigs">
-            </direct>
-            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_add.top_a">
-            </direct>
-            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_add.top_b">
-            </direct>
-            <direct name="result_top" input="fp16_mult_fp32_add.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_add.bot_a">
-            </direct>
-            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_add.bot_b">
-            </direct>
-            <direct name="result_bot" input="fp16_mult_fp32_add.result" output="dsp_pb.result[63:32]">
-            </direct>
-            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp16_mult_fp32_add.chainin">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp16_mult_fp32_add.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp16_mult_fp32_add.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>      
-
-        <!-- floating point fp16 mult, fp32 accum mode (chainout = fp16_mult_top_a * fp16_mult_top_b + fp16_mult_bot_a * fp16_mult_bot_b. result = third_inp + accumulator)-->
-        <mode name="fp16_mult_fp32_accum"> 
-          <pb_type name="fp16_mult_fp32_accum" blif_model=".subckt fp16_mult_fp32_accum" num_pb="1">
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="reset" num_pins="1"/>
-            <input name="top_a" num_pins="16"/>
-            <input name="top_b" num_pins="16"/>
-            <input name="bot_a" num_pins="16"/>
-            <input name="bot_b" num_pins="16"/>
-            <input name="fp32_in" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.result"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.mode_sigs" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_a" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.top_b" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_a" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.bot_b" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.fp32_in" out_port="fp16_mult_fp32_accum.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp16_mult_fp32_accum.reset" out_port="fp16_mult_fp32_accum.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.top_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.bot_b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp16_mult_fp32_accum.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp16_mult_fp32_accum.mode_sigs"/>
-            <direct name="reset" input="dsp_pb.reset" output="fp16_mult_fp32_accum.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp16_mult_fp32_accum.clk"/>
-            <direct name="atoa_top" input="dsp_pb.datain[15:0]" output="fp16_mult_fp32_accum.top_a">
-            </direct>
-            <direct name="btob_top" input="dsp_pb.datain[31:16]" output="fp16_mult_fp32_accum.top_b">
-            </direct>
-            <direct name="result_top" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="atoa_bot" input="dsp_pb.datain[47:32]" output="fp16_mult_fp32_accum.bot_a">
-            </direct>
-            <direct name="btob_bot" input="dsp_pb.datain[63:48]" output="fp16_mult_fp32_accum.bot_b">
-            </direct>
-            <direct name="result_bot" input="fp16_mult_fp32_accum.result" output="dsp_pb.result[63:32]">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]" output="fp16_mult_fp32_accum.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp16_mult_fp32_accum.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>      
-
-        <!-- floating point fp32 mult_then_add mode (result = fp32_mult_a * fp32_mult_b + chainin. chainout = third_inp or result) -->
-        <mode name="fp32_mult_then_add"> 
-          <pb_type name="fp32_mult_then_add" blif_model=".subckt fp32_mult_then_add" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <input name="fp32_in" num_pins="32"/>
-            <input name="chainin" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.reset" out_port="fp32_mult_then_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.mode_sigs" out_port="fp32_mult_then_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.a" out_port="fp32_mult_then_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.b" out_port="fp32_mult_then_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.chainin" out_port="fp32_mult_then_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_then_add.fp32_in" out_port="fp32_mult_then_add.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_then_add.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_then_add.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_then_add.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_then_add.clk"/>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_then_add.mode_sigs">
-            </direct>
-            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_then_add.a">
-            </direct>
-            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_then_add.b">
-            </direct>
-            <direct name="result" input="fp32_mult_then_add.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_then_add.chainin">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_then_add.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp32_mult_then_add.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>      
-
-        <!-- floating point fp32 mult_add mode (chainout = fp32_mult_a * fp32_mult_b. chainout = third_inp + chainin)-->
-        <mode name="fp32_mult_add"> 
-          <pb_type name="fp32_mult_add" blif_model=".subckt fp32_mult_add" num_pb="1">
-            <input name="reset" num_pins="1"/>
-            <input name="mode_sigs" num_pins="12"/>
-            <input name="a" num_pins="32"/>
-            <input name="b" num_pins="32"/>
-            <input name="fp32_in" num_pins="32"/>
-            <input name="chainin" num_pins="32"/>
-            <output name="result" num_pins="32"/>
-            <output name="chainout" num_pins="32"/>
-            <clock name="clk" num_pins="1"/>
-
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.result"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.result"/>
-
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.reset" out_port="fp32_mult_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.mode_sigs" out_port="fp32_mult_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.a" out_port="fp32_mult_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.b" out_port="fp32_mult_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.chainin" out_port="fp32_mult_add.chainout"/>
-            <delay_constant max="2.56e-9" in_port="fp32_mult_add.fp32_in" out_port="fp32_mult_add.chainout"/>
-
-            <T_setup value="18.91e-12" port="fp32_mult_add.reset" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.a" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.b" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.chainin" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
-            <T_setup value="18.91e-12" port="fp32_mult_add.result" clock="clk"/>
-
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.reset" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.mode_sigs" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.a" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.b" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.chainin" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.fp32_in" clock="clk"/>
-            <T_clock_to_Q max="60.32e-12" min="60.32e-12" port="fp32_mult_add.result" clock="clk"/>
-          </pb_type>
-          <interconnect>
-            <direct name="reset" input="dsp_pb.reset" output="fp32_mult_add.reset"/>
-            <direct name="clk" input="dsp_pb.clk" output="fp32_mult_add.clk"/>
-            <direct name="mode_sigs" input="dsp_pb.mode_sigs" output="fp32_mult_add.mode_sigs">
-            </direct>
-            <direct name="atoa" input="dsp_pb.datain[31:0]" output="fp32_mult_add.a">
-            </direct>
-            <direct name="btob" input="dsp_pb.datain[63:32]" output="fp32_mult_add.b">
-            </direct>
-            <direct name="result" input="fp32_mult_add.result" output="dsp_pb.result[31:0]">
-            </direct>
-            <direct name="chainin" input="dsp_pb.chainin[31:0]" output="fp32_mult_add.chainin">
-            </direct>
-            <direct name="fp32in" input="dsp_pb.datain[95:64]]" output="fp32_mult_add.fp32_in">
-            </direct>
-            <direct name="chainout" input="fp32_mult_add.chainout" output="dsp_pb.chainout[31:0]">
-            </direct>
-          </interconnect>
-        </mode>      
-      </pb_type>
-
-      <interconnect>
-        <direct name="mode_sigs" input="dsp.dsp_I1[11:0]" output ="dsp_pb.mode_sigs"/>
-        <direct name="datain1" input="dsp.dsp_I1[63:12]" output ="dsp_pb.datain[51:0]"/>
-        <direct name="datain2" input="dsp.dsp_I2" output ="dsp_pb.datain[115:52]"/>
-        <direct name="reset" input="dsp.reset" output="dsp_pb.reset"></direct>
-        <direct name="chainin" input="dsp.chainin"    output="dsp_pb.chainin"></direct>
-        <direct name="chainout" input="dsp_pb.chainout" output="dsp.chainout"></direct>
-        <direct name="scanin" input="dsp.scanin"    output="dsp_pb.scanin"></direct>
-        <direct name="scanout" input="dsp_pb.scanout" output="dsp.scanout"></direct>
-        <direct name="result" input="dsp_pb.result" output="dsp.result"></direct>
-        <direct name="clk" input="dsp.clk" output="dsp_pb.clk"></direct>
-      </interconnect>  
-
-    </pb_type>
-    <interconnect>
-        <!--50% sparse crossbar means 50% of the lines can reach an actual input of the dsp 
-        We do this by splitting inputs into two buckets and having two full crossbars-->
-        <!--
-       <complete name="first_half" input="dsp_top.dsp_I1" output="dsp.dsp_I1">
-            <delay_constant max="333e-12" in_port="dsp_top.dsp_I1" out_port="dsp.dsp_I1"/>
-       </complete>
-
-        <complete name="second_half" input="dsp_top.dsp_I2" output="dsp.dsp_I2">
-            <delay_constant max="333e-12" in_port="dsp_top.dsp_I2" out_port="dsp.dsp_I2"/>
-        </complete>
-        -->
-        <direct name="enable" input="dsp_top.dsp_I1[0]" output ="dsp.dsp_I1[0]"/>
-        <direct name="loadconst" input="dsp_top.dsp_I1[1]" output ="dsp.dsp_I1[1]"/>
-        <direct name="accumulate" input="dsp_top.dsp_I1[2]" output ="dsp.dsp_I1[2]"/>
-        <direct name="negate" input="dsp_top.dsp_I1[3]" output ="dsp.dsp_I1[3]"/>
-        <direct name="sub" input="dsp_top.dsp_I1[4]" output ="dsp.dsp_I1[4]"/>
-        <direct name="mode" input="dsp_top.dsp_I1[7:5]" output ="dsp.dsp_I1[7:5]"/>
-        <direct name="mux9_select" input="dsp_top.dsp_I1[8]" output ="dsp.dsp_I1[8]"/>
-        <direct name="internal_coeffa" input="dsp_top.dsp_I1[9]" output ="dsp.dsp_I1[9]"/>
-        <direct name="internal_coeffb" input="dsp_top.dsp_I1[10]" output ="dsp.dsp_I1[10]"/>
-        <direct name="datain1" input="dsp_top.dsp_I1[63:11]" output ="dsp.dsp_I1[63:11]"/>
-        <direct name="datain2" input="dsp_top.dsp_I2" output ="dsp.dsp_I2"/>
- 
-        <direct name="reset" input="dsp_top.reset" output="dsp.reset"></direct>
-        <direct name="chainin" input="dsp_top.chainin" output="dsp.chainin">
-            <delay_constant max="1179e-12" in_port="dsp_top.chainin" out_port="dsp.chainin"/>
-        </direct>
-        <direct name="chainout" input="dsp.chainout" output="dsp_top.chainout">
-            <delay_constant max="1179e-12" in_port="dsp.chainout" out_port="dsp_top.chainout"/>
-        </direct>
-        <direct name="scanin" input="dsp_top.scanin" output="dsp.scanin">
-            <delay_constant max="1179e-12" in_port="dsp_top.scanin" out_port="dsp.scanin"/>
-        </direct>
-        <direct name="scanout" input="dsp.scanout" output="dsp_top.scanout">
-            <delay_constant max="1179e-12" in_port="dsp.scanout" out_port="dsp_top.scanout"/>
-        </direct>
-        <direct name="result" input="dsp.result" output="dsp_top.result"></direct>
-        <direct name="clk" input="dsp_top.clk" output="dsp.clk"></direct>
-    </interconnect>
-    </pb_type>
-    <!-- Define DSP slice end -->
-
-
-    <!-- Define fracturable memory begin -->
-    <!-- 
-    RAM blocks always have registered inputs. The input FFs appear before the address decoder & wordline driver,
-    and after the local input crossbar & level shifter.
-    RAM blocks optionally have registered outputs. The output FFs (if present) appear after the output crossbar.
-    If BRAM doesn't have registered outputs, then T_clk_to_q is the whole delay of the read/write operation.
-    If BRAM does have registered output, then T_clk_to_q is just the FF clk_to_q and then delay_constant
-    can be used to specify the whole delay of the read/write operation.
-
-    This RAM block has registered outputs.
-
-    The area and delay values of this RAM block were obtained (indirectly) from COFFE simulations.
-    COFFE only support widths and depths that are powers of 2. For M20K (20 Kilobit BRAM), we need
-    the width to be 40 bits and depth to be 512 (for the logically widest mode: 512x40). We can't
-    simulate these dimensions directly in COFFE. So, we simulated and obtained the results for M32K
-    (32 Kilobits BRAM) and (16 Kilobits BRAM). Then we interpolated the results.
-    For delay, a linear interpolation was used, based on the size of the Memory (16K->20K->32K).
-    For area, the value was calculated using two interpolations: (1) port based (change in number of 
-    ports in going from 16K->20K->32K) and (2) number of bits based (change in number of bits in
-    going from 16K->20K->32K). The interpolation that resulted in the larger area was picked.
-    
-
-    Here are the equations used to calculate the delays based on COFFE results:
-    T_setup (inputs) = T_level_shifter + T_register_micro_setup = 32.3ps + 18.91ps = 51.21ps
-    T_clk_to_q (inputs) = T_register_micro_clk_to_q = 60.32ps
-    T_setup (outputs) = T_register_micro_setup = 18.91ps 
-    T_clk_to_q (outputs) = T_register_micro_clk_to_q = 60.32ps
-
-    (Register setup and clk_to_q timings are actually from the FF used in the logic cluster.)
-
-    T_read = T1 + T2 + T3
-    = max (Row decoder, Pre-charge time) + (Wordline driver + Bit line delay) + (Sense amp + Output crossbar)
-
-    * Bit line delay is included in self.RAM.samp.delay time in COFFE. The Sense amp delay is actually
-    self.RAM.samp_part2.delay
-
-    T_write = T1 + T2 + T3
-    = max (Row decoder, Pre-charge time) + (Wordline driver) + (Write driver)
-
-    delay_constant values model the internal limits of a block (the combinatorial delay).
-    delay_constant = max (T_read, T_write) 
-
-	  Overall internal delay of the RAM is T_clk_to_q (inputs) + delay_constant + T_setup (outputs)
-    -->
-    <pb_type name="memory">
-      <input name="addr1" num_pins="11"/>
-      <input name="addr2" num_pins="11"/>
-      <input name="data" num_pins="40"/>
-      <input name="we1" num_pins="1"/>
-      <input name="we2" num_pins="1"/>
-      <output name="out" num_pins="40"/>
-      <clock name="clk" num_pins="1"/>
-      <!-- Specify single port mode first -->
-      <mode name="mem_512x40_sp">
-        <pb_type name="mem_512x40_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="9" port_class="address"/>
-          <input name="data" num_pins="40" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="40" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-
-          <T_setup value="51.12e-12" port="mem_512x40_sp.addr" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_512x40_sp.data" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_512x40_sp.we" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_512x40_sp.out" clock="clk"/>
-
-          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.addr" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.data" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.we" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_512x40_sp.out" clock="clk"/>
-
-          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.addr" out_port="mem_512x40_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.data" out_port="mem_512x40_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_512x40_sp.we"   out_port="mem_512x40_sp.out"/>
-
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x40_sp.addr">
-          </direct>
-          <direct name="data1" input="memory.data" output="mem_512x40_sp.data">
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_512x40_sp.we">
-          </direct>
-          <direct name="dataout1" input="mem_512x40_sp.out" output="memory.out">
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_512x40_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-
-      <mode name="mem_1024x20_sp">
-        <pb_type name="mem_1024x20_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="10" port_class="address"/>
-          <input name="data" num_pins="20" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="20" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-
-          <T_setup value="51.12e-12" port="mem_1024x20_sp.addr" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_sp.data" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_sp.we" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_1024x20_sp.out" clock="clk"/>
-
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.addr" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.data" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.we" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_sp.out" clock="clk"/>
-
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.addr" out_port="mem_1024x20_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.data" out_port="mem_1024x20_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_sp.we"   out_port="mem_1024x20_sp.out"/>
-
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_sp.addr">
-          </direct>
-          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_sp.data">
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x20_sp.we">
-          </direct>
-          <direct name="dataout1" input="mem_1024x20_sp.out" output="memory.out[19:0]">
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x20_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-
-      <mode name="mem_2048x10_sp">
-        <pb_type name="mem_2048x10_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="11" port_class="address"/>
-          <input name="data" num_pins="10" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="10" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-
-          <T_setup value="51.12e-12" port="mem_2048x10_sp.addr" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_sp.data" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_sp.we" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_2048x10_sp.out" clock="clk"/>
-
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.addr" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.data" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.we" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_sp.out" clock="clk"/>
-
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.addr" out_port="mem_2048x10_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.data" out_port="mem_2048x10_sp.out"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_sp.we"   out_port="mem_2048x10_sp.out"/>
-
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_sp.addr">
-          </direct>
-          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_sp.data">
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x10_sp.we">
-          </direct>
-          <direct name="dataout1" input="mem_2048x10_sp.out" output="memory.out[9:0]">
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x10_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-
-      <!-- Specify true dual port mode next -->
-      <mode name="mem_1024x20_dp">
-        <pb_type name="mem_1024x20_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="10" port_class="address1"/>
-          <input name="addr2" num_pins="10" port_class="address2"/>
-          <input name="data1" num_pins="20" port_class="data_in1"/>
-          <input name="data2" num_pins="20" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="20" port_class="data_out1"/>
-          <output name="out2" num_pins="20" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.data1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.we1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.data2" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_1024x20_dp.we2" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_1024x20_dp.out1" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_1024x20_dp.out2" clock="clk"/>
-
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.addr2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.data2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_1024x20_dp.out2" clock="clk"/>
-
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr1" out_port="mem_1024x20_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data1" out_port="mem_1024x20_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we1" out_port="mem_1024x20_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.addr2" out_port="mem_1024x20_dp.out2"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.data2" out_port="mem_1024x20_dp.out2"/>
-          <delay_constant max="0.852e-9" in_port="mem_1024x20_dp.we2" out_port="mem_1024x20_dp.out2"/>
-
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_dp.addr1">
-          </direct>
-          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x20_dp.addr2">
-          </direct>
-          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_dp.data1">
-          </direct>
-          <direct name="data2" input="memory.data[39:20]" output="mem_1024x20_dp.data2">
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x20_dp.we1">
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_1024x20_dp.we2">
-          </direct>
-          <direct name="dataout1" input="mem_1024x20_dp.out1" output="memory.out[19:0]">
-          </direct>
-          <direct name="dataout2" input="mem_1024x20_dp.out2" output="memory.out[39:20]">
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x20_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-
-      <mode name="mem_2048x10_dp">
-        <pb_type name="mem_2048x10_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="11" port_class="address1"/>
-          <input name="addr2" num_pins="11" port_class="address2"/>
-          <input name="data1" num_pins="10" port_class="data_in1"/>
-          <input name="data2" num_pins="10" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="10" port_class="data_out1"/>
-          <output name="out2" num_pins="10" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.data1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.we1" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.data2" clock="clk"/>
-          <T_setup value="51.12e-12" port="mem_2048x10_dp.we2" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_2048x10_dp.out1" clock="clk"/>
-          <T_setup value="18.91e-12" port="mem_2048x10_dp.out2" clock="clk"/>
-
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.addr2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.data2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="60.32e-12" port="mem_2048x10_dp.out2" clock="clk"/>
-
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr1" out_port="mem_2048x10_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data1" out_port="mem_2048x10_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we1" out_port="mem_2048x10_dp.out1"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.addr2" out_port="mem_2048x10_dp.out2"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.data2" out_port="mem_2048x10_dp.out2"/>
-          <delay_constant max="0.852e-9" in_port="mem_2048x10_dp.we2" out_port="mem_2048x10_dp.out2"/>
-
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_dp.addr1">
-          </direct>
-          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x10_dp.addr2">
-          </direct>
-          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_dp.data1">
-          </direct>
-          <direct name="data2" input="memory.data[19:10]" output="mem_2048x10_dp.data2">
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x10_dp.we1">
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x10_dp.we2">
-          </direct>
-          <direct name="dataout1" input="mem_2048x10_dp.out1" output="memory.out[9:0]">
-          </direct>
-          <direct name="dataout2" input="mem_2048x10_dp.out2" output="memory.out[19:10]">
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x10_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <!-- Define fracturable memory end -->
-  </complexblocklist>
-
-  <switchblocklist>
-    <!-- Stratix IV uses a uni-directional routing architecture with a Driver Input Mux (DIM) size of 12 (i.e.
-           each wire can be driven by one of 12 block/outputs or wires) for the L4s.
-           
-           In the Stratix IV architecture the long wires (L16 here) are accessible only from the short wires, 
-           and are not connected to the block pins (i.e. connection blocks). Furthermore, they only connect 
-           to switch blocks every 4 LABs (to avoid expensive deep via stacks).
-           We approximate the L16 DIM size as 40:1 (in reality it is a pair of 20:1 (?) muxes with a 2:1 swap mux
-           in front, which has nearly the same connectivity as a full 40:1).
-
-           L4 wires
-           ================
-           At a channel width of 300 there are 260 L4/L4prime wires. At an effective Fc_out of 0.075 
-           and 40 LAB outputs this yeilds:
-
-                40 * 2 = 80 outputs per channel  [2 LABs per-channel]
-
-                80 * 0.075 = 6 outputs drive each L4 wire [output connection block]
-
-           This leaves:
-
-                12 - 6 = 6 inputs to the DIMs from other routing wires [switch block]
-
-           Since L4s connect at every switch block, there are:
-
-                260 L16 wires per channel + direction which can drive wires at a particular switchblock
-                (via switchpoints 0, 1, 2, 3)
-
-           And for each direction (260 wires) only:
-
-               260 / 4 = 65 wires starting/ending per channel + direction at each switch block
-               (i.e. from each direction, north/south/east/west, there are 32 L4s starting, and 32 L4s ending; + 1 wire for the 65th)
-
-           Which we allocate as follows:
-
-                L4
-                =====
-                straight-through connection: 2 (from L4 or L16)
-                clock-wise turn            : 2 (from L4 or L16)
-                counter-clock-wise turn    : 2 (from L4 or L16)
-
-           L16 wires
-           =========
-           At a channel width of 300 there are 40 L16 wires (20 in each direction), which do not connect to the input/output connection blocks.
-           This leaves 40 inputs to the DIM to select from routing wires (long wires use larger DIMs to improve reachability,
-           the area cost is relatively small since they are so rare).
-
-           Since L16s only connect at every 4th switch block there are:
-
-                40 / 4 = 10 L16 wires per channel (5 in each direction) which can drive wires at a particular switchblock
-                (via switchpoints 0, 4, 8, 12)
-
-           And for each direction (20 wires) only:
-
-               40 / 16 = 2.25 => 2 wires starting/ending per channel + direction at each switch block
-               (i.e. from each direction, north/south/east/west, there is one L16 starting, and one L16 ending)
-           
-           We assign the 40 DIM inputs as follows:
-
-                L16
-                =====
-                straight-through connection:  3 (from L16)
-                straight-through connection: 11 (from L4)
-                clock-wise turn            :  3 (from L16)
-                clock-wise turn            : 10 (from L4)
-                counter clock-wise turn    :  3 (from L16)
-                counter clock-wise turn    : 10 (from L4)
-
-           Switch pattern
-           ==============
-           This switch block is based on the Wilton switch block (see Page 103 of Steve Wilton's PhD Thesis 
-           "Architecture and Algorithms for Field-Programmable Gate Arrays with Embedded Memory", 1997):
-
-                left-to-top: W - t
-                top-to-right: t + 1
-                right-to-bottom: 2*W - 2 - t
-                bottom-to-left: t + 1
-                left-to-right: t
-                top-to-bottom: t
-
-           Since Wilton assumed bidirection routing (while we use unidirectional routing),
-           we mirror the clock-wise turns to match the conter-clock-wise specification.
-           -->
-    <switchblock name="wilton_turn_clockwise_core" type="unidir">
-      <switchblock_location type="CORE"/>
-      <switchfuncs>
-        <!-- Clock-wise turns -->
-        <func type="tl" formula="W-t"/>
-        <!-- top to left -->
-        <func type="rt" formula="t+1"/>
-        <!-- right to top -->
-        <func type="br" formula="2*W-2-t"/>
-        <!-- bottom to right -->
-        <func type="lb" formula="t+1"/>
-        <!-- left to bottom -->
-      </switchfuncs>
-      <!-- L16 drivers -->
-      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
-      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
-      <!-- L4 drivers 
-
-               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
-               all be multiply connected.
-               
-               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
-               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
-               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
-               -->
-      <wireconn num_conns="2*to" from_order="shuffled">
-        <from type="L16" switchpoint="0,12,8,4"/>
-        <from type="L4" switchpoint="0,1,2,3"/>
-        <to type="L4" switchpoint="0"/>
-      </wireconn>
-    </switchblock>
-    <switchblock name="wilton_turn_counter_clockwise_core" type="unidir">
-      <switchblock_location type="CORE"/>
-      <switchfuncs>
-        <!-- Counter-clock-wise turns -->
-        <func type="lt" formula="W-t"/>
-        <!-- left to top -->
-        <func type="tr" formula="t+1"/>
-        <!-- top to right -->
-        <func type="rb" formula="2*W-2-t"/>
-        <!-- right to bottom -->
-        <func type="bl" formula="t+1"/>
-        <!-- bottom to left -->
-      </switchfuncs>
-      <!-- L16 drivers -->
-      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
-      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
-      <!-- L4 drivers 
-
-               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
-               all be multiply connected.
-               
-               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
-               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
-               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
-
-               Note that a different from_switchpoints ordering is used to ensure a different shuffling occurs compared to 
-               wilton_turn_clockwise_core.
-               -->
-      <wireconn num_conns="2*to" from_order="shuffled">
-        <from type="L16" switchpoint="0,12,8,4"/>
-        <from type="L4" switchpoint="0,1,2,3"/>
-        <to type="L4" switchpoint="0"/>
-      </wireconn>
-    </switchblock>
-    <switchblock name="wilton_straight" type="unidir">
-      <switchblock_location type="EVERYWHERE"/>
-      <switchfuncs>
-        <!-- Straight -->
-        <func type="lr" formula="t"/>
-        <!-- left to right -->
-        <func type="tb" formula="t"/>
-        <!-- top to bottom -->
-        <func type="rl" formula="t"/>
-        <!-- right to left -->
-        <func type="bt" formula="t"/>
-        <!-- bottom to top -->
-      </switchfuncs>
-      <!-- L16 Drivers 
-                Note that we order the switchpoints in order of preference, since VPR currently
-                iterates through the source sets in order, such that we connect first to wires
-                ending at the switchblock (switchpoint 0), and then fallback to switchpoints
-                in decreasing distance from the drive point (if we have more to's than from's
-                it then wraps around).
-
-                Note also that we multiply the number of expected connections by 'to', since while usually
-                there is only one 'to' wire, ocasionally there may be more, and we want to ensure they all
-                get the same number of connections.
-
-                For L16->L16:
-                  We allow any valid switchpoint to be used as the 'from' point.
-                  Allow 'low' switchpoints like '4' may seem counter-intuitive (i.e. why not use a cheaper L4)
-                  this makes it easier to bypass once on the L16 network (e.g. to get around congestion).
-           -->
-      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
-      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
-      <!-- L4 Drivers -->
-      <wireconn num_conns="2*to" from_order="shuffled">
-        <from type="L16" switchpoint="0,12,8,4"/>
-        <from type="L4" switchpoint="0"/>
-        <to type="L4" switchpoint="0"/>
-      </wireconn>
-      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
-      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
-    </switchblock>
-    <switchblock name="wilton_straight_corner" type="unidir">
-      <!-- Same as wilton straight, but turning around a corner -->
-      <switchblock_location type="CORNER"/>
-      <switchfuncs>
-        <!-- Counter-clock-wise turns -->
-        <func type="lt" formula="t"/>
-        <!-- left to top -->
-        <func type="tr" formula="t"/>
-        <!-- top to right -->
-        <func type="rb" formula="t"/>
-        <!-- right to bottom -->
-        <func type="bl" formula="t"/>
-        <!-- bottom to left -->
-        <!-- Clock-wise turns -->
-        <func type="tl" formula="t"/>
-        <!-- top to left -->
-        <func type="rt" formula="t"/>
-        <!-- right to top -->
-        <func type="br" formula="t"/>
-        <!-- bottom to right -->
-        <func type="lb" formula="t"/>
-        <!-- left to bottom -->
-      </switchfuncs>
-      <!-- L16 Drivers -->
-      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
-      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
-      <!-- L4 Drivers -->
-      <wireconn num_conns="2*to" from_order="shuffled">
-        <from type="L16" switchpoint="0,12,8,4"/>
-        <from type="L4" switchpoint="0"/>
-        <to type="L4" switchpoint="0"/>
-      </wireconn>
-      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
-      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
-    </switchblock>
-    <switchblock name="wilton_turn_fringe" type="unidir">
-      <!-- Non-corner perimeter SBs -->
-      <switchblock_location type="FRINGE"/>
-      <switchfuncs>
-        <!-- Counter-clock-wise turns -->
-        <func type="lt" formula="W-t"/>
-        <!-- left to top -->
-        <func type="tr" formula="t+1"/>
-        <!-- top to right -->
-        <func type="rb" formula="2*W-2-t"/>
-        <!-- right to bottom -->
-        <func type="bl" formula="t+1"/>
-        <!-- bottom to left -->
-        <!-- Clock-wise turns -->
-        <func type="tl" formula="W-t"/>
-        <!-- top to left -->
-        <func type="rt" formula="t+1"/>
-        <!-- right to top -->
-        <func type="br" formula="2*W-2-t"/>
-        <!-- bottom to right -->
-        <func type="lb" formula="t+1"/>
-        <!-- left to bottom -->
-      </switchfuncs>
-      <!-- We use 'max' style connections here to ensure there are no dangling wires, otherwise like core turns -->
-      <!-- L16 drivers -->
-      <wireconn num_conns="3*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
-      <wireconn num_conns="21*max(from,to)" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
-      <!-- L4 drivers -->
-      <wireconn num_conns="1*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" from_order="fixed" to_type="L4" to_switchpoint="0"/>
-      <wireconn num_conns="1*max(from,to)" from_type="L4" from_switchpoint="0,1,2,3" from_order="shuffled" to_type="L4" to_switchpoint="0"/>
-    </switchblock>
-  </switchblocklist>
-
-  <clocks>
-    <clock buffer_size="auto" C_wire="2.5e-10"/>
-  </clocks>
-</architecture>
-
-
diff --git a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
deleted file mode 100644
index b8d26348a..000000000
--- a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
+++ /dev/null
@@ -1,1505 +0,0 @@
-<!-- 
-  Flagship Heterogeneous Architecture with Carry Chains for VTR 7.0.
-
-  - 40 nm technology
-  - General purpose logic block: 
-    K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared) 
-    with optionally registered outputs
-    Each 5-LUT has an arithemtic mode that converts it to a single-bit adder with both inputs driven by 4-LUTs (both 4-LUTs share all 4 inputs)
-    Carry chain links to vertically adjacent logic blocks
-  - Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.  
-    Height = 6, found on every (8n+2)th column
-  - Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.  
-    Height = 4, found on every (8n+6)th column
-  - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
-
-  Details on Modelling:
-
-  The electrical design of the architecture described here is NOT from an 
-  optimized, SPICED architecture.  Instead, we attempt to create a reasonable 
-  architecture file by using an existing commercial FPGA to approximate the area, 
-  delay, and power of the underlying components. This is combined with a reasonable 40 nm 
-  model of wiring and circuit design for low-level routing components, where available.
-  The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also 
-  has wiring electrical parameters that allow the wire lengths and switch patterns to be 
-  modified and you will still get reasonable delay results for the new architecture.
-  The following describes, in detail, how we obtained the various electrical values for this 
-  architecture.
-
-  Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar 
-  architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture. 
-  (n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)      
-  This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to 
-  match the overall target (a 40 nm FPGA).
-
-  We obtain maximum delay numbers by measuring delays of routing, soft logic blocks, 
-  memories, and multipliers from test circuits on a Stratix IV GX device 
-  (EP4SGX230DF29C2X, i.e. fastest speed grade). Minimum delay values are calculated based on the
-  ratios between maximum and minimum values in Stratix IV GX device. For routing, we took the 
-  average delay of H4 and V4 wires.  Rmetal and Cmetal values for the routing wires were obtained 
-  from work done by Charles Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal 
-  stack 40 nm routing) and take the R and C data from the ITRS roadmap. 
-
-  For the general purpose logic block, we assume that the area and delays of the Stratix IV 
-  crossbar is close enough to the crossbar modelled here.  We use 40 inputs and 20 feedback lines in 
-  the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
-  Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to 
-  36:1 multiplexers.  We require 60 such multiplexers, while Stratix IV requires 88 for its more
-  complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows: 
-  The Stratix IV crossbar has more inputs (72 vs. 60) and 
-  outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the 
-  Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its 
-  area and delay.  The total number of crossbar switch points is roughly similar between the two 
-  architectures (3160 for SIV and 3600 for the academic architecture below), so we use the area 
-  & delay of the Stratix IV crossbar as a rough approximation of our crossbar.
-
-  For LUTs, we include LUT 
-  delays measured from Stratix IV which is dependant on the input used (ie. some 
-  LUT inputs are faster than others).  The CAD tools at the time of VTR 7 does 
-  not consider differences in LUT input delays.
-
-  Adder delays obtained as approximate values from a Stratix IV EP4SE230F29C3 device.  
-  Delay obtained by compiling a 256 bit adder (registered inputs and outputs, 
-  all pins except clock virtual) then measuring the delays in chip-planner, 
-  sumout delay = 0.271ns to 0.348 ns, intra-block carry delay = 0.011 ns, 
-  inter-block carry delay = 0.327 ns.  Given this data, I will approximate 
-  sumout 0.3 ns, intra-block carry-delay = 0.01 ns, and 
-  inter-block carry-delay = 0.16 ns (since Altera inter-block carry delay has 
-  overhead that we don't have, I'll approximate the delay of a simpler chain at 
-  one half what they have.  This is very rough, anything from 0.01ns to 0.327ns 
-  can be justified).
-
-  Logic block area numbers obtained by scaling overall tile area of a 65nm 
-  Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out 
-  routing area at a channel width of 300. We use a channel width of 300 because it can route 
-  all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
-  total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
-  choosing a width that provides high routability. The architecture can be routed at different channel
-  widths, but we estimate the tile size and hence the physical length of routing wires assuming
-  a channel width of 300.
-
-  Sanity checks employed:
-    1.  We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches 
-        common electrical design.
-
-
-  Authors: Jason Luu, Jeff Goeders, Vaughn Betz
--->
-<architecture>
-  <!-- 
-       ODIN II specific config begins 
-       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
-       ".model [type_of_block]") that this architecture supports.
-
-       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
-       already special structures in blif (.names, .input, .output, and .latch) 
-       that describe them.
-  -->
-  <models>
-    <model name="multiply">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="single_port_ram">
-      <input_ports>
-        <port name="we" clock="clk"/>
-        <!-- control -->
-        <port name="addr" clock="clk"/>
-        <!-- address lines -->
-        <port name="data" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <model name="dual_port_ram">
-      <input_ports>
-        <port name="we1" clock="clk"/>
-        <!-- write enable -->
-        <port name="we2" clock="clk"/>
-        <!-- write enable -->
-        <port name="addr1" clock="clk"/>
-        <!-- address lines -->
-        <port name="addr2" clock="clk"/>
-        <!-- address lines -->
-        <port name="data1" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="data2" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out1" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-        <port name="out2" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <model name="adder">
-      <input_ports>
-        <port name="a" combinational_sink_ports="sumout cout"/>
-        <port name="b" combinational_sink_ports="sumout cout"/>
-        <port name="cin" combinational_sink_ports="sumout cout"/>
-      </input_ports>
-      <output_ports>
-        <port name="cout"/>
-        <port name="sumout"/>
-      </output_ports>
-    </model>
-  </models>
-  <tiles>
-    <tile name="io" area="0">
-      <sub_tile name="io" capacity="8">
-        <equivalent_sites>
-          <site pb_type="io" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="outpad" num_pins="1"/>
-        <output name="inpad" num_pins="1"/>
-        <clock name="clock" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="custom">
-          <loc side="left">io.outpad io.inpad io.clock</loc>
-          <loc side="top">io.outpad io.inpad io.clock</loc>
-          <loc side="right">io.outpad io.inpad io.clock</loc>
-          <loc side="bottom">io.outpad io.inpad io.clock</loc>
-        </pinlocations>
-      </sub_tile>
-    </tile>
-    <tile name="clb" area="53894">
-      <sub_tile name="clb">
-        <equivalent_sites>
-          <site pb_type="clb" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="I" num_pins="40" equivalent="full"/>
-        <input name="cin" num_pins="1"/>
-        <output name="O" num_pins="20" equivalent="none"/>
-        <output name="cout" num_pins="1"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
-          <fc_override port_name="cin" fc_type="frac" fc_val="0"/>
-          <fc_override port_name="cout" fc_type="frac" fc_val="0"/>
-        </fc>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="mult_36" height="4" area="396000">
-      <sub_tile name="mult_36">
-        <equivalent_sites>
-          <site pb_type="mult_36" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="a" num_pins="36"/>
-        <input name="b" num_pins="36"/>
-        <output name="out" num_pins="72"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="memory" height="6" area="548000">
-      <sub_tile name="memory">
-        <equivalent_sites>
-          <site pb_type="memory" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="addr1" num_pins="15"/>
-        <input name="addr2" num_pins="15"/>
-        <input name="data" num_pins="64"/>
-        <input name="we1" num_pins="1"/>
-        <input name="we2" num_pins="1"/>
-        <output name="out" num_pins="64"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-  </tiles>
-  <!-- ODIN II specific config ends -->
-  <!-- Physical descriptions begin -->
-  <layout>
-    <auto_layout aspect_ratio="1.0">
-      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
-      <perimeter type="io" priority="100"/>
-      <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
-      <fill type="clb" priority="10"/>
-      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
-      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
-      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
-      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
-    </auto_layout>
-  </layout>
-  <device>
-    <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
-			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
-			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
-			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
-			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
-			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
-			     lined up with Stratix IV. 
-			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
-			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
-			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
-	                     by 2.5x when looking up in Jeff's tables.
-			     The delay values are lined up with Stratix IV, which has an architecture similar to this
-			     proposed FPGA, and which is also 40 nm 
-			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
-			     4x minimum drive strength buffer. -->
-    <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
-    <!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
-     	  area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-	    -->
-    <area grid_logic_tile_area="0"/>
-    <chan_width_distr>
-      <x distr="uniform" peak="1.000000"/>
-      <y distr="uniform" peak="1.000000"/>
-    </chan_width_distr>
-    <switch_block type="wilton" fs="3"/>
-    <connection_block input_switch_name="ipin_cblock"/>
-  </device>
-  <switchlist>
-    <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
-           book area formula. This means the mux transistors are about 5x minimum drive strength.
-           We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
-           mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
-           the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
-           by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
-           buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
-           I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
-           (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
-           The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
-           2.5x when looking up in Jeff's tables.
-           Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
-           This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
-    <switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
-    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
-    <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
-  </switchlist>
-  <segmentlist>
-    <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
-             With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
-             reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
-    <segment freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
-      <mux name="0"/>
-      <sb type="pattern">1 1 1 1 1</sb>
-      <cb type="pattern">1 1 1 1</cb>
-    </segment>
-  </segmentlist>
-  <directlist>
-    <direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
-  </directlist>
-  <complexblocklist>
-    <!-- Define I/O pads begin -->
-    <!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
-    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
-    <pb_type name="io">
-      <input name="outpad" num_pins="1"/>
-      <output name="inpad" num_pins="1"/>
-      <clock name="clock" num_pins="1"/>
-      <!-- IOs can operate as either inputs or outputs.
-	     Maximum delays below come from Ian Kuon. They are small, so they should be interpreted as
-	     the delays to and from registers in the I/O (and generally I/Os are registered 
-	     today and that is when you timing analyze them.
-
-		 Minimum delays are retrieved using a ratio of maximum and minimum times as seen in Quartus II
-		 in Stratix IV. The ratio of minimum value/maximum value is as follows:
-			inpad delay:  0.9239
-			outpad delay: 0.9545
-
-	     -->
-      <mode name="inpad">
-        <pb_type name="inpad" blif_model=".input" num_pb="1">
-          <output name="inpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="inpad" input="inpad.inpad" output="io.inpad">
-            <delay_constant max="4.243e-11" min="3.92e-11" in_port="inpad.inpad" out_port="io.inpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="outpad">
-        <pb_type name="outpad" blif_model=".output" num_pb="1">
-          <input name="outpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="outpad" input="io.outpad" output="outpad.outpad">
-            <delay_constant max="1.394e-11" min="1.331e-11" in_port="io.outpad" out_port="outpad.outpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- IOs go on the periphery of the FPGA, for consistency, 
-          make it physically equivalent on all sides so that only one definition of I/Os is needed.
-          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-        -->
-      <!-- Place I/Os on the sides of the FPGA -->
-      <power method="ignore"/>
-    </pb_type>
-    <!-- Define I/O pads ends -->
-    <!-- Define general purpose logic block (CLB) begin -->
-    <!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor 
-	   area is 60 L^2 yields a tile area of 84375 MWTAs.
-	   Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area 
-	   This means that only 37% of our area is in the general routing, and 63% is inside the logic
-	   block. Note that the crossbar / local interconnect is considered part of the logic block
-	   area in this analysis. That is a lower proportion of of routing area than most academics
-	   assume, but note that the total routing area really includes the crossbar, which would push
-	   routing area up significantly, we estimate into the ~70% range. 
-	   -->
-    <pb_type name="clb">
-      <input name="I" num_pins="40" equivalent="full"/>
-      <input name="cin" num_pins="1"/>
-      <output name="O" num_pins="20" equivalent="none"/>
-      <output name="cout" num_pins="1"/>
-      <clock name="clk" num_pins="1"/>
-      <!-- Describe fracturable logic element.  
-             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
-             The outputs of the fracturable logic element can be optionally registered
-        -->
-      <pb_type name="fle" num_pb="10">
-        <input name="in" num_pins="6"/>
-        <input name="cin" num_pins="1"/>
-        <output name="out" num_pins="2"/>
-        <output name="cout" num_pins="1"/>
-        <clock name="clk" num_pins="1"/>
-        <mode name="n2_lut5">
-          <pb_type name="lut5inter" num_pb="1">
-            <input name="in" num_pins="5"/>
-            <input name="cin" num_pins="1"/>
-            <output name="out" num_pins="2"/>
-            <output name="cout" num_pins="1"/>
-            <clock name="clk" num_pins="1"/>
-            <pb_type name="ble5" num_pb="2">
-              <input name="in" num_pins="5"/>
-              <input name="cin" num_pins="1"/>
-              <output name="out" num_pins="1"/>
-              <output name="cout" num_pins="1"/>
-              <clock name="clk" num_pins="1"/>
-              <mode name="blut5">
-                <pb_type name="flut5" num_pb="1">
-                  <input name="in" num_pins="5"/>
-                  <output name="out" num_pins="1"/>
-                  <clock name="clk" num_pins="1"/>
-                  <!-- Regular LUT mode -->
-                  <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
-                    <input name="in" num_pins="5" port_class="lut_in"/>
-                    <output name="out" num_pins="1" port_class="lut_out"/>
-                    <!-- LUT timing using delay matrix -->
-                    <!-- These are the physical maximum delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                           we instead take the average of these numbers to get more stable results
-                        82e-12
-                        173e-12
-                        261e-12
-                        263e-12
-                        398e-12
-							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
-						is 0.7295 * the average of the maximum numbers
-                        -->
-                    <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
-                        235e-12
-                        235e-12
-                        235e-12
-                        235e-12
-                        235e-12
-                      </delay_matrix>
-                    <delay_matrix type="min" in_port="lut5.in" out_port="lut5.out">
-                        174e-12
-                        174e-12
-                        174e-12
-                        174e-12
-                        174e-12
-                      </delay_matrix>
-                  </pb_type>
-                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-                    <input name="D" num_pins="1" port_class="D"/>
-                    <output name="Q" num_pins="1" port_class="Q"/>
-                    <clock name="clk" num_pins="1" port_class="clock"/>
-                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
-                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
-                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
-                  </pb_type>
-                  <interconnect>
-                    <direct name="direct1" input="flut5.in" output="lut5.in"/>
-                    <direct name="direct2" input="lut5.out" output="ff.D">
-                      <pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
-                    </direct>
-                    <direct name="direct3" input="flut5.clk" output="ff.clk"/>
-                    <mux name="mux1" input="ff.Q lut5.out" output="flut5.out">
-                      <delay_constant max="25e-12" min="24e-12" in_port="lut5.out" out_port="flut5.out"/>
-                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="flut5.out"/>
-                    </mux>
-                  </interconnect>
-                </pb_type>
-                <interconnect>
-                  <direct name="direct1" input="ble5.in" output="flut5.in"/>
-                  <direct name="direct2" input="ble5.clk" output="flut5.clk"/>
-                  <direct name="direct3" input="flut5.out" output="ble5.out"/>
-                </interconnect>
-              </mode>
-              <mode name="arithmetic">
-                <pb_type name="arithmetic" num_pb="1">
-                  <input name="in" num_pins="4"/>
-                  <input name="cin" num_pins="1"/>
-                  <output name="out" num_pins="1"/>
-                  <output name="cout" num_pins="1"/>
-                  <clock name="clk" num_pins="1"/>
-                  <!-- Special dual-LUT mode that drives adder only -->
-                  <pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
-                    <input name="in" num_pins="4" port_class="lut_in"/>
-                    <output name="out" num_pins="1" port_class="lut_out"/>
-                    <!-- LUT timing using delay matrix -->
-                    <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                             we instead take the average of these numbers to get more stable results
-                        82e-12
-                        173e-12
-                        261e-12
-                        263e-12
-							The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
-						is 0.7295 * the average of the maximum numbers
-                        -->
-                    <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
-                        195e-12
-                        195e-12
-                        195e-12
-                        195e-12
-                      </delay_matrix>
-                    <delay_matrix type="min" in_port="lut4.in" out_port="lut4.out">
-                        144e-12
-                        144e-12
-                        144e-12
-                        144e-12
-                      </delay_matrix>
-                  </pb_type>
-                  <!-- The ratio between minimum and maximum delays in StratixIV for data ports to sumout
-							is 0.6809 and cin to sumout is 0.6969-->
-                  <pb_type name="adder" blif_model=".subckt adder" num_pb="1">
-                    <input name="a" num_pins="1"/>
-                    <input name="b" num_pins="1"/>
-                    <input name="cin" num_pins="1"/>
-                    <output name="cout" num_pins="1"/>
-                    <output name="sumout" num_pins="1"/>
-                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.sumout"/>
-                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.sumout"/>
-                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.cin" out_port="adder.sumout"/>
-                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.a" out_port="adder.cout"/>
-                    <delay_constant max="0.3e-9" min="0.2043e-9" in_port="adder.b" out_port="adder.cout"/>
-                    <delay_constant max="0.01e-9" min="6.9797e-12" in_port="adder.cin" out_port="adder.cout"/>
-                  </pb_type>
-                  <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-                    <input name="D" num_pins="1" port_class="D"/>
-                    <output name="Q" num_pins="1" port_class="Q"/>
-                    <clock name="clk" num_pins="1" port_class="clock"/>
-                    <T_setup value="66e-12" port="ff.D" clock="clk"/>
-                    <T_hold value="37e-12" port="ff.D" clock="clk"/>
-                    <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
-                  </pb_type>
-                  <interconnect>
-                    <direct name="clock" input="arithmetic.clk" output="ff.clk"/>
-                    <direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
-                    <direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
-                    <direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
-                      </direct>
-                    <direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
-                      </direct>
-                    <direct name="add_to_ff" input="adder.sumout" output="ff.D">
-                      <pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
-                    </direct>
-                    <direct name="carry_in" input="arithmetic.cin" output="adder.cin">
-                      <pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
-                    </direct>
-                    <direct name="carry_out" input="adder.cout" output="arithmetic.cout">
-                      <pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
-                    </direct>
-                    <mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
-                      <delay_constant max="25e-12" min="24e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
-                      <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="arithmetic.out"/>
-                    </mux>
-                  </interconnect>
-                </pb_type>
-                <interconnect>
-                  <direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
-                  <direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
-                    <pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
-                  </direct>
-                  <direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
-                    <pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
-                  </direct>
-                  <direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
-                  <direct name="direct3" input="arithmetic.out" output="ble5.out"/>
-                </interconnect>
-              </mode>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
-              <direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
-              <direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
-              <direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
-                <pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/>
-              </direct>
-              <direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
-                <pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/>
-              </direct>
-              <direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
-                <pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/>
-              </direct>
-              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
-            <direct name="direct2" input="lut5inter.out" output="fle.out"/>
-            <direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
-            <direct name="carry_in" input="fle.cin" output="lut5inter.cin">
-              <pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/>
-            </direct>
-            <direct name="carry_out" input="lut5inter.cout" output="fle.cout">
-              <pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/>
-            </direct>
-          </interconnect>
-        </mode>
-        <!-- n2_lut5 -->
-        <mode name="n1_lut6">
-          <pb_type name="ble6" num_pb="1">
-            <input name="in" num_pins="6"/>
-            <output name="out" num_pins="1"/>
-            <clock name="clk" num_pins="1"/>
-            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
-              <input name="in" num_pins="6" port_class="lut_in"/>
-              <output name="out" num_pins="1" port_class="lut_out"/>
-              <!-- LUT timing using delay matrix -->
-              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                       we instead take the average of these numbers to get more stable results
-                  82e-12
-                  173e-12
-                  261e-12
-                  263e-12
-                  398e-12
-                  397e-12
-
-					The minimum delay/maximum delay ratio is 0.7395 in QII on Stratix IV. Hence, the minimum delay
-						is 0.7295 * the average of the maximum numbers
-                  -->
-              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
-                  261e-12
-                  261e-12
-                  261e-12
-                  261e-12
-                  261e-12
-                  261e-12
-                </delay_matrix>
-              <delay_matrix type="min" in_port="lut6.in" out_port="lut6.out">
-                  174e-12
-                  174e-12
-                  174e-12
-                  174e-12
-                  174e-12
-                  174e-12
-                </delay_matrix>
-            </pb_type>
-            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-              <input name="D" num_pins="1" port_class="D"/>
-              <output name="Q" num_pins="1" port_class="Q"/>
-              <clock name="clk" num_pins="1" port_class="clock"/>
-              <T_setup value="66e-12" port="ff.D" clock="clk"/>
-              <T_hold value="37e-12" port="ff.D" clock="clk"/>
-              <T_clock_to_Q max="124e-12" min="60e-12" port="ff.Q" clock="clk"/>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
-              <direct name="direct2" input="lut6.out" output="ff.D">
-                <pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
-              </direct>
-              <direct name="direct3" input="ble6.clk" output="ff.clk"/>
-              <mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
-                <delay_constant max="25e-12" min="24e-12" in_port="lut6.out" out_port="ble6.out"/>
-                <delay_constant max="45e-12" min="27e-12" in_port="ff.Q" out_port="ble6.out"/>
-              </mux>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="direct1" input="fle.in" output="ble6.in"/>
-            <direct name="direct2" input="ble6.out" output="fle.out[0:0]"/>
-            <direct name="direct3" input="fle.clk" output="ble6.clk"/>
-          </interconnect>
-        </mode>
-        <!-- n1_lut6 -->
-      </pb_type>
-      <interconnect>
-        <!-- We use a full crossbar to get logical equivalence at inputs of CLB 
-           The delays below come from Stratix IV. the delay through a connection block
-           input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps 
-           delay on the connection block input mux (modeled by Ian Kuon), so the remaining
-           delay within the crossbar is 95 ps. 
-		   For the minimum delays, we have the delay through a connection block input mux +
-		   the crossbar in Stratix IV is 144. Subtracting the 72 ps leaves 72 ps remaining.
-           The max delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
-           Since all our outputs LUT outputs go to a BLE output, and have a delay of 
-           25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
-           to get the part that should be marked on the crossbar. For the minimum delay,
-  		   the value in Stratix IV is 93 ps, subtracting the 24 ps leaves 69 ps.-->
-        <complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in">
-          <delay_constant max="95e-12" min="72e-12" in_port="clb.I" out_port="fle[9:0].in"/>
-          <delay_constant max="75e-12" min="69e-12" in_port="fle[9:0].out" out_port="fle[9:0].in"/>
-        </complete>
-        <complete name="clks" input="clb.clk" output="fle[9:0].clk">
-          </complete>
-        <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
-                 By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
-                 then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
-                 naive specification).
-          -->
-        <direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
-        <direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
-        <!-- Carry chain links -->
-        <direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
-          <!-- Put all inter-block carry chain delay on this one edge -->
-          <delay_constant max="0.16e-9" min="0.11e-9" in_port="clb.cin" out_port="fle[0:0].cin"/>
-          <pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
-        </direct>
-        <direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
-          <pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
-        </direct>
-        <direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
-          <pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
-        </direct>
-      </interconnect>
-    </pb_type>
-    <!-- Define general purpose logic block (CLB) ends -->
-    <!-- Define fracturable multiplier begin -->
-    <!-- This multiplier can operate as a 36x36 multiplier that can fracture to two 18x18 multipliers each of which can further fracture to two 9x9 multipliers 
-	   For delay modelling, the 36x36 DSP multiplier in Stratix IV has a maximum delay of 1.523 ns + 1.93 ns
-	    = 3.45 ns. The average difference between the maximum and minimum values of a dsp mac out in 36 bit multiply
-		mode is 0.51. Hence, the minimum delay is modeled as 0.776 ns + 0.984 ns = 1.760 ns.
- 		The 18x18 mode doesn't need to sum four 18x18 multipliers, so it is a bit
-	   faster: 1.523 ns for the multiplier, and 1.09 ns for the multiplier output block.
-	    For the input and output interconnect delays, unlike Stratix IV, we don't
-	   have any routing/logic flexibility (crossbars) at the inputs.  There is some output muxing
-	   in Stratix IV and this architecture to select which multiplier outputs should go out (e.g.
-	   9x9 outputs, 18x18 or 36x36) so those are very close between the two architectures. 
-	   We take the conservative (slightly pessimistic)
-           approach modelling the input as the same as the Stratix IV input delay and the output delay the same as the Stratix IV DSP out delay.
-		   
-	   We estimate block area by using the published Stratix III data (which is architecturally identical to Stratix IV)
-	      (H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011) of 0.2623 
-		  mm^2 and scaling from 65 to 40 nm to obtain 0.0993 mm^2. That area is for a DSP block with approximately 2x the functionality of 
-		  the block we use (can implement two 36x36 multiplies instead of our 1, eight 18x18 multiplies instead of our 4, etc.). Hence we 
-		  divide the area by 2 to obtain 0.0497 mm^2. One minimum-width transistor units = 60 L^2 (where L = 40 nm), so is 518,000 MWTUS. 
-		  That area includes routing and the connection block input muxes.  Our DSP block is four 
-		  rows high, and hence includes four horizontal routing channel segments and four vertical ones, which is 4x the routing of a logic 
-		  block (single tile). It also includes 3.6x the outputs of a logic block, and 1.8x the inputs. Hence a slight overestimate of the routing
-		  area associated with our DSP block is four times that of a logic tile, where the routing area of a logic tile was calculated above (at W = 300)
-		  as 30481 MWTAs. Hence the (core, non-routing) area our DSP block is approximately 518,000 - 4 * 30,481 = 396,000 MWTUs.
-      -->
-    <pb_type name="mult_36">
-      <input name="a" num_pins="36"/>
-      <input name="b" num_pins="36"/>
-      <output name="out" num_pins="72"/>
-      <mode name="two_divisible_mult_18x18">
-        <pb_type name="divisible_mult_18x18" num_pb="2">
-          <input name="a" num_pins="18"/>
-          <input name="b" num_pins="18"/>
-          <output name="out" num_pins="36"/>
-          <!-- Model 9x9 delay and 18x18 delay as the same.  9x9 could be faster, but in Stratix IV
-	          isn't, presumably because the multiplier layout is really optimized for 18x18.
-		-->
-          <mode name="two_mult_9x9">
-            <pb_type name="mult_9x9_slice" num_pb="2">
-              <input name="A_cfg" num_pins="9"/>
-              <input name="B_cfg" num_pins="9"/>
-              <output name="OUT_cfg" num_pins="18"/>
-              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1">
-                <input name="a" num_pins="9"/>
-                <input name="b" num_pins="9"/>
-                <output name="out" num_pins="18"/>
-                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.a" out_port="mult_9x9.out"/>
-                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_9x9.b" out_port="mult_9x9.out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
-                </direct>
-                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
-                </direct>
-                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
-                </direct>
-              </interconnect>
-              <power method="pin-toggle">
-                <port name="A_cfg" energy_per_toggle="1.45e-12"/>
-                <port name="B_cfg" energy_per_toggle="1.45e-12"/>
-                <static_power power_per_instance="0.0"/>
-              </power>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
-              </direct>
-              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
-              </direct>
-            </interconnect>
-          </mode>
-          <mode name="mult_18x18">
-            <pb_type name="mult_18x18_slice" num_pb="1">
-              <input name="A_cfg" num_pins="18"/>
-              <input name="B_cfg" num_pins="18"/>
-              <output name="OUT_cfg" num_pins="36"/>
-              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1">
-                <input name="a" num_pins="18"/>
-                <input name="b" num_pins="18"/>
-                <output name="out" num_pins="36"/>
-                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.a" out_port="mult_18x18.out"/>
-                <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_18x18.b" out_port="mult_18x18.out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
-                </direct>
-                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
-                </direct>
-                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
-                </direct>
-              </interconnect>
-              <power method="pin-toggle">
-                <port name="A_cfg" energy_per_toggle="1.09e-12"/>
-                <port name="B_cfg" energy_per_toggle="1.09e-12"/>
-                <static_power power_per_instance="0.0"/>
-              </power>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
-              </direct>
-              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
-              </direct>
-            </interconnect>
-          </mode>
-          <power method="sum-of-children"/>
-        </pb_type>
-        <interconnect>
-          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
-		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading 134 ps
-				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
-              -->
-          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
-            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].b">
-            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].b"/>
-          </direct>
-          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
-            <delay_constant max="1.09e-9" min="74e-12" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mult_36x36">
-        <pb_type name="mult_36x36_slice" num_pb="1">
-          <input name="A_cfg" num_pins="36"/>
-          <input name="B_cfg" num_pins="36"/>
-          <output name="OUT_cfg" num_pins="72"/>
-          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1">
-            <input name="a" num_pins="36"/>
-            <input name="b" num_pins="36"/>
-            <output name="out" num_pins="72"/>
-            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.a" out_port="mult_36x36.out"/>
-            <delay_constant max="1.523e-9" min="0.776e-9" in_port="mult_36x36.b" out_port="mult_36x36.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
-            </direct>
-            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
-            </direct>
-            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
-            </direct>
-          </interconnect>
-          <power method="pin-toggle">
-            <port name="A_cfg" energy_per_toggle="2.13e-12"/>
-            <port name="B_cfg" energy_per_toggle="2.13e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
-		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
-		   to a 134 ps delay.
-				The interconnect difference for DSP blocks is 0.5523, which leads to a minimum delay of 74 ps
-              -->
-          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
-            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
-            <delay_constant max="134e-12" min="74e-12" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
-          </direct>
-          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
-            <delay_constant max="1.93e-9" min="74e-12" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Place this multiplier block every 8 columns from (and including) the sixth column -->
-      <power method="sum-of-children"/>
-    </pb_type>
-    <!-- Define fracturable multiplier end -->
-    <!-- Define fracturable memory begin -->
-    <!-- 32 Kb Memory that can operate from 512x64 to 32Kx1 for single-port mode and 1024x32 to 32Kx1 for dual-port mode.  
-           Area and max delay based off Stratix IV 9K and 144K memories (delay from linear interpolation, Tsu(483 ps, 636 ps) Tco(1084ps, 1969ps)).  
-
-		   uTh/Tsu ratio = 0.468, uTco min/max ratio = 0.97
-           uTh = 226ps, 298ps
-           Min uTco = 1051ps, 1909ps
-           Max input delay = 204ps (from Stratix IV LAB line) - 72ps (this architecture does not lump connection box delay in internal delay)
-           Max output delay = M9K buffer 50ps. 
-		   Min input delay = 160ps (from Stratix IV Lab line) - 72ps
-		   Min output delay = 46ps (M9K buffer min/max ratio = 0.9286)
-		   
-		   Area is obtained by appropriately scaling and adjusting the published Stratix III (which is architecturally identical to Stratix IV)
-		   data from H. Wong, V. Betz and J. Rose, "Comparing FPGA vs. Custom CMOS and the Impact on Processor Microarchitecture", FPGA 2011.
-		   Linearly interpolating (by bit count) between the M9k and M144k areas to obtain an M32k (our RAM size) point yields a 65 nm area of
-		   of 0.153 mm^2. Interpolating based on port count between the RAMs would instead yield an area of 0.209 mm^2 for our 32 kB RAM; since 
-		   bit count accounts for more area than ports for a RAM this size we choose the bit count interpolation; however, since the port interpolation
-		   is not radically different this also gives us confidence that interpolating based on bits is OK, but slightly underpredicts area.
-		   Scaling to 40 nm^2 yields .0579 mm^2, and converting to MWTUs at 60 L^2 / MWTU yields 604,000 MWTUs. This includes routing. A Stratix IV
-		   M9K RAM is one row high and hence has one routing tile (one horizonal and one vertical routing segment area). An M144k RAM has 8 such tiles.
-		   Linearly interpolating on
-		   bits to 32 kb yields 2.2 routing tiles incorporated in the area number above. The inter-block routing represents 30% of the area of a logic 
-		   tile according to D. Lewis et al, "Architectural Enhancements in Stratix V," FPGA 2013. Hence we should subtract 0.3 * 2.2 * 84,375 MWTUs to
-		   obtain a RAM core area (not including inter-block routing) of 548,000 MWTU areas for our 32 kb RAM in a 40 nm process.
-      -->
-    <pb_type name="memory">
-      <input name="addr1" num_pins="15"/>
-      <input name="addr2" num_pins="15"/>
-      <input name="data" num_pins="64"/>
-      <input name="we1" num_pins="1"/>
-      <input name="we2" num_pins="1"/>
-      <output name="out" num_pins="64"/>
-      <clock name="clk" num_pins="1"/>
-      <!-- Specify single port mode first -->
-      <mode name="mem_512x64_sp">
-        <pb_type name="mem_512x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="9" port_class="address"/>
-          <input name="data" num_pins="64" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="64" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_512x64_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_512x64_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_512x64_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_512x64_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x64_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[8:0]" out_port="mem_512x64_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[63:0]" output="mem_512x64_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:0]" out_port="mem_512x64_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_512x64_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_512x64_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_512x64_sp.out" output="memory.out[63:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_512x64_sp.out" out_port="memory.out[63:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_512x64_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_1024x32_sp">
-        <pb_type name="mem_1024x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="10" port_class="address"/>
-          <input name="data" num_pins="32" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="32" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x32_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_1024x32_sp.out" output="memory.out[31:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_sp.out" out_port="memory.out[31:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x32_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x16_sp">
-        <pb_type name="mem_2048x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="11" port_class="address"/>
-          <input name="data" num_pins="16" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="16" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x16_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x16_sp.out" output="memory.out[15:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_sp.out" out_port="memory.out[15:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x16_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_4096x8_sp">
-        <pb_type name="mem_4096x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="12" port_class="address"/>
-          <input name="data" num_pins="8" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="8" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_4096x8_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_4096x8_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_4096x8_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_4096x8_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x8_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_4096x8_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_4096x8_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_4096x8_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_4096x8_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_4096x8_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_4096x8_sp.out" output="memory.out[7:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_4096x8_sp.out" out_port="memory.out[7:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_4096x8_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x4_sp">
-        <pb_type name="mem_8192x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="13" port_class="address"/>
-          <input name="data" num_pins="4" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="4" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x4_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_8192x4_sp.out" output="memory.out[3:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_sp.out" out_port="memory.out[3:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x4_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x2_sp">
-        <pb_type name="mem_16384x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="14" port_class="address"/>
-          <input name="data" num_pins="2" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="2" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x2_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_16384x2_sp.out" output="memory.out[1:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_sp.out" out_port="memory.out[1:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x2_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x1_sp">
-        <pb_type name="mem_32768x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="15" port_class="address"/>
-          <input name="data" num_pins="1" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="1" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.we" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_sp.addr" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_sp.data" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_sp.addr">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_sp.data">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x1_sp.we">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_32768x1_sp.out" output="memory.out[0:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_sp.out" out_port="memory.out[0:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x1_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Specify true dual port mode next -->
-      <mode name="mem_1024x32_dp">
-        <pb_type name="mem_1024x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="10" port_class="address1"/>
-          <input name="addr2" num_pins="10" port_class="address2"/>
-          <input name="data1" num_pins="32" port_class="data_in1"/>
-          <input name="data2" num_pins="32" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="32" port_class="data_out1"/>
-          <output name="out2" num_pins="32" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_1024x32_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_1024x32_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x32_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[9:0]" out_port="mem_1024x32_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[63:32]" output="mem_1024x32_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[63:32]" out_port="mem_1024x32_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x32_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_1024x32_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_1024x32_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_1024x32_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_1024x32_dp.out1" output="memory.out[31:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out1" out_port="memory.out[31:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_1024x32_dp.out2" output="memory.out[63:32]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_1024x32_dp.out2" out_port="memory.out[63:32]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x32_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x16_dp">
-        <pb_type name="mem_2048x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="11" port_class="address1"/>
-          <input name="addr2" num_pins="11" port_class="address2"/>
-          <input name="data1" num_pins="16" port_class="data_in1"/>
-          <input name="data2" num_pins="16" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="16" port_class="data_out1"/>
-          <output name="out2" num_pins="16" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x16_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x16_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x16_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[10:0]" out_port="mem_2048x16_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[31:16]" output="mem_2048x16_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[31:16]" out_port="mem_2048x16_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x16_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x16_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x16_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x16_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x16_dp.out1" output="memory.out[15:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out1" out_port="memory.out[15:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_2048x16_dp.out2" output="memory.out[31:16]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x16_dp.out2" out_port="memory.out[31:16]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x16_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x8_dp">
-        <pb_type name="mem_2048x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="12" port_class="address1"/>
-          <input name="addr2" num_pins="12" port_class="address2"/>
-          <input name="data1" num_pins="8" port_class="data_in1"/>
-          <input name="data2" num_pins="8" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="8" port_class="data_out1"/>
-          <output name="out2" num_pins="8" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_2048x8_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_2048x8_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_2048x8_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[11:0]" out_port="mem_2048x8_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[11:0]" output="mem_2048x8_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[11:0]" out_port="mem_2048x8_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_2048x8_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:0]" out_port="mem_2048x8_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[15:8]" output="mem_2048x8_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[15:8]" out_port="mem_2048x8_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x8_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_2048x8_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x8_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_2048x8_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x8_dp.out1" output="memory.out[7:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out1" out_port="memory.out[7:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_2048x8_dp.out2" output="memory.out[15:8]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_2048x8_dp.out2" out_port="memory.out[15:8]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x8_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x4_dp">
-        <pb_type name="mem_8192x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="13" port_class="address1"/>
-          <input name="addr2" num_pins="13" port_class="address2"/>
-          <input name="data1" num_pins="4" port_class="data_in1"/>
-          <input name="data2" num_pins="4" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="4" port_class="data_out1"/>
-          <output name="out2" num_pins="4" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_8192x4_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_8192x4_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x4_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[12:0]" out_port="mem_8192x4_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[7:4]" output="mem_8192x4_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[7:4]" out_port="mem_8192x4_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x4_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_8192x4_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_8192x4_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_8192x4_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_8192x4_dp.out1" output="memory.out[3:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out1" out_port="memory.out[3:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_8192x4_dp.out2" output="memory.out[7:4]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_8192x4_dp.out2" out_port="memory.out[7:4]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x4_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x2_dp">
-        <pb_type name="mem_16384x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="14" port_class="address1"/>
-          <input name="addr2" num_pins="14" port_class="address2"/>
-          <input name="data1" num_pins="2" port_class="data_in1"/>
-          <input name="data2" num_pins="2" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="2" port_class="data_out1"/>
-          <output name="out2" num_pins="2" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_16384x2_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_16384x2_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x2_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[13:0]" out_port="mem_16384x2_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[3:2]" output="mem_16384x2_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[3:2]" out_port="mem_16384x2_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x2_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_16384x2_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_16384x2_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_16384x2_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_16384x2_dp.out1" output="memory.out[1:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out1" out_port="memory.out[1:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_16384x2_dp.out2" output="memory.out[3:2]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_16384x2_dp.out2" out_port="memory.out[3:2]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x2_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x1_dp">
-        <pb_type name="mem_32768x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="15" port_class="address1"/>
-          <input name="addr2" num_pins="15" port_class="address2"/>
-          <input name="data1" num_pins="1" port_class="data_in1"/>
-          <input name="data2" num_pins="1" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="1" port_class="data_out1"/>
-          <output name="out2" num_pins="1" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.we2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.data1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.we1" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.data2" clock="clk"/>
-          <T_hold value="238e-12" port="mem_32768x1_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="mem_32768x1_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_dp.addr1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x1_dp.addr2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.addr2[14:0]" out_port="mem_32768x1_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_dp.data1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[1:1]" output="mem_32768x1_dp.data2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.data[1:1]" out_port="mem_32768x1_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x1_dp.we1">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we1" out_port="mem_32768x1_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_32768x1_dp.we2">
-            <delay_constant max="132e-12" min="88e-12" in_port="memory.we2" out_port="mem_32768x1_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_32768x1_dp.out1" output="memory.out[0:0]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out1" out_port="memory.out[0:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_32768x1_dp.out2" output="memory.out[1:1]">
-            <delay_constant max="50e-12" min="46e-12" in_port="mem_32768x1_dp.out2" out_port="memory.out[1:1]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x1_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- Place this memory block every 8 columns from (and including) the second column -->
-      <power method="sum-of-children"/>
-    </pb_type>
-    <!-- Define fracturable memory end -->
-  </complexblocklist>
-  <power>
-    <local_interconnect C_wire="2.5e-10"/>
-    <mux_transistor_size mux_transistor_size="3"/>
-    <FF_size FF_size="4"/>
-    <LUT_transistor_size LUT_transistor_size="4"/>
-  </power>
-  <clocks>
-    <clock buffer_size="auto" C_wire="2.5e-10"/>
-  </clocks>
-</architecture>
diff --git a/parmys-plugin/tests/raygentop/raygentop.v b/parmys-plugin/tests/raygentop/raygentop.v
deleted file mode 100644
index 256b3aead..000000000
--- a/parmys-plugin/tests/raygentop/raygentop.v
+++ /dev/null
@@ -1,2978 +0,0 @@
- module paj_raygentop_hierarchy_no_mem (rgwant_addr, rgwant_data, rgread_ready, rgaddr_ready, rgdata_ready, rgwant_read, rgdatain, rgdataout, rgaddrin, rgCont, rgStat, rgCfgData, rgwant_CfgData, rgCfgData_ready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, clk, fbdata, fbdatavalid, fbnextscanline, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, rgData, rgAddr, rgWE, rgAddrValid, rgDone, rgResultData, rgResultReady, rgResultSource);
-
-    output rgwant_addr; 
-    wire rgwant_addr;
-    output rgwant_data; 
-    wire rgwant_data;
-    output rgread_ready; 
-    wire rgread_ready;
-    input rgaddr_ready; 
-    input rgdata_ready; 
-
-    input rgwant_read; 
-    input[63:0] rgdatain; 
-    output[63:0] rgdataout; 
-    wire[63:0] rgdataout;
-    input[17:0] rgaddrin; 
-    input[31:0] rgCont; 
-    output[31:0] rgStat; 
-    wire[31:0] rgStat;
-    input[31:0] rgCfgData; 
-    output rgwant_CfgData; 
-    wire rgwant_CfgData;
-    input rgCfgData_ready; 
-
-    input[63:0] tm3_sram_data_in; 
-    wire[63:0] tm3_sram_data_in;
-    output[63:0] tm3_sram_data_out; 
-    wire[63:0] tm3_sram_data_out;
-    wire[63:0] tm3_sram_data_xhdl0;
-    output[18:0] tm3_sram_addr; 
-    wire[18:0] tm3_sram_addr;
-    output[7:0] tm3_sram_we; 
-    wire[7:0] tm3_sram_we;
-    output[1:0] tm3_sram_oe; 
-    wire[1:0] tm3_sram_oe;
-    output tm3_sram_adsp; 
-    wire tm3_sram_adsp;
-    input clk; 
-
-    output[63:0] fbdata; 
-    wire[63:0] fbdata;
-    output fbdatavalid; 
-    wire fbdatavalid;
-    input fbnextscanline; 
-    output[1:0] raygroup01; 
-    wire[1:0] raygroup01;
-    output raygroupvalid01; 
-    wire raygroupvalid01;
-    input busy01; 
-    output[1:0] raygroup10; 
-    wire[1:0] raygroup10;
-
-    output raygroupvalid10; 
-    wire raygroupvalid10;
-    input busy10; 
-    input globalreset; 
-    output[31:0] rgData; 
-    wire[31:0] rgData;
-    output[3:0] rgAddr; 
-    wire[3:0] rgAddr;
-    output[2:0] rgWE; 
-    wire[2:0] rgWE;
-    output rgAddrValid; 
-    wire rgAddrValid;
-
-    input rgDone; 
-    input[31:0] rgResultData; 
-    input rgResultReady; 
-    input[1:0] rgResultSource; 
-
-    wire[2:0] statepeek2; 
-    wire as01; 
-    wire ack01; 
-
-    wire[3:0] addr01; 
-    wire[47:0] dir01; 
-    wire[47:0] dir; 
-    wire[47:0] sramdatal; 
-    wire wantDir; 
-    wire dirReady; 
-    wire dirReadyl; 
-    wire[14:0] address; 
-    wire[30:0] cyclecounter; 
-
-    wire nas01; 
-    wire nas10; 
-    wire go; 
-    reg page; 
-    wire[2:0] statepeekct; 
-    // result Signals
-    wire valid01; 
-    wire valid10; 
-    wire[15:0] id01a; 
-    wire[15:0] id01b; 
-    wire[15:0] id01c; 
-    wire[15:0] id10a; 
-
-    wire[15:0] id10b; 
-    wire[15:0] id10c; 
-    wire hit01a; 
-    wire hit01b; 
-    wire hit01c; 
-    wire hit10a; 
-    wire hit10b; 
-    wire hit10c; 
-    wire[7:0] u01a; 
-    wire[7:0] u01b; 
-    wire[7:0] u01c; 
-    wire[7:0] v01a; 
-
-    wire[7:0] v01b; 
-    wire[7:0] v01c; 
-    wire[7:0] u10a; 
-    wire[7:0] u10b; 
-    wire[7:0] u10c; 
-    wire[7:0] v10a; 
-    wire[7:0] v10b; 
-    wire[7:0] v10c; 
-    wire wantwriteback; 
-    wire writebackack; 
-    wire[63:0] writebackdata; 
-    wire[17:0] writebackaddr; 
-
-    wire[17:0] nextaddr01; 
-    // Shading Signals
-    wire[63:0] shadedata; 
-    wire[15:0] triID; 
-    wire wantshadedata; 
-    wire shadedataready; 
-    // CfgData Signals
-    wire[27:0] origx; 
-    wire[27:0] origy; 
-    wire[27:0] origz; 
-    wire[15:0] m11; 
-    wire[15:0] m12; 
-
-    wire[15:0] m13; 
-    wire[15:0] m21; 
-    wire[15:0] m22; 
-    wire[15:0] m23; 
-    wire[15:0] m31; 
-    wire[15:0] m32; 
-    wire[15:0] m33; 
-    wire[20:0] bkcolour; 
-    // Texture signals
-    wire[20:0] texinfo; 
-    wire[3:0] texaddr; 
-    wire[63:0] texel; 
-
-    wire[17:0] texeladdr; 
-    wire wanttexel; 
-    wire texelready; 
-    // Frame Buffer Read Signals
-    wire fbpage; 
-    // debug signals
-    wire wantcfg; 
-    wire debugglobalreset; 
-
-    assign rgwant_CfgData = wantcfg ;
-
-    onlyonecycle onlyeonecycleinst (rgCont[0], go, globalreset, clk); 
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          page <= 1'b1 ; // Reset to 1 such that first flip sets to 0
-       end
-       else
-
-       begin
-          page <= ~page ; 
-       end 
-    end 
-    assign fbpage = ~page ;
-
-    matmult matmultinst(sramdatal[47:32], sramdatal[31:16], sramdatal[15:0], m11, m12, m13, m21, m22, m23, m31, m32, m33, dir[47:32], dir[31:16], dir[15:0], clk); 
-
-    delay1x3 dir01delay(dirReady, dirReadyl, clk); 
-    rgconfigmemory ConfigMemoryInst (rgCfgData[31:28], rgCfgData[27:0], rgCfgData_ready, wantcfg, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk); 
-
-    rgsramcontroller sramcont (rgwant_addr, rgaddr_ready, rgaddrin, rgwant_data, rgdata_ready, rgdatain, rgwant_read, rgread_ready, rgdataout, dirReady, wantDir, sramdatal, address, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
-    raysend raysendinst (as01, ack01, addr01, dir01, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek2); 
-
-    raygencont  raygencontinst(go, rgCont[15:1], rgStat[31], cyclecounter, nextaddr01, nas01, nas10, page, dirReadyl, wantDir, dir, address, as01, addr01, ack01, dir01, raygroup01, raygroupvalid01, busy01, raygroup10, raygroupvalid10, busy10, globalreset, clk, statepeekct); 
-    resultrecieve resultrecieveinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk); 
-    assign debugglobalreset = globalreset | go ;
-    resultwriter resultwriteinst (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, nextaddr01, nas01, nas10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, writebackdata, writebackaddr, wantwriteback, writebackack, debugglobalreset, clk);
-    assign rgStat[30:0] = cyclecounter ;
- endmodule
-
-
-module delay1x3 (datain, dataout, clk);
-
-    input datain; 
-    output dataout; 
-    wire dataout;
-    input clk; 
-
-    reg buff0; 
-    reg buff1; 
-    reg buff2; 
-
-    assign dataout = buff2 ;
-
-    always @(posedge clk)
-    begin
-/* PAJ expanded for loop to hard definition the size of `depth */
-       buff0 <= datain ; 
-		buff1 <= buff0;
-		buff2 <= buff1;
-    end 
- endmodule
-
-
-    
-
-    
-    
- // A debugging circuit that allows a single cycle pulse to be 
- // generated by through the ports package
- module onlyonecycle (trigger, output_xhdl0, globalreset, clk);
-
-    input trigger; 
-    output output_xhdl0; 
-    reg output_xhdl0;
-    input globalreset; 
-    input clk; 
-
-    reg[1:0] state; 
-    reg[1:0] next_state; 
-    reg count; 
-    reg temp_count; 
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          state <= 0 ; 
-          count <= 0 ; 
-
-       end
-       else
-       begin
-          state <= next_state ; 
-		count <= temp_count;
-       end 
-    end 
-
-    always @(state or trigger or count)
-    begin
-       case (state)
-          0 :
-                   begin
-       				  output_xhdl0 = 1'b0 ; 
-                      if (trigger == 1'b1)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-                         temp_count = 1 - 1 ; 
-                   end
-          1 :
-                   begin
-                      output_xhdl0 = 1'b1 ; 
-                      if (count == 0)
-                      begin
-                         next_state = 2 ; 
-                      end
-                      else
-
-                      begin
-
-                         next_state = 1 ; 
-                      end 
-                         temp_count = count - 1 ; 
-                   end
-          2 :
-                   begin
-       				  output_xhdl0 = 1'b0 ; 
-                      if (trigger == 1'b0)
-                      begin
-                         next_state = 0 ; 
-                      end
-                      else
-                      begin
-                         next_state = 2 ; 
-
-                      end 
-                   end
-       endcase 
-    end 
- endmodule
-
-module matmult (Ax, Ay, Az, m11, m12, m13, m21, m22, m23, m31, m32, m33, Cx, Cy, Cz, clk);
-
-    input[16 - 1:0] Ax; 
-    input[16 - 1:0] Ay; 
-    input[16 - 1:0] Az; 
-    input[16 - 1:0] m11; 
-    input[16 - 1:0] m12; 
-
-    input[16 - 1:0] m13; 
-    input[16 - 1:0] m21; 
-    input[16 - 1:0] m22; 
-    input[16 - 1:0] m23; 
-    input[16 - 1:0] m31; 
-    input[16 - 1:0] m32; 
-    input[16 - 1:0] m33; 
-    output[16 - 1:0] Cx; 
-    reg[16 - 1:0] Cx;
-    output[16 - 1:0] Cy; 
-    reg[16 - 1:0] Cy;
-    output[16 - 1:0] Cz; 
-
-    reg[16 - 1:0] Cz;
-    input clk; 
-
-    reg[16 + 16 - 1:0] am11; 
-    reg[16 + 16 - 1:0] am12; 
-    reg[16 + 16 - 1:0] am13; 
-    reg[16 + 16 - 1:0] am21; 
-    reg[16 + 16 - 1:0] am22; 
-    reg[16 + 16 - 1:0] am23; 
-    reg[16 + 16 - 1:0] am31; 
-    reg[16 + 16 - 1:0] am32; 
-    reg[16 + 16 - 1:0] am33; 
-
-
-    always @(posedge clk)
-    begin
-       am11 <= Ax * m11 ; 
-       am12 <= Ay * m12 ; 
-       am13 <= Az * m13 ; 
-       am21 <= Ax * m21 ; 
-       am22 <= Ay * m22 ; 
-       am23 <= Az * m23 ; 
-       am31 <= Ax * m31 ; 
-       am32 <= Ay * m32 ; 
-       am33 <= Az * m33 ; 
-
-       //      Cx <= (am11 + am12 + am13) (`widthA+`widthB-2 downto `widthB-1);
-       //      Cy <= (am21 + am22 + am23) (`widthA+`widthB-2 downto `widthB-1);
-       //      Cz <= (am31 + am32 + am33) (`widthA+`widthB-2 downto `widthB-1);
-       Cx <= (am11[16+16-2:16-1] + am12[16+16-2:16-1] + am13[16+16-2:16-1]) ; 
-       Cy <= (am21[16+16-2:16-1] + am22[16+16-2:16-1] + am23[16+16-2:16-1]); 
-       Cz <= (am31[16+16-2:16-1] + am32[16+16-2:16-1] + am33[16+16-2:16-1]) ;  
-    end 
- endmodule
-
-    
-    
-
-module rgconfigmemory (CfgAddr, CfgData, CfgData_Ready, want_CfgData, origx, origy, origz, m11, m12, m13, m21, m22, m23, m31, m32, m33, bkcolour, texinfo, globalreset, clk);
-
-
-    input[3:0] CfgAddr; 
-    input[27:0] CfgData; 
-    input CfgData_Ready; 
-    output want_CfgData; 
-    reg want_CfgData;
-    output[27:0] origx; 
-    reg[27:0] origx;
-    output[27:0] origy; 
-    reg[27:0] origy;
-    output[27:0] origz; 
-    reg[27:0] origz;
-    output[15:0] m11; 
-    reg[15:0] m11;
-    output[15:0] m12; 
-    reg[15:0] m12;
-    output[15:0] m13; 
-    reg[15:0] m13;
-    output[15:0] m21; 
-    reg[15:0] m21;
-    output[15:0] m22; 
-    reg[15:0] m22;
-    output[15:0] m23; 
-    reg[15:0] m23;
-    output[15:0] m31; 
-    reg[15:0] m31;
-    output[15:0] m32; 
-    reg[15:0] m32;
-    output[15:0] m33; 
-    reg[15:0] m33;
-    output[20:0] bkcolour; 
-    reg[20:0] bkcolour;
-    output[20:0] texinfo; 
-
-    wire[20:0] texinfo;
-    input globalreset; 
-    input clk; 
-
-    reg state; 
-    reg next_state; 
-    wire we; 
-
-    reg[27:0] temp_origx;
-    reg[27:0] temp_origy;
-    reg[27:0] temp_origz;
-    reg[15:0] temp_m11;
-    reg[15:0] temp_m12;
-    reg[15:0] temp_m13;
-    reg[15:0] temp_m21;
-    reg[15:0] temp_m22;
-    reg[15:0] temp_m23;
-    reg[15:0] temp_m31;
-    reg[15:0] temp_m32;
-    reg[15:0] temp_m33;
-    reg[20:0] temp_bkcolour;
-
-    // <<X-HDL>> Can't find translated component 'spram'. Module name may not match
-    spram21x4 spraminst(we, texinfo, CfgData[20:0], clk); 
-    assign we = ((CfgData_Ready == 1'b1) & (CfgAddr == 4'b1110)) ? 1'b1 : 1'b0 ;
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          state <= 0 ; 
-          origx <= 0;
-          origy <= 0;
-
-          origz <= 0;
-          m11 <= 1;
-          m12 <= 0;
-          m13 <= 0;
-          m21 <= 0;
-          m22 <= 1;
-          m23 <= 0;
-          m31 <= 0;
-          m32 <= 0;
-         m33 <= 1;
-          bkcolour <= 0;
-       end
-       else
-       begin
-          state <= next_state ; 
-          origx <= temp_origx;
-          origy <= temp_origy;
-          origz <= temp_origz;
-          m11 <= temp_m11;
-          m12 <= temp_m12;
-          m13 <= temp_m13;
-          m21 <= temp_m21;
-          m22 <= temp_m22;
-          m23 <= temp_m23;
-          m31 <= temp_m31;
-          m32 <= temp_m32;
-         m33 <= temp_m33;
-          bkcolour <= bkcolour;
-       end 
-    end 
-
-    always @(state or CfgData_Ready)
-    begin
-       case (state)
-          0 :
-                   begin
-                      want_CfgData = 1'b1 ; 
-                      if (CfgData_Ready == 1'b1)
-                      begin
-                         next_state = 1 ; 
-                      end
-
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-
-              if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0001))
-                        begin
-											temp_origx = CfgData ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0010))
-                        begin
-                                           temp_origy = CfgData ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0011))
-                        begin
-                                           temp_origz = CfgData ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0100))
-                        begin
-                                           temp_m11 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0101))
-                        begin
-                                           temp_m12 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0110))
-                        begin
-                                           temp_m13 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b0111))
-                        begin
-                                           temp_m21 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1000))
-                        begin
-                                           temp_m22 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1001))
-                        begin
-                                           temp_m23 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1010))
-                        begin
-                                           temp_m31 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1011))
-                        begin
-                                           temp_m32 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1100))
-                        begin
-                                           temp_m33 = CfgData[15:0] ; 
-						end
-                        else if ((CfgData_Ready == 1'b1) && (CfgAddr == 4'b1101))
-                        begin
-                                           temp_bkcolour = CfgData[20:0] ; 
-						end
-                   end
-          1 :
-                   begin
-                      want_CfgData = 1'b0 ; 
-                      if (CfgData_Ready == 1'b0)
-                      begin
-                         next_state = 0 ; 
-                      end
-
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-                   end
-       endcase 
-    end 
- endmodule
-
-    
-    
- module spram21x4 (we, dataout, datain, clk);
-
-    input we; 
-    output[21 - 1:0] dataout; 
-    wire[21 - 1:0] dataout;
-    input[21 - 1:0] datain; 
-    input clk; 
-
-	reg [7:0] addr;
-	
-	always @ (posedge clk)
-	begin
-	 addr[0] <= we;
-	 addr [1] <= addr[0];
-	 addr [2] <= addr[1];
-	 addr [3] <= addr[2];
-	 addr [4] <= addr[3];
-	 addr [5] <= addr[4];
-	 addr [6] <= addr[5];
-	 addr [7] <= addr[6];
-	 end
-//changed to odin 2 ram specifications
-
-defparam new_ram.ADDR_WIDTH = 8;
-defparam new_ram.DATA_WIDTH = 21;
-single_port_ram new_ram(
-  .clk (clk),
-  .we(we),
-  .data(datain),
-  .out(dataout),
-  .addr(addr)
-  );
-  
-  
- endmodule
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-
-module rgsramcontroller (want_addr, addr_ready, addrin, want_data, data_ready, datain, want_read, read_ready, dataout, dirReady, wantDir, sramdatal, addr, wantwriteback, writebackack, writebackdata, writebackaddr, fbdata, fbnextscanline, fbdatavalid, fbpage, shadedata, triID, wantshadedata, shadedataready, texeladdr, texel, wanttexel, texelready, tm3_sram_data_in, tm3_sram_data_out, tm3_sram_addr, tm3_sram_we, tm3_sram_oe, tm3_sram_adsp, globalreset, clk);
-
-    output want_addr; 
-    reg want_addr;
-    input addr_ready; 
-    input[17:0] addrin; 
-    output want_data; 
-    reg want_data;
-    input data_ready; 
-    input[63:0] datain; 
-    input want_read; 
-    output read_ready; 
-
-    reg read_ready;
-    output[63:0] dataout; 
-    wire[63:0] dataout;
-    output dirReady; 
-    reg dirReady;
-    input wantDir; 
-    output[47:0] sramdatal; 
-    reg[47:0] sramdatal;
-    output[14:0] addr; 
-    wire[14:0] addr;
-    input wantwriteback; 
-    output writebackack; 
-
-    reg writebackack;
-    input[63:0] writebackdata; 
-    input[17:0] writebackaddr; 
-    output[63:0] fbdata; 
-    reg[63:0] fbdata;
-    input fbnextscanline; 
-    output fbdatavalid; 
-    reg fbdatavalid;
-    input fbpage; 
-    output[63:0] shadedata; 
-    wire[63:0] shadedata;
-    input[15:0] triID; 
-
-    input wantshadedata; 
-    output shadedataready; 
-    reg shadedataready;
-    input[17:0] texeladdr; 
-    output[63:0] texel; 
-    wire[63:0] texel;
-    input wanttexel; 
-    output texelready; 
-    reg texelready;
-    input[63:0] tm3_sram_data_in; 
-    wire[63:0] tm3_sram_data_in;
-    output[63:0] tm3_sram_data_out; 
-    wire[63:0] tm3_sram_data_out;
-    reg[63:0] tm3_sram_data_xhdl0;
-
-    output[18:0] tm3_sram_addr; 
-    reg[18:0] tm3_sram_addr;
-    output[7:0] tm3_sram_we; 
-    reg[7:0] tm3_sram_we;
-    output[1:0] tm3_sram_oe; 
-    reg[1:0] tm3_sram_oe;
-    output tm3_sram_adsp; 
-    reg tm3_sram_adsp;
-    input globalreset; 
-    input clk; 
-
-    reg[3:0] state; 
-    reg[3:0] next_state; 
-    reg[17:0] waddress; 
-    reg[14:0] faddress; 
-    reg[6:0] fcount; 
-    reg fbdatavalidl; 
-
-    reg[17:0] temp_waddress; 
-    reg[14:0] temp_faddress; 
-    reg[6:0] temp_fcount; 
-    reg temp_fbdatavalidl; 
-    reg temp_texelready;
-    reg temp_shadedataready;
-
-    assign tm3_sram_data_out = tm3_sram_data_xhdl0;
-
-    assign dataout = tm3_sram_data_in ;
-    assign addr = tm3_sram_data_in[62:48] ;
-    assign shadedata = tm3_sram_data_in ;
-    assign texel = tm3_sram_data_in ;
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-
-          state <= 0 ; 
-          waddress <= 0;
-          faddress <= 0;
-          fcount <= 7'b1101011 ; 
-          fbdatavalid <= 1'b0 ; 
-          fbdatavalidl <= 1'b0 ; 
-          shadedataready <= 1'b0 ; 
-          texelready <= 1'b0 ; 
-          sramdatal <= 0;
-          fbdata <= 0;
-       end
-       else
-
-       begin
-          state <= next_state ; 
-          sramdatal <= tm3_sram_data_in[47:0] ; 
-          fbdata <= tm3_sram_data_in ; 
-          fbdatavalid <= fbdatavalidl ; 
-
-fbdatavalidl <= temp_fbdatavalidl;
-texelready <= temp_texelready;
-shadedataready <= temp_shadedataready;
-fcount <= temp_fcount;
-faddress <= temp_faddress;
-waddress <= temp_waddress;
-
-       end 
-    end 
-
-    always @(state or addr_ready or data_ready or waddress or datain or wantDir or 
-             want_read or wantwriteback or writebackdata or writebackaddr or 
-             fcount or fbpage or faddress or fbnextscanline or triID or wantshadedata or 
-             wanttexel or texeladdr)
-
-    begin
-       case (state)
-
-          0 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      if (addr_ready == 1'b1)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else if (want_read == 1'b1)
-                      begin
-                         next_state = 2 ; 
-                      end
-                      else if (data_ready == 1'b1)
-                      begin
-
-                         next_state = 3 ; 
-                      end
-                      else if (wantDir == 1'b1)
-                      begin
-                         next_state = 5 ; 
-                      end
-                      else if (wantwriteback == 1'b1)
-                      begin
-                         next_state = 6 ; 
-                      end
-                      else if (wantshadedata == 1'b1)
-                      begin
-
-                         next_state = 9 ; 
-                      end
-                      else if (wanttexel == 1'b1)
-                      begin
-                         next_state = 10 ; 
-                      end
-                      else if (fcount != 0)
-                      begin
-                         next_state = 7 ; 
-                      end
-                      else if (fbnextscanline == 1'b1)
-                      begin
-
-                         next_state = 8 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         if (addr_ready == 1'b1)
-
-                         begin
-                            temp_waddress = addrin ; 
-                         end 
-
-                   end
-          1 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      want_addr = 1'b0 ; 
-                      if (addr_ready == 1'b0)
-                      begin
-                         next_state = 0 ; 
-
-                      end
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-                   end
-          2 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-
-                      read_ready = 1'b0 ; 
-                      if (want_read == 1'b0)
-                      begin
-                         next_state = 0 ; 
-                      end
-                      else
-                      begin
-                         next_state = 2 ; 
-                      end 
-
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         if (want_read == 1'b0)
-                         begin
-
-                            temp_waddress = waddress + 1 ; 
-                         end 
-
-                   end
-          3 :
-                   begin
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      tm3_sram_data_xhdl0 = datain ; 
-                      tm3_sram_we = 8'b00000000 ; 
-
-
-                   tm3_sram_oe = 2'b11 ; 
-                      tm3_sram_adsp = 1'b0 ; 
-                      want_data = 1'b0 ; 
-                      next_state = 4 ; 
-
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         temp_waddress = waddress + 1 ; 
-
-                   end
-          4 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      if (data_ready == 1'b0)
-                      begin
-
-                         next_state = 0 ; 
-                      end
-                      else
-                      begin
-                         next_state = 4 ; 
-                      end 
-                      want_data = 1'b0 ; 
-                   end
-
-          5 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       writebackack = 1'b0 ; 
-
-                     dirReady = 1'b1 ; 
-                      if (wantDir == 1'b0)
-                      begin
-                         next_state = 0 ; 
-
-                      end
-                      else
-                      begin
-                         next_state = 5 ; 
-                      end 
-
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         if (wantDir == 1'b0)
-                         begin
-                            temp_waddress = waddress + 1 ; 
-                         end 
-
-                   end
-          6 :
-                   begin
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-
-                      tm3_sram_data_xhdl0 = writebackdata ; 
-                      tm3_sram_we = 8'b00000000 ; 
-                      tm3_sram_oe = 2'b11 ; 
-                      tm3_sram_adsp = 1'b0 ; 
-                      tm3_sram_addr = {1'b0, writebackaddr} ; 
-                      writebackack = 1'b1 ; 
-                      next_state = 0 ; 
-                   end
-
-          7 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      tm3_sram_addr = {3'b011, fbpage, faddress} ; 
-                      if ((fcount == 1) | (addr_ready == 1'b1) | (want_read == 1'b1) | (data_ready == 1'b1) | (wantDir == 1'b1) | (wantwriteback == 1'b1))
-                      begin
-                         next_state = 0 ; 
-
-                      end
-                      else
-                      begin
-                         next_state = 7 ; 
-                      end 
-
-
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         temp_fbdatavalidl = 1'b1 ; 
-                         if (fcount != 0)
-                         begin
-                            temp_faddress = faddress + 1 ; 
-                            temp_fcount = fcount - 1 ; 
-                         end 
-
-                   end
-          8 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       tm3_sram_addr = {1'b0, waddress} ; 
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      next_state = 7 ; 
-
-				   				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         temp_fcount = 7'b1101011 ; 
-                         if (faddress == 25680)
-                         begin
-                            temp_faddress = 0;
-                         end 
-                   end
-          9 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      tm3_sram_addr = {3'b010, triID} ; 
-                      next_state = 0 ; 
-
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_texelready = 1'b0 ; 
-                         temp_shadedataready = 1'b1 ; 
-                   end
-
-          10 :
-                   begin
-				       tm3_sram_we = 8'b11111111 ; 
-				       tm3_sram_oe = 2'b01 ; 
-				       tm3_sram_adsp = 1'b0 ; 
-				       tm3_sram_data_xhdl0 = 0;
-				       want_addr = 1'b1 ; 
-				       want_data = 1'b1 ; 
-				       read_ready = 1'b1 ; 
-				       dirReady = 1'b0 ; 
-				       writebackack = 1'b0 ; 
-                      tm3_sram_addr = {1'b0, texeladdr} ; 
-                      next_state = 0 ; 
-
-				          temp_fbdatavalidl = 1'b0 ; 
-				          temp_shadedataready = 1'b0 ; 
-                         temp_texelready = 1'b1 ; 
-                   end
-       endcase 
-    end 
- endmodule
-
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-
- module raysend (as, ack, addr, dir, origx, origy, origz, rgData, rgAddr, rgWE, rgAddrValid, rgDone, globalreset, clk, statepeek);
-
-    input as; 
-    output ack; 
-    reg ack;
-    input[3:0] addr; 
-    input[47:0] dir; 
-    input[27:0] origx; 
-    input[27:0] origy; 
-    input[27:0] origz; 
-    output[31:0] rgData; 
-    reg[31:0] rgData;
-
-    output[3:0] rgAddr; 
-    reg[3:0] rgAddr;
-    output[2:0] rgWE; 
-    reg[2:0] rgWE;
-    output rgAddrValid; 
-    reg rgAddrValid;
-    input rgDone; 
-    input globalreset; 
-    input clk; 
-    output[2:0] statepeek; 
-    reg[2:0] statepeek;
-
-    reg[3:0] state; 
-    reg[3:0] next_state; 
-
-
-
-    reg[31:0] temp_rgData;
-    reg[2:0] temp_rgWE; 
-    reg temp_rgAddrValid;
-    reg temp_ack;
-    reg[3:0] temp_rgAddr; 
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          state <= 0 ; 
-          ack <= 1'b0 ; 
-          rgWE <= 3'b000 ; 
-          rgData <= 0;
-          rgAddrValid <= 1'b0 ; 
-          rgAddr <= 0;
-       end
-       else
-       begin
-          state <= next_state ; 
-
-rgData <= temp_rgData;
-rgWE <= temp_rgWE;
-rgAddrValid <= temp_rgAddrValid;
-ack <= temp_ack;
-rgAddr <= temp_rgAddr;
-
-       end 
-    end 
-
-    always @(state or ack or as or rgDone)
-    begin
-
-       case (state)
-          0 :
-                   begin
-                      if ((as == 1'b1) & (ack == 1'b0))
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-                      statepeek = 3'b001 ; 
-
-                         if ((as == 1'b1) & (ack == 1'b0))
-                         begin
-                            temp_rgData = {4'b0000, origx} ; 
-                            temp_rgWE = 3'b001 ; 
-                            temp_rgAddrValid = 1'b1 ; 
-                            temp_rgAddr = addr ; 
-                         end 
-                         if (as == 1'b0 & ack == 1'b1)
-                         begin
-                            temp_ack = 1'b0 ; 
-                         end 
-
-                   end
-          1 :
-                   begin
-                      if (rgDone == 1'b1)
-                      begin
-                         next_state = 6 ; 
-                      end
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-                      statepeek = 3'b010 ; 
-
-                         if (rgDone == 1'b1)
-                         begin
-                            temp_rgAddrValid = 1'b0 ; 
-                         end 
-
-                   end
-          2 :
-                   begin
-                      if (rgDone == 1'b1)
-                      begin
-                         next_state = 7 ; 
-                      end
-                      else
-                      begin
-                         next_state = 2 ; 
-                      end 
-                      statepeek = 3'b011 ; 
-
-                         if (rgDone == 1'b1)
-                         begin
-                            temp_rgAddrValid = 1'b0 ; 
-                         end 
-
-                   end
-           3 :
-                   begin
-                      if (rgDone == 1'b1)
-                      begin
-                         next_state = 8 ; 
-                      end
-                      else
-                      begin
-                         next_state = 3 ; 
-                      end 
-                      statepeek = 3'b100 ; 
-
-                         if (rgDone == 1'b1)
-                         begin
-                            temp_rgAddrValid = 1'b0 ; 
-                         end 
-
-                   end
-         4 :
-                   begin
-                      if (rgDone == 1'b1)
-                       begin
-                         next_state = 9 ; 
-                      end
-                      else
-                      begin
-                         next_state = 4 ; 
-                      end 
-                      statepeek = 3'b101 ; 
-
-                         if (rgDone == 1'b1)
-                         begin
-                            temp_rgAddrValid = 1'b0 ; 
-                         end 
-                   end
-
-          5 :
-                   begin
-                      if (rgDone == 1'b1)
-                      begin
-                         next_state = 0 ; 
-                      end
-                      else
-                      begin
-                         next_state = 5 ; 
-                      end 
-                      statepeek = 3'b110 ; 
-
-                         temp_ack = 1'b1 ; 
-                         if (rgDone == 1'b1)
-                         begin
-                            temp_rgAddrValid = 1'b0 ; 
-                         end 
-
-                   end
-
-          6 :
-                   begin
-                      next_state = 2 ; 
-
-                         temp_rgData = {4'b0000, origy} ; 
-                         temp_rgWE = 3'b010 ; 
-                         temp_rgAddrValid = 1'b1 ; 
-
-                   end
-          7 :
-                   begin
-                      next_state = 3 ; 
-
-                         temp_rgData = {4'b0000, origz} ; 
-                         temp_rgWE = 3'b011 ; 
-                         temp_rgAddrValid = 1'b1 ; 
-                   end
-          8 :
-                   begin
-                      next_state = 4 ; 
-
-                         temp_rgData = {dir[31:16], dir[47:32]} ; 
-                         temp_rgWE = 3'b100 ; 
-                         temp_rgAddrValid = 1'b1 ; 
-                   end
-           9 :
-                   begin
-                      next_state = 5 ; 
-
-                         temp_rgData = {16'b0000000000000000, dir[15:0]} ; 
-                          temp_rgWE = 3'b101 ; 
-                         temp_rgAddrValid = 1'b1 ; 
-                   end
-       endcase 
-    end 
- endmodule
-
-    
-    
-    
-    
-    
-
- module raygencont (go, initcount, busyout, cycles, nextaddr, nas0, nas1, page, dirReady, wantDir, dirIn, addrIn, as, addr, ack, dir, raygroup0, raygroupvalid0, busy0, raygroup1, raygroupvalid1, busy1, globalreset, clk, statepeek);
-
-    input go; 
-    input[14:0] initcount; 
-    output busyout; 
-    wire busyout;
-    reg temp_busyout;
-    output[30:0] cycles; 
-    reg[30:0] cycles;
-    output[17:0] nextaddr; 
-    wire[17:0] nextaddr;
-    output nas0; 
-
-    wire nas0;
-    reg temp_nas0;
-    output nas1; 
-    wire nas1;
-    reg temp_nas1;
-    input page; 
-    input dirReady; 
-    output wantDir; 
-    reg wantDir;
-    input[47:0] dirIn; 
-    input[14:0] addrIn; 
-    output as; 
-    reg as;
-    output[3:0] addr; 
-
-    reg[3:0] addr;
-    input ack; 
-    output[47:0] dir; 
-    reg[47:0] dir;
-    output[1:0] raygroup0; 
-    wire[1:0] raygroup0;
-    output raygroupvalid0; 
-    reg raygroupvalid0;
-    input busy0; 
-    output[1:0] raygroup1; 
-    wire[1:0] raygroup1;
-    output raygroupvalid1; 
-
-    reg raygroupvalid1;
-    input busy1; 
-    input globalreset; 
-    input clk; 
-    output[2:0] statepeek; 
-    reg[2:0] statepeek;
-
-
-    reg[2:0] state; 
-    reg[2:0] next_state; 
-    reg[14:0] count; 
-    reg first; 
-    reg[17:0] destaddr; 
-    wire[1:0] busy; 
-    reg[1:0] loaded; 
-    reg[1:0] groupID; 
-    reg active; 
-
-    reg[47:0] temp_dir;
-    reg[30:0] temp_cycles;
-    reg[1:0] temp_addr;
-    reg[1:0] temp_loaded; 
-    reg[1:0] temp_groupID; 
-    reg[14:0] temp_count; 
-    reg temp_active; 
-    reg temp_raygroupvalid1;
-    reg temp_raygroupvalid0;
-
-    assign busy = {busy1, busy0} ;
-
-    always @(posedge clk)
-    begin
-
-       if (globalreset == 1'b1)
-
-       begin
-          state <= 0 ; 
-          cycles <= 0;
-          dir <= 0;
-          addr[1:0] <= 2'b00 ; 
-          groupID <= 2'b00 ; 
-          count <= 0;
-          first <= 1'b0 ; 
-          destaddr <= 0;
-          raygroupvalid0 <= 1'b0 ; 
-          raygroupvalid1 <= 1'b0 ; 
-          loaded <= 2'b00 ; 
-
-          active <= 1'b0 ; 
-       end
-       else
-       begin
-    	addr[3:2] <= (active == 1'b0) ? {1'b0, groupID[0]} : {1'b1, groupID[1]} ;
-	addr[1:0] <= temp_addr[1:0];
-        state <= next_state ; 
-
-	dir <= temp_dir;
-	cycles <= temp_cycles;
-	loaded <= temp_loaded;	
-	groupID <= temp_groupID;
-	count <= temp_count;
-	active <= temp_active;
-	raygroupvalid0 <= temp_raygroupvalid0;
-	raygroupvalid1 <= temp_raygroupvalid1;
-
-       end 
-    end 
-
-    assign raygroup0 = {1'b0, groupID[0]} ;
-    assign raygroup1 = {1'b1, groupID[1]} ;
-    assign nextaddr = {2'b11, page, addrIn} ;
-    assign busyout = temp_busyout;
-    assign nas0 = temp_nas0;
-    assign nas1 = temp_nas1;
-
-    always @(state or go or ack or busy or dirReady or addr or count or loaded)
-    begin
-       case (state)
-          0 :
-                   begin
-       				as = 1'b0 ; 
-       				wantDir = 1'b0 ; 
-                      if (go == 1'b1)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-                      statepeek = 3'b001 ; 
-						temp_busyout = 1'b0;
-						temp_nas0 = 1'b0;
-						temp_nas1 = 1'b0;
-
-
-                         if (go == 1'b1)
-                         begin
-                            temp_cycles = 0;
-                         end 
-                         temp_addr[1:0] = 2'b00 ; 
-                         temp_loaded = 2'b00 ; 
-                         temp_groupID = 2'b00 ; 
-                         temp_count = initcount ; 
-                         temp_active = 1'b0 ; 
-
-                   end
-          1 :
-                   begin
-                      as = dirReady ; 
-                      wantDir = 1'b1 ; 
-                      if (dirReady == 1'b1)
-                      begin
-                         next_state = 2 ; 
-                      end
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-                     statepeek = 3'b010 ; 
-						temp_busyout = 1'b1;
-    				if (addr[1:0] == 2'b00 & dirReady == 1'b1 & active == 1'b0) 
-					begin
-						 temp_nas0 = 1'b1;
-						 temp_nas1 = 1'b1;
-					end
-
-                         temp_dir = dirIn ; 
-                         if (dirReady == 1'b1 & addr[1:0] == 2'b10)
-                         begin
-                            if (active == 1'b0)
-                            begin
-                               temp_loaded[0] = 1'b1 ; 
-                            end
-                            else
-                            begin
-                               temp_loaded[1] = 1'b1 ; 
-                            end 
-                         end 
-             temp_cycles = cycles + 1 ; 
-
-
-                   end
-          2 :
-                   begin
-                      wantDir = 1'b0 ; 
-                      as = 1'b1 ; 
-                      if ((ack == 1'b1) & (addr[1:0] != 2'b10))
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else if (ack == 1'b1)
-                      begin
-                         if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
-                         begin
-                            next_state = 3 ; 
-                         end
-                         else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
-                         begin
-                            next_state = 4 ; 
-                         end
-                         else if (loaded != 2'b11)
-                         begin
-
-                            next_state = 1 ; 
-                         end
-                         else
-                         begin
-                            next_state = 2 ; 
-                         end 
-                      end
-                      else
-                      begin
-                         next_state = 2 ; 
-                      end 
-                      statepeek = 3'b011 ; 
-						temp_busyout = 1'b1;
-						temp_nas0 = 1'b0;
-						temp_nas1 = 1'b0;
-
-                         if ((ack == 1'b1) & (addr[1:0] != 2'b10))
-                         begin
-                            temp_addr[1:0] = addr[1:0] + 2'b01 ; 
-                         end 
-                         else if ((ack == 1'b1) & addr[1:0] == 2'b10)
-                         begin
-                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
-                            begin
-                               temp_raygroupvalid0 = 1'b1 ; 
-                            end
-                            else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
-                            begin
-
-                               temp_raygroupvalid1 = 1'b1 ; 
-                            end
-                            else if ((loaded[0]) == 1'b0)
-                            begin
-                               temp_active = 1'b0 ; 
-                               temp_addr[1:0] = 2'b00 ; 
-                            end
-                            else if ((loaded[1]) == 1'b0)
-                            begin
-                               temp_active = 1'b1 ; 
-                               temp_addr[1:0] = 2'b00 ; 
-                            end 
-                         end 
-
-             temp_cycles = cycles + 1 ; 
-                   end
-          4 :
-                   begin
-                      if ((busy[1]) == 1'b0)
-                      begin
-                         next_state = 4 ; 
-                      end
-                      else if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
-                      begin
-                         next_state = 3 ; 
-                      end
-                      else if (count > 0)
-                      begin
-
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-                      statepeek = 3'b101 ; 
-						temp_busyout = 1'b1;
-						temp_nas0 = 1'b0;
-						temp_nas1 = 1'b0;
-
-                     if ((busy[1]) == 1'b1)
-                         begin
-                            temp_groupID[1] = ~groupID[1] ; 
-                            temp_raygroupvalid1 = 1'b0 ; 
-                            temp_count = count - 1 ; 
-                            if ((loaded[0]) == 1'b1 & (busy[0]) == 1'b0)
-                            begin
-                               temp_raygroupvalid0 = 1'b1 ; 
-                            end
-
-                            else if ((loaded[0]) == 1'b0)
-                            begin
-                               temp_active = 1'b0 ; 
-                            end
-                            else
-                            begin
-                               temp_active = 1'b1 ; 
-                            end 
-                         end 
-                         temp_loaded[1] = 1'b0 ; 
-                         temp_addr[1:0] = 2'b00 ; 
-
-             temp_cycles = cycles + 1 ; 
-                   end
-          3 :
-                   begin
-                      if ((busy[0]) == 1'b0)
-                      begin
-                         next_state = 3 ; 
-
-                      end
-                      else if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
-                      begin
-                         next_state = 4 ; 
-                      end
-                      else if (count > 0)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-
-                      end 
-                      statepeek = 3'b100 ; 
-						temp_busyout = 1'b1;
-						temp_nas0 = 1'b0;
-						temp_nas1 = 1'b0;
-
-                         if ((busy[0]) == 1'b1)
-                         begin
-                            temp_groupID[0] = ~groupID[0] ; 
-                            temp_raygroupvalid0 = 1'b0 ; 
-                            temp_count = count - 1 ; 
-                            if ((loaded[1]) == 1'b1 & (busy[1]) == 1'b0)
-                            begin
-                               temp_raygroupvalid1 = 1'b1 ; 
-
-                            end
-                            else if ((loaded[1]) == 1'b0)
-                            begin
-                               temp_active = 1'b1 ; 
-                            end
-                            else
-                            begin
-                               temp_active = 1'b0 ; 
-                            end 
-                         end 
-                         temp_loaded[0] = 1'b0 ; 
-                         temp_addr[1:0] = 2'b00 ; 
-
-
-             temp_cycles = cycles + 1 ; 
-                   end
-       endcase 
-    end 
- endmodule
-    
-    
-    
-    
-    
-    
-    
-
- module resultrecieve (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, rgResultData, rgResultReady, rgResultSource, globalreset, clk);
-
-    output valid01; 
-    reg valid01;
-    output valid10; 
-    reg valid10;
-    output[15:0] id01a; 
-    reg[15:0] id01a;
-    output[15:0] id01b; 
-    reg[15:0] id01b;
-    output[15:0] id01c; 
-    reg[15:0] id01c;
-
-    output[15:0] id10a; 
-    reg[15:0] id10a;
-    output[15:0] id10b; 
-    reg[15:0] id10b;
-    output[15:0] id10c; 
-    reg[15:0] id10c;
-    output hit01a; 
-    reg hit01a;
-    output hit01b; 
-    reg hit01b;
-    output hit01c; 
-    reg hit01c;
-
-    output hit10a; 
-    reg hit10a;
-    output hit10b; 
-    reg hit10b;
-    output hit10c; 
-    reg hit10c;
-    output[7:0] u01a; 
-    reg[7:0] u01a;
-    output[7:0] u01b; 
-    reg[7:0] u01b;
-    output[7:0] u01c; 
-    reg[7:0] u01c;
-
-    output[7:0] v01a; 
-    reg[7:0] v01a;
-    output[7:0] v01b; 
-    reg[7:0] v01b;
-    output[7:0] v01c; 
-    reg[7:0] v01c;
-    output[7:0] u10a; 
-    reg[7:0] u10a;
-    output[7:0] u10b; 
-    reg[7:0] u10b;
-    output[7:0] u10c; 
-    reg[7:0] u10c;
-
-    output[7:0] v10a; 
-    reg[7:0] v10a;
-    output[7:0] v10b; 
-    reg[7:0] v10b;
-    output[7:0] v10c; 
-    reg[7:0] v10c;
-    input[31:0] rgResultData; 
-    input rgResultReady; 
-    input[1:0] rgResultSource; 
-    input globalreset; 
-    input clk; 
-
-    reg temp_valid01;
-    reg temp_valid10;
-    reg[15:0] temp_id01a;
-    reg[15:0] temp_id01b;
-    reg[15:0] temp_id01c;
-    reg[15:0] temp_id10a;
-    reg[15:0] temp_id10b;
-    reg[15:0] temp_id10c;
-    reg temp_hit01a;
-    reg temp_hit01b;
-    reg temp_hit01c;
-    reg temp_hit10a;
-    reg temp_hit10b;
-    reg temp_hit10c;
-    reg[7:0] temp_u01a;
-    reg[7:0] temp_u01b;
-    reg[7:0] temp_u01c;
-    reg[7:0] temp_v01a;
-    reg[7:0] temp_v01b;
-    reg[7:0] temp_v01c;
-    reg[7:0] temp_u10a;
-    reg[7:0] temp_u10b;
-    reg[7:0] temp_u10c;
-    reg[7:0] temp_v10a;
-    reg[7:0] temp_v10b;
-    reg[7:0] temp_v10c;
-
-
-    reg[2:0] state; 
-    reg[2:0] next_state; 
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          state <= 0 ; 
-          valid01 <= 1'b0 ; 
-          valid10 <= 1'b0 ; 
-          hit01a <= 1'b0 ; 
-          hit01b <= 1'b0 ; 
-          hit01c <= 1'b0 ; 
-          hit10a <= 1'b0 ; 
-          hit10b <= 1'b0 ; 
-          hit10c <= 1'b0 ; 
-          id01a <= 0;
-
-          id01b <= 0;
-          id01c <= 0;
-          id10a <= 0;
-          id10b <= 0;
-          id10c <= 0;
-          u01a <= 0;
-          u01b <= 0;
-          u01c <= 0;
-          v01a <= 0;
-          v01b <= 0;
-          v01c <= 0;
-          u10a <= 0;
-
-          u10b <= 0;
-          u10c <= 0;
-          v10a <= 0;
-          v10b <= 0;
-          v10c <= 0;
-       end
-       else
-       begin
-          state <= next_state ; 
-
-valid01 <= temp_valid01;
-valid10 <= temp_valid10;
-id01a <= temp_id01a;
-id01b <= temp_id01b;
-id01c <= temp_id01c;
-hit01a <= temp_hit01a;
-hit01b <= temp_hit01b;
-hit01c <= temp_hit01c;
-u01a <= temp_u01a;
-u01b <= temp_u01b;
-u01c <= temp_u01c;
-u10a <= temp_u10a;
-u10b <= temp_u10b;
-u10c <= temp_u10c;
-v01a <= temp_v01a;
-v01b <= temp_v01b;
-v01c <= temp_v01c;
-v10a <= temp_v10a;
-v10b <= temp_v10b;
-v10c <= temp_v10c;
-hit10a <= temp_hit10a;
-hit10b <= temp_hit10b;
-hit10c <= temp_hit10c;
-       end 
-    end 
-
-
-    always @(state or rgResultReady or rgResultSource)
-    begin
-       case (state)
-          0 :
-                   begin
-                      if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
-                      begin
-
-                         next_state = 4 ; 
-                      end
-                      else
-                      begin
-                         next_state = 0 ; 
-                      end 
-
-
-			temp_valid01 = 1'b0 ; 
-				          temp_valid10 = 1'b0 ; 
-                         if (rgResultReady == 1'b1 & rgResultSource == 2'b01)
-                         begin
-                            temp_id01a = rgResultData[31:16] ; 
-                            temp_id01b = rgResultData[15:0] ; 
-                         end
-                         else if (rgResultReady == 1'b1 & rgResultSource == 2'b10)
-                         begin
-                            temp_id10a = rgResultData[31:16] ; 
-                            temp_id10b = rgResultData[15:0] ; 
-                         end 
-
-                   end
-
-          1 :
-                   begin
-                      next_state = 2 ; 
-
-			temp_valid01 = 1'b0 ; 
-				          temp_valid10 = 1'b0 ; 
-                         temp_id01c = rgResultData[15:0] ; 
-                         temp_hit01a = rgResultData[18] ; 
-                         temp_hit01b = rgResultData[17] ; 
-                         temp_hit01c = rgResultData[16] ; 
-
-                   end
-          2 :
-
-                   begin
-                      next_state = 3 ; 
-
-			temp_valid01 = 1'b0 ; 
-				          temp_valid10 = 1'b0 ; 
-                         temp_u01a = rgResultData[23:16] ; 
-                         temp_u01b = rgResultData[15:8] ; 
-                         temp_u01c = rgResultData[7:0] ; 
-
-                   end
-          3 :
-                   begin
-                      next_state = 0 ; 
-
-				          temp_valid10 = 1'b0 ; 
-                         temp_v01a = rgResultData[23:16] ; 
-                         temp_v01b = rgResultData[15:8] ; 
-                         temp_v01c = rgResultData[7:0] ; 
-                         temp_valid01 = 1'b1 ; 
-
-                   end
-          4 :
-                   begin
-                      next_state = 5 ; 
-
-          				temp_valid01 = 1'b0 ; 
-				          temp_valid10 = 1'b0 ; 
-                         temp_id10c = rgResultData[15:0] ; 
-
-                         temp_hit10a = rgResultData[18] ; 
-                         temp_hit10b = rgResultData[17] ; 
-                         temp_hit10c = rgResultData[16] ; 
-
-                   end
-          5 :
-
-                   begin
-                      next_state = 6 ; 
-
-          				temp_valid01 = 1'b0 ; 
-				          temp_valid10 = 1'b0 ; 
-                         temp_u10a = rgResultData[23:16] ; 
-                         temp_u10b = rgResultData[15:8] ; 
-                         temp_u10c = rgResultData[7:0] ; 
-
-                   end
-          6 :
-                   begin
-                      next_state = 0 ; 
-
-      				temp_valid01 = 1'b0 ; 
-                         temp_v10a = rgResultData[23:16] ; 
-                         temp_v10b = rgResultData[15:8] ; 
-                         temp_v10c = rgResultData[7:0] ; 
-                         temp_valid10 = 1'b1 ; 
-
-                   end
-       endcase 
-    end 
- endmodule
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-
- module resultwriter (valid01, valid10, id01a, id01b, id01c, id10a, id10b, id10c, hit01a, hit01b, hit01c, hit10a, hit10b, hit10c, u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, addr, as01, as10, bkcolour, shadedata, triID, wantshadedata, shadedataready, texinfo, texaddr, texeladdr, texel, wanttexel, texelready, dataout, addrout, write, ack, globalreset, clk);
-
-    input valid01; 
-    input valid10; 
-    input[15:0] id01a; 
-    input[15:0] id01b; 
-    input[15:0] id01c; 
-    input[15:0] id10a; 
-    input[15:0] id10b; 
-    input[15:0] id10c; 
-
-    input hit01a; 
-    input hit01b; 
-    input hit01c; 
-    input hit10a; 
-    input hit10b; 
-    input hit10c; 
-    input[7:0] u01a; 
-    input[7:0] u01b; 
-    input[7:0] u01c; 
-    input[7:0] v01a; 
-    input[7:0] v01b; 
-    input[7:0] v01c; 
-
-    input[7:0] u10a; 
-    input[7:0] u10b; 
-    input[7:0] u10c; 
-    input[7:0] v10a; 
-    input[7:0] v10b; 
-    input[7:0] v10c; 
-    input[17:0] addr; 
-    input as01; 
-    input as10; 
-    input[20:0] bkcolour; 
-    input[63:0] shadedata; 
-    output[15:0] triID; 
-
-    reg[15:0] triID;
-    output wantshadedata; 
-    reg wantshadedata;
-    input shadedataready; 
-    input[20:0] texinfo; 
-    output[3:0] texaddr; 
-    wire[3:0] texaddr;
-    output[17:0] texeladdr; 
-    wire[17:0] texeladdr;
-    input[63:0] texel; 
-    output wanttexel; 
-    reg wanttexel;
-
-    input texelready; 
-    output[63:0] dataout; 
-    // PAJ see lower note wire[63:0] dataout;
-    reg[63:0] dataout;
-    output[17:0] addrout; 
-    wire[17:0] addrout;
-    output write; 
-    wire write;
-    reg temp_write;
-    input ack; 
-    input globalreset; 
-    input clk; 
-
-    reg[3:0] state; 
-    reg[3:0] next_state; 
-    reg pending01; 
-    reg pending10; 
-    reg process01; 
-    wire[17:0] addrout01; 
-    wire[17:0] addrout10; 
-    wire shiften01; 
-    wire shiften10; 
-    reg temp_shiften01; 
-    reg temp_shiften10; 
-    reg[20:0] shadedataa; 
-    reg[20:0] shadedatab; 
-    reg[20:0] shadedatac; 
-    wire hita; 
-    wire hitb; 
-    wire hitc; 
-
-    reg[2:0] selectuv; 
-    wire[6:0] blr; 
-    wire[6:0] blg; 
-    wire[6:0] blb; 
-    reg texmap; 
-    reg lmenable; 
-    wire[1:0] texelselect; 
-    wire[6:0] texelr; 
-    wire[6:0] texelg; 
-    wire[6:0] texelb; 
-    reg[20:0] texinfol; 
-
-    reg temp_pending01; 
-    reg temp_pending10; 
-    reg temp_process01; 
-    reg temp_texmap; 
-    reg[20:0] temp_texinfol; 
-    reg[20:0] temp_shadedataa; 
-    reg[20:0] temp_shadedatab; 
-    reg[20:0] temp_shadedatac; 
-
-    col16to21 col16to21inst (texel, texelselect, texelr, texelg, texelb); 
-    linearmap linearmapinst (blb, blg, texinfol[17:0], texeladdr, texelselect, texinfol[20:18], lmenable, clk); 
-    bilinearintrp bilinearimp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, shadedata[41:35], shadedata[62:56], shadedata[20:14], shadedata[34:28], shadedata[55:49], shadedata[13:7], shadedata[27:21], shadedata[48:42], shadedata[6:0], blr, blg, blb, clk); 
-    fifo3 fifo3insta (addr, as01, addrout01, shiften01, globalreset, clk); 
-    fifo3 fifo3instb (addr, as10, addrout10, shiften10, globalreset, clk); 
-    assign hita = (hit01a & process01) | (hit10a & ~process01) ;
-    assign hitb = (hit01b & process01) | (hit10b & ~process01) ;
-    assign hitc = (hit01c & process01) | (hit10c & ~process01) ;
-    assign texaddr = shadedata[59:56] ;
-    assign shiften01 = temp_shiften01;
-    assign shiften10 = temp_shiften10;
-    assign write = temp_write;
-
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          state <= 0 ; 
-          pending01 <= 1'b0 ; 
-          pending10 <= 1'b0 ; 
-          shadedataa <= 0;
-          shadedatab <= 0;
-          shadedatac <= 0;
-          process01 <= 1'b0 ; 
-          texmap <= 1'b0 ; 
-
-          texinfol <= 0;
-       end
-       else
-       begin
-          state <= next_state ; 
-
-process01 <= temp_process01;
-pending01 <= temp_pending01;
-pending10 <= temp_pending10;
-texmap <= temp_texmap;
-texinfol <= temp_texinfol;
-shadedataa <= temp_shadedataa;
-shadedatab <= temp_shadedatab;
-shadedatac <= temp_shadedatac;
-
-    dataout <= {1'b0, 
-					shadedataa[20],
-					shadedataa[19],
-					shadedataa[18],
-					shadedataa[17],
-					shadedataa[16],
-					shadedataa[15],
-					shadedataa[14],
-					shadedataa[13],
-					shadedataa[12],
-					shadedataa[11],
-					shadedataa[10],
-					shadedataa[9],
-					shadedataa[8],
-					shadedataa[7],
-					shadedataa[6],
-					shadedataa[5],
-					shadedataa[4],
-					shadedataa[3],
-					shadedataa[2],
-					shadedataa[1],
-					shadedataa[0],
-					shadedatab[20],
-					shadedatab[19],
-					shadedatab[18],
-					shadedatab[17],
-					shadedatab[16],
-					shadedatab[15],
-					shadedatab[14],
-					shadedatab[13],
-					shadedatab[12],
-					shadedatab[11],
-					shadedatab[10],
-					shadedatab[9],
-					shadedatab[8],
-					shadedatab[7],
-					shadedatab[6],
-					shadedatab[5],
-					shadedatab[4],
-					shadedatab[3],
-					shadedatab[2],
-					shadedatab[1],
-					shadedatab[0],
-					shadedatac[20],
-					shadedatac[19],
-					shadedatac[18],
-					shadedatac[17],
-					shadedatac[16],
-					shadedatac[15],
-					shadedatac[14],
-					shadedatac[13],
-					shadedatac[12],
-					shadedatac[11],
-					shadedatac[10],
-					shadedatac[9],
-					shadedatac[8],
-					shadedatac[7],
-					shadedatac[6],
-					shadedatac[5],
-					shadedatac[4],
-					shadedatac[3],
-					shadedatac[2],
-					shadedatac[1],
-					shadedatac[0]} ;
-       end 
-//    end 
-// PAJ used to be assign, but weird error, so added as register   assign dataout = {1'b0, 
-    end 
-    assign addrout = (process01 == 1'b1) ? addrout01 : addrout10 ;
-
-    always @(state or process01 or pending10 or ack or shadedataready or id01a or 
-             id01b or id01c or id10a or id10b or id10c or selectuv or hita or 
-             hitb or hitc or shadedata or pending01 or texmap or texelready)
-    begin
-       case (state)
-          0 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       selectuv = 0;
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      if (pending01 == 1'b1 | pending10 == 1'b1)
-                      begin
-                         next_state = 2 ; 
-                      end
-                      else
-
-                      begin
-                         next_state = 0 ; 
-                      end 
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_process01 = pending01 ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          2 :
-                   begin
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      wantshadedata = 1'b1 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b00 ; 
-                      if (process01 == 1'b1)
-                      begin
-                         triID = id01a ; 
-
-                      end
-                      else
-                      begin
-                         triID = id10a ; 
-                      end 
-                      if (shadedataready == 1'b1)
-                      begin
-                         if (hita == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
-                         begin
-                            next_state = 3 ; 
-                         end
-                         else
-
-                         begin
-                            next_state = 4 ; 
-                         end 
-                      end
-                      else
-                      begin
-                         next_state = 2 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         if (hita == 1'b1)
-                         begin
-                            temp_shadedataa = shadedata[20:0] ; 
-                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
-                         end
-                         else
-                         begin
-                            temp_shadedataa = bkcolour ; 
-                         end 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          3 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-
-                      selectuv[1:0] = 2'b00 ; 
-                      next_state = 8 ; 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_texinfol = texinfo ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-
-                   end
-          8 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b00 ; 
-                      lmenable = 1'b1 ; 
-                      if (texmap == 1'b1)
-                      begin
-
-                         next_state = 11 ; 
-                      end
-                      else
-                      begin
-                         next_state = 4 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_shadedataa[6:0] = blb ; 
-                         temp_shadedataa[13:7] = blg ; 
-                         temp_shadedataa[20:14] = blr ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          11 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       selectuv = 0;
-				       lmenable = 1'b0 ; 
-
-                      wanttexel = 1'b1 ; 
-                      if (texelready == 1'b1)
-                      begin
-                         next_state = 4 ; 
-                      end
-                      else
-                      begin
-                         next_state = 11 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         temp_shadedataa[6:0] = texelb ; 
-                         temp_shadedataa[13:7] = texelg ; 
-                         temp_shadedataa[20:14] = texelr ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          12 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       selectuv = 0;
-				       lmenable = 1'b0 ; 
-
-                      wanttexel = 1'b1 ; 
-                      if (texelready == 1'b1)
-                      begin
-                         next_state = 5 ; 
-                      end
-                      else
-                      begin
-                         next_state = 12 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_shadedatab[6:0] = texelb ; 
-                         temp_shadedatab[13:7] = texelg ; 
-                         temp_shadedatab[20:14] = texelr ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          13 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       selectuv = 0;
-				       lmenable = 1'b0 ; 
-
-                      wanttexel = 1'b1 ; 
-                      if (texelready == 1'b1)
-                      begin
-                         next_state = 1 ; 
-                      end
-                      else
-                      begin
-                         next_state = 13 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         temp_shadedatac[6:0] = texelb ; 
-                         temp_shadedatac[13:7] = texelg ; 
-                         temp_shadedatac[20:14] = texelr ; 
-
-                   end
-          6 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b01 ; 
-                      next_state = 9 ; 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_texinfol = texinfo ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          9 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b01 ; 
-                      lmenable = 1'b1 ; 
-                      if (texmap == 1'b1)
-                      begin
-                         next_state = 12 ; 
-
-                      end
-                      else
-                      begin
-                         next_state = 5 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         temp_shadedatab[6:0] = blb ; 
-                         temp_shadedatab[13:7] = blg ; 
-                         temp_shadedatab[20:14] = blr ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          7 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b10 ; 
-                      next_state = 10 ; 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_texinfol = texinfo ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-
-          10 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b10 ; 
-                      if (texmap == 1'b1)
-                      begin
-                         next_state = 13 ; 
-                      end
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-
-                      lmenable = 1'b1 ; 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-                         temp_shadedatac[6:0] = blb ; 
-                         temp_shadedatac[13:7] = blg ; 
-                         temp_shadedatac[20:14] = blr ; 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          4 :
-                   begin
-				       wantshadedata = 1'b0 ; 
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b01 ; 
-                      if (process01 == 1'b1)
-                      begin
-                         triID = id01b ; 
-                      end
-                      else
-                      begin
-
-                         triID = id10b ; 
-                      end 
-                      if (shadedataready == 1'b1)
-                      begin
-                         if (hitb == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
-                         begin
-                            next_state = 6 ; 
-                         end
-                         else
-                         begin
-                            next_state = 5 ; 
-                         end 
-
-                      end
-                      else
-                      begin
-                         next_state = 4 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         if (hitb == 1'b1)
-                         begin
-                            temp_shadedatab = shadedata[20:0] ; 
-                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
-                         end
-                         else
-                         begin
-                            temp_shadedatab = bkcolour ; 
-                         end 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          5 :
-                   begin
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      wantshadedata = 1'b1 ; 
-                      selectuv[2] = ~process01 ; 
-                      selectuv[1:0] = 2'b10 ; 
-                      if (process01 == 1'b1)
-
-                      begin
-                         triID = id01c ; 
-                      end
-                      else
-                      begin
-                         triID = id10c ; 
-                      end 
-                      if (shadedataready == 1'b1)
-                      begin
-                         if (hitc == 1'b1 & ((shadedata[63]) == 1'b1 | shadedata[63:62] == 2'b01))
-                         begin
-                            next_state = 7 ; 
-
-                         end
-                         else
-                         begin
-                            next_state = 1 ; 
-                         end 
-                      end
-                      else
-                      begin
-                         next_state = 5 ; 
-                      end 
-
-	          if (valid01 == 1'b1)
-				          begin
-				             temp_pending01 = 1'b1 ; 
-				          end 
-				          if (valid10 == 1'b1)
-				          begin
-				             temp_pending10 = 1'b1 ; 
-				          end 
-
-                         if (hitc == 1'b1)
-                          begin
-                            temp_shadedatac = shadedata[20:0] ; 
-                            temp_texmap = (~shadedata[63]) & shadedata[62] ; 
-                         end
-                         else
-                         begin
-                            temp_shadedatac = bkcolour ; 
-                         end 
-
-							temp_shiften01 = 1'b0;
-							temp_shiften10 = 1'b0;
-					    	temp_write = 1'b0;
-                   end
-          1 :
-
-                   begin
-				       wantshadedata = 1'b0 ; 
-   				       triID = 0;
-				       selectuv = 0;
-				       lmenable = 1'b0 ; 
-				       wanttexel = 1'b0 ; 
-                      if (ack == 1'b1)
-                      begin
-                         next_state = 0 ; 
-                      end
-                      else
-                      begin
-                         next_state = 1 ; 
-                      end 
-
-                         if (ack == 1'b1 & process01 == 1'b1)
-                         begin
-                            temp_pending01 = 1'b0 ; 
-                         end
-
-                          else if (ack == 1'b1 & process01 == 1'b0)
-                         begin
-                            temp_pending10 = 1'b0 ; 
-                         end 
-
-    				if (process01 == 1'b1 &  ack == 1'b1)
-						begin
-							temp_shiften01 = 1'b1;
-							temp_shiften10 = 1'b1;
-						end
-					    temp_write = 1'b1;
-                   end
-       endcase 
-    end 
- endmodule
- //////////////////////////////////////////////////////////////////////////////////////////////
- //
- // Verilog file generated by X-HDL - Revision 3.2.38  Jan. 9, 2004 
- // Sun Feb  8 14:14:35 2004
- //
- //      Input file         : G:/jamieson/VERILOG_BENCHMARKS/RAYTRACE/col16to21.vhd
- //      Design name        : col16to21
- //      Author             : 
- //      Company            : 
- //
- //      Description        : 
- //
- //
- //////////////////////////////////////////////////////////////////////////////////////////////
- //
- module col16to21 (dataline, texelselect, r, g, b);
-
-    input[63:0] dataline; 
-    input[1:0] texelselect; 
-    output[6:0] r; 
-    wire[6:0] r;
-    output[6:0] g; 
-    wire[6:0] g;
-    output[6:0] b; 
-    wire[6:0] b;
-
-    reg[15:0] col16; 
-
-    always @(dataline or texelselect)
-    begin
-       case (texelselect)
-          2'b00 :
-                   begin
-                      col16 = dataline[15:0] ; 
-                   end
-          2'b01 :
-                   begin
-                      col16 = dataline[31:16] ; 
-                   end
-          2'b10 :
-                   begin
-                      col16 = dataline[47:32] ; 
-                   end
-          2'b11 :
-                   begin
-                      col16 = dataline[63:48] ; 
-                   end
-       endcase 
-    end 
-    assign r = {col16[15:10], 1'b0} ;
-    assign g = {col16[9:5], 2'b00} ;
-    assign b = {col16[4:0], 2'b00} ;
- endmodule
- module linearmap (u, v, start, addr, texelselect, factor, enable, clk);
-
-    input[6:0] u; 
-    input[6:0] v; 
-    input[17:0] start; 
-    output[17:0] addr; 
-    reg[17:0] addr;
-    output[1:0] texelselect; 
-    wire[1:0] texelselect;
-
-    input[2:0] factor; 
-    input enable; 
-    input clk; 
-
-    reg[6:0] ul; 
-    reg[6:0] vl; 
-
-    assign texelselect = ul[1:0] ;
-
-    always @(posedge clk)
-    begin
-       if (enable == 1'b1)
-       begin
-          ul <= u ; 
-          vl <= v ; 
-       end 
-       else
-       begin
-          ul <= ul ; 
-          vl <= vl ; 
-       end 
-       case (factor)
-          3'b000 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({11'b00000000000, vl}) ; 
-                   end
-          3'b001 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({10'b0000000000, vl, 1'b0}) ; 
-
-                   end
-          3'b010 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({9'b000000000, vl, 2'b00}) ; 
-                   end
-          3'b011 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({8'b00000000, vl, 3'b000}) ; 
-                   end
-          3'b100 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({7'b0000000, vl, 4'b0000}) ; 
-
-                   end
-          3'b101 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({6'b000000, vl, 5'b00000}) ; 
-                   end
-          3'b110 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({5'b00000, vl, 6'b000000}) ; 
-                   end
-          3'b111 :
-                   begin
-                      addr <= start + ({13'b0000000000000, ul[6:2]}) + ({4'b0000, vl, 7'b0000000}) ; 
-
-                   end
-       endcase  
-    end 
- endmodule
-     module bilinearintrp (u01a, u01b, u01c, v01a, v01b, v01c, u10a, u10b, u10c, v10a, v10b, v10c, selectuv, ru, rv, rw, gu, gv, gw, bu, bv, bw, r, g, b, clk);
-
-        input[7:0] u01a; 
-        input[7:0] u01b; 
-        input[7:0] u01c; 
-        input[7:0] v01a; 
-        input[7:0] v01b; 
-        input[7:0] v01c; 
-        input[7:0] u10a; 
-        input[7:0] u10b; 
-        input[7:0] u10c; 
-        input[7:0] v10a; 
-        input[7:0] v10b; 
-        input[7:0] v10c; 
-        input[2:0] selectuv; 
-        input[6:0] ru; 
-        input[6:0] rv; 
-        input[6:0] rw; 
-        input[6:0] gu; 
-        input[6:0] gv; 
-        input[6:0] gw; 
-        input[6:0] bu; 
-        input[6:0] bv; 
-        input[6:0] bw; 
-        output[6:0] r; 
-        wire[6:0] r;
-        output[6:0] g; 
-        wire[6:0] g;
-        output[6:0] b; 
-        wire[6:0] b;
-        input clk; 
-
-        reg[7:0] u; 
-        reg[7:0] v; 
-        reg[7:0] ul; 
-        reg[7:0] vl; 
-        reg[7:0] wl; 
-        reg[14:0] i1b; 
-        reg[14:0] i2b; 
-        reg[14:0] i3b; 
-        reg[14:0] i1g; 
-        reg[14:0] i2g; 
-        reg[14:0] i3g; 
-        reg[14:0] i1r; 
-        reg[14:0] i2r; 
-        reg[14:0] i3r; 
-        reg[6:0] rul; 
-        reg[6:0] rvl; 
-        reg[6:0] rwl; 
-        reg[6:0] gul; 
-        reg[6:0] gvl; 
-        reg[6:0] gwl; 
-        reg[6:0] bul; 
-        reg[6:0] bvl; 
-        reg[6:0] bwl; 
-
-        always @(selectuv or u01a or u01b or u01c or v01a or v01b or v01c or u10a or 
-                 u10b or u10c or v10a or v10b or v10c)
-        begin
-           case (selectuv)
-              3'b000 :
-                       begin
-                          u = u01a ; 
-                          v = v01a ; 
-                       end
-              3'b001 :
-                       begin
-                          u = u01b ; 
-						 v = v01b ; 
-                       end
-              3'b010 :
-                       begin
-                          u = u01c ; 
-                          v = v01c ; 
-                       end
-              3'b100 :
-                       begin
-                          u = u10a ; 
-                          v = v10a ; 
-                       end
-              3'b101 :
-                       begin
-                          u = u10b ; 
-                          v = v10b ; 
-                       end
-              3'b110 :
-                       begin
-                          u = u10c ; 
-                          v = v10c ; 
-                       end
-              default :
-                       begin
-                          u = 0;
-                          v = 0;
-                       end
-           endcase 
-        end 
-
-        always @(posedge clk)
-        begin
-           wl <= 8'b11111111 - u - v ; 
-           ul <= u ; 
-           vl <= v ; 
-           rul <= ru ; 
-           rvl <= rv ; 
-           rwl <= rw ; 
-           gul <= gu ; 
-           gvl <= gv ; 
-           gwl <= gw ; 
-           bul <= bu ; 
-           bvl <= bv ; 
-           bwl <= bw ; 
-           i1r <= ul * rul ; 
-           i2r <= vl * rvl ; 
-           i3r <= wl * rwl ; 
-           i1g <= ul * gul ; 
-           i2g <= vl * gvl ; 
-           i3g <= wl * gwl ; 
-           i1b <= ul * bul ; 
-           i2b <= vl * bvl ; 
-           i3b <= wl * bwl ;  
-        end 
-        assign r = (i1r + i2r + i3r) ;
-        assign g = (i1g + i2g + i3g) ;
-        assign b = (i1b + i2b + i3b) ;
-     endmodule
-
-
-
-module fifo3 (datain, writeen, dataout, shiften, globalreset, clk);
-
-    input[18 - 1:0] datain; 
-    input writeen; 
-    output[18 - 1:0] dataout; 
-    wire[18 - 1:0] dataout;
-    input shiften; 
-    input globalreset; 
-    input clk; 
-
-    reg[18 - 1:0] data0; 
-    reg[18 - 1:0] data1; 
-    reg[18 - 1:0] data2; 
-
-    reg[1:0] pos; 
-
-    assign dataout = data0 ;
-
-    always @(posedge clk)
-    begin
-       if (globalreset == 1'b1)
-       begin
-          pos <= 2'b00 ; 
-          data0 <= 0 ; 
-          data1 <= 0 ; 
-          data2 <= 0 ; 
-       end
-       else
-       begin
-          if (writeen == 1'b1 & shiften == 1'b1)
-          begin
-             case (pos)
-                2'b00 :
-                         begin
-                            data0 <= 0 ; 
-                            data1 <= 0 ; 
-                            data2 <= 0 ; 
-                         end
-
-                2'b01 :
-                         begin
-                            data0 <= datain ; 
-                            data1 <= 0 ; 
-                            data2 <= 0 ; 
-                         end
-                2'b10 :
-                         begin
-                            data0 <= data1 ; 
-                            data1 <= datain ; 
-                            data2 <= 0 ; 
-                         end
-
-                2'b11 :
-                         begin
-                            data0 <= data1 ; 
-                            data1 <= data2 ; 
-                            data2 <= datain ; 
-                         end
-             endcase 
-          end
-          else if (shiften == 1'b1)
-          begin
-             data0 <= data1 ; 
-             data1 <= data2 ; 
-             pos <= pos - 1 ; 
-          end
-          else if (writeen == 1'b1)
-          begin
-             case (pos)
-                2'b00 :
-                         begin
-                            data0 <= datain ; 
-                         end
-                2'b01 :
-    					begin
-                            data1 <= datain ; 
-                         end
-                2'b10 :
-                         begin
-                            data2 <= datain ; 
-                         end
-             endcase 
-             pos <= pos + 1 ; 
-          end 
-       end 
-    end 
- endmodule
-

From 26d337263335a1ec765990b31942b6091da7943f Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:17:56 -0400
Subject: [PATCH 16/56] files symlink

---
 .github/workflows/licensing.yml               |   9 +-
 parmys-plugin/techlibs/vtr_primitives.v       | 329 ------------------
 .../tests/eltwise_layer/hard_block_include.v  |   1 +
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml |   1 +
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    |   1 +
 parmys-plugin/tests/raygentop/raygentop.v     |   1 +
 6 files changed, 8 insertions(+), 334 deletions(-)
 delete mode 100644 parmys-plugin/techlibs/vtr_primitives.v
 create mode 120000 parmys-plugin/tests/eltwise_layer/hard_block_include.v
 create mode 120000 parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
 create mode 120000 parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
 create mode 120000 parmys-plugin/tests/raygentop/raygentop.v

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 2e92bfac0..b459b282a 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -34,11 +34,10 @@ jobs:
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
           ./third_party/vtr/verilog/eltwise_layer.v
-          ./parmys-plugin/tests/raygentop/raygentop.v
-          ./parmys-plugin/tests/eltwise_layer/hard_block_include.v
-          ./parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
-          ./parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
-          ./parmys-plugin/techlibs/vtr_primitives.v
+          ./third_party/vtr/verilog/raygentop.v
+          ./third_party/vtr/verilog/hard_block_include.v
+          ./third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+          ./third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
         third_party: |
           ./third_party/googletest/
           ./third_party/libargparse/
diff --git a/parmys-plugin/techlibs/vtr_primitives.v b/parmys-plugin/techlibs/vtr_primitives.v
deleted file mode 100644
index 678af1ccd..000000000
--- a/parmys-plugin/techlibs/vtr_primitives.v
+++ /dev/null
@@ -1,329 +0,0 @@
-`timescale 1ps/1ps
-//Overivew
-//========
-//This file contains the verilog primitives produced by VPR's
-//post-synthesis netlist writer.
-//
-//If you wish to do back-annotated timing simulation you will need
-//to link with this file during simulation.
-//
-//To ensure currect result when performing back-annoatation with 
-//Modelsim see the notes at the end of this comment.
-//
-//Specifying Timing Edges
-//=======================
-//To perform timing back-annotation the simulator must know the delay 
-//dependancies (timing edges) between the ports on each primitive.
-//
-//During back-annotation the simulator will attempt to annotate SDF delay
-//values onto the timing edges.  It should give a warning if was unable
-//to find a matching edge.
-//
-//
-//In Verilog timing edges are specified using a specify block (delimited by the
-//'specify' and 'endspecify' keywords.
-//
-//Inside the specify block a set of specify statements are used to describe
-//the timing edges.  For example consider:
-//
-//  input [1:0] in;
-//  output [1:0] out;
-//  specify
-//      (in[0] => out[0]) = "";
-//      (in[1] => out[1]) = "";
-//  endspecify
-//
-//This states that there are the following timing edges (dependancies):
-//  * from in[0] to out[0]
-//  * from in[1] to out[1]
-//
-//We could (according to the Verilog standard) equivalently have used:
-//
-//  input [1:0] in;
-//  output [1:0] out;
-//  specify
-//      (in => out) = "";
-//  endspecify
-//
-//However NOT ALL SIMULATORS TREAT MULTIBIT SPECIFY STATEMENTS CORRECTLY,
-//at least by default (in particular ModelSim, see notes below).
-//
-//The previous examples use the 'parrallel connection' operator '=>', which
-//creates parallel edges between the two operands (i.e. bit 0 to bit 0, bit
-//1 to bit 1 etc.).  Note that both operands must have the same bit-width. 
-//
-//Verilog also supports the 'full connection' operator '*>' which will create
-//a fully connected set of edges (e.g. from all-to-all). It does not require
-//both operands to have the same bit-width. For example:
-//
-//  input [1:0] in;
-//  output [2:0] out;
-//  specify
-//      (in *> out) = "";
-//  endspecify
-//
-//states that there are the following timing edges (dependancies):
-//  * from in[0] to out[0]
-//  * from in[0] to out[1]
-//  * from in[0] to out[2]
-//  * from in[1] to out[0]
-//  * from in[1] to out[1]
-//  * from in[1] to out[2]
-//
-//For more details on specify blocks see Section 14 "Specify Blocks" of the
-//Verilog standard (IEEE 1364-2005).
-//
-//Back-annotation with Modelsim
-//=============================
-//
-//Ensuring Multi-bit Specifies are Handled Correctly: Bit-blasting
-//----------------------------------------------------------------
-//
-//ModelSim (tested on Modelsim SE 10.4c) ignores multi-bit specify statements
-//by default.
-//
-//This causes SDF annotation errors such as:
-//
-//  vsim-SDF-3261: Failed to find matching specify module path
-//
-//To force Modelsim to correctly interpret multi-bit specify statements you
-//should provide the '+bitblast' option to the vsim executable.
-//This forces it to apply specify statements using multi-bit operands to
-//each bit of the operand (i.e. according to the Verilog standard).
-//
-//Confirming back-annotation is occuring correctly
-//------------------------------------------------
-//
-//Another useful option is '+sdf_verbose' which produces extra output about
-//SDF annotation, which can be used to verify annotation occured correctly.
-//
-//For example:
-//
-//      Summary of Verilog design objects annotated: 
-//      
-//           Module path delays =          5
-//      
-//       ******************************************************************************
-//      
-//       Summary of constructs read: 
-//      
-//                 IOPATH =          5
-//
-//shows that all 5 IOPATH constructs in the SDF were annotated to the verilog
-//design.
-//
-//Example vsim Command Line
-//--------------------------
-//The following is an example command-line to vsim (where 'tb' is the name of your
-//testbench):
-//
-//  vsim -t 1ps -L rtl_work -L work -voptargs="+acc" +sdf_verbose +bitblast tb
-
-
-
-
-//K-input Look-Up Table
-module LUT_K #(
-    //The Look-up Table size (number of inputs)
-    parameter K = 1, 
-
-    //The lut mask.  
-    //Left-most (MSB) bit corresponds to all inputs logic one. 
-    //Defaults to always false.
-    parameter LUT_MASK={2**K{1'b0}} 
-) (
-    input [K-1:0] in,
-    output out
-);
-
-    specify
-        (in *> out) = "";
-    endspecify
-
-    assign out = LUT_MASK[in];
-
-endmodule
-
-//D-FlipFlop module
-module DFF #(
-    parameter INITIAL_VALUE=1'b0    
-) (
-    input clk,
-    input D,
-    output reg Q
-);
-
-    specify
-        (clk => Q) = "";
-        $setup(D, posedge clk, "");
-        $hold(posedge clk, D, "");
-    endspecify
-
-    initial begin
-        Q <= INITIAL_VALUE;
-    end
-
-    always@(posedge clk) begin
-        Q <= D;
-    end
-endmodule
-
-//Routing fpga_interconnect module
-module fpga_interconnect(
-    input datain,
-    output dataout
-);
-
-    specify
-        (datain=>dataout)="";
-    endspecify
-
-    assign dataout = datain;
-
-endmodule
-
-
-//2-to-1 mux module
-module mux(
-    input select,
-    input x,
-    input y,
-    output z
-);
-
-    assign z = (x & ~select) | (y & select);
-
-endmodule
-
-//n-bit adder
-module adder #(
-    parameter WIDTH = 1   
-) (
-    input [WIDTH-1:0] a, 
-    input [WIDTH-1:0] b, 
-    input cin, 
-    output cout, 
-    output [WIDTH-1:0] sumout);
-
-   specify
-      (a*>sumout)="";
-      (b*>sumout)="";
-      (cin*>sumout)="";
-      (a*>cout)="";
-      (b*>cout)="";
-      (cin=>cout)="";
-   endspecify
-   
-   assign {cout, sumout} = a + b + cin;
-   
-endmodule
-   
-//nxn multiplier module
-module multiply #(
-    //The width of input signals
-    parameter WIDTH = 1
-) (
-    input [WIDTH-1:0] a,
-    input [WIDTH-1:0] b,
-    output [2*WIDTH-1:0] out
-);
-
-    specify
-        (a *> out) = "";
-        (b *> out) = "";
-    endspecify
-
-    assign out = a * b;
-
-endmodule // mult
-
-//single_port_ram module
-(* keep_hierarchy *)
-module single_port_ram #(
-    parameter ADDR_WIDTH = 1,
-    parameter DATA_WIDTH = 1
-) (
-    input clk,
-    input [ADDR_WIDTH-1:0] addr,
-    input [DATA_WIDTH-1:0] data,
-    input we,
-    output reg [DATA_WIDTH-1:0] out
-);
-
-    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
-
-    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
-
-    specify
-        (clk*>out)="";
-        $setup(addr, posedge clk, "");
-        $setup(data, posedge clk, "");
-        $setup(we, posedge clk, "");
-        $hold(posedge clk, addr, "");
-        $hold(posedge clk, data, "");
-        $hold(posedge clk, we, "");
-    endspecify
-   
-    always@(posedge clk) begin
-        if(we) begin
-            Mem[addr] = data;
-        end
-    	out = Mem[addr]; //New data read-during write behaviour (blocking assignments)
-    end
-   
-endmodule // single_port_RAM
-
-//dual_port_ram module
-(* keep_hierarchy *)
-module dual_port_ram #(
-    parameter ADDR_WIDTH = 1,
-    parameter DATA_WIDTH = 1
-) (
-    input clk,
-
-    input [ADDR_WIDTH-1:0] addr1,
-    input [ADDR_WIDTH-1:0] addr2,
-    input [DATA_WIDTH-1:0] data1,
-    input [DATA_WIDTH-1:0] data2,
-    input we1,
-    input we2,
-    output reg [DATA_WIDTH-1:0] out1,
-    output reg [DATA_WIDTH-1:0] out2
-);
-
-    localparam MEM_DEPTH = 2 ** ADDR_WIDTH;
-
-    reg [DATA_WIDTH-1:0] Mem[MEM_DEPTH-1:0];
-
-    specify
-        (clk*>out1)="";
-        (clk*>out2)="";
-        $setup(addr1, posedge clk, "");
-        $setup(addr2, posedge clk, "");
-        $setup(data1, posedge clk, "");
-        $setup(data2, posedge clk, "");
-        $setup(we1, posedge clk, "");
-        $setup(we2, posedge clk, "");
-        $hold(posedge clk, addr1, "");
-        $hold(posedge clk, addr2, "");
-        $hold(posedge clk, data1, "");
-        $hold(posedge clk, data2, "");
-        $hold(posedge clk, we1, "");
-        $hold(posedge clk, we2, "");
-    endspecify
-   
-    always@(posedge clk) begin //Port 1
-        if(we1) begin
-            Mem[addr1] = data1;
-        end
-        out1 = Mem[addr1]; //New data read-during write behaviour (blocking assignments)
-    end
-
-    always@(posedge clk) begin //Port 2
-        if(we2) begin
-            Mem[addr2] = data2;
-        end
-        out2 = Mem[addr2]; //New data read-during write behaviour (blocking assignments)
-    end
-   
-endmodule // dual_port_ram
diff --git a/parmys-plugin/tests/eltwise_layer/hard_block_include.v b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
new file mode 120000
index 000000000..04689e506
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
@@ -0,0 +1 @@
+../../../third_party/vtr/verilog/hard_block_include.v
\ No newline at end of file
diff --git a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
new file mode 120000
index 000000000..8456a8597
--- /dev/null
+++ b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
@@ -0,0 +1 @@
+../../../third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
new file mode 120000
index 000000000..af1bf2426
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -0,0 +1 @@
+../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/raygentop.v b/parmys-plugin/tests/raygentop/raygentop.v
new file mode 120000
index 000000000..125e9fc84
--- /dev/null
+++ b/parmys-plugin/tests/raygentop/raygentop.v
@@ -0,0 +1 @@
+../../../third_party/vtr/verilog/raygentop.v
\ No newline at end of file

From 4503b032bec09273338d7eb99dc2d2fa52e5109e Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:19:43 -0400
Subject: [PATCH 17/56] primitives symlink

---
 parmys-plugin/techlibs/vtr_primitives.v            | 1 +
 third_party/vtr/{vtr_primitives.v => primitives.v} | 0
 2 files changed, 1 insertion(+)
 create mode 120000 parmys-plugin/techlibs/vtr_primitives.v
 rename third_party/vtr/{vtr_primitives.v => primitives.v} (100%)

diff --git a/parmys-plugin/techlibs/vtr_primitives.v b/parmys-plugin/techlibs/vtr_primitives.v
new file mode 120000
index 000000000..b6739f726
--- /dev/null
+++ b/parmys-plugin/techlibs/vtr_primitives.v
@@ -0,0 +1 @@
+../../third_party/vtr/primitives.v
\ No newline at end of file
diff --git a/third_party/vtr/vtr_primitives.v b/third_party/vtr/primitives.v
similarity index 100%
rename from third_party/vtr/vtr_primitives.v
rename to third_party/vtr/primitives.v

From ddd1cc272a90a7cd4e1748186d070efa89218b28 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 14:20:51 -0400
Subject: [PATCH 18/56] ci

---
 .github/workflows/licensing.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index b459b282a..421c157d8 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -38,6 +38,7 @@ jobs:
           ./third_party/vtr/verilog/hard_block_include.v
           ./third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
           ./third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
+          ./third_party/vtr/primitives.v
         third_party: |
           ./third_party/googletest/
           ./third_party/libargparse/

From 736593c1d8f335e90a0354125e6e78b59ed741e1 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 15:30:13 -0400
Subject: [PATCH 19/56] VexRiscv_Lite test

---
 parmys-plugin/include/partial_map.h           |   1 +
 parmys-plugin/parmys_update.cc                |  36 ++-
 parmys-plugin/parmys_update.hpp               |   1 +
 parmys-plugin/src/partial_map.cc              | 214 +++++++++++++++++-
 parmys-plugin/tests/Makefile                  |   2 +
 .../tests/VexRiscv_Lite/VexRiscv_Lite.tcl     |  95 ++++++++
 .../tests/VexRiscv_Lite/VexRiscv_Lite.v       |   1 +
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    |   1 +
 .../tests/VexRiscv_Lite/odin_config.xml       |  40 ++++
 9 files changed, 386 insertions(+), 5 deletions(-)
 create mode 100644 parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
 create mode 120000 parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.v
 create mode 120000 parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
 create mode 100644 parmys-plugin/tests/VexRiscv_Lite/odin_config.xml

diff --git a/parmys-plugin/include/partial_map.h b/parmys-plugin/include/partial_map.h
index a7be967f3..29590262a 100644
--- a/parmys-plugin/include/partial_map.h
+++ b/parmys-plugin/include/partial_map.h
@@ -30,5 +30,6 @@ void partial_map_top(netlist_t *netlist);
 void instantiate_add_w_carry(nnode_t *node, short mark, netlist_t *netlist);
 void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t *netlist);
 void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist);
+void instantiate_multi_port_n_bits_mux(nnode_t* node, short mark, netlist_t* /*netlist*/);
 
 #endif
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
index 25fca5991..21a52e278 100644
--- a/parmys-plugin/parmys_update.cc
+++ b/parmys-plugin/parmys_update.cc
@@ -289,7 +289,8 @@ void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module,
         break;
 
     case SMUX_2:
-        Yosys::log_error("SMUX_2\n");
+        define_SMUX_function_yosys(node, module);
+        // Yosys::log_error("SMUX_2\n");
         break;
 
     case FF_NODE:
@@ -517,4 +518,37 @@ void define_logical_function_yosys(nnode_t *node, Yosys::Module *module)
         cell->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->num_output_pins));
         cell->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
     }
+}
+
+void define_SMUX_function_yosys(nnode_t *node, Yosys::Module *module)
+{
+    Yosys::RTLIL::SigSpec input_sig_A, input_sig_B, input_sig_S, output_sig;
+
+    oassert(node->num_input_pins == 3); // s a b
+
+    nnet_t *s_net = node->input_pins[0]->net;
+    Yosys::Wire *s_wire = wire_net_driver(module, node, s_net, 0);
+    input_sig_S.append(s_wire);
+
+    nnet_t *a_net = node->input_pins[1]->net;
+    Yosys::Wire *a_wire = wire_net_driver(module, node, a_net, 0);
+    input_sig_A.append(a_wire);
+
+    nnet_t *b_net = node->input_pins[2]->net;
+    Yosys::Wire *b_wire = wire_net_driver(module, node, b_net, 0);
+    input_sig_B.append(b_wire);
+
+    oassert(node->num_output_pins == 1); // y
+    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    output_sig.append(out_wire);
+
+    Yosys::IdString celltype = ID($mux);
+    ;
+
+    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+    cell->parameters[Yosys::ID::WIDTH] = Yosys::RTLIL::Const(int(1));
+    cell->setPort(Yosys::ID::S, input_sig_S);
+    cell->setPort(Yosys::ID::A, input_sig_A);
+    cell->setPort(Yosys::ID::B, input_sig_B);
+    cell->setPort(Yosys::ID::Y, output_sig);
 }
\ No newline at end of file
diff --git a/parmys-plugin/parmys_update.hpp b/parmys-plugin/parmys_update.hpp
index ecdb00112..156b1b63f 100644
--- a/parmys-plugin/parmys_update.hpp
+++ b/parmys-plugin/parmys_update.hpp
@@ -25,6 +25,7 @@
 void define_logical_function_yosys(nnode_t *node, Yosys::Module *module);
 void update_design(Yosys::Design *design, netlist_t *netlist);
 void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module);
+void define_SMUX_function_yosys(nnode_t *node, Yosys::Module *module);
 void define_FF_yosys(nnode_t *node, Yosys::Module *module);
 
 #endif //__DESIGN_UPDATE_H__
\ No newline at end of file
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/src/partial_map.cc
index 15bf7bd40..78da16a83 100644
--- a/parmys-plugin/src/partial_map.cc
+++ b/parmys-plugin/src/partial_map.cc
@@ -214,9 +214,9 @@ void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist)
     case MULTI_PORT_MUX:
         instantiate_multi_port_mux(node, traverse_number, netlist);
         break;
-    // case MULTIPORT_nBIT_SMUX:
-    //     instantiate_multi_port_n_bits_mux(node, traverse_number, netlist);
-    //     break;
+    case MULTIPORT_nBIT_SMUX:
+        instantiate_multi_port_n_bits_mux(node, traverse_number, netlist);
+        break;
     case MULTIPLY: {
         mixer->partial_map_node(node, traverse_number, netlist);
         break;
@@ -264,7 +264,7 @@ void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist)
     case CASE_NOT_EQUAL:
     case DIVIDE:
     case MODULO:
-    case MULTIPORT_nBIT_SMUX:
+    // case MULTIPORT_nBIT_SMUX:
     default:
         error_message(NETLIST, node->loc, "%s", "Partial map: node should have been converted to softer version.");
         break;
@@ -322,6 +322,212 @@ void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t * /*netlist
     free_nnode(node);
 }
 
+/**
+ * (function: transform_to_single_bit_mux_nodes)
+ *
+ * @brief split the mux node read from yosys blif to
+ * the same type nodes with input/output width one
+ *
+ * @param node pointing to the mux node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_mark_number, netlist_t * /* netlist */)
+{
+    oassert(node->traverse_visited == traverse_mark_number);
+
+    int i, j;
+    /* to check all mux inputs have the same width(except [0] which is selector) */
+    for (i = 2; i < node->num_input_port_sizes; i++) {
+        oassert(node->input_port_sizes[i] == node->input_port_sizes[1]);
+    }
+
+    int selector_width = node->input_port_sizes[0];
+    int num_input_ports = node->num_input_port_sizes;
+    int num_mux_nodes = node->num_output_pins;
+
+    nnode_t **mux_node = (nnode_t **)vtr::calloc(num_mux_nodes, sizeof(nnode_t *));
+
+    /**
+     * input_port[0] -> SEL
+     * input_pin[SEL_WIDTH..n] -> MUX inputs
+     * output_pin[0..n-1] -> MUX outputs
+     */
+    for (i = 0; i < num_mux_nodes; i++) {
+        mux_node[i] = allocate_nnode(node->loc);
+
+        mux_node[i]->type = node->type;
+        mux_node[i]->traverse_visited = traverse_mark_number;
+
+        /* Name the mux based on the name of its output pin */
+        // const char* mux_base_name = node_name_based_on_op(mux_node[i]);
+        // mux_node[i]->name = (char*)vtr::malloc(sizeof(char) * (strlen(node->output_pins[i]->name) + strlen(mux_base_name) + 2));
+        // odin_sprintf(mux_node[i]->name, "%s_%s", node->output_pins[i]->name, mux_base_name);
+        mux_node[i]->name = node_name(mux_node[i], node->name);
+
+        add_input_port_information(mux_node[i], selector_width);
+        allocate_more_input_pins(mux_node[i], selector_width);
+
+        if (i == num_mux_nodes - 1) {
+            /**
+             * remap the SEL pins from the mux node
+             * to the last splitted mux node
+             */
+            for (j = 0; j < selector_width; j++) {
+                remap_pin_to_new_node(node->input_pins[j], mux_node[i], j);
+            }
+
+        } else {
+            /* add a copy of SEL pins from the mux node to the splitted mux nodes */
+            for (j = 0; j < selector_width; j++) {
+                add_input_pin_to_node(mux_node[i], copy_input_npin(node->input_pins[j]), j);
+            }
+        }
+
+        /**
+         * remap the input_pin[i+1]/output_pin[i] from the mux node to the
+         * last splitted ff node since we do not need it in dff node anymore
+         **/
+        int acc_port_sizes = selector_width;
+        for (j = 1; j < num_input_ports; j++) {
+            add_input_port_information(mux_node[i], 1);
+            allocate_more_input_pins(mux_node[i], 1);
+
+            remap_pin_to_new_node(node->input_pins[i + acc_port_sizes], mux_node[i], selector_width + j - 1);
+            acc_port_sizes += node->input_port_sizes[j];
+        }
+
+        /* output */
+        add_output_port_information(mux_node[i], 1);
+        allocate_more_output_pins(mux_node[i], 1);
+
+        remap_pin_to_new_node(node->output_pins[i], mux_node[i], 0);
+    }
+
+    // CLEAN UP
+    free_nnode(node);
+
+    return mux_node;
+}
+
+/**
+ * (function: instantiate_multi_port_n_bits_mux)
+ *
+ * @brief Makes the multiport n bits multiplexer into
+ * a series of 2-Mux-decoded
+ *
+ * [NOTE]: Selector should be the first port
+ *
+ * @param node pointing to the mux node
+ * @param traverse_mark_number unique traversal mark for blif elaboration pass
+ * @param netlist pointer to the current netlist file
+ */
+void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t *netlist)
+{
+    int i, j;
+
+    char *name = vtr::strdup(node->name);
+    int num_single_muxes = node->num_output_pins;
+    /* This split the multiport n bit mux node into multiport 1 bit muxes*/
+    nnode_t **single_bit_muxes = transform_to_single_bit_mux_nodes(node, mark, netlist);
+
+    int cnt;
+    /* iterating over single bit muxes that has multiple (>2) port to turn them into 2-mux */
+    for (cnt = 0; cnt < num_single_muxes; cnt++) {
+        nnode_t *single_bit_mux = single_bit_muxes[cnt];
+
+        /* keeping the information of each single bit mux */
+        int num_expressions = single_bit_mux->num_input_port_sizes - 1;
+        int port_offset = single_bit_mux->input_port_sizes[0];
+        int selector_width = single_bit_mux->input_port_sizes[0];
+
+        /* need to reorder to turn to a smux, nothing more */
+        if (selector_width == 1 && num_expressions == 2) {
+            single_bit_mux->type = SMUX_2;
+            if (single_bit_mux->name)
+                vtr::free(single_bit_mux->name);
+
+            single_bit_mux->name = node_name(single_bit_mux, name);
+        } else {
+            nnode_t ***muxes = (nnode_t ***)vtr::calloc(selector_width, sizeof(nnode_t **));
+            /* to keep the internal output signals for future usage */
+            signal_list_t **output_signals = (signal_list_t **)vtr::calloc(selector_width, sizeof(signal_list_t *));
+            /* creating multiple stages to decode single bit mux into 2-mux */
+            for (i = 0; i < selector_width; i++) {
+                /* num of muxes in each stage */
+                int num_of_muxes = shift_left_value_with_overflow_check(0x1, selector_width - (i + 1), single_bit_mux->loc);
+                muxes[i] = (nnode_t **)vtr::calloc(num_of_muxes, sizeof(nnode_t *));
+                output_signals[i] = init_signal_list();
+
+                // single_bit_mux->input_pins[i] === selector[i]
+                npin_t *selector_pin = single_bit_mux->input_pins[selector_width - i - 1];
+
+                /* iterating over each single bit 2-mux to connect inputs */
+                for (j = 0; j < num_of_muxes; j++) {
+
+                    muxes[i][j] = make_2port_gate(SMUX_2, 1, 2, 1, single_bit_mux, mark);
+
+                    if (j != num_of_muxes - 1)
+                        add_input_pin_to_node(muxes[i][j], copy_input_npin(selector_pin), 0);
+                    else
+                        remap_pin_to_new_node(selector_pin, muxes[i][j], 0);
+
+                    /* connecting the single bit mux input pins into decoded 2-Muxes */
+                    if (i == 0) {
+                        remap_pin_to_new_node(single_bit_mux->input_pins[port_offset + j], muxes[i][j], 1);
+
+                        remap_pin_to_new_node(single_bit_mux->input_pins[port_offset + j + (num_expressions / 2)], muxes[i][j], 2);
+                    }
+                    /* connecting the outputs of internal 2-muxes to next level 2-muxes as input */
+                    else {
+                        add_input_pin_to_node(muxes[i][j], output_signals[i - 1]->pins[j], 1);
+
+                        add_input_pin_to_node(muxes[i][j], output_signals[i - 1]->pins[j + num_of_muxes], 2);
+                    }
+
+                    // Connect output pin to related input pin
+                    if (i != selector_width - 1) {
+                        npin_t *new_pin1 = allocate_npin();
+                        npin_t *new_pin2 = allocate_npin();
+                        nnet_t *new_net = allocate_nnet();
+                        new_net->name = make_full_ref_name(NULL, NULL, NULL, muxes[i][j]->name, j);
+                        /* hook the output pin into the node */
+                        add_output_pin_to_node(muxes[i][j], new_pin1, 0);
+                        /* hook up new pin 1 into the new net */
+                        add_driver_pin_to_net(new_net, new_pin1);
+                        /* hook up the new pin 2 to this new net */
+                        add_fanout_pin_to_net(new_net, new_pin2);
+
+                        // Storing the output pins of the current mux stage as the input of the next one
+                        add_pin_to_signal_list(output_signals[i], new_pin2);
+
+                    } else {
+                        remap_pin_to_new_node(single_bit_mux->output_pins[j], muxes[i][j], 0);
+                    }
+                }
+            }
+
+            // CLEAN UP per single mux
+            for (i = 0; i < selector_width; i++) {
+                vtr::free(muxes[i]);
+            }
+            vtr::free(muxes);
+
+            for (i = 0; i < selector_width; i++) {
+                free_signal_list(output_signals[i]);
+            }
+            vtr::free(output_signals);
+
+            // to free each single mux node
+            free_nnode(single_bit_mux);
+        }
+    }
+
+    // CLEAN UP
+    vtr::free(single_bit_muxes);
+    vtr::free(name);
+}
+
 /*---------------------------------------------------------------------------------------------
  * (function: instantiate_not_logic )
  *-------------------------------------------------------------------------------------------*/
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
index 59b7f57ad..311c89e77 100644
--- a/parmys-plugin/tests/Makefile
+++ b/parmys-plugin/tests/Makefile
@@ -16,11 +16,13 @@
 
 TESTS = raygentop \
         eltwise_layer \
+		VexRiscv_Lite \
         
 include $(shell pwd)/../../Makefile_test.common
 
 raygentop_verify = true
 eltwise_layer_verify = true
+VexRiscv_Lite_verify = true
 
 clean_modules:
 	@find . -name "*.net.dot" -or -name "*.yosys.blif" | xargs rm -rf
diff --git a/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
new file mode 100644
index 000000000..01b5ec73c
--- /dev/null
+++ b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
@@ -0,0 +1,95 @@
+yosys -import
+
+plugin -i parmys
+
+yosys -import
+
+read_verilog -nomem2reg +/parmys/vtr_primitives.v
+
+setattr -mod -set keep_hierarchy 1 single_port_ram
+
+setattr -mod -set keep_hierarchy 1 dual_port_ram
+
+
+puts "Using parmys as partial mapper"
+
+
+parmys_arch -a k6_frac_N10_frac_chain_mem32K_40nm.xml
+
+
+read_verilog -sv -nolatches VexRiscv_Lite.v
+
+
+# Check that there are no combinational loops
+
+scc -select
+
+select -assert-none %
+
+select -clear
+
+
+hierarchy -check -auto-top -purge_lib
+
+
+opt_expr
+
+opt_clean
+
+check
+
+opt -nodffe -nosdff
+
+procs -norom
+
+fsm
+
+opt
+
+wreduce
+
+peepopt
+
+opt_clean
+
+share
+
+opt -full
+
+memory -nomap
+
+flatten
+
+opt -full
+
+techmap -map +/parmys/adff2dff.v
+
+techmap -map +/parmys/adffe2dff.v
+
+techmap -map +/parmys/aldff2dff.v
+
+techmap -map +/parmys/aldffe2dff.v
+
+opt -full
+
+parmys -a k6_frac_N10_frac_chain_mem32K_40nm.xml -nopass -c odin_config.xml
+
+opt -full
+
+techmap 
+
+opt -fast
+
+dffunmap
+
+opt -fast -noff
+
+
+tee -o /dev/stdout stat
+
+hierarchy -check -auto-top -purge_lib
+
+check -assert
+
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox VexRiscv_Lite.yosys.blif
+
diff --git a/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.v b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.v
new file mode 120000
index 000000000..ede3e75e3
--- /dev/null
+++ b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.v
@@ -0,0 +1 @@
+../../../third_party/VexRiscv_Lite/VexRiscv_Lite.v
\ No newline at end of file
diff --git a/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
new file mode 120000
index 000000000..af1bf2426
--- /dev/null
+++ b/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -0,0 +1 @@
+../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml b/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml
new file mode 100644
index 000000000..2edfb591c
--- /dev/null
+++ b/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml
@@ -0,0 +1,40 @@
+<!--
+# Copyright 2022 Daniel Khadivi
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+-->
+<config>
+	<inputs>
+		<input_type>Verilog</input_type>
+		<input_path_and_name>raygentop.v</input_path_and_name>
+	</inputs>
+	<output>
+		<output_type>blif</output_type>
+		<output_path_and_name>raygentop.yosys.blif</output_path_and_name>
+		<target>
+			<arch_file>k6_frac_N10_frac_chain_mem32K_40nm.xml</arch_file>
+		</target>
+	</output>
+	<optimizations>
+		<multiply size="3" fixed="1" fracture="0" padding="-1" />
+		<memory split_memory_width="1" split_memory_depth="15" />
+		<adder size="0" threshold_size="1" />
+	</optimizations>
+	<debug_outputs>
+		<debug_output_path>.</debug_output_path>
+		<output_ast_graphs>0</output_ast_graphs>
+		<output_netlist_graphs>0</output_netlist_graphs>
+	</debug_outputs>
+</config>
\ No newline at end of file

From 38d0795ba964a82ad3906d5d0e73af0b5541a926 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 15:34:24 -0400
Subject: [PATCH 20/56] ci

---
 parmys-plugin/include/partial_map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parmys-plugin/include/partial_map.h b/parmys-plugin/include/partial_map.h
index 29590262a..f01f0640a 100644
--- a/parmys-plugin/include/partial_map.h
+++ b/parmys-plugin/include/partial_map.h
@@ -30,6 +30,6 @@ void partial_map_top(netlist_t *netlist);
 void instantiate_add_w_carry(nnode_t *node, short mark, netlist_t *netlist);
 void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t *netlist);
 void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist);
-void instantiate_multi_port_n_bits_mux(nnode_t* node, short mark, netlist_t* /*netlist*/);
+void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t * /*netlist*/);
 
 #endif

From ba3bc5c268b947b7cf63f2197c106738c7ae4360 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 15:39:55 -0400
Subject: [PATCH 21/56] finalized

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 8e371ba00..aa1d61923 100644
--- a/Makefile
+++ b/Makefile
@@ -14,8 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))

From 2d03233b6e6d0b4645ce7d0c3cbfc6caa2a13183 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 18:21:00 -0400
Subject: [PATCH 22/56] mips32r1_core submodule added

---
 .gitmodules               | 3 +++
 third_party/mips32r1_core | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 third_party/mips32r1_core

diff --git a/.gitmodules b/.gitmodules
index e27183eb6..840df63ce 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "third_party/libargparse"]
 	path = third_party/libargparse
 	url = https://github.com/kmurray/libargparse.git
+[submodule "third_party/mips32r1_core"]
+	path = third_party/mips32r1_core
+	url = https://github.com/grantae/mips32r1_core.git
diff --git a/third_party/mips32r1_core b/third_party/mips32r1_core
new file mode 160000
index 000000000..2dea2c58a
--- /dev/null
+++ b/third_party/mips32r1_core
@@ -0,0 +1 @@
+Subproject commit 2dea2c58abefd46ab6200449420d72454d496955

From c24e691df1d15decd3cd0b145ed17af76ed2f80d Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 18:21:54 -0400
Subject: [PATCH 23/56] mips32r1_core test added

---
 Makefile                                      |  3 +-
 parmys-plugin/src/BlockMemories.cc            |  4 +-
 parmys-plugin/tests/Makefile                  |  2 +
 .../tests/VexRiscv_Lite/VexRiscv_Lite.tcl     |  4 +-
 .../tests/eltwise_layer/eltwise_layer.tcl     |  4 +-
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    |  1 +
 .../tests/mips32r1_core/mips32r1_core.tcl     | 95 +++++++++++++++++++
 .../tests/mips32r1_core/mips32r1_core.v       | 35 +++++++
 .../tests/mips32r1_core/odin_config.xml       | 40 ++++++++
 parmys-plugin/tests/raygentop/raygentop.tcl   |  4 +-
 10 files changed, 183 insertions(+), 9 deletions(-)
 create mode 120000 parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
 create mode 100644 parmys-plugin/tests/mips32r1_core/mips32r1_core.tcl
 create mode 100644 parmys-plugin/tests/mips32r1_core/mips32r1_core.v
 create mode 100644 parmys-plugin/tests/mips32r1_core/odin_config.xml

diff --git a/Makefile b/Makefile
index aa1d61923..8e371ba00 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/parmys-plugin/src/BlockMemories.cc b/parmys-plugin/src/BlockMemories.cc
index 14116e74f..f709163be 100644
--- a/parmys-plugin/src/BlockMemories.cc
+++ b/parmys-plugin/src/BlockMemories.cc
@@ -1064,8 +1064,8 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     int num_wr_ports = old_node->attributes->WR_PORTS;
 
     /* should have been resovled before this function */
-    oassert(num_rd_ports > 2);
-    oassert(num_wr_ports > 2);
+    oassert(num_rd_ports > 1);
+    oassert(num_wr_ports > 1);
 
     /* dual port ram signals */
     dp_ram_signals *signals = (dp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
index 311c89e77..5c1f63d8a 100644
--- a/parmys-plugin/tests/Makefile
+++ b/parmys-plugin/tests/Makefile
@@ -17,12 +17,14 @@
 TESTS = raygentop \
         eltwise_layer \
 		VexRiscv_Lite \
+		mips32r1_core \
         
 include $(shell pwd)/../../Makefile_test.common
 
 raygentop_verify = true
 eltwise_layer_verify = true
 VexRiscv_Lite_verify = true
+mips32r1_core_verify = true
 
 clean_modules:
 	@find . -name "*.net.dot" -or -name "*.yosys.blif" | xargs rm -rf
diff --git a/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
index 01b5ec73c..4b026f886 100644
--- a/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
+++ b/parmys-plugin/tests/VexRiscv_Lite/VexRiscv_Lite.tcl
@@ -17,7 +17,7 @@ puts "Using parmys as partial mapper"
 parmys_arch -a k6_frac_N10_frac_chain_mem32K_40nm.xml
 
 
-read_verilog -sv -nolatches VexRiscv_Lite.v
+read_verilog -sv -nolatches $::env(DESIGN_TOP).v
 
 
 # Check that there are no combinational loops
@@ -91,5 +91,5 @@ hierarchy -check -auto-top -purge_lib
 
 check -assert
 
-write_blif -true + vcc -false + gnd -undef + unconn -blackbox VexRiscv_Lite.yosys.blif
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox $::env(DESIGN_TOP).yosys.blif
 
diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl b/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
index b9b22ef10..832f287c6 100644
--- a/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
+++ b/parmys-plugin/tests/eltwise_layer/eltwise_layer.tcl
@@ -14,7 +14,7 @@ puts "Using parmys as partial mapper"
 
 parmys_arch -a k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
 
-read_verilog -sv -nolatches hard_block_include.v eltwise_layer.v
+read_verilog -sv -nolatches hard_block_include.v $::env(DESIGN_TOP).v
 
 
 # Check that there are no combinational loops
@@ -83,5 +83,5 @@ tee -o /dev/stdout stat
 
 hierarchy -check -auto-top -purge_lib
 
-write_blif -true + vcc -false + gnd -undef + unconn -blackbox eltwise_layer.yosys.blif
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox $::env(DESIGN_TOP).yosys.blif
 
diff --git a/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
new file mode 120000
index 000000000..af1bf2426
--- /dev/null
+++ b/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -0,0 +1 @@
+../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/mips32r1_core/mips32r1_core.tcl b/parmys-plugin/tests/mips32r1_core/mips32r1_core.tcl
new file mode 100644
index 000000000..4b026f886
--- /dev/null
+++ b/parmys-plugin/tests/mips32r1_core/mips32r1_core.tcl
@@ -0,0 +1,95 @@
+yosys -import
+
+plugin -i parmys
+
+yosys -import
+
+read_verilog -nomem2reg +/parmys/vtr_primitives.v
+
+setattr -mod -set keep_hierarchy 1 single_port_ram
+
+setattr -mod -set keep_hierarchy 1 dual_port_ram
+
+
+puts "Using parmys as partial mapper"
+
+
+parmys_arch -a k6_frac_N10_frac_chain_mem32K_40nm.xml
+
+
+read_verilog -sv -nolatches $::env(DESIGN_TOP).v
+
+
+# Check that there are no combinational loops
+
+scc -select
+
+select -assert-none %
+
+select -clear
+
+
+hierarchy -check -auto-top -purge_lib
+
+
+opt_expr
+
+opt_clean
+
+check
+
+opt -nodffe -nosdff
+
+procs -norom
+
+fsm
+
+opt
+
+wreduce
+
+peepopt
+
+opt_clean
+
+share
+
+opt -full
+
+memory -nomap
+
+flatten
+
+opt -full
+
+techmap -map +/parmys/adff2dff.v
+
+techmap -map +/parmys/adffe2dff.v
+
+techmap -map +/parmys/aldff2dff.v
+
+techmap -map +/parmys/aldffe2dff.v
+
+opt -full
+
+parmys -a k6_frac_N10_frac_chain_mem32K_40nm.xml -nopass -c odin_config.xml
+
+opt -full
+
+techmap 
+
+opt -fast
+
+dffunmap
+
+opt -fast -noff
+
+
+tee -o /dev/stdout stat
+
+hierarchy -check -auto-top -purge_lib
+
+check -assert
+
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox $::env(DESIGN_TOP).yosys.blif
+
diff --git a/parmys-plugin/tests/mips32r1_core/mips32r1_core.v b/parmys-plugin/tests/mips32r1_core/mips32r1_core.v
new file mode 100644
index 000000000..36dff2bda
--- /dev/null
+++ b/parmys-plugin/tests/mips32r1_core/mips32r1_core.v
@@ -0,0 +1,35 @@
+// Copyright 2022 F4PGA Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+`include "../../../third_party/mips32r1_core/mips32r1/Add.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Compare.v"
+`include "../../../third_party/mips32r1_core/mips32r1/CPZero.v"
+`include "../../../third_party/mips32r1_core/mips32r1/EXMEM_Stage.v"
+`include "../../../third_party/mips32r1_core/mips32r1/IDEX_Stage.v"
+`include "../../../third_party/mips32r1_core/mips32r1/MemControl.v"
+`include "../../../third_party/mips32r1_core/mips32r1/MIPS_Parameters.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Mux4.v"
+`include "../../../third_party/mips32r1_core/mips32r1/RegisterFile.v"
+`include "../../../third_party/mips32r1_core/mips32r1/TrapDetect.v"
+`include "../../../third_party/mips32r1_core/mips32r1/ALU.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Control.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Divide.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Hazard_Detection.v"
+`include "../../../third_party/mips32r1_core/mips32r1/IFID_Stage.v"
+`include "../../../third_party/mips32r1_core/mips32r1/MEMWB_Stage.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Mux2.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Processor.v"
+`include "../../../third_party/mips32r1_core/mips32r1/Register.v"
\ No newline at end of file
diff --git a/parmys-plugin/tests/mips32r1_core/odin_config.xml b/parmys-plugin/tests/mips32r1_core/odin_config.xml
new file mode 100644
index 000000000..2edfb591c
--- /dev/null
+++ b/parmys-plugin/tests/mips32r1_core/odin_config.xml
@@ -0,0 +1,40 @@
+<!--
+# Copyright 2022 Daniel Khadivi
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+-->
+<config>
+	<inputs>
+		<input_type>Verilog</input_type>
+		<input_path_and_name>raygentop.v</input_path_and_name>
+	</inputs>
+	<output>
+		<output_type>blif</output_type>
+		<output_path_and_name>raygentop.yosys.blif</output_path_and_name>
+		<target>
+			<arch_file>k6_frac_N10_frac_chain_mem32K_40nm.xml</arch_file>
+		</target>
+	</output>
+	<optimizations>
+		<multiply size="3" fixed="1" fracture="0" padding="-1" />
+		<memory split_memory_width="1" split_memory_depth="15" />
+		<adder size="0" threshold_size="1" />
+	</optimizations>
+	<debug_outputs>
+		<debug_output_path>.</debug_output_path>
+		<output_ast_graphs>0</output_ast_graphs>
+		<output_netlist_graphs>0</output_netlist_graphs>
+	</debug_outputs>
+</config>
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/raygentop.tcl b/parmys-plugin/tests/raygentop/raygentop.tcl
index 0b44ee689..b5463bb96 100644
--- a/parmys-plugin/tests/raygentop/raygentop.tcl
+++ b/parmys-plugin/tests/raygentop/raygentop.tcl
@@ -17,7 +17,7 @@ puts "Using parmys as partial mapper"
 parmys_arch -a k6_frac_N10_frac_chain_mem32K_40nm.xml
 
 
-read_verilog -sv -nolatches raygentop.v
+read_verilog -sv -nolatches $::env(DESIGN_TOP).v
 
 
 # Check that there are no combinational loops
@@ -89,5 +89,5 @@ tee -o /dev/stdout stat
 
 hierarchy -check -auto-top -purge_lib
 
-write_blif -true + vcc -false + gnd -undef + unconn -blackbox raygentop.yosys.blif
+write_blif -true + vcc -false + gnd -undef + unconn -blackbox $::env(DESIGN_TOP).yosys.blif
 

From 8277a06ba5358768e60ddd504223cafed299d246 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 14 Dec 2022 18:28:03 -0400
Subject: [PATCH 24/56] all together

---
 .github/workflows/licensing.yml | 1 +
 Makefile                        | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 421c157d8..0a5c6f1e1 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -44,3 +44,4 @@ jobs:
           ./third_party/libargparse/
           ./third_party/pugixml/
           ./third_party/vtr/
+          .third_party/mips32r1_core/
diff --git a/Makefile b/Makefile
index 8e371ba00..aa1d61923 100644
--- a/Makefile
+++ b/Makefile
@@ -14,8 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))

From 4fe1f8fceb5a9af8918ef8f634cde69e8f654764 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 20 Dec 2022 11:16:33 -0400
Subject: [PATCH 25/56] no libargparse

---
 .gitmodules             | 3 ---
 third_party/libargparse | 1 -
 2 files changed, 4 deletions(-)
 delete mode 160000 third_party/libargparse

diff --git a/.gitmodules b/.gitmodules
index 840df63ce..4a0680aa3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,9 +7,6 @@
 [submodule "third_party/pugixml"]
 	path = third_party/pugixml
 	url = https://github.com/zeux/pugixml.git
-[submodule "third_party/libargparse"]
-	path = third_party/libargparse
-	url = https://github.com/kmurray/libargparse.git
 [submodule "third_party/mips32r1_core"]
 	path = third_party/mips32r1_core
 	url = https://github.com/grantae/mips32r1_core.git
diff --git a/third_party/libargparse b/third_party/libargparse
deleted file mode 160000
index ee74d1b53..000000000
--- a/third_party/libargparse
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit ee74d1b53bd680748af14e737378de57e2a0a954

From 03b7b7929e58cf691e2b6b55ffe8db8d1f0f0cc4 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 20 Dec 2022 11:16:52 -0400
Subject: [PATCH 26/56] licensing resolved

---
 parmys-plugin/Makefile                        |  26 ++--
 parmys-plugin/include/Hashtable.hpp           |  49 --------
 parmys-plugin/include/{adders.h => adder.h}   |  37 +++---
 parmys-plugin/include/ast_util.h              |  23 +++-
 .../{BlockMemories.hpp => block_memory.h}     |  41 +++---
 parmys-plugin/include/config_t.h              |  23 +++-
 parmys-plugin/include/hard_block.h            |  32 +++++
 parmys-plugin/include/hard_blocks.h           |  38 ------
 ...LogicMixer.hpp => hard_soft_logic_mixer.h} |  38 +++---
 parmys-plugin/include/hash_table.h            |  44 +++++++
 .../include/{memories.h => memory.h}          |  38 +++---
 ...Optimization.hpp => mixing_optimization.h} |  37 +++---
 .../include/{multipliers.h => multiplier.h}   |  37 +++---
 parmys-plugin/include/netlist_check.h         |  23 +++-
 parmys-plugin/include/netlist_cleanup.h       |  23 +++-
 parmys-plugin/include/netlist_statistic.h     |  23 +++-
 parmys-plugin/include/netlist_utils.h         |  23 +++-
 parmys-plugin/include/netlist_visualizer.h    |  23 +++-
 .../{node_creation_library.h => node_utils.h} |  23 +++-
 parmys-plugin/include/odin_error.h            |  23 +++-
 parmys-plugin/include/odin_globals.h          |  27 +++-
 parmys-plugin/include/odin_ii.h               |  23 +++-
 parmys-plugin/include/odin_types.h            | 118 ++++++------------
 parmys-plugin/include/odin_util.h             |  23 +++-
 parmys-plugin/include/partial_map.h           |  38 +++---
 parmys-plugin/include/read_xml_config_file.h  |  38 +++---
 parmys-plugin/include/string_cache.h          |  32 +++--
 parmys-plugin/include/subtractions.h          |  40 ------
 parmys-plugin/include/subtractor.h            |  35 ++++++
 parmys-plugin/parmys.cc                       |  22 ++--
 parmys-plugin/parmys_arch.cc                  |   2 +-
 parmys-plugin/parmys_resolve.cc               |  13 +-
 parmys-plugin/parmys_resolve.hpp              |   9 +-
 parmys-plugin/parmys_update.cc                |  11 +-
 parmys-plugin/parmys_update.hpp               |   8 +-
 parmys-plugin/parmys_utils.cc                 |   2 +-
 parmys-plugin/parmys_utils.hpp                |   8 +-
 parmys-plugin/src/Hashtable.cc                |  60 ---------
 parmys-plugin/src/{adders.cc => adder.cc}     |  40 +++---
 parmys-plugin/src/ast_util.cc                 |  31 ++---
 .../src/{BlockMemories.cc => block_memory.cc} |  51 +++-----
 parmys-plugin/src/enum_str.cc                 |  17 +++
 .../src/{hard_blocks.cc => hard_block.cc}     |  36 +++---
 ...LogicMixer.cc => hard_soft_logic_mixer.cc} |  35 +++---
 parmys-plugin/src/hash_table.cc               |  55 ++++++++
 parmys-plugin/src/{memories.cc => memory.cc}  |  38 +++---
 ...Optimization.cc => mixing_optimization.cc} |  40 +++---
 .../src/{multipliers.cc => multiplier.cc}     |  38 +++---
 parmys-plugin/src/netlist_check.cc            |  30 ++---
 parmys-plugin/src/netlist_cleanup.cc          |  31 ++---
 parmys-plugin/src/netlist_statistic.cc        |  19 ++-
 parmys-plugin/src/netlist_utils.cc            |  33 +++--
 parmys-plugin/src/netlist_visualizer.cc       |  31 ++---
 ...node_creation_library.cc => node_utils.cc} |  33 +++--
 parmys-plugin/src/odin_error.cc               |  17 +++
 parmys-plugin/src/odin_ii.cc                  |  34 +++--
 parmys-plugin/src/odin_util.cc                |  31 ++---
 parmys-plugin/src/partial_map.cc              |  47 +++----
 parmys-plugin/src/read_xml_config_file.cc     |  37 +++---
 parmys-plugin/src/string_cache.cc             |  18 ++-
 .../src/{subtractions.cc => subtractor.cc}    |  38 +++---
 parmys-plugin/techlibs/adffe2dff.v            |   1 +
 parmys-plugin/techlibs/aldff2dff.v            |   4 +-
 parmys-plugin/techlibs/aldffe2dff.v           |   4 +-
 parmys-plugin/tests/Makefile                  |   2 +-
 .../tests/VexRiscv_Lite/odin_config.xml       |   2 +-
 .../tests/eltwise_layer/odin_config.xml       |   2 +-
 .../tests/mips32r1_core/mips32r1_core.v       |   2 +-
 .../tests/mips32r1_core/odin_config.xml       |   2 +-
 parmys-plugin/tests/raygentop/odin_config.xml |   2 +-
 70 files changed, 979 insertions(+), 955 deletions(-)
 delete mode 100644 parmys-plugin/include/Hashtable.hpp
 rename parmys-plugin/include/{adders.h => adder.h} (62%)
 rename parmys-plugin/include/{BlockMemories.hpp => block_memory.h} (64%)
 create mode 100644 parmys-plugin/include/hard_block.h
 delete mode 100644 parmys-plugin/include/hard_blocks.h
 rename parmys-plugin/include/{HardSoftLogicMixer.hpp => hard_soft_logic_mixer.h} (70%)
 create mode 100644 parmys-plugin/include/hash_table.h
 rename parmys-plugin/include/{memories.h => memory.h} (70%)
 rename parmys-plugin/include/{MixingOptimization.hpp => mixing_optimization.h} (83%)
 rename parmys-plugin/include/{multipliers.h => multiplier.h} (57%)
 rename parmys-plugin/include/{node_creation_library.h => node_utils.h} (61%)
 delete mode 100644 parmys-plugin/include/subtractions.h
 create mode 100644 parmys-plugin/include/subtractor.h
 delete mode 100644 parmys-plugin/src/Hashtable.cc
 rename parmys-plugin/src/{adders.cc => adder.cc} (97%)
 rename parmys-plugin/src/{BlockMemories.cc => block_memory.cc} (97%)
 rename parmys-plugin/src/{hard_blocks.cc => hard_block.cc} (90%)
 rename parmys-plugin/src/{HardSoftLogicMixer.cc => hard_soft_logic_mixer.cc} (57%)
 create mode 100644 parmys-plugin/src/hash_table.cc
 rename parmys-plugin/src/{memories.cc => memory.cc} (98%)
 rename parmys-plugin/src/{MixingOptimization.cc => mixing_optimization.cc} (81%)
 rename parmys-plugin/src/{multipliers.cc => multiplier.cc} (98%)
 rename parmys-plugin/src/{node_creation_library.cc => node_utils.cc} (92%)
 rename parmys-plugin/src/{subtractions.cc => subtractor.cc} (97%)

diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index ef8ffea19..897816c7a 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2022 Daniel Khadivi
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,9 +22,9 @@ SOURCES = parmys.cc \
 		  parmys_update.cc \
 		  parmys_utils.cc \
 		  parmys_resolve.cc \
-		  src/adders.cc \
+		  src/adder.cc \
 		  src/enum_str.cc \
-		  src/MixingOptimization.cc \
+		  src/mixing_optimization.cc \
 		  src/read_xml_config_file.cc \
 		  src/odin_error.cc \
 		  src/odin_util.cc \
@@ -32,18 +32,18 @@ SOURCES = parmys.cc \
 		  src/netlist_utils.cc \
 		  src/netlist_check.cc \
 		  src/netlist_cleanup.cc \
-		  src/node_creation_library.cc \
-		  src/multipliers.cc \
-		  src/subtractions.cc \
-		  src/HardSoftLogicMixer.cc \
+		  src/node_utils.cc \
+		  src/multiplier.cc \
+		  src/subtractor.cc \
+		  src/hard_soft_logic_mixer.cc \
 		  src/odin_ii.cc \
 		  src/string_cache.cc \
 		  src/partial_map.cc \
-		  src/hard_blocks.cc \
-		  src/BlockMemories.cc \
-		  src/memories.cc \
+		  src/hard_block.cc \
+		  src/block_memory.cc \
+		  src/memory.cc \
 		  src/netlist_visualizer.cc \
-		  src/Hashtable.cc \
+		  src/hash_table.cc \
 		  src/ast_util.cc \
 		  ../third_party/vtr/libs/vtrutil/src/vtr_util.cc \
 		  ../third_party/vtr/libs/vtrutil/src/vtr_token.cc \
@@ -57,9 +57,6 @@ SOURCES = parmys.cc \
 		  ../third_party/vtr/libs/vtrutil/src/vtr_assert.cc \
 		  ../third_party/vtr/libs/log/src/log.cc \
 		  ../third_party/pugixml/src/pugixml.cpp \
-		  ../third_party/libargparse/src/argparse.cpp \
-		  ../third_party/libargparse/src/argparse_formatter.cpp \
-		  ../third_party/libargparse/src/argparse_util.cpp \
 		  ../third_party/vtr/libs/rtlnumber/src/rtl_int.cc \
 		  ../third_party/vtr/libs/rtlnumber/src/rtl_utils.cc \
 		  ../third_party/vtr/libs/pugiutil/src/pugixml_loc.cc \
@@ -78,7 +75,6 @@ include ../Makefile_plugin.common
 
 CXXFLAGS += -I./include
 CXXFLAGS += -I../third_party/pugixml/src
-CXXFLAGS += -I../third_party/libargparse/src
 CXXFLAGS += -I../third_party/vtr/libs/archfpga/src
 CXXFLAGS += -I../third_party/vtr/libs/log/src
 CXXFLAGS += -I../third_party/vtr/libs/pugiutil/src
diff --git a/parmys-plugin/include/Hashtable.hpp b/parmys-plugin/include/Hashtable.hpp
deleted file mode 100644
index 6cf79436d..000000000
--- a/parmys-plugin/include/Hashtable.hpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef HASHTABLE_H
-#define HASHTABLE_H
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string>
-#include <unordered_map>
-
-class Hashtable
-{
-  private:
-    std::unordered_map<std::string, void *> my_map;
-
-  public:
-    // Adds an item to the hashtable.
-    void add(std::string key, void *item);
-    // Removes an item from the hashtable. If the item is not present, a null pointer is returned.
-    void *remove(std::string key);
-    // Gets an item from the hashtable without removing it. If the item is not present, a null pointer is returned.
-    void *get(std::string key);
-    // Check to see if the hashtable is empty.
-    bool is_empty();
-    // calls free on each item.
-    void destroy_free_items();
-};
-
-#endif
diff --git a/parmys-plugin/include/adders.h b/parmys-plugin/include/adder.h
similarity index 62%
rename from parmys-plugin/include/adders.h
rename to parmys-plugin/include/adder.h
index 4e228a804..3b2c7222d 100644
--- a/parmys-plugin/include/adders.h
+++ b/parmys-plugin/include/adder.h
@@ -1,27 +1,22 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef ADDERS_H
-#define ADDERS_H
+#ifndef _ADDER_H_
+#define _ADDER_H_
 
 #include "odin_types.h"
 #include "read_xml_arch_file.h"
@@ -69,4 +64,4 @@ int match_pins(nnode_t *node, nnode_t *next_node);
 void instantiate_add_w_carry_block(int *width, nnode_t *node, short mark, netlist_t *netlist, short subtraction);
 nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
 
-#endif // ADDERS_H
+#endif // _ADDER_H_
diff --git a/parmys-plugin/include/ast_util.h b/parmys-plugin/include/ast_util.h
index af19d9637..1f3ab2e8e 100644
--- a/parmys-plugin/include/ast_util.h
+++ b/parmys-plugin/include/ast_util.h
@@ -1,9 +1,26 @@
-#ifndef AST_UTIL_H
-#define AST_UTIL_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _AST_UTIL_H_
+#define _AST_UTIL_H_
 
 #include "odin_types.h"
 
 ast_node_t *create_node_w_type(ids id, loc_t loc);
 ast_node_t *create_tree_node_id(char *string, loc_t loc);
 
-#endif
+#endif // _AST_UTIL_H_
diff --git a/parmys-plugin/include/BlockMemories.hpp b/parmys-plugin/include/block_memory.h
similarity index 64%
rename from parmys-plugin/include/BlockMemories.hpp
rename to parmys-plugin/include/block_memory.h
index 76996b33e..0b2330d09 100644
--- a/parmys-plugin/include/BlockMemories.hpp
+++ b/parmys-plugin/include/block_memory.h
@@ -1,33 +1,22 @@
-/**
- * Copyright (c) 2021 Seyed Alireza Damghani (sdamghann@gmail.com)
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  *
- * @file This file includes the definition of the basic structure
- * used in Odin-II Block Memory resolving process. Moreover, it
- * provides the declaration of the related public routines.
+ * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef _BLOCK_MEMORIES_H_
-#define _BLOCK_MEMORIES_H_
+#ifndef _BLOCK_MEMORY_H_
+#define _BLOCK_MEMORY_H_
 
 #include <unordered_map>
 
@@ -97,4 +86,4 @@ extern void resolve_rom_node(nnode_t *node, uintptr_t traverse_mark_number, netl
 
 extern void iterate_block_memories(netlist_t *netlist);
 
-#endif // _BLOCK_MEMORIES_H_
+#endif // _BLOCK_MEMORY_H_
diff --git a/parmys-plugin/include/config_t.h b/parmys-plugin/include/config_t.h
index 717ef41b2..9b3ac396b 100644
--- a/parmys-plugin/include/config_t.h
+++ b/parmys-plugin/include/config_t.h
@@ -1,5 +1,22 @@
-#ifndef CONFIG_T_H
-#define CONFIG_T_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _CONFIG_T_H_
+#define _CONFIG_T_H_
 
 #include "odin_types.h"
 #include <string>
@@ -44,4 +61,4 @@ struct config_t {
 
 extern config_t configuration;
 
-#endif
+#endif // _CONFIG_T_H_
diff --git a/parmys-plugin/include/hard_block.h b/parmys-plugin/include/hard_block.h
new file mode 100644
index 000000000..6976ab1a5
--- /dev/null
+++ b/parmys-plugin/include/hard_block.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _HARD_BLOCK_H_
+#define _HARD_BLOCK_H_
+
+#include "odin_types.h"
+
+extern STRING_CACHE *hard_block_names;
+
+void register_hard_blocks();
+t_model *find_hard_block(const char *name);
+void cell_hard_block(nnode_t *node, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
+void output_hard_blocks_yosys(Yosys::Design *design);
+void instantiate_hard_block(nnode_t *node, short mark, netlist_t *netlist);
+t_model_ports *get_model_port(t_model_ports *ports, const char *name);
+
+#endif // _HARD_BLOCK_H_
diff --git a/parmys-plugin/include/hard_blocks.h b/parmys-plugin/include/hard_blocks.h
deleted file mode 100644
index 726a0e23e..000000000
--- a/parmys-plugin/include/hard_blocks.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef HARD_BLOCKS_H
-#define HARD_BLOCKS_H
-
-#include "odin_types.h"
-
-extern STRING_CACHE *hard_block_names;
-
-void register_hard_blocks();
-t_model *find_hard_block(const char *name);
-void cell_hard_block(nnode_t *node, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
-void output_hard_blocks_yosys(Yosys::Design *design);
-void instantiate_hard_block(nnode_t *node, short mark, netlist_t *netlist);
-t_model_ports *get_model_port(t_model_ports *ports, const char *name);
-
-#endif
diff --git a/parmys-plugin/include/HardSoftLogicMixer.hpp b/parmys-plugin/include/hard_soft_logic_mixer.h
similarity index 70%
rename from parmys-plugin/include/HardSoftLogicMixer.hpp
rename to parmys-plugin/include/hard_soft_logic_mixer.h
index 21d431359..5e0283d00 100644
--- a/parmys-plugin/include/HardSoftLogicMixer.hpp
+++ b/parmys-plugin/include/hard_soft_logic_mixer.h
@@ -1,30 +1,24 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
+#ifndef _HARD_SOFT_LOGIC_MIXER_HPP_
+#define _HARD_SOFT_LOGIC_MIXER_HPP_
 
-#ifndef HARD_SOFT_LOGIC_MIXER_HPP
-#define HARD_SOFT_LOGIC_MIXER_HPP
-
-#include "MixingOptimization.hpp"
+#include "mixing_optimization.h"
 #include "odin_types.h" // netlist_t, config_t
 
 class HardSoftLogicMixer
diff --git a/parmys-plugin/include/hash_table.h b/parmys-plugin/include/hash_table.h
new file mode 100644
index 000000000..323b8d8ef
--- /dev/null
+++ b/parmys-plugin/include/hash_table.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _HASH_TABLE_H
+#define _HASH_TABLE_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <unordered_map>
+
+class Hashtable
+{
+  private:
+    std::unordered_map<std::string, void *> my_map;
+
+  public:
+    // Adds an item to the hashtable.
+    void add(std::string key, void *item);
+    // Removes an item from the hashtable. If the item is not present, a null pointer is returned.
+    void *remove(std::string key);
+    // Gets an item from the hashtable without removing it. If the item is not present, a null pointer is returned.
+    void *get(std::string key);
+    // Check to see if the hashtable is empty.
+    bool is_empty();
+    // calls free on each item.
+    void destroy_free_items();
+};
+
+#endif // _HASH_TABLE_H
diff --git a/parmys-plugin/include/memories.h b/parmys-plugin/include/memory.h
similarity index 70%
rename from parmys-plugin/include/memories.h
rename to parmys-plugin/include/memory.h
index c0550094b..3b39096f5 100644
--- a/parmys-plugin/include/memories.h
+++ b/parmys-plugin/include/memory.h
@@ -1,28 +1,22 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#ifndef MEMORIES_H
-#define MEMORIES_H
+#ifndef _MEMORY_H_
+#define _MEMORY_H_
 
 #include "odin_types.h"
 
@@ -103,4 +97,4 @@ extern void register_memory_model(nnode_t *mem);
 extern void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
 extern void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
 
-#endif // MEMORIES_H
+#endif // _MEMORY_H_
diff --git a/parmys-plugin/include/MixingOptimization.hpp b/parmys-plugin/include/mixing_optimization.h
similarity index 83%
rename from parmys-plugin/include/MixingOptimization.hpp
rename to parmys-plugin/include/mixing_optimization.h
index 717030fe6..65cc11b57 100644
--- a/parmys-plugin/include/MixingOptimization.hpp
+++ b/parmys-plugin/include/mixing_optimization.h
@@ -1,27 +1,22 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef MIXING_OPTIMIZATION_HPP
-#define MIXING_OPTIMIZATION_HPP
+#ifndef _MIXING_OPTIMIZATION_H_
+#define _MIXING_OPTIMIZATION_H_
 #include "odin_types.h" // netlist_t, config_t
 
 class HardSoftLogicMixer;
@@ -219,4 +214,4 @@ class MultsOpt : public MixingOpt
     virtual bool hardenable(nnode_t *);
 };
 
-#endif
+#endif // _MIXING_OPTIMIZATION_H_
diff --git a/parmys-plugin/include/multipliers.h b/parmys-plugin/include/multiplier.h
similarity index 57%
rename from parmys-plugin/include/multipliers.h
rename to parmys-plugin/include/multiplier.h
index 2c6112c4e..5f9fd4e16 100644
--- a/parmys-plugin/include/multipliers.h
+++ b/parmys-plugin/include/multiplier.h
@@ -1,27 +1,22 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef MULTIPLIERS_H
-#define MULTIPLIERS_H
+#ifndef _MULTIPLIER_H_
+#define _MULTIPLIER_H_
 
 #include "odin_types.h"
 #include "read_xml_arch_file.h"
@@ -61,4 +56,4 @@ extern void check_multiplier_port_size(nnode_t *node);
 extern void clean_multipliers();
 extern void free_multipliers();
 
-#endif // MULTIPLIERS_H
+#endif // _MULTIPLIER_H_
diff --git a/parmys-plugin/include/netlist_check.h b/parmys-plugin/include/netlist_check.h
index 79f3c7ff9..9ac2ce8f8 100644
--- a/parmys-plugin/include/netlist_check.h
+++ b/parmys-plugin/include/netlist_check.h
@@ -1,6 +1,23 @@
-#ifndef NETLIST_CHECK_H
-#define NETLIST_CHECK_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NETLIST_CHECK_H_
+#define _NETLIST_CHECK_H_
 
 void check_netlist(netlist_t *netlist);
 
-#endif
+#endif // _NETLIST_CHECK_H_
diff --git a/parmys-plugin/include/netlist_cleanup.h b/parmys-plugin/include/netlist_cleanup.h
index a94d6df71..71a66b187 100644
--- a/parmys-plugin/include/netlist_cleanup.h
+++ b/parmys-plugin/include/netlist_cleanup.h
@@ -1,6 +1,23 @@
-#ifndef NETLIST_CLEANUP_H
-#define NETLIST_CLEANUP_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NETLIST_CLEANUP_H_
+#define _NETLIST_CLEANUP_H_
 
 void remove_unused_logic(netlist_t *netlist);
 
-#endif
+#endif // _NETLIST_CLEANUP_H_
diff --git a/parmys-plugin/include/netlist_statistic.h b/parmys-plugin/include/netlist_statistic.h
index fc7c4b752..91e7a516d 100644
--- a/parmys-plugin/include/netlist_statistic.h
+++ b/parmys-plugin/include/netlist_statistic.h
@@ -1,5 +1,22 @@
-#ifndef NETLIST_STATISTIC_HPP
-#define NETLIST_STATISTIC_HPP
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NETLIST_STATISTIC_H_
+#define _NETLIST_STATISTIC_H_
 
 #include "netlist_utils.h"
 
@@ -22,4 +39,4 @@ void compute_statistics(netlist_t *netlist, bool display);
  */
 void mixing_optimization_stats(nnode_t *node, netlist_t *netlist);
 
-#endif // NETLIST_STATISTIC_HPP
+#endif // _NETLIST_STATISTIC_H_
diff --git a/parmys-plugin/include/netlist_utils.h b/parmys-plugin/include/netlist_utils.h
index 5e5958e06..38daf05e7 100644
--- a/parmys-plugin/include/netlist_utils.h
+++ b/parmys-plugin/include/netlist_utils.h
@@ -1,5 +1,22 @@
-#ifndef NETLIST_UTILS_H_FUNCTIONS
-#define NETLIST_UTILS_H_FUNCTIONS
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NETLIST_UTILS_H_
+#define _NETLIST_UTILS_H_
 
 #include "odin_types.h"
 
@@ -70,4 +87,4 @@ void remove_fanout_pins_from_net(nnet_t *net, npin_t *pin, int id);
 extern void equalize_ports_size(nnode_t *&node, uintptr_t traverse_mark_number, netlist_t *netlist);
 extern void delete_npin(npin_t *pin);
 
-#endif
+#endif // _NETLIST_UTILS_H_
diff --git a/parmys-plugin/include/netlist_visualizer.h b/parmys-plugin/include/netlist_visualizer.h
index 529a5b6fe..545653352 100644
--- a/parmys-plugin/include/netlist_visualizer.h
+++ b/parmys-plugin/include/netlist_visualizer.h
@@ -1,5 +1,22 @@
-#ifndef NETLIST_VISUALIZER_H
-#define NETLIST_VISUALIZER_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NETLIST_VISUALIZER_H_
+#define _NETLIST_VISUALIZER_H_
 
 #include "odin_types.h"
 #include <string>
@@ -7,4 +24,4 @@
 void graphVizOutputNetlist(std::string path, const char *name, uintptr_t marker_value, netlist_t *input_netlist);
 void graphVizOutputCombinationalNet(std::string path, const char *name, uintptr_t marker_value, nnode_t *current_node);
 
-#endif
+#endif // _NETLIST_VISUALIZER_H_
diff --git a/parmys-plugin/include/node_creation_library.h b/parmys-plugin/include/node_utils.h
similarity index 61%
rename from parmys-plugin/include/node_creation_library.h
rename to parmys-plugin/include/node_utils.h
index 865a17501..a456e0dd3 100644
--- a/parmys-plugin/include/node_creation_library.h
+++ b/parmys-plugin/include/node_utils.h
@@ -1,5 +1,22 @@
-#ifndef NODE_CREATION_LIBRARY_H
-#define NODE_CREATION_LIBRARY_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _NODE_UTILS_H_
+#define _NODE_UTILS_H_
 
 #include "odin_types.h"
 
@@ -21,4 +38,4 @@ const char *edge_type_blif_str(edge_type_e edge_type, loc_t loc);
 
 extern nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, int num_muxed_inputs, signal_list_t *outs, nnode_t *node,
                                     netlist_t *netlist);
-#endif
+#endif // _NODE_UTILS_H_
diff --git a/parmys-plugin/include/odin_error.h b/parmys-plugin/include/odin_error.h
index cc7930f88..6ad2681c2 100644
--- a/parmys-plugin/include/odin_error.h
+++ b/parmys-plugin/include/odin_error.h
@@ -1,5 +1,22 @@
-#ifndef ODIN_ERROR_H
-#define ODIN_ERROR_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _ODIN_ERROR_H_
+#define _ODIN_ERROR_H_
 
 #include <cstdio>
 #include <cstdlib>
@@ -53,4 +70,4 @@ void _log_message(odin_error error_type, loc_t loc, bool soft_error, const char
 
 void verify_delayed_error(odin_error error_type);
 
-#endif
+#endif // _ODIN_ERROR_H_
diff --git a/parmys-plugin/include/odin_globals.h b/parmys-plugin/include/odin_globals.h
index cd117e447..9670a12d9 100644
--- a/parmys-plugin/include/odin_globals.h
+++ b/parmys-plugin/include/odin_globals.h
@@ -1,8 +1,25 @@
-#ifndef GLOBALS_H
-#define GLOBALS_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _ODIN_GLOBALS_H_
+#define _ODIN_GLOBALS_H_
 
-#include "HardSoftLogicMixer.hpp"
-#include "Hashtable.hpp"
+#include "hard_soft_logic_mixer.h"
+#include "hash_table.h"
 #include "config_t.h"
 #include "odin_types.h"
 #include "read_xml_arch_file.h"
@@ -45,4 +62,4 @@ extern bool coarsen_cleanup;
 extern strmap<file_type_e> file_type_strmap;
 extern strmap<operation_list> yosys_subckt_strmap;
 
-#endif
+#endif // _ODIN_GLOBALS_H_
diff --git a/parmys-plugin/include/odin_ii.h b/parmys-plugin/include/odin_ii.h
index ebd03b8d8..0e563b9e9 100644
--- a/parmys-plugin/include/odin_ii.h
+++ b/parmys-plugin/include/odin_ii.h
@@ -1,5 +1,22 @@
-#ifndef ODIN_II_H
-#define ODIN_II_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _ODIN_II_H_
+#define _ODIN_II_H_
 
 #include "odin_types.h"
 /* Odin-II exit status enumerator */
@@ -7,4 +24,4 @@ enum ODIN_ERROR_CODE { ERROR_INITIALIZATION, ERROR_PARSE_CONFIG, ERROR_PARSE_ARC
 
 void set_default_config();
 
-#endif
+#endif // _ODIN_II_H_
diff --git a/parmys-plugin/include/odin_types.h b/parmys-plugin/include/odin_types.h
index 2ea6b8a53..32f76e457 100644
--- a/parmys-plugin/include/odin_types.h
+++ b/parmys-plugin/include/odin_types.h
@@ -1,30 +1,23 @@
-#ifndef ODIN_TYPES_H
-#define ODIN_TYPES_H
 /*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
+#ifndef _ODIN_TYPES_H_
+#define _ODIN_TYPES_H_
 
-#include "argparse_value.hpp"
 #include "odin_error.h"
 #include "read_xml_arch_file.h"
 #include "string_cache.h"
@@ -90,75 +83,38 @@ struct global_args_t {
     // Current path Odin-II is running
     std::string current_path;
 
-    argparse::ArgValue<std::string> config_file;
-    argparse::ArgValue<std::vector<std::string>> verilog_files;
-    argparse::ArgValue<std::string> blif_file;
-    argparse::ArgValue<std::string> output_file;
-    argparse::ArgValue<std::string> arch_file;   // Name of the FPGA architecture file
-    argparse::ArgValue<std::string> tcl_file;    // TCL file to be run by yosys elaborator
-    argparse::ArgValue<std::string> elaborator;  // Name of the external elaborator tool, currently Yosys is supported, default is Odin
-    argparse::ArgValue<bool> permissive;         // turn possible_errors into warnings
-    argparse::ArgValue<bool> print_parse_tokens; // print the tokens as they are parsed byt the parser
+    std::string config_file;
+    std::vector<std::string> verilog_files;
+    std::string blif_file;
+    std::string output_file;
+    std::string arch_file;   // Name of the FPGA architecture file
+    std::string tcl_file;    // TCL file to be run by yosys elaborator
+    std::string elaborator;  // Name of the external elaborator tool, currently Yosys is supported, default is Odin
+    bool permissive;         // turn possible_errors into warnings
+    bool print_parse_tokens; // print the tokens as they are parsed byt the parser
 
-    argparse::ArgValue<std::string> high_level_block; // Legacy option, no longer used
+    std::string high_level_block; // Legacy option, no longer used
 
-    argparse::ArgValue<std::string> top_level_module_name; // force the name of the top level module desired
+    std::string top_level_module_name; // force the name of the top level module desired
 
-    argparse::ArgValue<bool> write_netlist_as_dot;
-    argparse::ArgValue<bool> write_ast_as_dot;
-    argparse::ArgValue<bool> all_warnings;
-    argparse::ArgValue<bool> show_help;
+    bool write_netlist_as_dot;
+    bool write_ast_as_dot;
+    bool all_warnings;
+    bool show_help;
 
-    //    argparse::ArgValue<bool> fflegalize;     // makes flip-flops rising edge sensitive
-    argparse::ArgValue<bool> coarsen; // tells Odin-II that the input blif is coarse-grain
-                                      //    argparse::ArgValue<bool> show_yosys_log; // Show Yosys output logs into the standard output stream
+    //    bool fflegalize;     // makes flip-flops rising edge sensitive
+    bool coarsen; // tells Odin-II that the input blif is coarse-grain
+                                      //    bool show_yosys_log; // Show Yosys output logs into the standard output stream
 
-    argparse::ArgValue<std::string> adder_def; // DEPRECATED
+    std::string adder_def; // DEPRECATED
 
     // defines if the first cin of an adder/subtractor is connected to a global gnd/vdd
     // or generated using a dummy adder with both inputs set to gnd/vdd
-    argparse::ArgValue<bool> adder_cin_global;
-
-    /////////////////////
-    // For simulation.
-    /////////////////////
-    // Generate this number of random vectors.
-    argparse::ArgValue<int> sim_num_test_vectors;
-    // Input vectors to simulate instead of generating vectors.
-    argparse::ArgValue<std::string> sim_vector_input_file;
-    // Existing output vectors to verify against.
-    argparse::ArgValue<std::string> sim_vector_output_file;
-    // Simulation output Directory
-    argparse::ArgValue<std::string> sim_directory;
-    // Tells the simulator whether or not to generate random vectors which include the unknown logic value.
-    argparse::ArgValue<bool> sim_generate_three_valued_logic;
-    // Output both falling and rising edges in the output_vectors file. (DEFAULT)
-    argparse::ArgValue<bool> sim_output_both_edges;
-    // Request to read mif file input
-    argparse::ArgValue<bool> read_mif_input;
-    // Additional pins, nets, and nodes to output.
-    argparse::ArgValue<std::vector<std::string>> sim_additional_pins;
-    // Comma-separated list of primary input pins to hold high for all cycles but the first.
-    argparse::ArgValue<std::vector<std::string>> sim_hold_high;
-    // Comma-separated list of primary input pins to hold low for all cycles but the first.
-    argparse::ArgValue<std::vector<std::string>> sim_hold_low;
-    // target coverage
-    argparse::ArgValue<double> sim_min_coverage;
-    // simulate until best coverage is achieved
-    argparse::ArgValue<bool> sim_achieve_best;
-
-    argparse::ArgValue<int> parralelized_simulation;
-    argparse::ArgValue<bool> parralelized_simulation_in_batch;
-    // deprecated since this should be defined when compiled
-    argparse::ArgValue<int> sim_initial_value;
-    // The seed for creating random simulation vector
-    argparse::ArgValue<int> sim_random_seed;
-
-    argparse::ArgValue<bool> interactive_simulation;
+    bool adder_cin_global;
 
     // Arguments for mixing hard and soft logic
-    argparse::ArgValue<int> exact_mults;
-    argparse::ArgValue<float> mults_ratio;
+    int exact_mults;
+    float mults_ratio;
 };
 
 extern const char *ZERO_GND_ZERO;
@@ -668,4 +624,4 @@ struct netlist_t {
     Yosys::Design *design;
 };
 
-#endif
+#endif // _ODIN_TYPES_H_
diff --git a/parmys-plugin/include/odin_util.h b/parmys-plugin/include/odin_util.h
index 8dd23404f..05583595f 100644
--- a/parmys-plugin/include/odin_util.h
+++ b/parmys-plugin/include/odin_util.h
@@ -1,5 +1,22 @@
-#ifndef ODIN_UTIL_H
-#define ODIN_UTIL_H
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _ODIN_UTIL_H_
+#define _ODIN_UTIL_H_
 
 #include <string>
 
@@ -24,4 +41,4 @@ int odin_sprintf(char *s, const char *format, ...);
 
 void passed_verify_i_o_availabilty(nnode_t *node, int expected_input_size, int expected_output_size, const char *current_src, int line_src);
 
-#endif
+#endif // _ODIN_UTIL_H_
diff --git a/parmys-plugin/include/partial_map.h b/parmys-plugin/include/partial_map.h
index f01f0640a..6371fe0e8 100644
--- a/parmys-plugin/include/partial_map.h
+++ b/parmys-plugin/include/partial_map.h
@@ -1,30 +1,22 @@
 /*
- * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#ifndef PARTIAL_MAP_H
-#define PARTIAL_MAP_H
+#ifndef _PARTIAL_MAP_H_
+#define _PARTIAL_MAP_H_
 
 void partial_map_top(netlist_t *netlist);
 void instantiate_add_w_carry(nnode_t *node, short mark, netlist_t *netlist);
@@ -32,4 +24,4 @@ void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t *netlist);
 void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist);
 void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t * /*netlist*/);
 
-#endif
+#endif // _PARTIAL_MAP_H_
diff --git a/parmys-plugin/include/read_xml_config_file.h b/parmys-plugin/include/read_xml_config_file.h
index 58ee4403e..02f7a960f 100644
--- a/parmys-plugin/include/read_xml_config_file.h
+++ b/parmys-plugin/include/read_xml_config_file.h
@@ -1,32 +1,24 @@
 /*
- * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#ifndef READ_XML_CONFIG_FILE_H
-#define READ_XML_CONFIG_FILE_H
+#ifndef _READ_XML_CONFIG_FILE_H_
+#define _READ_XML_CONFIG_FILE_H_
 
 #include "odin_types.h"
 
 extern void read_config_file(const char *file_name);
-#endif
+#endif // _READ_XML_CONFIG_FILE_H_
diff --git a/parmys-plugin/include/string_cache.h b/parmys-plugin/include/string_cache.h
index 3552d61c2..bba9291b2 100644
--- a/parmys-plugin/include/string_cache.h
+++ b/parmys-plugin/include/string_cache.h
@@ -1,24 +1,22 @@
-#ifndef __STRING_CACHE_H__
-#define __STRING_CACHE_H__
-
 /*
- * Copyright (c) 2001 Vladimir Dergachev (volodya@users.sourceforge.net)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- *    This source code is free software; you can redistribute it
- *    and/or modify it in source code form under the terms of the GNU
- *    General Public License as published by the Free Software
- *    Foundation; either version 2 of the License, or (at your option)
- *    any later version.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * SPDX-License-Identifier: Apache-2.0
  */
+#ifndef _STRING_CACHE_H_
+#define _STRING_CACHE_H_
 
 struct STRING_CACHE {
     long size;
@@ -43,4 +41,4 @@ void *sc_do_alloc(long, long);
 /* free the cache */
 STRING_CACHE *sc_free_string_cache(STRING_CACHE *sc);
 
-#endif
+#endif // _STRING_CACHE_H_
diff --git a/parmys-plugin/include/subtractions.h b/parmys-plugin/include/subtractions.h
deleted file mode 100644
index 690540d6a..000000000
--- a/parmys-plugin/include/subtractions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef SUBS_H
-#define SUBS_H
-
-#include "adders.h"
-#include "read_xml_arch_file.h"
-
-extern vtr::t_linked_vptr *sub_list;
-extern vtr::t_linked_vptr *sub_chain_list;
-
-extern void report_sub_distribution();
-extern void declare_hard_adder_for_sub(nnode_t *node);
-extern void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t *netlist);
-extern void split_adder_for_sub(nnode_t *node, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist);
-extern void iterate_adders_for_sub(netlist_t *netlist);
-extern void instantiate_sub_w_borrow_block(nnode_t *node, short traverse_mark_number, netlist_t *netlist);
-extern void clean_adders_for_sub();
-
-#endif // SUBS_H
diff --git a/parmys-plugin/include/subtractor.h b/parmys-plugin/include/subtractor.h
new file mode 100644
index 000000000..281e659ca
--- /dev/null
+++ b/parmys-plugin/include/subtractor.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _SUBTRACTOR_H_
+#define _SUBTRACTOR_H_
+
+#include "adder.h"
+#include "read_xml_arch_file.h"
+
+extern vtr::t_linked_vptr *sub_list;
+extern vtr::t_linked_vptr *sub_chain_list;
+
+extern void report_sub_distribution();
+extern void declare_hard_adder_for_sub(nnode_t *node);
+extern void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t *netlist);
+extern void split_adder_for_sub(nnode_t *node, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist);
+extern void iterate_adders_for_sub(netlist_t *netlist);
+extern void instantiate_sub_w_borrow_block(nnode_t *node, short traverse_mark_number, netlist_t *netlist);
+extern void clean_adders_for_sub();
+
+#endif // _SUBTRACTOR_H_
diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index ad977b906..11303768c 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,17 +37,17 @@
 
 #include "parmys_resolve.hpp"
 
-#include "BlockMemories.hpp"
-#include "adders.h"
+#include "block_memory.h"
+#include "adder.h"
 #include "arch_util.h"
-#include "hard_blocks.h"
-#include "memories.h"
-#include "multipliers.h"
+#include "hard_block.h"
+#include "memory.h"
+#include "multiplier.h"
 #include "netlist_cleanup.h"
 #include "netlist_statistic.h"
 #include "netlist_visualizer.h"
 #include "read_xml_config_file.h"
-#include "subtractions.h"
+#include "subtractor.h"
 
 #include "ast_util.h"
 #include "parmys_update.hpp"
@@ -872,8 +872,8 @@ struct ParMYSPass : public Pass {
         std::string top_module_name;
         std::string DEFAULT_OUTPUT(".");
 
-        global_args.exact_mults.set(-1, argparse::Provenance::DEFAULT);
-        global_args.mults_ratio.set(-1.0, argparse::Provenance::DEFAULT);
+        global_args.exact_mults = -1;
+        global_args.mults_ratio = -1.0;
 
         log_header(design, "Starting parmys pass.\n");
 
@@ -906,11 +906,11 @@ struct ParMYSPass : public Pass {
                 continue;
             }
             if (args[argidx] == "-exact_mults" && argidx + 1 < args.size()) {
-                global_args.exact_mults.set(atoi(args[++argidx].c_str()), argparse::Provenance::SPECIFIED);
+                global_args.exact_mults = atoi(args[++argidx].c_str());
                 continue;
             }
             if (args[argidx] == "-mults_ratio" && argidx + 1 < args.size()) {
-                global_args.mults_ratio.set(atof(args[++argidx].c_str()), argparse::Provenance::SPECIFIED);
+                global_args.mults_ratio = atof(args[++argidx].c_str());
                 continue;
             }
         }
diff --git a/parmys-plugin/parmys_arch.cc b/parmys-plugin/parmys_arch.cc
index 21672bdbb..d297e9f40 100644
--- a/parmys-plugin/parmys_arch.cc
+++ b/parmys-plugin/parmys_arch.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/parmys_resolve.cc b/parmys-plugin/parmys_resolve.cc
index 7b4aabd4d..a547a56f1 100644
--- a/parmys-plugin/parmys_resolve.cc
+++ b/parmys-plugin/parmys_resolve.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-
 #include "odin_globals.h"
 #include "odin_types.h"
 
@@ -23,12 +22,12 @@
 
 #include "netlist_utils.h"
 
-#include "BlockMemories.hpp"
-#include "adders.h"
-#include "memories.h"
-#include "multipliers.h"
+#include "block_memory.h"
+#include "adder.h"
+#include "memory.h"
+#include "multiplier.h"
 #include "parmys_resolve.hpp"
-#include "subtractions.h"
+#include "subtractor.h"
 
 #include "vtr_util.h"
 
diff --git a/parmys-plugin/parmys_resolve.hpp b/parmys-plugin/parmys_resolve.hpp
index 52c2a9beb..5c93c43e4 100644
--- a/parmys-plugin/parmys_resolve.hpp
+++ b/parmys-plugin/parmys_resolve.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,12 +15,11 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-
-#ifndef __RESOLVE_H
-#define __RESOLVE_H
+#ifndef _PARMYS_RESOLVE_HPP_
+#define _PARMYS_RESOLVE_HPP_
 
 #define DEFAULT_CLOCK_NAME "GLOBAL_SIM_BASE_CLK"
 
 void resolve_top(netlist_t* netlist);
 
-#endif
+#endif // _PARMYS_RESOLVE_HPP_
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
index 21a52e278..cd3622a61 100644
--- a/parmys-plugin/parmys_update.cc
+++ b/parmys-plugin/parmys_update.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-
 #include <string.h>
 
 #include "odin_globals.h"
@@ -24,11 +23,11 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
-#include "node_creation_library.h"
+#include "node_utils.h"
 
-#include "adders.h"
-#include "hard_blocks.h"
-#include "multipliers.h"
+#include "adder.h"
+#include "hard_block.h"
+#include "multiplier.h"
 
 #include "kernel/rtlil.h"
 #include "parmys_update.hpp"
diff --git a/parmys-plugin/parmys_update.hpp b/parmys-plugin/parmys_update.hpp
index 156b1b63f..9ace088c9 100644
--- a/parmys-plugin/parmys_update.hpp
+++ b/parmys-plugin/parmys_update.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef __DESIGN_UPDATE_H__
-#define __DESIGN_UPDATE_H__
+#ifndef _PARMYS_UPDATE_HPP_
+#define _PARMYS_UPDATE_HPP_
 
 #include "odin_types.h"
 
@@ -28,4 +28,4 @@ void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module);
 void define_SMUX_function_yosys(nnode_t *node, Yosys::Module *module);
 void define_FF_yosys(nnode_t *node, Yosys::Module *module);
 
-#endif //__DESIGN_UPDATE_H__
\ No newline at end of file
+#endif //_PARMYS_UPDATE_HPP_
\ No newline at end of file
diff --git a/parmys-plugin/parmys_utils.cc b/parmys-plugin/parmys_utils.cc
index fb43e2f20..7ed397973 100644
--- a/parmys-plugin/parmys_utils.cc
+++ b/parmys-plugin/parmys_utils.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/parmys_utils.hpp b/parmys-plugin/parmys_utils.hpp
index 44433b13b..ae884353c 100644
--- a/parmys-plugin/parmys_utils.hpp
+++ b/parmys-plugin/parmys_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2022 Daniel Khadivi
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-#ifndef __YOSYS_UTILS_H__
-#define __YOSYS_UTILS_H__
+#ifndef _PARMYS_UTILS_HPP_
+#define _PARMYS_UTILS_HPP_
 
 #include "odin_types.h"
 
@@ -28,4 +28,4 @@ void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yo
                                  Yosys::Design *design, Yosys::Module *module, Yosys::Cell *cell);
 void handle_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Yosys::Module *module);
 
-#endif //__YOSYS_UTILS_H__
\ No newline at end of file
+#endif //_PARMYS_UTILS_HPP_
\ No newline at end of file
diff --git a/parmys-plugin/src/Hashtable.cc b/parmys-plugin/src/Hashtable.cc
deleted file mode 100644
index 6fac26204..000000000
--- a/parmys-plugin/src/Hashtable.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "Hashtable.hpp"
-#include "odin_types.h"
-#include "vtr_memory.h"
-
-void Hashtable::destroy_free_items()
-{
-    for (auto kv : my_map)
-        vtr::free(kv.second);
-}
-
-void Hashtable::add(std::string key, void *item) { this->my_map.emplace(key, item); }
-
-void *Hashtable::remove(std::string key)
-{
-    void *value = NULL;
-    auto v = this->my_map.find(key);
-    if (v != this->my_map.end()) {
-        value = v->second;
-        this->my_map.erase(v);
-    }
-    return value;
-}
-
-void *Hashtable::get(std::string key)
-{
-    void *value = NULL;
-    auto v = this->my_map.find(key);
-    if (v != this->my_map.end())
-        value = v->second;
-
-    return value;
-}
-
-bool Hashtable::is_empty() { return my_map.empty(); }
diff --git a/parmys-plugin/src/adders.cc b/parmys-plugin/src/adder.cc
similarity index 97%
rename from parmys-plugin/src/adders.cc
rename to parmys-plugin/src/adder.cc
index 500757044..b475ba3d5 100644
--- a/parmys-plugin/src/adders.cc
+++ b/parmys-plugin/src/adder.cc
@@ -1,34 +1,28 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#include "adders.h"
-#include "multipliers.h"
+#include "adder.h"
+#include "multiplier.h"
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "odin_globals.h"
 #include "odin_types.h"
 #include "odin_util.h"
-#include "subtractions.h"
+#include "subtractor.h"
 #include <string.h>
 
 #include "vtr_memory.h"
diff --git a/parmys-plugin/src/ast_util.cc b/parmys-plugin/src/ast_util.cc
index db15ce510..6eab5f864 100644
--- a/parmys-plugin/src/ast_util.cc
+++ b/parmys-plugin/src/ast_util.cc
@@ -1,26 +1,19 @@
 /*
- * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/BlockMemories.cc b/parmys-plugin/src/block_memory.cc
similarity index 97%
rename from parmys-plugin/src/BlockMemories.cc
rename to parmys-plugin/src/block_memory.cc
index f709163be..9bacd4060 100644
--- a/parmys-plugin/src/BlockMemories.cc
+++ b/parmys-plugin/src/block_memory.cc
@@ -1,47 +1,28 @@
-/**
- * Copyright (c) 2021 Seyed Alireza Damghani (sdamghann@gmail.com)
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  *
- * @file: This file includes the definitions of the routines to map
- * block memories to VTR compatible memory types, i.e., Single Port RAM
- * and Dual Port RAM. The definition of block memory and read-only memory
- * is provided in techlib directory in the Odin-II root directory.
- * Basically, a memory block with both read and write accesses that has a
- * separate port for each operation is called BRAM. While following the
- * same definition, a read-only memory block is referred to as a BRAM that
- * has only read access (even multiple accesses). This function also
- * includes ymem block support which somehow represents the Yosys internal
- * memory cell.
+ * SPDX-License-Identifier: Apache-2.0
  */
-
 #include "odin_util.h"
 #include <string.h>
 
-#include "BlockMemories.hpp"
-#include "hard_blocks.h"
-#include "memories.h"
+#include "block_memory.h"
+#include "hard_block.h"
+#include "memory.h"
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "partial_map.h"
 #include "vtr_memory.h"
 #include "vtr_util.h"
diff --git a/parmys-plugin/src/enum_str.cc b/parmys-plugin/src/enum_str.cc
index 3134fa5d6..5460ddf7b 100644
--- a/parmys-plugin/src/enum_str.cc
+++ b/parmys-plugin/src/enum_str.cc
@@ -1,3 +1,20 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "odin_types.h"
 
 const char *edge_type_e_STR[] = {
diff --git a/parmys-plugin/src/hard_blocks.cc b/parmys-plugin/src/hard_block.cc
similarity index 90%
rename from parmys-plugin/src/hard_blocks.cc
rename to parmys-plugin/src/hard_block.cc
index 2d1bb2253..0c9ca2dcd 100644
--- a/parmys-plugin/src/hard_blocks.cc
+++ b/parmys-plugin/src/hard_block.cc
@@ -1,30 +1,24 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
 #include <stdlib.h>
 
-#include "hard_blocks.h"
-#include "memories.h"
+#include "hard_block.h"
+#include "memory.h"
 #include "netlist_utils.h"
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/HardSoftLogicMixer.cc b/parmys-plugin/src/hard_soft_logic_mixer.cc
similarity index 57%
rename from parmys-plugin/src/HardSoftLogicMixer.cc
rename to parmys-plugin/src/hard_soft_logic_mixer.cc
index ec7dce087..674e840b3 100644
--- a/parmys-plugin/src/HardSoftLogicMixer.cc
+++ b/parmys-plugin/src/hard_soft_logic_mixer.cc
@@ -1,31 +1,26 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-#include "HardSoftLogicMixer.hpp"
+#include "hard_soft_logic_mixer.h"
 
 #include <stdint.h> // INT_MAX
 #include <vector>
 
-#include "multipliers.h" // instantiate_simple_soft_multiplier
+#include "multiplier.h" // instantiate_simple_soft_multiplier
 #include "odin_error.h"  // error_message
 
 HardSoftLogicMixer::HardSoftLogicMixer()
diff --git a/parmys-plugin/src/hash_table.cc b/parmys-plugin/src/hash_table.cc
new file mode 100644
index 000000000..102999de8
--- /dev/null
+++ b/parmys-plugin/src/hash_table.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hash_table.h"
+#include "odin_types.h"
+#include "vtr_memory.h"
+
+void Hashtable::destroy_free_items()
+{
+    for (auto kv : my_map)
+        vtr::free(kv.second);
+}
+
+void Hashtable::add(std::string key, void *item) { this->my_map.emplace(key, item); }
+
+void *Hashtable::remove(std::string key)
+{
+    void *value = NULL;
+    auto v = this->my_map.find(key);
+    if (v != this->my_map.end()) {
+        value = v->second;
+        this->my_map.erase(v);
+    }
+    return value;
+}
+
+void *Hashtable::get(std::string key)
+{
+    void *value = NULL;
+    auto v = this->my_map.find(key);
+    if (v != this->my_map.end())
+        value = v->second;
+
+    return value;
+}
+
+bool Hashtable::is_empty() { return my_map.empty(); }
diff --git a/parmys-plugin/src/memories.cc b/parmys-plugin/src/memory.cc
similarity index 98%
rename from parmys-plugin/src/memories.cc
rename to parmys-plugin/src/memory.cc
index 675da7b18..fda6bf0c8 100644
--- a/parmys-plugin/src/memories.cc
+++ b/parmys-plugin/src/memory.cc
@@ -1,26 +1,20 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
 #include "ast_util.h"
 #include "odin_globals.h"
 #include "odin_types.h"
@@ -28,10 +22,10 @@
 #include <math.h>
 #include <string.h>
 
-#include "hard_blocks.h"
-#include "memories.h"
+#include "hard_block.h"
+#include "memory.h"
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "partial_map.h"
 #include "vtr_memory.h"
 #include "vtr_util.h"
diff --git a/parmys-plugin/src/MixingOptimization.cc b/parmys-plugin/src/mixing_optimization.cc
similarity index 81%
rename from parmys-plugin/src/MixingOptimization.cc
rename to parmys-plugin/src/mixing_optimization.cc
index 293238c4b..c4e0148b3 100644
--- a/parmys-plugin/src/MixingOptimization.cc
+++ b/parmys-plugin/src/mixing_optimization.cc
@@ -1,34 +1,28 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#include "MixingOptimization.hpp"
+#include "mixing_optimization.h"
 
 #include <stdint.h> // INT_MAX
 #include <vector>
 
-#include "HardSoftLogicMixer.hpp" // HardSoftLogicMixer
-#include "adders.h"               // hard_adders
-#include "multipliers.h"          // instantiate_simple_soft_multiplier
+#include "hard_soft_logic_mixer.h" // HardSoftLogicMixer
+#include "adder.h"               // hard_adders
+#include "multiplier.h"          // instantiate_simple_soft_multiplier
 #include "netlist_statistic.h"    // mixing_optimization_stats
 #include "odin_error.h"           // error_message
 
diff --git a/parmys-plugin/src/multipliers.cc b/parmys-plugin/src/multiplier.cc
similarity index 98%
rename from parmys-plugin/src/multipliers.cc
rename to parmys-plugin/src/multiplier.cc
index 424e92dda..25841d676 100644
--- a/parmys-plugin/src/multipliers.cc
+++ b/parmys-plugin/src/multiplier.cc
@@ -1,29 +1,23 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#include "multipliers.h"
+#include "multiplier.h"
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "odin_globals.h"
 #include "odin_types.h"
 #include "odin_util.h"
@@ -36,7 +30,7 @@
 #include <string.h>
 #include <string>
 
-#include "adders.h"
+#include "adder.h"
 
 #include "vtr_list.h"
 #include "vtr_memory.h"
diff --git a/parmys-plugin/src/netlist_check.cc b/parmys-plugin/src/netlist_check.cc
index 92e945feb..58f21110c 100644
--- a/parmys-plugin/src/netlist_check.cc
+++ b/parmys-plugin/src/netlist_check.cc
@@ -1,25 +1,19 @@
 /*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/netlist_cleanup.cc b/parmys-plugin/src/netlist_cleanup.cc
index d9458b479..914807c5b 100644
--- a/parmys-plugin/src/netlist_cleanup.cc
+++ b/parmys-plugin/src/netlist_cleanup.cc
@@ -1,24 +1,19 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/netlist_statistic.cc b/parmys-plugin/src/netlist_statistic.cc
index b65f3f004..b65fef626 100644
--- a/parmys-plugin/src/netlist_statistic.cc
+++ b/parmys-plugin/src/netlist_statistic.cc
@@ -1,7 +1,24 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include <algorithm>
 
 /* for hb */
-#include "multipliers.h"
+#include "multiplier.h"
 
 #include "netlist_statistic.h"
 #include "odin_globals.h"
diff --git a/parmys-plugin/src/netlist_utils.cc b/parmys-plugin/src/netlist_utils.cc
index 2ac86ef22..a7b9049e5 100644
--- a/parmys-plugin/src/netlist_utils.cc
+++ b/parmys-plugin/src/netlist_utils.cc
@@ -1,24 +1,19 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
@@ -29,7 +24,7 @@
 #include <string.h>
 
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "odin_util.h"
 #include "vtr_memory.h"
 #include "vtr_util.h"
diff --git a/parmys-plugin/src/netlist_visualizer.cc b/parmys-plugin/src/netlist_visualizer.cc
index c7e36d2b3..a4b01ce79 100644
--- a/parmys-plugin/src/netlist_visualizer.cc
+++ b/parmys-plugin/src/netlist_visualizer.cc
@@ -1,26 +1,19 @@
 /*
- * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/parmys-plugin/src/node_creation_library.cc b/parmys-plugin/src/node_utils.cc
similarity index 92%
rename from parmys-plugin/src/node_creation_library.cc
rename to parmys-plugin/src/node_utils.cc
index 45a0b62df..8db010a35 100644
--- a/parmys-plugin/src/node_creation_library.cc
+++ b/parmys-plugin/src/node_utils.cc
@@ -1,26 +1,21 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "netlist_utils.h"
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/odin_error.cc b/parmys-plugin/src/odin_error.cc
index ff4365baa..411a47c79 100644
--- a/parmys-plugin/src/odin_error.cc
+++ b/parmys-plugin/src/odin_error.cc
@@ -1,3 +1,20 @@
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "odin_error.h"
 #include "config_t.h"
 
diff --git a/parmys-plugin/src/odin_ii.cc b/parmys-plugin/src/odin_ii.cc
index 7a7679bb4..e53f19b3a 100644
--- a/parmys-plugin/src/odin_ii.cc
+++ b/parmys-plugin/src/odin_ii.cc
@@ -1,34 +1,28 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include <sstream>
 
-#include "argparse.hpp"
 #include "odin_ii.h"
 
 #include "odin_globals.h"
 #include "odin_types.h"
 
-#include "HardSoftLogicMixer.hpp"
+#include "hard_soft_logic_mixer.h"
 #include "vtr_path.h"
 
 #define DEFAULT_OUTPUT "."
diff --git a/parmys-plugin/src/odin_util.cc b/parmys-plugin/src/odin_util.cc
index dd29c3d27..88d093e3c 100644
--- a/parmys-plugin/src/odin_util.cc
+++ b/parmys-plugin/src/odin_util.cc
@@ -1,26 +1,19 @@
 /*
- * Copyright (c) 2009 Peter Andrew Jamieson (jamieson.peter@gmail.com)
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/src/partial_map.cc
index 78da16a83..f2e24d875 100644
--- a/parmys-plugin/src/partial_map.cc
+++ b/parmys-plugin/src/partial_map.cc
@@ -1,30 +1,19 @@
-/**
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  *
- * @file: this file includes the instantiation process of Odin-II
- * supported cells. Technically, the Odin-II partial mapper transforms
- * netlist to a target device technology dependent cells. The partial
- * decides the hard/soft logic inference of logic blocks according to
- * the target architecture and specified threshold in command arguments.
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "odin_globals.h"
 #include "odin_types.h"
@@ -32,16 +21,16 @@
 #include <string.h>
 
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "odin_util.h"
 
-#include "adders.h"
-#include "hard_blocks.h"
+#include "adder.h"
+#include "hard_block.h"
 
-#include "memories.h"
+#include "memory.h"
 
 #include "partial_map.h"
-#include "subtractions.h"
+#include "subtractor.h"
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
diff --git a/parmys-plugin/src/read_xml_config_file.cc b/parmys-plugin/src/read_xml_config_file.cc
index eba55eb9c..b3f10c8df 100644
--- a/parmys-plugin/src/read_xml_config_file.cc
+++ b/parmys-plugin/src/read_xml_config_file.cc
@@ -1,24 +1,19 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
 #include "read_xml_config_file.h"
 #include "odin_globals.h"
@@ -127,7 +122,7 @@ void read_outputs(pugi::xml_node a_node, const pugiutil::loc_data &loc_data)
 
     child = get_single_child(a_node, "output_path_and_name", loc_data, OPTIONAL);
     if (child != NULL) {
-        global_args.output_file.set(child.child_value(), argparse::Provenance::SPECIFIED);
+        global_args.output_file = child.child_value();
     }
 
     child = get_single_child(a_node, "target", loc_data, OPTIONAL);
@@ -135,10 +130,10 @@ void read_outputs(pugi::xml_node a_node, const pugiutil::loc_data &loc_data)
         child = get_single_child(child, "arch_file", loc_data, OPTIONAL);
         if (child != NULL) {
             /* Two arch files specified? */
-            if (global_args.arch_file.value() != "") {
+            if (global_args.arch_file != "") {
                 error_message(PARSE_ARGS, unknown_location, "%s", "Error: Arch file specified in config file AND command line\n");
             }
-            global_args.arch_file.set(child.child_value(), argparse::Provenance::SPECIFIED);
+            global_args.arch_file = child.child_value();
         }
     }
     return;
diff --git a/parmys-plugin/src/string_cache.cc b/parmys-plugin/src/string_cache.cc
index 7a81fec0b..038a4ab6f 100644
--- a/parmys-plugin/src/string_cache.cc
+++ b/parmys-plugin/src/string_cache.cc
@@ -1,4 +1,20 @@
-// Not PJs code, but very useful and used everywhere */
+/*
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
 #include "string_cache.h"
 #include "vtr_memory.h"
 #include "vtr_util.h"
diff --git a/parmys-plugin/src/subtractions.cc b/parmys-plugin/src/subtractor.cc
similarity index 97%
rename from parmys-plugin/src/subtractions.cc
rename to parmys-plugin/src/subtractor.cc
index b9dcf6b85..ac96c86be 100644
--- a/parmys-plugin/src/subtractions.cc
+++ b/parmys-plugin/src/subtractor.cc
@@ -1,30 +1,24 @@
 /*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
+ * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
  *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
  */
-
-#include "subtractions.h"
-#include "adders.h"
+#include "subtractor.h"
+#include "adder.h"
 #include "netlist_utils.h"
-#include "node_creation_library.h"
+#include "node_utils.h"
 #include "odin_globals.h"
 #include "odin_types.h"
 #include "odin_util.h"
diff --git a/parmys-plugin/techlibs/adffe2dff.v b/parmys-plugin/techlibs/adffe2dff.v
index cb9f35c83..07f3ce474 100644
--- a/parmys-plugin/techlibs/adffe2dff.v
+++ b/parmys-plugin/techlibs/adffe2dff.v
@@ -1,6 +1,7 @@
 // yosys -- Yosys Open SYnthesis Suite
 //
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+// Copyright (C) 2022  CAS—Atlantic (University of New Brunswick, CASA)
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
diff --git a/parmys-plugin/techlibs/aldff2dff.v b/parmys-plugin/techlibs/aldff2dff.v
index f8ab2281c..b7ad1fc59 100644
--- a/parmys-plugin/techlibs/aldff2dff.v
+++ b/parmys-plugin/techlibs/aldff2dff.v
@@ -1,7 +1,7 @@
 // yosys -- Yosys Open SYnthesis Suite
 //
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
-// Copyright (C) 2022  Daniel Khadivi
+// Copyright (C) 2022  CAS—Atlantic (University of New Brunswick, CASA)
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
@@ -15,6 +15,8 @@
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
+// Modified version of adff2dff for alddf
+//
 // SPDX-License-Identifier: ISC
 
 (* techmap_celltype = "$aldff" *)
diff --git a/parmys-plugin/techlibs/aldffe2dff.v b/parmys-plugin/techlibs/aldffe2dff.v
index 613d646da..c0d0cf0de 100644
--- a/parmys-plugin/techlibs/aldffe2dff.v
+++ b/parmys-plugin/techlibs/aldffe2dff.v
@@ -1,7 +1,7 @@
 // yosys -- Yosys Open SYnthesis Suite
 //
 // Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
-// Copyright (C) 2022  Daniel Khadivi
+// Copyright (C) 2022  CAS—Atlantic (University of New Brunswick, CASA)
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
@@ -15,6 +15,8 @@
 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
+// Modified version of adff2dff for alddfe
+//
 // SPDX-License-Identifier: ISC
 
 (* techmap_celltype = "$aldffe" *)
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
index 5c1f63d8a..758361f7b 100644
--- a/parmys-plugin/tests/Makefile
+++ b/parmys-plugin/tests/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2020-2022 F4PGA Authors
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml b/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml
index 2edfb591c..472ee2cac 100644
--- a/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml
+++ b/parmys-plugin/tests/VexRiscv_Lite/odin_config.xml
@@ -1,5 +1,5 @@
 <!--
-# Copyright 2022 Daniel Khadivi
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/tests/eltwise_layer/odin_config.xml b/parmys-plugin/tests/eltwise_layer/odin_config.xml
index 07a54fea6..a9612d67d 100644
--- a/parmys-plugin/tests/eltwise_layer/odin_config.xml
+++ b/parmys-plugin/tests/eltwise_layer/odin_config.xml
@@ -1,5 +1,5 @@
 <!--
-# Copyright 2022 Daniel Khadivi
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/tests/mips32r1_core/mips32r1_core.v b/parmys-plugin/tests/mips32r1_core/mips32r1_core.v
index 36dff2bda..2f79fbb9e 100644
--- a/parmys-plugin/tests/mips32r1_core/mips32r1_core.v
+++ b/parmys-plugin/tests/mips32r1_core/mips32r1_core.v
@@ -1,4 +1,4 @@
-// Copyright 2022 F4PGA Authors
+// Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/tests/mips32r1_core/odin_config.xml b/parmys-plugin/tests/mips32r1_core/odin_config.xml
index 2edfb591c..472ee2cac 100644
--- a/parmys-plugin/tests/mips32r1_core/odin_config.xml
+++ b/parmys-plugin/tests/mips32r1_core/odin_config.xml
@@ -1,5 +1,5 @@
 <!--
-# Copyright 2022 Daniel Khadivi
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/parmys-plugin/tests/raygentop/odin_config.xml b/parmys-plugin/tests/raygentop/odin_config.xml
index 2edfb591c..472ee2cac 100644
--- a/parmys-plugin/tests/raygentop/odin_config.xml
+++ b/parmys-plugin/tests/raygentop/odin_config.xml
@@ -1,5 +1,5 @@
 <!--
-# Copyright 2022 Daniel Khadivi
+# Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 30cc0de44bfb8df340911a3e458b049c08a20a10 Mon Sep 17 00:00:00 2001
From: Dani <17553473+poname@users.noreply.github.com>
Date: Tue, 20 Dec 2022 13:55:44 -0400
Subject: [PATCH 27/56] Pr issues (#1)

* for loop syntax improved

* namespace refined

* just parmys ci for now

* reformatted

* all ci
---
 parmys-plugin/Makefile                     |   1 -
 parmys-plugin/include/netlist_check.h      |  23 -
 parmys-plugin/include/odin_globals.h       |   2 +-
 parmys-plugin/include/odin_types.h         |   2 +-
 parmys-plugin/parmys.cc                    |  85 +--
 parmys-plugin/parmys_arch.cc               |  15 +-
 parmys-plugin/parmys_resolve.cc            |  15 +-
 parmys-plugin/parmys_resolve.hpp           |   2 +-
 parmys-plugin/parmys_update.cc             | 238 ++++---
 parmys-plugin/parmys_update.hpp            |  12 +-
 parmys-plugin/parmys_utils.cc              |  55 +-
 parmys-plugin/parmys_utils.hpp             |  16 +-
 parmys-plugin/src/adder.cc                 | 101 ++-
 parmys-plugin/src/block_memory.cc          | 285 ++++----
 parmys-plugin/src/hard_block.cc            |   4 +-
 parmys-plugin/src/hard_soft_logic_mixer.cc |   2 +-
 parmys-plugin/src/memory.cc                | 124 ++--
 parmys-plugin/src/mixing_optimization.cc   |   8 +-
 parmys-plugin/src/multiplier.cc            | 139 ++--
 parmys-plugin/src/netlist_check.cc         | 735 ---------------------
 parmys-plugin/src/netlist_cleanup.cc       |  17 +-
 parmys-plugin/src/netlist_utils.cc         |  93 +--
 parmys-plugin/src/netlist_visualizer.cc    |  17 +-
 parmys-plugin/src/node_utils.cc            |  14 +-
 parmys-plugin/src/partial_map.cc           |  89 +--
 parmys-plugin/src/string_cache.cc          |   7 +-
 parmys-plugin/src/subtractor.cc            |  63 +-
 27 files changed, 647 insertions(+), 1517 deletions(-)
 delete mode 100644 parmys-plugin/include/netlist_check.h
 delete mode 100644 parmys-plugin/src/netlist_check.cc

diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index 897816c7a..5ca2525a4 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -30,7 +30,6 @@ SOURCES = parmys.cc \
 		  src/odin_util.cc \
 		  src/netlist_statistic.cc \
 		  src/netlist_utils.cc \
-		  src/netlist_check.cc \
 		  src/netlist_cleanup.cc \
 		  src/node_utils.cc \
 		  src/multiplier.cc \
diff --git a/parmys-plugin/include/netlist_check.h b/parmys-plugin/include/netlist_check.h
deleted file mode 100644
index 9ac2ce8f8..000000000
--- a/parmys-plugin/include/netlist_check.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef _NETLIST_CHECK_H_
-#define _NETLIST_CHECK_H_
-
-void check_netlist(netlist_t *netlist);
-
-#endif // _NETLIST_CHECK_H_
diff --git a/parmys-plugin/include/odin_globals.h b/parmys-plugin/include/odin_globals.h
index 9670a12d9..fa00ecb05 100644
--- a/parmys-plugin/include/odin_globals.h
+++ b/parmys-plugin/include/odin_globals.h
@@ -18,9 +18,9 @@
 #ifndef _ODIN_GLOBALS_H_
 #define _ODIN_GLOBALS_H_
 
+#include "config_t.h"
 #include "hard_soft_logic_mixer.h"
 #include "hash_table.h"
-#include "config_t.h"
 #include "odin_types.h"
 #include "read_xml_arch_file.h"
 #include "string_cache.h"
diff --git a/parmys-plugin/include/odin_types.h b/parmys-plugin/include/odin_types.h
index 32f76e457..5f25f9cdf 100644
--- a/parmys-plugin/include/odin_types.h
+++ b/parmys-plugin/include/odin_types.h
@@ -104,7 +104,7 @@ struct global_args_t {
 
     //    bool fflegalize;     // makes flip-flops rising edge sensitive
     bool coarsen; // tells Odin-II that the input blif is coarse-grain
-                                      //    bool show_yosys_log; // Show Yosys output logs into the standard output stream
+                  //    bool show_yosys_log; // Show Yosys output logs into the standard output stream
 
     std::string adder_def; // DEPRECATED
 
diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index 11303768c..05a0f0960 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -29,17 +29,15 @@
 #include "vtr_path.h"
 #include "vtr_util.h"
 
-#include "netlist_check.h"
-
 #include "partial_map.h"
 
 #include "netlist_visualizer.h"
 
 #include "parmys_resolve.hpp"
 
-#include "block_memory.h"
 #include "adder.h"
 #include "arch_util.h"
+#include "block_memory.h"
 #include "hard_block.h"
 #include "memory.h"
 #include "multiplier.h"
@@ -71,30 +69,32 @@ struct ParMYSPass : public Pass {
 
     static void hook_up_nets(netlist_t *odin_netlist, Hashtable *output_nets_hash)
     {
-        nnode_t **node_sets[] = {odin_netlist->internal_nodes, odin_netlist->ff_nodes, odin_netlist->top_output_nodes};
-        int counts[] = {odin_netlist->num_internal_nodes, odin_netlist->num_ff_nodes, odin_netlist->num_top_output_nodes};
-        int num_sets = 3;
-
-        int i;
-        for (i = 0; i < num_sets; i++) {
-            int j;
-            for (j = 0; j < counts[i]; j++) {
-                nnode_t *node = node_sets[i][j];
-                hook_up_node(node, output_nets_hash);
-            }
+        for (int i = 0; i < odin_netlist->num_internal_nodes; i++) {
+            nnode_t *node = odin_netlist->internal_nodes[i];
+            hook_up_node(node, output_nets_hash);
+        }
+
+        for (int i = 0; i < odin_netlist->num_ff_nodes; i++) {
+            nnode_t *node = odin_netlist->ff_nodes[i];
+            hook_up_node(node, output_nets_hash);
+        }
+
+        for (int i = 0; i < odin_netlist->num_top_output_nodes; i++) {
+            nnode_t *node = odin_netlist->top_output_nodes[i];
+            hook_up_node(node, output_nets_hash);
         }
     }
 
     static void hook_up_node(nnode_t *node, Hashtable *output_nets_hash)
     {
-        int j;
-        for (j = 0; j < node->num_input_pins; j++) {
+        for (int j = 0; j < node->num_input_pins; j++) {
             npin_t *input_pin = node->input_pins[j];
 
             nnet_t *output_net = (nnet_t *)output_nets_hash->get(input_pin->name);
 
-            if (!output_net)
+            if (!output_net) {
                 log_error("Error: Could not hook up the pin %s: not available, related node: %s.", input_pin->name, node->name);
+            }
             add_fanout_pin_to_net(output_net, input_pin);
         }
     }
@@ -300,7 +300,7 @@ struct ParMYSPass : public Pass {
         }
     }
 
-    static operation_list from_yosys_type(Yosys::RTLIL::IdString type)
+    static operation_list from_yosys_type(RTLIL::IdString type)
     {
         if (type == ID($add)) {
             return ADD;
@@ -342,7 +342,7 @@ struct ParMYSPass : public Pass {
             return DPRAM;
         }
 
-        if (Yosys::RTLIL::builtin_ff_cell_types().count(type)) {
+        if (RTLIL::builtin_ff_cell_types().count(type)) {
             return SKIP;
         }
 
@@ -369,10 +369,13 @@ struct ParMYSPass : public Pass {
 
         for (auto module : design->modules()) {
 
-            if (module->processes.size() != 0)
+            if (module->processes.size() != 0) {
                 log_error("Found unmapped processes in module %s: unmapped processes are not supported in parmys pass!\n", log_id(module->name));
-            if (module->memories.size() != 0)
+            }
+
+            if (module->memories.size() != 0) {
                 log_error("Found unmapped memories in module %s: unmapped memories are not supported in parmys pass!\n", log_id(module->name));
+            }
 
             if (module->name == RTLIL::escape_id(top_module_name)) {
                 top_module_name.clear();
@@ -426,7 +429,7 @@ struct ParMYSPass : public Pass {
             nnode_t *new_node = allocate_nnode(my_location);
 
             for (auto &param : cell->parameters) {
-                new_node->cell_parameters[Yosys::RTLIL::IdString(param.first)] = Yosys::Const(param.second);
+                new_node->cell_parameters[RTLIL::IdString(param.first)] = Const(param.second);
             }
 
             new_node->related_ast_node = NULL;
@@ -992,12 +995,15 @@ struct ParMYSPass : public Pass {
             Pass::call(design, "opt -full");
         }
 
-        if (design->top_module()->processes.size() != 0)
+        if (design->top_module()->processes.size() != 0) {
             log_error("Found unmapped processes in top module %s: unmapped processes are not supported in parmys pass!\n",
                       log_id(design->top_module()->name));
-        if (design->top_module()->memories.size() != 0)
+        }
+
+        if (design->top_module()->memories.size() != 0) {
             log_error("Found unmapped memories in module %s: unmapped memories are not supported in parmys pass!\n",
                       log_id(design->top_module()->name));
+        }
 
         design->sort();
 
@@ -1013,7 +1019,7 @@ struct ParMYSPass : public Pass {
 
                 bb.name = str(bb_module->name);
 
-                std::map<int, Yosys::RTLIL::Wire *> inputs, outputs;
+                std::map<int, RTLIL::Wire *> inputs, outputs;
 
                 for (auto wire : bb_module->wires()) {
                     if (wire->port_input)
@@ -1023,15 +1029,15 @@ struct ParMYSPass : public Pass {
                 }
 
                 for (auto &it : inputs) {
-                    Yosys::RTLIL::Wire *wire = it.second;
+                    RTLIL::Wire *wire = it.second;
                     for (int i = 0; i < wire->width; i++)
-                        bb.inputs.push_back(str(Yosys::RTLIL::SigSpec(wire, i)));
+                        bb.inputs.push_back(str(RTLIL::SigSpec(wire, i)));
                 }
 
                 for (auto &it : outputs) {
-                    Yosys::RTLIL::Wire *wire = it.second;
+                    RTLIL::Wire *wire = it.second;
                     for (int i = 0; i < wire->width; i++)
-                        bb.outputs.push_back(str(Yosys::RTLIL::SigSpec(wire, i)));
+                        bb.outputs.push_back(str(RTLIL::SigSpec(wire, i)));
                 }
 
                 black_boxes.push_back(bb);
@@ -1095,21 +1101,22 @@ struct ParMYSPass : public Pass {
         }
 
         for (auto bb_module : black_boxes) {
-            Yosys::Module *module = nullptr;
-            Yosys::hashlib::dict<Yosys::IdString, std::pair<int, bool>> wideports_cache;
+            Module *module = nullptr;
+            hashlib::dict<IdString, std::pair<int, bool>> wideports_cache;
 
-            module = new Yosys::Module;
+            module = new Module;
             module->name = RTLIL::escape_id(bb_module.name);
 
-            if (design->module(module->name))
-                log_error("Duplicate definition of module %s!\n", Yosys::log_id(module->name));
+            if (design->module(module->name)) {
+                log_error("Duplicate definition of module %s!\n", log_id(module->name));
+            }
 
             design->add(module);
 
             for (auto b_wire : bb_module.inputs) {
-                Yosys::RTLIL::Wire *wire = to_wire(b_wire, module);
+                RTLIL::Wire *wire = to_wire(b_wire, module);
                 wire->port_input = true;
-                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(Yosys::RTLIL::unescape_id(b_wire));
+                std::pair<RTLIL::IdString, int> wp = wideports_split(RTLIL::unescape_id(b_wire));
                 if (!wp.first.empty() && wp.second >= 0) {
                     wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
                     wideports_cache[wp.first].second = true;
@@ -1117,9 +1124,9 @@ struct ParMYSPass : public Pass {
             }
 
             for (auto b_wire : bb_module.outputs) {
-                Yosys::RTLIL::Wire *wire = to_wire(Yosys::RTLIL::unescape_id(b_wire), module);
+                RTLIL::Wire *wire = to_wire(RTLIL::unescape_id(b_wire), module);
                 wire->port_output = true;
-                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(Yosys::RTLIL::unescape_id(b_wire));
+                std::pair<RTLIL::IdString, int> wp = wideports_split(RTLIL::unescape_id(b_wire));
                 if (!wp.first.empty() && wp.second >= 0) {
                     wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
                     wideports_cache[wp.first].second = false;
@@ -1131,7 +1138,7 @@ struct ParMYSPass : public Pass {
             module->fixup_ports();
             wideports_cache.clear();
 
-            module->attributes[Yosys::ID::blackbox] = Yosys::RTLIL::Const(1);
+            module->attributes[ID::blackbox] = RTLIL::Const(1);
         }
 
         update_design(design, transformed);
diff --git a/parmys-plugin/parmys_arch.cc b/parmys-plugin/parmys_arch.cc
index d297e9f40..3b005e7e3 100644
--- a/parmys-plugin/parmys_arch.cc
+++ b/parmys-plugin/parmys_arch.cc
@@ -35,17 +35,18 @@ struct ParmysArchPass : public Pass {
         module = new Module;
         module->name = RTLIL::escape_id(hb->name);
 
-        if (design->module(module->name))
-            Yosys::log_error("Duplicate definition of module %s!\n", log_id(module->name));
+        if (design->module(module->name)) {
+            log_error("Duplicate definition of module %s!\n", log_id(module->name));
+        }
         design->add(module);
 
         t_model_ports *input_port = hb->inputs;
         while (input_port) {
             for (int i = 0; i < input_port->size; i++) {
                 std::string w_name = stringf("%s[%d]", input_port->name, i);
-                Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                RTLIL::Wire *wire = to_wire(w_name, module);
                 wire->port_input = true;
-                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                std::pair<RTLIL::IdString, int> wp = wideports_split(w_name);
                 if (!wp.first.empty() && wp.second >= 0) {
                     wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
                     wideports_cache[wp.first].second = true;
@@ -59,9 +60,9 @@ struct ParmysArchPass : public Pass {
         while (output_port) {
             for (int i = 0; i < output_port->size; i++) {
                 std::string w_name = stringf("%s[%d]", output_port->name, i);
-                Yosys::RTLIL::Wire *wire = to_wire(w_name, module);
+                RTLIL::Wire *wire = to_wire(w_name, module);
                 wire->port_output = true;
-                std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(w_name);
+                std::pair<RTLIL::IdString, int> wp = wideports_split(w_name);
                 if (!wp.first.empty() && wp.second >= 0) {
                     wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
                     wideports_cache[wp.first].second = false;
@@ -131,4 +132,4 @@ struct ParmysArchPass : public Pass {
 
 } ParmysArchPass;
 
-PRIVATE_NAMESPACE_END
\ No newline at end of file
+PRIVATE_NAMESPACE_END
diff --git a/parmys-plugin/parmys_resolve.cc b/parmys-plugin/parmys_resolve.cc
index a547a56f1..85af152ac 100644
--- a/parmys-plugin/parmys_resolve.cc
+++ b/parmys-plugin/parmys_resolve.cc
@@ -22,8 +22,8 @@
 
 #include "netlist_utils.h"
 
-#include "block_memory.h"
 #include "adder.h"
+#include "block_memory.h"
 #include "memory.h"
 #include "multiplier.h"
 #include "parmys_resolve.hpp"
@@ -31,6 +31,8 @@
 
 #include "vtr_util.h"
 
+USING_YOSYS_NAMESPACE
+
 void dfs_resolve(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
 
 void resolve_node(nnode_t *node, short traverse_mark_number, netlist_t *netlist);
@@ -65,17 +67,15 @@ void resolve_top(netlist_t *netlist)
 
 void dfs_resolve(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
 {
-    int i, j;
-
     if (node->traverse_visited != traverse_mark_number) {
 
         node->traverse_visited = traverse_mark_number;
 
-        for (i = 0; i < node->num_output_pins; i++) {
+        for (int i = 0; i < node->num_output_pins; i++) {
             if (node->output_pins[i]->net) {
                 nnet_t *next_net = node->output_pins[i]->net;
                 if (next_net->fanout_pins) {
-                    for (j = 0; j < next_net->num_fanout_pins; j++) {
+                    for (int j = 0; j < next_net->num_fanout_pins; j++) {
                         if (next_net->fanout_pins[j]) {
                             if (next_net->fanout_pins[j]->node) {
                                 dfs_resolve(next_net->fanout_pins[j]->node, traverse_mark_number, netlist);
@@ -207,14 +207,13 @@ static void resolve_memory_nodes(nnode_t *node, uintptr_t traverse_mark_number,
 
 static void look_for_clocks(netlist_t *netlist)
 {
-    int i;
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
         nnode_t *input_node = netlist->top_input_nodes[i];
         if (!strcmp(input_node->name, DEFAULT_CLOCK_NAME))
             input_node->type = CLOCK_NODE;
     }
 
-    for (i = 0; i < netlist->num_ff_nodes; i++) {
+    for (int i = 0; i < netlist->num_ff_nodes; i++) {
         oassert(netlist->ff_nodes[i]->input_pins[1]->net->num_driver_pins == 1);
         nnode_t *node = netlist->ff_nodes[i]->input_pins[1]->net->driver_pins[0]->node;
 
diff --git a/parmys-plugin/parmys_resolve.hpp b/parmys-plugin/parmys_resolve.hpp
index 5c93c43e4..07ad9aa06 100644
--- a/parmys-plugin/parmys_resolve.hpp
+++ b/parmys-plugin/parmys_resolve.hpp
@@ -20,6 +20,6 @@
 
 #define DEFAULT_CLOCK_NAME "GLOBAL_SIM_BASE_CLK"
 
-void resolve_top(netlist_t* netlist);
+void resolve_top(netlist_t *netlist);
 
 #endif // _PARMYS_RESOLVE_HPP_
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
index cd3622a61..ff91db1c9 100644
--- a/parmys-plugin/parmys_update.cc
+++ b/parmys-plugin/parmys_update.cc
@@ -33,12 +33,13 @@
 #include "parmys_update.hpp"
 #include "parmys_utils.hpp"
 
-static void depth_first_traversal_to_design(short marker_value, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
-static void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Yosys::Module *module, netlist_t *netlist,
-                                         Yosys::Design *design);
-static void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design);
+USING_YOSYS_NAMESPACE
 
-Yosys::Wire *wire_net_driver(Yosys::Module *module, nnode_t *node, nnet_t *net, long driver_idx)
+static void depth_first_traversal_to_design(short marker_value, Module *module, netlist_t *netlist, Design *design);
+static void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Module *module, netlist_t *netlist, Design *design);
+static void cell_node(nnode_t *node, short /*traverse_number*/, Module *module, netlist_t *netlist, Design *design);
+
+Wire *wire_net_driver(Module *module, nnode_t *node, nnet_t *net, long driver_idx)
 {
     oassert(driver_idx < net->num_driver_pins);
     npin_t *driver = net->driver_pins[driver_idx];
@@ -60,7 +61,7 @@ Yosys::Wire *wire_net_driver(Yosys::Module *module, nnode_t *node, nnet_t *net,
     return to_wire(wire_name, module);
 }
 
-Yosys::Wire *wire_input_single_driver(Yosys::Module *module, nnode_t *node, long pin_idx)
+Wire *wire_input_single_driver(Module *module, nnode_t *node, long pin_idx)
 {
     oassert(pin_idx < node->num_input_pins);
     nnet_t *net = node->input_pins[pin_idx]->net;
@@ -72,63 +73,64 @@ Yosys::Wire *wire_input_single_driver(Yosys::Module *module, nnode_t *node, long
     }
 }
 
-Yosys::Wire *wire_output_pin(Yosys::Module *module, nnode_t *node)
+Wire *wire_output_pin(Module *module, nnode_t *node)
 {
-    Yosys::RTLIL::IdString wire_name(Yosys::stringf("\\%s", node->name));
-    Yosys::RTLIL::Wire *wire = module->wire(wire_name);
+    RTLIL::IdString wire_name(stringf("\\%s", node->name));
+    RTLIL::Wire *wire = module->wire(wire_name);
     if (wire == nullptr)
         wire = module->addWire(wire_name);
 
     return wire;
 }
 
-void update_design(Yosys::Design *design, netlist_t *netlist)
+void update_design(Design *design, netlist_t *netlist)
 {
-    Yosys::RTLIL::Module *module = nullptr;
+    RTLIL::Module *module = nullptr;
     std::string err_reason;
     int blif_maxnum = 0;
 
-    Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> wideports_cache;
+    hashlib::dict<RTLIL::IdString, std::pair<int, bool>> wideports_cache;
 
-    module = new Yosys::RTLIL::Module;
-    module->name = Yosys::RTLIL::escape_id(strtok(netlist->identifier, " \t\r\n"));
+    module = new RTLIL::Module;
+    module->name = RTLIL::escape_id(strtok(netlist->identifier, " \t\r\n"));
 
-    if (design->module(module->name))
-        Yosys::log_error("Duplicate definition of module %s\n", Yosys::log_id(module->name));
+    if (design->module(module->name)) {
+        log_error("Duplicate definition of module %s\n", log_id(module->name));
+    }
     design->add(module);
 
-    Yosys::RTLIL::SigSpec undef;
+    RTLIL::SigSpec undef;
     undef.append(to_wire("$undef", module));
-    module->connect(Yosys::RTLIL::SigSig(undef, Yosys::RTLIL::State::Sx));
-    Yosys::RTLIL::SigSpec vcc;
+    module->connect(RTLIL::SigSig(undef, RTLIL::State::Sx));
+    RTLIL::SigSpec vcc;
     vcc.append(to_wire("$true", module));
     // vcc.append(module->wire(ID($true)));
-    module->connect(Yosys::RTLIL::SigSig(vcc, Yosys::RTLIL::State::S1));
-    Yosys::RTLIL::SigSpec gnd;
+    module->connect(RTLIL::SigSig(vcc, RTLIL::State::S1));
+    RTLIL::SigSpec gnd;
     gnd.append(to_wire("$false", module));
-    module->connect(Yosys::RTLIL::SigSig(gnd, Yosys::RTLIL::State::S0));
+    module->connect(RTLIL::SigSig(gnd, RTLIL::State::S0));
 
-    for (long i = 0; i < netlist->num_top_input_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
         nnode_t *top_input_node = netlist->top_input_nodes[i];
-        Yosys::RTLIL::Wire *wire = to_wire(top_input_node->name, module);
+        RTLIL::Wire *wire = to_wire(top_input_node->name, module);
         wire->port_input = true;
 
-        std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(top_input_node->name);
+        std::pair<RTLIL::IdString, int> wp = wideports_split(top_input_node->name);
         if (!wp.first.empty() && wp.second >= 0) {
             wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
             wideports_cache[wp.first].second = true;
         }
     }
 
-    for (long i = 0; i < netlist->num_top_output_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_output_nodes; i++) {
         nnode_t *top_output_node = netlist->top_output_nodes[i];
         if (!top_output_node->input_pins[0]->net->num_driver_pins) {
-            Yosys::log_warning("This output is undriven (%s) and will be removed\n", top_output_node->name);
+            log_warning("This output is undriven (%s) and will be removed\n", top_output_node->name);
         } else {
-            Yosys::RTLIL::Wire *wire = to_wire(top_output_node->name, module);
+            RTLIL::Wire *wire = to_wire(top_output_node->name, module);
             wire->port_output = true;
 
-            std::pair<Yosys::RTLIL::IdString, int> wp = wideports_split(top_output_node->name);
+            std::pair<RTLIL::IdString, int> wp = wideports_split(top_output_node->name);
             if (!wp.first.empty() && wp.second >= 0) {
                 wideports_cache[wp.first].first = std::max(wideports_cache[wp.first].first, wp.second + 1);
                 wideports_cache[wp.first].second = false;
@@ -139,16 +141,16 @@ void update_design(Yosys::Design *design, netlist_t *netlist)
     depth_first_traversal_to_design(100, module, netlist, design);
 
     /* connect all the outputs up to the last gate */
-    for (long i = 0; i < netlist->num_top_output_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_output_nodes; i++) {
         nnode_t *node = netlist->top_output_nodes[i];
 
         if (node->input_pins[0]->net->num_fanout_pins > 0) {
             nnet_t *net = node->input_pins[0]->net;
             for (int j = 0; j < net->num_driver_pins; j++) {
-                Yosys::Wire *driver_wire = wire_net_driver(module, node, net, j);
-                Yosys::Wire *out_wire = to_wire(node->name, module);
+                Wire *driver_wire = wire_net_driver(module, node, net, j);
+                Wire *out_wire = to_wire(node->name, module);
 
-                Yosys::RTLIL::SigSpec input_sig, output_sig;
+                RTLIL::SigSpec input_sig, output_sig;
                 input_sig.append(driver_wire);
                 output_sig.append(out_wire);
 
@@ -164,30 +166,30 @@ void update_design(Yosys::Design *design, netlist_t *netlist)
 
     bool run_clean = true;
     if (run_clean) {
-        Yosys::Const buffer_lut(std::vector<Yosys::RTLIL::State>({Yosys::State::S0, Yosys::State::S1}));
-        std::vector<Yosys::Cell *> remove_cells;
+        Const buffer_lut(std::vector<RTLIL::State>({State::S0, State::S1}));
+        std::vector<Cell *> remove_cells;
 
         for (auto cell : module->cells())
-            if (cell->type == ID($lut) && cell->getParam(Yosys::ID::LUT) == buffer_lut) {
-                module->connect(cell->getPort(Yosys::ID::Y), cell->getPort(Yosys::ID::A));
+            if (cell->type == ID($lut) && cell->getParam(ID::LUT) == buffer_lut) {
+                module->connect(cell->getPort(ID::Y), cell->getPort(ID::A));
                 remove_cells.push_back(cell);
             }
 
         for (auto cell : remove_cells)
             module->remove(cell);
 
-        Yosys::Wire *true_wire = module->wire(ID($true));
-        Yosys::Wire *false_wire = module->wire(ID($false));
-        Yosys::Wire *undef_wire = module->wire(ID($undef));
+        Wire *true_wire = module->wire(ID($true));
+        Wire *false_wire = module->wire(ID($false));
+        Wire *undef_wire = module->wire(ID($undef));
 
         if (true_wire != nullptr)
-            module->rename(true_wire, Yosys::stringf("$true$%d", ++blif_maxnum));
+            module->rename(true_wire, stringf("$true$%d", ++blif_maxnum));
 
         if (false_wire != nullptr)
-            module->rename(false_wire, Yosys::stringf("$false$%d", ++blif_maxnum));
+            module->rename(false_wire, stringf("$false$%d", ++blif_maxnum));
 
         if (undef_wire != nullptr)
-            module->rename(undef_wire, Yosys::stringf("$undef$%d", ++blif_maxnum));
+            module->rename(undef_wire, stringf("$undef$%d", ++blif_maxnum));
 
         blif_maxnum = 0;
     }
@@ -200,10 +202,8 @@ void update_design(Yosys::Design *design, netlist_t *netlist)
     module = nullptr;
 }
 
-void depth_first_traversal_to_design(short marker_value, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+void depth_first_traversal_to_design(short marker_value, Module *module, netlist_t *netlist, Design *design)
 {
-    int i;
-
     if (!coarsen_cleanup) {
         netlist->gnd_node->name = vtr::strdup("$false");
         netlist->vcc_node->name = vtr::strdup("$true");
@@ -214,16 +214,15 @@ void depth_first_traversal_to_design(short marker_value, Yosys::Module *module,
     depth_traverse_update_design(netlist->vcc_node, marker_value, module, netlist, design);
     depth_traverse_update_design(netlist->pad_node, marker_value, module, netlist, design);
 
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
         if (netlist->top_input_nodes[i] != NULL) {
             depth_traverse_update_design(netlist->top_input_nodes[i], marker_value, module, netlist, design);
         }
     }
 }
 
-void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number, Module *module, netlist_t *netlist, Design *design)
 {
-    int i, j;
     nnode_t *next_node;
     nnet_t *next_net;
 
@@ -234,12 +233,12 @@ void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number,
 
         node->traverse_visited = traverse_mark_number;
 
-        for (i = 0; i < node->num_output_pins; i++) {
+        for (int i = 0; i < node->num_output_pins; i++) {
             if (node->output_pins[i]->net == NULL)
                 continue;
 
             next_net = node->output_pins[i]->net;
-            for (j = 0; j < next_net->num_fanout_pins; j++) {
+            for (int j = 0; j < next_net->num_fanout_pins; j++) {
                 if (next_net->fanout_pins[j] == NULL)
                     continue;
 
@@ -253,20 +252,20 @@ void depth_traverse_update_design(nnode_t *node, uintptr_t traverse_mark_number,
     }
 }
 
-void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module, netlist_t *netlist, Yosys::Design *design)
+void cell_node(nnode_t *node, short /*traverse_number*/, Module *module, netlist_t *netlist, Design *design)
 {
     switch (node->type) {
     case GT:
-        Yosys::log_error("GT\n");
+        log_error("GT\n");
         break;
     case LT:
-        Yosys::log_error("LT\n");
+        log_error("LT\n");
         break;
     case BITWISE_NOT:
-        Yosys::log_error("BITWISE_NOT\n");
+        log_error("BITWISE_NOT\n");
         break;
     case BUF_NODE:
-        Yosys::log_error("BUF_NODE\n");
+        log_error("BUF_NODE\n");
         break;
     case LOGICAL_OR:
     case LOGICAL_AND:
@@ -281,7 +280,7 @@ void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module,
     case LOGICAL_NAND:
     case LOGICAL_EQUAL:
     case NOT_EQUAL:
-        Yosys::log_error("LOGICAL_\n");
+        log_error("LOGICAL_\n");
         break;
     case MUX_2:
         define_MUX_function_yosys(node, module);
@@ -289,7 +288,6 @@ void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module,
 
     case SMUX_2:
         define_SMUX_function_yosys(node, module);
-        // Yosys::log_error("SMUX_2\n");
         break;
 
     case FF_NODE:
@@ -316,7 +314,7 @@ void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module,
         cell_hard_block(node, module, netlist, design);
         break;
     case CLOCK_NODE:
-        Yosys::log_error("CLOCK\n");
+        log_error("CLOCK\n");
         break;
     case INPUT_NODE:
     case OUTPUT_NODE:
@@ -345,25 +343,25 @@ void cell_node(nnode_t *node, short /*traverse_number*/, Yosys::Module *module,
     case GTE:
     case LTE:
     default:
-        Yosys::log_error("node should have been converted to softer version.");
+        log_error("node should have been converted to softer version.");
         break;
     }
 }
 
-void define_FF_yosys(nnode_t *node, Yosys::Module *module)
+void define_FF_yosys(nnode_t *node, Module *module)
 {
-    Yosys::Wire *d = wire_input_single_driver(module, node, 0);
-    Yosys::Wire *q = wire_output_pin(module, node);
+    Wire *d = wire_input_single_driver(module, node, 0);
+    Wire *q = wire_output_pin(module, node);
     const char *clk_edge_type_str = edge_type_blif_str(node->attributes->clk_edge_type, node->loc);
     char *edge = vtr::strdup(clk_edge_type_str);
-    Yosys::Wire *clock = wire_input_single_driver(module, node, 1);
+    Wire *clock = wire_input_single_driver(module, node, 1);
 
     if (clock == nullptr && edge != nullptr) {
         edge = nullptr;
     }
 
     if (node->initial_value == init_value_e::_0 || node->initial_value == init_value_e::_1)
-        q->attributes[Yosys::ID::init] = Yosys::Const(node->initial_value, 1);
+        q->attributes[ID::init] = Const(node->initial_value, 1);
 
     if (clock == nullptr)
         goto no_latch_clock;
@@ -382,74 +380,74 @@ void define_FF_yosys(nnode_t *node, Yosys::Module *module)
     }
 }
 
-void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module)
+void define_MUX_function_yosys(nnode_t *node, Module *module)
 {
     oassert(node->num_output_pins == 1);
     oassert(node->num_input_port_sizes == 2);
     oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
 
-    Yosys::RTLIL::SigSpec input_sig_A, input_sig_B, buf_sig_M, output_sig;
+    RTLIL::SigSpec input_sig_A, input_sig_B, buf_sig_M, output_sig;
 
     for (int i = 0; i < node->input_port_sizes[0]; i++) {
         nnet_t *input_net = node->input_pins[i]->net;
-        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
+        Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
 
         input_sig_A.append(driver_wire);
     }
 
     for (int i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
         nnet_t *input_net = node->input_pins[i]->net;
-        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
+        Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
 
         input_sig_B.append(driver_wire);
     }
 
     for (int i = 0; i < node->input_port_sizes[0]; i++) {
         std::string mid_buf_name = op_node_name(BUF_NODE, node->name);
-        Yosys::RTLIL::Wire *buf_wire = to_wire(mid_buf_name, module);
+        RTLIL::Wire *buf_wire = to_wire(mid_buf_name, module);
         buf_sig_M.append(buf_wire);
     }
 
-    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    RTLIL::Wire *out_wire = to_wire(node->name, module);
     output_sig.append(out_wire);
 
-    Yosys::IdString celltype_1 = ID($and);
-    Yosys::RTLIL::Cell *cell_1 = module->addCell(NEW_ID, celltype_1);
-    cell_1->setPort(Yosys::ID::A, input_sig_A);
-    cell_1->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
-    cell_1->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
-    cell_1->setPort(Yosys::ID::B, input_sig_B);
-    cell_1->parameters[Yosys::ID::B_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[1]));
-    cell_1->parameters[Yosys::ID::B_SIGNED] = Yosys::RTLIL::Const(false);
-    cell_1->setPort(Yosys::ID::Y, buf_sig_M);
-    cell_1->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
-
-    Yosys::IdString celltype_2 = ID($reduce_or);
-    Yosys::RTLIL::Cell *cell_2 = module->addCell(NEW_ID, celltype_2);
-    cell_2->setPort(Yosys::ID::A, buf_sig_M);
-    cell_2->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->input_port_sizes[0]));
-    cell_2->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
-    cell_2->setPort(Yosys::ID::Y, output_sig);
-    cell_2->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->num_output_pins));
+    IdString celltype_1 = ID($and);
+    RTLIL::Cell *cell_1 = module->addCell(NEW_ID, celltype_1);
+    cell_1->setPort(ID::A, input_sig_A);
+    cell_1->parameters[ID::A_WIDTH] = RTLIL::Const(int(node->input_port_sizes[0]));
+    cell_1->parameters[ID::A_SIGNED] = RTLIL::Const(false);
+    cell_1->setPort(ID::B, input_sig_B);
+    cell_1->parameters[ID::B_WIDTH] = RTLIL::Const(int(node->input_port_sizes[1]));
+    cell_1->parameters[ID::B_SIGNED] = RTLIL::Const(false);
+    cell_1->setPort(ID::Y, buf_sig_M);
+    cell_1->parameters[ID::Y_WIDTH] = RTLIL::Const(int(node->input_port_sizes[0]));
+
+    IdString celltype_2 = ID($reduce_or);
+    RTLIL::Cell *cell_2 = module->addCell(NEW_ID, celltype_2);
+    cell_2->setPort(ID::A, buf_sig_M);
+    cell_2->parameters[ID::A_WIDTH] = RTLIL::Const(int(node->input_port_sizes[0]));
+    cell_2->parameters[ID::A_SIGNED] = RTLIL::Const(false);
+    cell_2->setPort(ID::Y, output_sig);
+    cell_2->parameters[ID::Y_WIDTH] = RTLIL::Const(int(node->num_output_pins));
 }
 
-void define_logical_function_yosys(nnode_t *node, Yosys::Module *module)
+void define_logical_function_yosys(nnode_t *node, Module *module)
 {
-    Yosys::RTLIL::SigSpec input_sig, output_sig;
+    RTLIL::SigSpec input_sig, output_sig;
 
     for (int i = 0; i < node->num_input_pins; i++) {
         nnet_t *input_net = node->input_pins[i]->net;
-        Yosys::Wire *driver_wire = wire_net_driver(module, node, input_net, 0); // 0 TODO?
+        Wire *driver_wire = wire_net_driver(module, node, input_net, 0); // 0 TODO?
 
         input_sig.append(driver_wire);
     }
 
-    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    RTLIL::Wire *out_wire = to_wire(node->name, module);
     output_sig.append(out_wire);
 
     oassert(node->num_output_pins == 1);
 
-    Yosys::IdString celltype;
+    IdString celltype;
 
     /* print out the blif definition of this gate */
     switch (node->type) {
@@ -496,58 +494,58 @@ void define_logical_function_yosys(nnode_t *node, Yosys::Module *module)
         break;
     }
 
-    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+    RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
 
-    cell->setPort(Yosys::ID::A, input_sig);
-    cell->setPort(Yosys::ID::Y, output_sig);
+    cell->setPort(ID::A, input_sig);
+    cell->setPort(ID::Y, output_sig);
 
     if (node->type == CARRY_FUNC) {
-        cell->parameters[Yosys::ID::WIDTH] = Yosys::RTLIL::Const(input_sig.size());
-        cell->parameters[Yosys::ID::LUT] = Yosys::RTLIL::Const(Yosys::RTLIL::State::Sx, 1 << input_sig.size());
-        Yosys::RTLIL::Const *lutptr = NULL;
-        lutptr = &cell->parameters.at(Yosys::ID::LUT);
+        cell->parameters[ID::WIDTH] = RTLIL::Const(input_sig.size());
+        cell->parameters[ID::LUT] = RTLIL::Const(RTLIL::State::Sx, 1 << input_sig.size());
+        RTLIL::Const *lutptr = NULL;
+        lutptr = &cell->parameters.at(ID::LUT);
         for (int i = 0; i < (1 << node->num_input_pins); i++) {
             if (i == 3 || i == 5 || i == 6 || i == 7) //"011 1\n101 1\n110 1\n111 1\n"
-                lutptr->bits.at(i) = Yosys::RTLIL::State::S1;
+                lutptr->bits.at(i) = RTLIL::State::S1;
             else
-                lutptr->bits.at(i) = Yosys::RTLIL::State::S0;
+                lutptr->bits.at(i) = RTLIL::State::S0;
         }
     } else {
-        cell->parameters[Yosys::ID::A_WIDTH] = Yosys::RTLIL::Const(int(node->num_input_pins));
-        cell->parameters[Yosys::ID::Y_WIDTH] = Yosys::RTLIL::Const(int(node->num_output_pins));
-        cell->parameters[Yosys::ID::A_SIGNED] = Yosys::RTLIL::Const(false);
+        cell->parameters[ID::A_WIDTH] = RTLIL::Const(int(node->num_input_pins));
+        cell->parameters[ID::Y_WIDTH] = RTLIL::Const(int(node->num_output_pins));
+        cell->parameters[ID::A_SIGNED] = RTLIL::Const(false);
     }
 }
 
-void define_SMUX_function_yosys(nnode_t *node, Yosys::Module *module)
+void define_SMUX_function_yosys(nnode_t *node, Module *module)
 {
-    Yosys::RTLIL::SigSpec input_sig_A, input_sig_B, input_sig_S, output_sig;
+    RTLIL::SigSpec input_sig_A, input_sig_B, input_sig_S, output_sig;
 
     oassert(node->num_input_pins == 3); // s a b
 
     nnet_t *s_net = node->input_pins[0]->net;
-    Yosys::Wire *s_wire = wire_net_driver(module, node, s_net, 0);
+    Wire *s_wire = wire_net_driver(module, node, s_net, 0);
     input_sig_S.append(s_wire);
 
     nnet_t *a_net = node->input_pins[1]->net;
-    Yosys::Wire *a_wire = wire_net_driver(module, node, a_net, 0);
+    Wire *a_wire = wire_net_driver(module, node, a_net, 0);
     input_sig_A.append(a_wire);
 
     nnet_t *b_net = node->input_pins[2]->net;
-    Yosys::Wire *b_wire = wire_net_driver(module, node, b_net, 0);
+    Wire *b_wire = wire_net_driver(module, node, b_net, 0);
     input_sig_B.append(b_wire);
 
     oassert(node->num_output_pins == 1); // y
-    Yosys::RTLIL::Wire *out_wire = to_wire(node->name, module);
+    RTLIL::Wire *out_wire = to_wire(node->name, module);
     output_sig.append(out_wire);
 
-    Yosys::IdString celltype = ID($mux);
+    IdString celltype = ID($mux);
     ;
 
-    Yosys::RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
-    cell->parameters[Yosys::ID::WIDTH] = Yosys::RTLIL::Const(int(1));
-    cell->setPort(Yosys::ID::S, input_sig_S);
-    cell->setPort(Yosys::ID::A, input_sig_A);
-    cell->setPort(Yosys::ID::B, input_sig_B);
-    cell->setPort(Yosys::ID::Y, output_sig);
-}
\ No newline at end of file
+    RTLIL::Cell *cell = module->addCell(NEW_ID, celltype);
+    cell->parameters[ID::WIDTH] = RTLIL::Const(int(1));
+    cell->setPort(ID::S, input_sig_S);
+    cell->setPort(ID::A, input_sig_A);
+    cell->setPort(ID::B, input_sig_B);
+    cell->setPort(ID::Y, output_sig);
+}
diff --git a/parmys-plugin/parmys_update.hpp b/parmys-plugin/parmys_update.hpp
index 9ace088c9..bfdcd95cc 100644
--- a/parmys-plugin/parmys_update.hpp
+++ b/parmys-plugin/parmys_update.hpp
@@ -20,12 +20,14 @@
 
 #include "odin_types.h"
 
+USING_YOSYS_NAMESPACE
+
 #define DEFAULT_CLOCK_NAME "GLOBAL_SIM_BASE_CLK"
 
-void define_logical_function_yosys(nnode_t *node, Yosys::Module *module);
-void update_design(Yosys::Design *design, netlist_t *netlist);
-void define_MUX_function_yosys(nnode_t *node, Yosys::Module *module);
-void define_SMUX_function_yosys(nnode_t *node, Yosys::Module *module);
-void define_FF_yosys(nnode_t *node, Yosys::Module *module);
+void define_logical_function_yosys(nnode_t *node, Module *module);
+void update_design(Design *design, netlist_t *netlist);
+void define_MUX_function_yosys(nnode_t *node, Module *module);
+void define_SMUX_function_yosys(nnode_t *node, Module *module);
+void define_FF_yosys(nnode_t *node, Module *module);
 
 #endif //_PARMYS_UPDATE_HPP_
\ No newline at end of file
diff --git a/parmys-plugin/parmys_utils.cc b/parmys-plugin/parmys_utils.cc
index 7ed397973..74f133fbe 100644
--- a/parmys-plugin/parmys_utils.cc
+++ b/parmys-plugin/parmys_utils.cc
@@ -17,10 +17,12 @@
  */
 #include "parmys_utils.hpp"
 
-Yosys::Wire *to_wire(std::string wire_name, Yosys::Module *module)
+USING_YOSYS_NAMESPACE
+
+Wire *to_wire(std::string wire_name, Module *module)
 {
-    Yosys::IdString wire_id = Yosys::RTLIL::escape_id(wire_name);
-    Yosys::Wire *wire = module->wire(wire_id);
+    IdString wire_id = RTLIL::escape_id(wire_name);
+    Wire *wire = module->wire(wire_id);
 
     if (wire == nullptr)
         wire = module->addWire(wire_id);
@@ -28,14 +30,14 @@ Yosys::Wire *to_wire(std::string wire_name, Yosys::Module *module)
     return wire;
 }
 
-std::pair<Yosys::RTLIL::IdString, int> wideports_split(std::string name)
+std::pair<RTLIL::IdString, int> wideports_split(std::string name)
 {
     int pos = -1;
 
     if (name.empty() || name.back() != ']')
         goto failed;
 
-    for (int i = 0; i + 1 < Yosys::GetSize(name); i++) {
+    for (int i = 0; i + 1 < GetSize(name); i++) {
         if (name[i] == '[')
             pos = i;
         else if (name[i] != '-' && (name[i] < '0' || name[i] > '9'))
@@ -49,49 +51,48 @@ std::pair<Yosys::RTLIL::IdString, int> wideports_split(std::string name)
     }
 
     if (pos >= 0)
-        return std::pair<Yosys::RTLIL::IdString, int>("\\" + name.substr(0, pos), atoi(name.c_str() + pos + 1));
+        return std::pair<RTLIL::IdString, int>("\\" + name.substr(0, pos), atoi(name.c_str() + pos + 1));
 
 failed:
-    return std::pair<Yosys::RTLIL::IdString, int>(Yosys::RTLIL::IdString(), 0);
+    return std::pair<RTLIL::IdString, int>(RTLIL::IdString(), 0);
 }
 
-const std::string str(Yosys::RTLIL::SigBit sig)
+const std::string str(RTLIL::SigBit sig)
 {
     // cstr_bits_seen.insert(sig);
 
     if (sig.wire == NULL) {
-        if (sig == Yosys::RTLIL::State::S0)
+        if (sig == RTLIL::State::S0)
             return "$false";
-        if (sig == Yosys::RTLIL::State::S1)
+        if (sig == RTLIL::State::S1)
             return "$true";
         return "$undef";
     }
 
-    std::string str = Yosys::RTLIL::unescape_id(sig.wire->name);
+    std::string str = RTLIL::unescape_id(sig.wire->name);
     for (size_t i = 0; i < str.size(); i++)
         if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>')
             str[i] = '?';
 
     if (sig.wire->width != 1)
-        str +=
-          Yosys::stringf("[%d]", sig.wire->upto ? sig.wire->start_offset + sig.wire->width - sig.offset - 1 : sig.wire->start_offset + sig.offset);
+        str += stringf("[%d]", sig.wire->upto ? sig.wire->start_offset + sig.wire->width - sig.offset - 1 : sig.wire->start_offset + sig.offset);
 
     return str;
 }
 
-const std::string str(Yosys::RTLIL::IdString id)
+const std::string str(RTLIL::IdString id)
 {
-    std::string str = Yosys::RTLIL::unescape_id(id);
+    std::string str = RTLIL::unescape_id(id);
     for (size_t i = 0; i < str.size(); i++)
         if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>')
             str[i] = '?';
     return str;
 }
 
-void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> *cell_wideports_cache,
-                                 Yosys::Design *design, Yosys::Module *module, Yosys::Cell *cell)
+void handle_cell_wideports_cache(hashlib::dict<RTLIL::IdString, hashlib::dict<int, SigBit>> *cell_wideports_cache, Design *design, Module *module,
+                                 Cell *cell)
 {
-    Yosys::RTLIL::Module *cell_mod = design->module(cell->type);
+    RTLIL::Module *cell_mod = design->module(cell->type);
     for (auto &it : *cell_wideports_cache) {
         int width = 0;
         int offset = 0;
@@ -100,7 +101,7 @@ void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yo
             width = std::max(width, b.first + 1);
 
         if (cell_mod) {
-            Yosys::Wire *cell_port = cell_mod->wire(it.first);
+            Wire *cell_port = cell_mod->wire(it.first);
             if (cell_port && (cell_port->port_input || cell_port->port_output)) {
                 offset = cell_port->start_offset;
                 upto = cell_port->upto;
@@ -108,7 +109,7 @@ void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yo
             }
         }
 
-        Yosys::SigSpec sig;
+        SigSpec sig;
 
         for (int i = 0; i < width; i++) {
             int idx = offset + (upto ? width - 1 - i : i);
@@ -122,28 +123,28 @@ void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yo
     }
 }
 
-void handle_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Yosys::Module *module)
+void handle_wideports_cache(hashlib::dict<RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Module *module)
 {
     for (auto &wp : *wideports_cache) {
         auto name = wp.first;
         int width = wp.second.first;
         bool isinput = wp.second.second;
 
-        Yosys::RTLIL::Wire *wire = module->addWire(name, width);
+        RTLIL::Wire *wire = module->addWire(name, width);
         wire->port_input = isinput;
         wire->port_output = !isinput;
 
         for (int i = 0; i < width; i++) {
-            Yosys::RTLIL::IdString other_name = name.str() + Yosys::stringf("[%d]", i);
-            Yosys::RTLIL::Wire *other_wire = module->wire(other_name);
+            RTLIL::IdString other_name = name.str() + stringf("[%d]", i);
+            RTLIL::Wire *other_wire = module->wire(other_name);
             if (other_wire) {
                 other_wire->port_input = false;
                 other_wire->port_output = false;
                 if (isinput)
-                    module->connect(other_wire, Yosys::SigSpec(wire, i));
+                    module->connect(other_wire, SigSpec(wire, i));
                 else
-                    module->connect(Yosys::SigSpec(wire, i), other_wire);
+                    module->connect(SigSpec(wire, i), other_wire);
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/parmys-plugin/parmys_utils.hpp b/parmys-plugin/parmys_utils.hpp
index ae884353c..c40e309c4 100644
--- a/parmys-plugin/parmys_utils.hpp
+++ b/parmys-plugin/parmys_utils.hpp
@@ -20,12 +20,14 @@
 
 #include "odin_types.h"
 
-Yosys::Wire *to_wire(std::string wire_name, Yosys::Module *module);
-std::pair<Yosys::RTLIL::IdString, int> wideports_split(std::string name);
-const std::string str(Yosys::RTLIL::SigBit sig);
-const std::string str(Yosys::RTLIL::IdString id);
-void handle_cell_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, Yosys::hashlib::dict<int, Yosys::SigBit>> *cell_wideports_cache,
-                                 Yosys::Design *design, Yosys::Module *module, Yosys::Cell *cell);
-void handle_wideports_cache(Yosys::hashlib::dict<Yosys::RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Yosys::Module *module);
+USING_YOSYS_NAMESPACE
+
+Wire *to_wire(std::string wire_name, Module *module);
+std::pair<RTLIL::IdString, int> wideports_split(std::string name);
+const std::string str(RTLIL::SigBit sig);
+const std::string str(RTLIL::IdString id);
+void handle_cell_wideports_cache(hashlib::dict<RTLIL::IdString, hashlib::dict<int, SigBit>> *cell_wideports_cache, Design *design, Module *module,
+                                 Cell *cell);
+void handle_wideports_cache(hashlib::dict<RTLIL::IdString, std::pair<int, bool>> *wideports_cache, Module *module);
 
 #endif //_PARMYS_UTILS_HPP_
\ No newline at end of file
diff --git a/parmys-plugin/src/adder.cc b/parmys-plugin/src/adder.cc
index b475ba3d5..db8a0e3ba 100644
--- a/parmys-plugin/src/adder.cc
+++ b/parmys-plugin/src/adder.cc
@@ -153,7 +153,7 @@ void declare_hard_adder(nnode_t *node)
 void instantiate_hard_adder(nnode_t *node, short mark, netlist_t * /*netlist*/)
 {
     char *new_name;
-    int len, sanity, i;
+    int len, sanity;
 
     declare_hard_adder(node);
 
@@ -175,7 +175,7 @@ void instantiate_hard_adder(nnode_t *node, short mark, netlist_t * /*netlist*/)
         oassert(false);
 
     /* Give names to the output pins */
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         if (node->output_pins[i]->name == NULL) {
             len = strlen(node->name) + 20; /* 6 chars for pin idx */
             new_name = (char *)vtr::malloc(len);
@@ -372,7 +372,6 @@ void define_add_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Desi
  *---------------------------------------------------------------------*/
 void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag, netlist_t *netlist)
 {
-    int i;
     int flaga = 0, flagb = 0;
     int current_sizea, current_sizeb;
     int aa = 0, bb = 0, num = 0;
@@ -449,27 +448,27 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
     ptr->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (current_sizea + current_sizeb + cin));
     // if flaga or flagb = 1, the input pins should be empty.
     if (flaga == 1) {
-        for (i = 0; i < current_sizea; i++)
+        for (int i = 0; i < current_sizea; i++)
             ptr->input_pins[i] = NULL;
     } else if (flaga == 2) {
         if (index == 0) {
             ptr->input_pins[0] = NULL;
             if (sizea > 1) {
-                for (i = 1; i < aa; i++) {
+                for (int i = 1; i < aa; i++) {
                     ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
                 }
-                for (i = 0; i < (sizea - aa); i++)
+                for (int i = 0; i < (sizea - aa); i++)
                     ptr->input_pins[i + aa] = NULL;
             }
         } else {
-            for (i = 0; i < aa; i++) {
+            for (int i = 0; i < aa; i++) {
                 ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
                 ptr->input_pins[i]->node = ptr;
                 ptr->input_pins[i]->pin_node_idx = i;
             }
-            for (i = 0; i < (sizea - aa); i++)
+            for (int i = 0; i < (sizea - aa); i++)
                 ptr->input_pins[i + aa] = NULL;
         }
     } else {
@@ -477,14 +476,14 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
             if (flag == 0) {
                 ptr->input_pins[0] = NULL;
                 if (current_sizea > 1) {
-                    for (i = 1; i < current_sizea; i++) {
+                    for (int i = 1; i < current_sizea; i++) {
                         ptr->input_pins[i] = node->input_pins[i - 1];
                         ptr->input_pins[i]->node = ptr;
                         ptr->input_pins[i]->pin_node_idx = i;
                     }
                 }
             } else {
-                for (i = 0; i < current_sizea; i++) {
+                for (int i = 0; i < current_sizea; i++) {
                     ptr->input_pins[i] = node->input_pins[i];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
@@ -492,7 +491,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
             }
         } else {
             if (flag == 0) {
-                for (i = 0; i < current_sizea; i++) {
+                for (int i = 0; i < current_sizea; i++) {
                     // use the offset to compensate for the dummy adder added at start of the chain
                     ptr->input_pins[i] = node->input_pins[i + index * sizea - offset];
                     ptr->input_pins[i]->node = ptr;
@@ -503,7 +502,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
                     connect_nodes(netlist->gnd_node, 0, ptr, 0);
                 else {
                     num = node->input_port_sizes[0];
-                    for (i = 0; i < current_sizea; i++) {
+                    for (int i = 0; i < current_sizea; i++) {
                         ptr->input_pins[i] = node->input_pins[i + num - current_sizea];
                         ptr->input_pins[i]->node = ptr;
                         ptr->input_pins[i]->pin_node_idx = i;
@@ -514,27 +513,27 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
     }
 
     if (flagb == 1) {
-        for (i = 0; i < current_sizeb; i++)
+        for (int i = 0; i < current_sizeb; i++)
             ptr->input_pins[i + current_sizeb] = NULL;
     } else if (flagb == 2) {
         if (index == 0) {
             ptr->input_pins[sizea] = NULL;
             if (current_sizeb > 1) {
-                for (i = 1; i < bb; i++) {
+                for (int i = 1; i < bb; i++) {
                     ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
                     ptr->input_pins[i + current_sizea]->node = ptr;
                     ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
                 }
-                for (i = 0; i < (sizeb - bb); i++)
+                for (int i = 0; i < (sizeb - bb); i++)
                     ptr->input_pins[i + current_sizea + bb] = NULL;
             }
         } else {
-            for (i = 0; i < bb; i++) {
+            for (int i = 0; i < bb; i++) {
                 ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
                 ptr->input_pins[i + current_sizea]->node = ptr;
                 ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
             }
-            for (i = 0; i < (sizeb - bb); i++)
+            for (int i = 0; i < (sizeb - bb); i++)
                 ptr->input_pins[i + current_sizea + bb] = NULL;
         }
     } else {
@@ -542,14 +541,14 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
             if (flag == 0) {
                 ptr->input_pins[sizea] = NULL;
                 if (current_sizeb > 1) {
-                    for (i = 1; i < current_sizeb; i++) {
+                    for (int i = 1; i < current_sizeb; i++) {
                         ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - 1];
                         ptr->input_pins[i + current_sizea]->node = ptr;
                         ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
                     }
                 }
             } else {
-                for (i = 0; i < current_sizeb; i++) {
+                for (int i = 0; i < current_sizeb; i++) {
                     ptr->input_pins[i + current_sizea] = node->input_pins[i + a];
                     ptr->input_pins[i + current_sizea]->node = ptr;
                     ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
@@ -557,7 +556,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
             }
         } else {
             if (flag == 0) {
-                for (i = 0; i < current_sizeb; i++) {
+                for (int i = 0; i < current_sizeb; i++) {
                     ptr->input_pins[i + current_sizea] = node->input_pins[i + a + index * sizeb - offset];
                     ptr->input_pins[i + current_sizea]->node = ptr;
                     ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
@@ -567,7 +566,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
                     connect_nodes(netlist->gnd_node, 0, ptr, current_sizea);
                 else {
                     num = node->input_port_sizes[0] + node->input_port_sizes[1];
-                    for (i = 0; i < current_sizeb; i++) {
+                    for (int i = 0; i < current_sizeb; i++) {
                         ptr->input_pins[i + current_sizea] = node->input_pins[i + num - current_sizeb];
                         ptr->input_pins[i + current_sizea]->node = ptr;
                         ptr->input_pins[i + current_sizea]->pin_node_idx = i + current_sizea;
@@ -578,7 +577,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
     }
 
     /* Carry_in should be NULL*/
-    for (i = 0; i < cin; i++) {
+    for (int i = 0; i < cin; i++) {
         ptr->input_pins[i + current_sizea + current_sizeb] = NULL;
     }
 
@@ -591,7 +590,7 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
 
     ptr->num_output_pins = output;
     ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * output);
-    for (i = 0; i < output; i++)
+    for (int i = 0; i < output; i++)
         ptr->output_pins[i] = NULL;
 
     return;
@@ -613,7 +612,6 @@ void init_split_adder(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int
 void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, int cout, int count, netlist_t *netlist)
 {
     nnode_t **node;
-    int i, j;
     int num, lefta = 0, leftb = 0;
     int max_num = 0;
     int flag = 0;
@@ -629,7 +627,7 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
 
     node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (count));
 
-    for (i = 0; i < count; i++) {
+    for (int i = 0; i < count; i++) {
         node[i] = allocate_nnode(nodeo->loc);
         node[i]->name = (char *)vtr::malloc(strlen(nodeo->name) + 20);
         odin_sprintf(node[i]->name, "%s-%d", nodeo->name, i);
@@ -699,9 +697,9 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
     }
 
     // if any input pins beside first cin pins are NULL, connect those pins to unconn
-    for (i = 0; i < count; i++) {
+    for (int i = 0; i < count; i++) {
         num = node[i]->num_input_pins;
-        for (j = 0; j < num - 1; j++) {
+        for (int j = 0; j < num - 1; j++) {
             if (node[i]->input_pins[j] == NULL)
                 connect_nodes(netlist->pad_node, 0, node[i], j);
         }
@@ -713,13 +711,13 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
     }
 
     // connect cout to next cin
-    for (i = 1; i < count; i++)
+    for (int i = 1; i < count; i++)
         connect_nodes(node[i - 1], 0, node[i], (node[i]->num_input_pins - 1));
 
     // remap the output pins of each adder to nodeo
     if (count == 1) {
         if (flag == 0) {
-            for (j = 0; j < node[0]->num_output_pins - 2; j++) {
+            for (int j = 0; j < node[0]->num_output_pins - 2; j++) {
                 if (j < nodeo->num_output_pins)
                     remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
                 else {
@@ -731,22 +729,22 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
                 node[0]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[0]->name, 0, 0);
             }
         } else {
-            for (j = 0; j < node[0]->num_output_pins - 1; j++)
+            for (int j = 0; j < node[0]->num_output_pins - 1; j++)
                 remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 1);
             remap_pin_to_new_node(nodeo->output_pins[nodeo->num_output_pins - 1], node[0], 0);
         }
     } else {
         // First adder
-        for (j = 0; j < node[0]->num_output_pins - 2; j++)
+        for (int j = 0; j < node[0]->num_output_pins - 2; j++)
             remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
         // if a dummy adder is added (offset = 1) start from the second adder)
-        for (i = offset; i < count - 1; i++) {
-            for (j = 0; j < node[i]->num_output_pins - 1; j++)
+        for (int i = offset; i < count - 1; i++) {
+            for (int j = 0; j < node[i]->num_output_pins - 1; j++)
                 remap_pin_to_new_node(nodeo->output_pins[i * sizea + j - offset], node[i], j + 1);
         }
         // Last adder
         if (flag == 0) {
-            for (j = 0; j < node[count - 1]->num_output_pins - 1; j++) {
+            for (int j = 0; j < node[count - 1]->num_output_pins - 1; j++) {
                 // if a dummy adder is added to this chain (offset = 1), adjust the index of the adder using the offset constant
                 if (((count - 1) * sizea + j - offset) < nodeo->num_output_pins)
                     remap_pin_to_new_node(nodeo->output_pins[(count - 1) * sizea + j - offset], node[count - 1], j + 1);
@@ -761,7 +759,7 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
             // Pad outputs with a unique and descriptive name to avoid collisions.
             node[count - 1]->output_pins[0]->name = append_string("", "%s~dummy_output~%d~%d", node[count - 1]->name, count - 1, 0);
         } else {
-            for (j = 0; j < node[count - 1]->num_output_pins - 1; j++)
+            for (int j = 0; j < node[count - 1]->num_output_pins - 1; j++)
                 // if(((count - 1) * sizea + j - 1) < nodeo->num_output_pins)
                 remap_pin_to_new_node(nodeo->output_pins[(count - 1) * sizea + j - 1], node[count - 1], j + 1);
             if (nodeo->output_pins[nodeo->num_output_pins - 1] != NULL)
@@ -774,8 +772,8 @@ void split_adder(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int cin, in
         }
     }
 
-    for (i = offset; configuration.coarsen && i < count - 1; i++) {
-        for (j = 0; j < node[i]->num_output_pins - 1; j++) {
+    for (int i = offset; configuration.coarsen && i < count - 1; i++) {
+        for (int j = 0; j < node[i]->num_output_pins - 1; j++) {
             char *new_output_pin_name = (char *)vtr::malloc((strlen(node[i]->name) + 20) * sizeof(char)); /* 6 chars for pin idx */
             odin_sprintf(new_output_pin_name, "%s[1]", node[i]->name);
             node[i]->output_pins[1]->name = new_output_pin_name;
@@ -1027,13 +1025,11 @@ int match_ports(nnode_t *node, nnode_t *next_node, operation_list oper)
  *-----------------------------------------------------------------------*/
 void traverse_operation_node(ast_node_t *node, char *component[], operation_list op, int *mark)
 {
-    long i;
-
     if (node == NULL)
         return;
 
     if (node->types.operation.op == op) {
-        for (i = 0; i < node->num_children; i++) {
+        for (long i = 0; i < node->num_children; i++) {
             *mark = 0;
             if (node->children[i]->type != IDENTIFIERS && node->children[i]->type != NUMBERS) {
                 *mark = 1;
@@ -1079,9 +1075,9 @@ void remove_list_node(t_linked_vptr *pre, t_linked_vptr *next)
  *-------------------------------------------------------------------------*/
 void remove_fanout_pins(nnode_t *node)
 {
-    int i, j, k, idx;
-    for (i = 0; i < node->num_input_pins; i++) {
-        idx = node->input_pins[i]->unique_id;
+    for (int i = 0; i < node->num_input_pins; i++) {
+        int j, k;
+        int idx = node->input_pins[i]->unique_id;
         for (j = 0; j < node->input_pins[i]->net->num_fanout_pins; j++) {
             if (node->input_pins[i]->net->fanout_pins[j]->unique_id == idx)
                 break;
@@ -1100,13 +1096,12 @@ void remove_fanout_pins(nnode_t *node)
  *-------------------------------------------------------------------------*/
 void reallocate_pins(nnode_t *node, nnode_t *next_node)
 {
-    int i, j;
     int pin_idx;
     nnode_t *input_node = NULL;
     nnet_t *net = NULL;
     npin_t *pin = NULL;
-    for (i = 0; i < next_node->num_output_pins; i++) {
-        for (j = 0; j < next_node->output_pins[i]->net->num_fanout_pins; j++) {
+    for (int i = 0; i < next_node->num_output_pins; i++) {
+        for (int j = 0; j < next_node->output_pins[i]->net->num_fanout_pins; j++) {
             if (next_node->output_pins[i]->net->fanout_pins[j]->node != NULL) {
                 input_node = next_node->output_pins[i]->net->fanout_pins[j]->node;
                 net = node->output_pins[i]->net;
@@ -1294,14 +1289,13 @@ void instantiate_add_w_carry_block(int *width, nnode_t *node, short mark, netlis
  *-----------------------------------------------------------------------*/
 static void cleanup_add_old_node(nnode_t *nodeo, netlist_t *netlist)
 {
-    int i;
     /* Disconnecting input pins from the old node side */
-    for (i = 0; i < nodeo->num_input_pins; i++) {
+    for (int i = 0; i < nodeo->num_input_pins; i++) {
         nodeo->input_pins[i] = NULL;
     }
 
     /* connecting the extra output pins to the gnd node */
-    for (i = 0; i < nodeo->num_output_pins; i++) {
+    for (int i = 0; i < nodeo->num_output_pins; i++) {
         npin_t *output_pin = nodeo->output_pins[i];
 
         if (output_pin && output_pin->node) {
@@ -1348,7 +1342,6 @@ nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netl
 
     /* check for operations that has 2 operands */
     if (num_input_port == 2) {
-        int i;
         int in_port1_size = node->input_port_sizes[0];
         int in_port2_size = node->input_port_sizes[1];
         int out_port_size = (in_port1_size >= in_port2_size) ? in_port1_size + 1 : in_port2_size + 1;
@@ -1358,11 +1351,11 @@ nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netl
         /* copy attributes */
         copy_attribute(new_node->attributes, node->attributes);
 
-        for (i = 0; i < in_port1_size; i++) {
+        for (int i = 0; i < in_port1_size; i++) {
             remap_pin_to_new_node(node->input_pins[i], new_node, i);
         }
 
-        for (i = 0; i < in_port2_size; i++) {
+        for (int i = 0; i < in_port2_size; i++) {
             remap_pin_to_new_node(node->input_pins[i + in_port1_size], new_node, i + in_port1_size);
         }
 
@@ -1374,7 +1367,7 @@ nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netl
         add_input_pin_to_node(new_node, cin_pin, new_node->num_input_pins - 1);
 
         // moving the output pins to the new node
-        for (i = 0; i < out_port_size; i++) {
+        for (int i = 0; i < out_port_size; i++) {
             if (i < node->num_output_pins) {
                 remap_pin_to_new_node(node->output_pins[i], new_node, i);
             } else {
@@ -1395,7 +1388,7 @@ nnode_t *check_missing_ports(nnode_t *node, uintptr_t traverse_mark_number, netl
          * if number of output pins is greater than the max of input pins,
          * here we connect the exceeded pins to the GND
          */
-        for (i = out_port_size; i < node->num_output_pins; i++) {
+        for (int i = out_port_size; i < node->num_output_pins; i++) {
             /* creating a buf node */
             nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, traverse_mark_number);
             /* adding the GND input pin to the buf node */
diff --git a/parmys-plugin/src/block_memory.cc b/parmys-plugin/src/block_memory.cc
index 9bacd4060..1b600771a 100644
--- a/parmys-plugin/src/block_memory.cc
+++ b/parmys-plugin/src/block_memory.cc
@@ -83,7 +83,6 @@ void init_block_memory_index()
  */
 static block_memory_t *init_block_memory(nnode_t *node, netlist_t * /* netlist */)
 {
-    int i, offset;
     block_memory_t *bram = (block_memory_t *)vtr::malloc(sizeof(block_memory_t));
 
     /**
@@ -111,51 +110,51 @@ static block_memory_t *init_block_memory(nnode_t *node, netlist_t * /* netlist *
     /* INPUT */
 
     /* CLK */
-    offset = 0;
+    int CLK_offset = 0;
     bram->clk = init_signal_list();
-    add_pin_to_signal_list(bram->clk, node->input_pins[offset]);
+    add_pin_to_signal_list(bram->clk, node->input_pins[CLK_offset]);
 
     /* read address pins */
-    offset += CLK_width;
+    int RD_ADDR_offset = CLK_offset + CLK_width;
     bram->read_addr = init_signal_list();
-    for (i = 0; i < RD_ADDR_width; ++i) {
-        add_pin_to_signal_list(bram->read_addr, node->input_pins[i + offset]);
+    for (int i = 0; i < RD_ADDR_width; ++i) {
+        add_pin_to_signal_list(bram->read_addr, node->input_pins[RD_ADDR_offset + i]);
     }
 
     /* read enable pins */
-    offset += RD_ADDR_width;
+    int RD_ENABLE_offset = RD_ADDR_offset + RD_ADDR_width;
     bram->read_en = init_signal_list();
-    for (i = 0; i < RD_ENABLE_width; ++i) {
-        add_pin_to_signal_list(bram->read_en, node->input_pins[i + offset]);
+    for (int i = 0; i < RD_ENABLE_width; ++i) {
+        add_pin_to_signal_list(bram->read_en, node->input_pins[RD_ENABLE_offset + i]);
     }
 
     /* write addr pins */
-    offset += RD_ENABLE_width;
+    int WR_ADDR_offset = RD_ENABLE_offset + RD_ENABLE_width;
     bram->write_addr = init_signal_list();
-    for (i = 0; i < WR_ADDR_width; ++i) {
-        add_pin_to_signal_list(bram->write_addr, node->input_pins[i + offset]);
+    for (int i = 0; i < WR_ADDR_width; ++i) {
+        add_pin_to_signal_list(bram->write_addr, node->input_pins[WR_ADDR_offset + i]);
     }
 
     /* write data pins */
-    offset += WR_ADDR_width;
+    int WR_DATA_offset = WR_ADDR_offset + WR_ADDR_width;
     bram->write_data = init_signal_list();
-    for (i = 0; i < WR_DATA_width; ++i) {
-        add_pin_to_signal_list(bram->write_data, node->input_pins[i + offset]);
+    for (int i = 0; i < WR_DATA_width; ++i) {
+        add_pin_to_signal_list(bram->write_data, node->input_pins[WR_DATA_offset + i]);
     }
 
     /* write enable clk pins */
-    offset += WR_DATA_width;
+    int WR_ENABLE_offset = WR_DATA_offset + WR_DATA_width;
     bram->write_en = init_signal_list();
-    for (i = 0; i < WR_ENABLE_width; ++i) {
-        add_pin_to_signal_list(bram->write_en, node->input_pins[i + offset]);
+    for (int i = 0; i < WR_ENABLE_width; ++i) {
+        add_pin_to_signal_list(bram->write_en, node->input_pins[WR_ENABLE_offset + i]);
     }
 
     /* OUTPUT */
     /* read clk pins */
-    offset = 0;
+    int RD_DATA_offset = 0;
     bram->read_data = init_signal_list();
-    for (i = 0; i < RD_DATA_width; ++i) {
-        add_pin_to_signal_list(bram->read_data, node->output_pins[i + offset]);
+    for (int i = 0; i < RD_DATA_width; ++i) {
+        add_pin_to_signal_list(bram->read_data, node->output_pins[RD_DATA_offset + i]);
     }
 
     /* creating new node since we need to reorder some input port for each inferenece mode */
@@ -183,7 +182,6 @@ static block_memory_t *init_block_memory(nnode_t *node, netlist_t * /* netlist *
  */
 static block_memory_t *init_read_only_memory(nnode_t *node, netlist_t *netlist)
 {
-    int i, offset;
     block_memory_t *rom = (block_memory_t *)vtr::malloc(sizeof(block_memory_t));
 
     /**
@@ -205,35 +203,35 @@ static block_memory_t *init_read_only_memory(nnode_t *node, netlist_t *netlist)
 
     /* INPUT */
     /* CLK */
-    offset = 0;
+    int CLK_offset = 0;
     rom->clk = init_signal_list();
-    add_pin_to_signal_list(rom->clk, node->input_pins[offset]);
+    add_pin_to_signal_list(rom->clk, node->input_pins[CLK_offset]);
 
     /* read address pins */
-    offset += CLK_width;
+    int RD_ADDR_offset = CLK_offset + CLK_width;
     rom->read_addr = init_signal_list();
-    for (i = 0; i < RD_ADDR_width; ++i) {
-        add_pin_to_signal_list(rom->read_addr, node->input_pins[i + offset]);
+    for (int i = 0; i < RD_ADDR_width; ++i) {
+        add_pin_to_signal_list(rom->read_addr, node->input_pins[RD_ADDR_offset + i]);
     }
 
     /* read enable pins */
-    offset += RD_ADDR_width;
+    int RD_ENABLE_offset = RD_ADDR_offset + RD_ADDR_width;
     rom->read_en = init_signal_list();
-    for (i = 0; i < RD_ENABLE_width; ++i) {
-        add_pin_to_signal_list(rom->read_en, node->input_pins[i + offset]);
+    for (int i = 0; i < RD_ENABLE_width; ++i) {
+        add_pin_to_signal_list(rom->read_en, node->input_pins[RD_ENABLE_offset + i]);
     }
 
     /* OUTPUT */
-    offset = 0;
+    int RD_DATA_offset = 0;
     rom->read_data = init_signal_list();
-    for (i = 0; i < RD_DATA_width; ++i) {
-        add_pin_to_signal_list(rom->read_data, node->output_pins[i + offset]);
+    for (int i = 0; i < RD_DATA_width; ++i) {
+        add_pin_to_signal_list(rom->read_data, node->output_pins[RD_DATA_offset + i]);
     }
 
     /* PAD DATA IN */
     /* we pad the data_in port for rom using pad pins */
     rom->write_data = init_signal_list();
-    for (i = 0; i < WR_DATA_width; ++i) {
+    for (int i = 0; i < WR_DATA_width; ++i) {
         add_pin_to_signal_list(rom->write_data, get_pad_pin(netlist));
     }
 
@@ -321,7 +319,6 @@ static void create_2r_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
     nnode_t *old_node = bram->node;
 
-    int i, offset;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
     int num_rd_ports = old_node->attributes->RD_PORTS;
@@ -338,13 +335,13 @@ static void create_2r_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* split read addr and add the first half to the addr1 */
     signals->addr1 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         add_pin_to_signal_list(signals->addr1, bram->read_addr->pins[i]);
     }
 
     /* add pad pins as data1 */
     signals->data1 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
     }
 
@@ -357,15 +354,15 @@ static void create_2r_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* split read data and add the first half to the out1 */
     signals->out1 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->out1, bram->read_data->pins[i]);
     }
 
     /* add the second half of the read addr to addr2 */
-    offset = addr_width;
+    int addr_2_offset = addr_width;
     signals->addr2 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
-        add_pin_to_signal_list(signals->addr2, bram->read_addr->pins[i + offset]);
+    for (int i = 0; i < addr_width; ++i) {
+        add_pin_to_signal_list(signals->addr2, bram->read_addr->pins[addr_2_offset + i]);
     }
 
     /* there is no write data to set any we, so it will be connected to GND */
@@ -374,15 +371,15 @@ static void create_2r_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* add the second half of the write data to data2 */
     signals->data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data2, get_pad_pin(netlist));
     }
 
     /* add the second half of the read data to out2 */
-    offset = data_width;
+    int out_2_offset = data_width;
     signals->out2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
-        add_pin_to_signal_list(signals->out2, bram->read_data->pins[i + offset]);
+    for (int i = 0; i < data_width; ++i) {
+        add_pin_to_signal_list(signals->out2, bram->read_data->pins[out_2_offset + i]);
     }
 
     /* create a new dual port ram */
@@ -469,7 +466,6 @@ static void create_nr_single_port_ram(block_memory_t *rom, netlist_t *netlist)
  */
 static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netlist */)
 {
-    int i;
     nnode_t *old_node = bram->node;
     int num_rd_ports = old_node->attributes->RD_PORTS;
     int num_wr_ports = old_node->attributes->WR_PORTS;
@@ -482,7 +478,7 @@ static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netli
     sp_ram_signals *signals = (sp_ram_signals *)vtr::calloc(1, sizeof(dp_ram_signals));
 
     /* the wr addr will be deleted since we do not need it anymore */
-    for (i = 0; i < bram->write_addr->count; ++i) {
+    for (int i = 0; i < bram->write_addr->count; ++i) {
         npin_t *wr_addr_pin = bram->write_addr->pins[i];
         /* delete pin */
         delete_npin(wr_addr_pin);
@@ -502,7 +498,7 @@ static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netli
     signals->data = bram->write_data;
 
     /* the rd enables will be deleted since we do not need it anymore */
-    for (i = 0; i < bram->read_en->count; ++i) {
+    for (int i = 0; i < bram->read_en->count; ++i) {
         npin_t *rd_en_pin = bram->read_en->pins[i];
         /* delete pin */
         delete_npin(rd_en_pin);
@@ -512,7 +508,7 @@ static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netli
     if (bram->write_en->count > 1) {
         /* need to OR all write enable since we1 should be one bit in single port ram */
         // bram->write_en = make_chain(LOGICAL_OR, bram->write_en, old_node);
-        for (i = 1; i < bram->write_en->count; ++i) {
+        for (int i = 1; i < bram->write_en->count; ++i) {
             delete_npin(bram->write_en->pins[i]);
         }
     }
@@ -541,7 +537,6 @@ static void create_rw_single_port_ram(block_memory_t *bram, netlist_t * /* netli
  */
 static void create_rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
-    int i;
     nnode_t *old_node = bram->node;
     int num_rd_ports = old_node->attributes->RD_PORTS;
     int num_wr_ports = old_node->attributes->WR_PORTS;
@@ -565,12 +560,12 @@ static void create_rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* we pad the second data port using pad pins */
     signal_list_t *pad_signals = init_signal_list();
-    for (i = 0; i < bram->write_data->count; ++i) {
+    for (int i = 0; i < bram->write_data->count; ++i) {
         add_pin_to_signal_list(pad_signals, get_pad_pin(netlist));
     }
     signals->data1 = pad_signals;
 
-    for (i = 0; i < bram->read_en->count; ++i) {
+    for (int i = 0; i < bram->read_en->count; ++i) {
         /* delete all read enable pins, since no need to write from addr1 */
         delete_npin(bram->read_en->pins[0]);
     }
@@ -586,7 +581,7 @@ static void create_rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     /* leave second output port unconnected */
     int offset = bram->read_data->count;
     signal_list_t *out2_signals = init_signal_list();
-    for (i = 0; i < bram->read_data->count; i++) {
+    for (int i = 0; i < bram->read_data->count; i++) {
         // specify the output pin
         npin_t *new_pin1 = allocate_npin();
         npin_t *new_pin2 = allocate_npin();
@@ -625,7 +620,6 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
     nnode_t *old_node = bram->node;
 
-    int i, offset;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
     int num_rd_ports = old_node->attributes->RD_PORTS;
@@ -640,29 +634,29 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* add read address as addr1 to dpram signal lists */
     signals->addr1 = init_signal_list();
-    for (i = 0; i < bram->read_addr->count; ++i) {
+    for (int i = 0; i < bram->read_addr->count; ++i) {
         add_pin_to_signal_list(signals->addr1, bram->read_addr->pins[i]);
     }
 
     /* split wr_addr, wr_data and wr_en ports */
-    offset = addr_width;
+    int addr_2_offset = addr_width;
     signal_list_t *wr_addr1 = init_signal_list();
     signal_list_t *wr_addr2 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         add_pin_to_signal_list(wr_addr1, bram->write_addr->pins[i]);
-        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[i + offset]);
+        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[addr_2_offset + i]);
     }
 
     oassert(bram->write_en->count == 2);
     npin_t *wr_en1 = bram->write_en->pins[0];
     npin_t *wr_en2 = bram->write_en->pins[1];
 
-    offset = data_width;
+    int wr_data_2_offset = data_width;
     signal_list_t *wr_data1 = init_signal_list();
     signal_list_t *wr_data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(wr_data1, bram->write_data->pins[i]);
-        add_pin_to_signal_list(wr_data2, bram->write_data->pins[i + offset]);
+        add_pin_to_signal_list(wr_data2, bram->write_data->pins[wr_data_2_offset + i]);
     }
 
     /**
@@ -705,7 +699,7 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* map read data to the out1 */
     signals->out1 = init_signal_list();
-    for (i = 0; i < bram->read_data->count; ++i) {
+    for (int i = 0; i < bram->read_data->count; ++i) {
         add_pin_to_signal_list(signals->out1, bram->read_data->pins[i]);
     }
 
@@ -721,7 +715,7 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* out2 will be unconnected */
     signals->out2 = init_signal_list();
-    for (i = 0; i < signals->out1->count; ++i) {
+    for (int i = 0; i < signals->out1->count; ++i) {
         /* create the clk node's output pin */
         npin_t *new_pin1 = allocate_npin();
         npin_t *new_pin2 = allocate_npin();
@@ -740,7 +734,7 @@ static void create_r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     // CLEAN UP
     /* free matched wr addr pins since they are as the same as read addr */
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         npin_t *pin = (first_match) ? wr_addr1->pins[i] : wr_addr2->pins[i];
         /* delete pin */
         delete_npin(pin);
@@ -764,7 +758,6 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
     nnode_t *old_node = bram->node;
 
-    int i, offset;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
     int num_rd_ports = old_node->attributes->RD_PORTS;
@@ -779,7 +772,7 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* add write address as addr1 to dpram signal lists */
     signals->addr1 = init_signal_list();
-    for (i = 0; i < bram->write_addr->count; ++i) {
+    for (int i = 0; i < bram->write_addr->count; ++i) {
         add_pin_to_signal_list(signals->addr1, bram->write_addr->pins[i]);
     }
 
@@ -788,7 +781,7 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
      * As a result, the corresponding write data will be mapped to data1
      */
     signals->data1 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data1, bram->write_data->pins[i]);
     }
 
@@ -797,20 +790,20 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     signals->we1 = bram->write_en->pins[0];
 
     /* split rd_addr, rd_data ports */
-    offset = addr_width;
+    int rd_addr_2_offset = addr_width;
     signal_list_t *rd_addr1 = init_signal_list();
     signal_list_t *rd_addr2 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         add_pin_to_signal_list(rd_addr1, bram->read_addr->pins[i]);
-        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[i + offset]);
+        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[rd_addr_2_offset + i]);
     }
 
-    offset = data_width;
+    int rd_data_2_offset = data_width;
     signal_list_t *rd_data1 = init_signal_list();
     signal_list_t *rd_data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(rd_data1, bram->read_data->pins[i]);
-        add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
+        add_pin_to_signal_list(rd_data2, bram->read_data->pins[rd_data_2_offset + i]);
     }
 
     /**
@@ -838,7 +831,7 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     }
 
     /* delete rd pins since we use corresponding wr_en and zero*/
-    for (i = 0; i < bram->read_en->count; ++i) {
+    for (int i = 0; i < bram->read_en->count; ++i) {
         delete_npin(bram->read_en->pins[i]);
     }
 
@@ -857,7 +850,7 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* the rest of write data pin is for data2 */
     signals->data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data2, get_pad_pin(netlist));
     }
 
@@ -869,7 +862,7 @@ static void create_2rw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     // CLEAN UP
     /* free matched rd addr pins since they are as the same as write addr */
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         npin_t *pin = (first_match) ? rd_addr1->pins[i] : rd_addr2->pins[i];
         /* delete pin */
         delete_npin(pin);
@@ -893,7 +886,6 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
     nnode_t *old_node = bram->node;
 
-    int i, offset;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
     int num_rd_ports = old_node->attributes->RD_PORTS;
@@ -906,41 +898,41 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     oassert(bram->read_data->count == 2 * data_width);
 
     /* split wr_addr, wr_data and wr_en ports */
-    offset = addr_width;
+    int wr_addr_2_base = addr_width;
     signal_list_t *wr_addr1 = init_signal_list();
     signal_list_t *wr_addr2 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         add_pin_to_signal_list(wr_addr1, bram->write_addr->pins[i]);
-        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[i + offset]);
+        add_pin_to_signal_list(wr_addr2, bram->write_addr->pins[wr_addr_2_base + i]);
     }
 
     oassert(bram->write_en->count == 2);
     npin_t *wr_en1 = bram->write_en->pins[0];
     npin_t *wr_en2 = bram->write_en->pins[1];
 
-    offset = data_width;
+    int wr_data_2_base = data_width;
     signal_list_t *wr_data1 = init_signal_list();
     signal_list_t *wr_data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(wr_data1, bram->write_data->pins[i]);
-        add_pin_to_signal_list(wr_data2, bram->write_data->pins[i + offset]);
+        add_pin_to_signal_list(wr_data2, bram->write_data->pins[wr_data_2_base + i]);
     }
 
     /* split rd_addr, rd_data ports */
-    offset = addr_width;
+    int rd_addr_2_base = addr_width;
     signal_list_t *rd_addr1 = init_signal_list();
     signal_list_t *rd_addr2 = init_signal_list();
-    for (i = 0; i < addr_width; ++i) {
+    for (int i = 0; i < addr_width; ++i) {
         add_pin_to_signal_list(rd_addr1, bram->read_addr->pins[i]);
-        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[i + offset]);
+        add_pin_to_signal_list(rd_addr2, bram->read_addr->pins[rd_addr_2_base + i]);
     }
 
-    offset = data_width;
+    int rd_data_2_base = data_width;
     signal_list_t *rd_data1 = init_signal_list();
     signal_list_t *rd_data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(rd_data1, bram->read_data->pins[i]);
-        add_pin_to_signal_list(rd_data2, bram->read_data->pins[i + offset]);
+        add_pin_to_signal_list(rd_data2, bram->read_data->pins[rd_data_2_base + i]);
     }
 
     /**
@@ -972,7 +964,7 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     }
 
     /* delete rd pins since we use corresponding wr_en and zero*/
-    for (i = 0; i < bram->read_en->count; ++i) {
+    for (int i = 0; i < bram->read_en->count; ++i) {
         delete_npin(bram->read_en->pins[i]);
     }
 
@@ -1013,7 +1005,7 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     /* at this point wr_addr and rd_addr must be the same */
     oassert(bram->read_addr->count == bram->write_addr->count);
     /* the wr addr will be deleted since we do not need it anymore */
-    for (i = 0; i < bram->write_addr->count; ++i) {
+    for (int i = 0; i < bram->write_addr->count; ++i) {
         npin_t *wr_addr_pin = bram->write_addr->pins[i];
         /* delete pin */
         delete_npin(wr_addr_pin);
@@ -1037,7 +1029,6 @@ static void create_2r2w_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
  */
 static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
-    int i;
     nnode_t *old_node = bram->node;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
@@ -1068,7 +1059,7 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* we pad the first data port using pad pins */
     signals->data1 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
     }
     selectors = copy_input_signals(bram->write_en);
@@ -1081,7 +1072,7 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 
     /* create vcc signas as the value of we2 when the write_en pins are active */
     signal_list_t *vcc_signals = init_signal_list();
-    for (i = 0; i < num_wr_ports; ++i) {
+    for (int i = 0; i < num_wr_ports; ++i) {
         add_pin_to_signal_list(vcc_signals, get_one_pin(netlist));
     }
     signal_list_t *we2_signal = split_cascade_port(vcc_signals, bram->write_en, 1, old_node, netlist);
@@ -1097,7 +1088,7 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
     nnode_t *dpram = create_dual_port_ram(signals, old_node);
 
     signal_list_t *dpram_outputs = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(dpram_outputs, dpram->output_pins[i]);
     }
 
@@ -1123,7 +1114,6 @@ static void create_nrmw_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
  */
 static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t *netlist)
 {
-    int i;
     nnode_t *old_node = bram->node;
     int data_width = bram->node->attributes->DBITS;
     int addr_width = bram->node->attributes->ABITS;
@@ -1141,7 +1131,7 @@ static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t
     signals->addr1 = split_cascade_port(bram->read_addr, selectors, addr_width, old_node, netlist);
     free_signal_list(selectors);
     signals->addr2 = init_signal_list();
-    for (i = 0; i < bram->write_addr->count; ++i) {
+    for (int i = 0; i < bram->write_addr->count; ++i) {
         add_pin_to_signal_list(signals->addr2, bram->write_addr->pins[i]);
     }
 
@@ -1149,11 +1139,11 @@ static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t
     signals->clk = bram->clk->pins[0];
     /* we pad the first data port using pad pins */
     signals->data1 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data1, get_pad_pin(netlist));
     }
     signals->data2 = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(signals->data2, bram->write_data->pins[i]);
     }
     /* first port does not have data, so the enable is GND */
@@ -1166,7 +1156,7 @@ static void create_2rw_multiplexed_dual_port_ram(block_memory_t *bram, netlist_t
     /* create a DPRAM node */
     nnode_t *dpram = create_dual_port_ram(signals, old_node);
     signal_list_t *dpram_outputs = init_signal_list();
-    for (i = 0; i < data_width; ++i) {
+    for (int i = 0; i < data_width; ++i) {
         add_pin_to_signal_list(dpram_outputs, dpram->output_pins[i]);
     }
     /* decode the spram outputs to the n bram output ports */
@@ -1459,7 +1449,6 @@ static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i;
     int offset, new_offset = 0;
     int addr_width = node->attributes->ABITS;
     int data_width = node->attributes->DBITS;
@@ -1494,7 +1483,7 @@ static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     offset = RD_ADDR_width;
     add_input_port_information(transformed_mem, 1);
     allocate_more_input_pins(transformed_mem, 1);
-    for (i = 0; i < RD_CLK_width; i++) {
+    for (int i = 0; i < RD_CLK_width; i++) {
         if (i == 0) {
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
         } else {
@@ -1509,7 +1498,7 @@ static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ADDR_width == num_rd_ports * addr_width);
     add_input_port_information(transformed_mem, RD_ADDR_width);
     allocate_more_input_pins(transformed_mem, RD_ADDR_width);
-    for (i = 0; i < RD_ADDR_width; i++) {
+    for (int i = 0; i < RD_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ADDR_width;
@@ -1519,7 +1508,7 @@ static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ENABLE_width == num_rd_ports);
     add_input_port_information(transformed_mem, RD_ENABLE_width);
     allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
-    for (i = 0; i < RD_ENABLE_width; i++) {
+    for (int i = 0; i < RD_ENABLE_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ENABLE_width;
@@ -1529,7 +1518,7 @@ static nnode_t *ymem_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_DATA_width == num_rd_ports * data_width);
     add_output_port_information(transformed_mem, RD_DATA_width);
     allocate_more_output_pins(transformed_mem, RD_DATA_width);
-    for (i = 0; i < RD_DATA_width; i++) {
+    for (int i = 0; i < RD_DATA_width; i++) {
         remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
     }
 
@@ -1542,7 +1531,6 @@ static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i;
     int offset, new_offset = 0;
     int addr_width = node->attributes->ABITS;
     int data_width = node->attributes->DBITS;
@@ -1569,19 +1557,19 @@ static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
 
     /* ARST */
     offset = RD_ADDR_width;
-    for (i = 0; i < RD_ARST_width; i++) {
+    for (int i = 0; i < RD_ARST_width; i++) {
         delete_npin(node->input_pins[offset + i]);
     }
     /* SRST */
     offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width;
-    for (i = 0; i < RD_SRST_width; i++) {
+    for (int i = 0; i < RD_SRST_width; i++) {
         delete_npin(node->input_pins[offset + i]);
     }
     /* CLK */
     offset = RD_ADDR_width + RD_ARST_width;
     add_input_port_information(transformed_mem, 1);
     allocate_more_input_pins(transformed_mem, 1);
-    for (i = 0; i < RD_CLK_width; i++) {
+    for (int i = 0; i < RD_CLK_width; i++) {
         if (i == 0) {
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
         } else {
@@ -1596,7 +1584,7 @@ static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ADDR_width == num_rd_ports * addr_width);
     add_input_port_information(transformed_mem, RD_ADDR_width);
     allocate_more_input_pins(transformed_mem, RD_ADDR_width);
-    for (i = 0; i < RD_ADDR_width; i++) {
+    for (int i = 0; i < RD_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ADDR_width;
@@ -1606,7 +1594,7 @@ static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ENABLE_width == num_rd_ports);
     add_input_port_information(transformed_mem, RD_ENABLE_width);
     allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
-    for (i = 0; i < RD_ENABLE_width; i++) {
+    for (int i = 0; i < RD_ENABLE_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ENABLE_width;
@@ -1616,7 +1604,7 @@ static nnode_t *ymem2_to_rom(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_DATA_width == num_rd_ports * data_width);
     add_output_port_information(transformed_mem, RD_DATA_width);
     allocate_more_output_pins(transformed_mem, RD_DATA_width);
-    for (i = 0; i < RD_DATA_width; i++) {
+    for (int i = 0; i < RD_DATA_width; i++) {
         remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
     }
 
@@ -1637,7 +1625,6 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i;
     int offset, new_offset = 0;
     int addr_width = node->attributes->ABITS;
     int data_width = node->attributes->DBITS;
@@ -1681,7 +1668,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     offset = RD_ADDR_width + RD_CLK_width + RD_ENABLE_width + WR_ADDR_width;
     add_input_port_information(transformed_mem, 1);
     allocate_more_input_pins(transformed_mem, 1);
-    for (i = 0; i < WR_CLK_width; i++) {
+    for (int i = 0; i < WR_CLK_width; i++) {
         if (i == 0) {
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
         } else {
@@ -1696,7 +1683,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ADDR_width == num_rd_ports * addr_width);
     add_input_port_information(transformed_mem, RD_ADDR_width);
     allocate_more_input_pins(transformed_mem, RD_ADDR_width);
-    for (i = 0; i < RD_ADDR_width; i++) {
+    for (int i = 0; i < RD_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ADDR_width;
@@ -1704,7 +1691,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     /* RD_CLK */
     offset = RD_ADDR_width;
     oassert(RD_CLK_width == num_rd_ports);
-    for (i = 0; i < RD_CLK_width; i++) {
+    for (int i = 0; i < RD_CLK_width; i++) {
         delete_npin(node->input_pins[i + offset]);
     }
 
@@ -1713,7 +1700,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ENABLE_width == num_rd_ports);
     add_input_port_information(transformed_mem, RD_ENABLE_width);
     allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
-    for (i = 0; i < RD_ENABLE_width; i++) {
+    for (int i = 0; i < RD_ENABLE_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ENABLE_width;
@@ -1723,7 +1710,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_ADDR_width == num_wr_ports * addr_width);
     add_input_port_information(transformed_mem, WR_ADDR_width);
     allocate_more_input_pins(transformed_mem, WR_ADDR_width);
-    for (i = 0; i < WR_ADDR_width; i++) {
+    for (int i = 0; i < WR_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += WR_ADDR_width;
@@ -1733,7 +1720,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_DATA_width == num_wr_ports * data_width);
     add_input_port_information(transformed_mem, WR_DATA_width);
     allocate_more_input_pins(transformed_mem, WR_DATA_width);
-    for (i = 0; i < WR_DATA_width; i++) {
+    for (int i = 0; i < WR_DATA_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += WR_DATA_width;
@@ -1743,7 +1730,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_ENABLE_width == num_wr_ports * data_width);
     add_input_port_information(transformed_mem, num_wr_ports);
     allocate_more_input_pins(transformed_mem, num_wr_ports);
-    for (i = 0; i < WR_ENABLE_width; i++) {
+    for (int i = 0; i < WR_ENABLE_width; i++) {
         if (i % data_width == 0)
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, new_offset++);
         else
@@ -1755,7 +1742,7 @@ static nnode_t *ymem_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_DATA_width == num_rd_ports * data_width);
     add_output_port_information(transformed_mem, RD_DATA_width);
     allocate_more_output_pins(transformed_mem, RD_DATA_width);
-    for (i = 0; i < RD_DATA_width; i++) {
+    for (int i = 0; i < RD_DATA_width; i++) {
         remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
     }
 
@@ -1769,7 +1756,6 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i;
     int offset, new_offset = 0;
     int addr_width = node->attributes->ABITS;
     int data_width = node->attributes->DBITS;
@@ -1801,12 +1787,12 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
 
     /* ARST */
     offset = RD_ADDR_width;
-    for (i = 0; i < RD_ARST_width; i++) {
+    for (int i = 0; i < RD_ARST_width; i++) {
         delete_npin(node->input_pins[offset + i]);
     }
     /* SRST */
     offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width;
-    for (i = 0; i < RD_SRST_width; i++) {
+    for (int i = 0; i < RD_SRST_width; i++) {
         delete_npin(node->input_pins[offset + i]);
     }
 
@@ -1814,7 +1800,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     offset = RD_ADDR_width + RD_ARST_width + RD_CLK_width + RD_ENABLE_width + RD_SRST_width + WR_ADDR_width;
     add_input_port_information(transformed_mem, 1);
     allocate_more_input_pins(transformed_mem, 1);
-    for (i = 0; i < WR_CLK_width; i++) {
+    for (int i = 0; i < WR_CLK_width; i++) {
         if (i == 0) {
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, 0);
         } else {
@@ -1829,7 +1815,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ADDR_width == num_rd_ports * addr_width);
     add_input_port_information(transformed_mem, RD_ADDR_width);
     allocate_more_input_pins(transformed_mem, RD_ADDR_width);
-    for (i = 0; i < RD_ADDR_width; i++) {
+    for (int i = 0; i < RD_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ADDR_width;
@@ -1837,7 +1823,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     /* RD_CLK */
     offset = RD_ADDR_width + RD_ARST_width;
     oassert(RD_CLK_width == num_rd_ports);
-    for (i = 0; i < RD_CLK_width; i++) {
+    for (int i = 0; i < RD_CLK_width; i++) {
         delete_npin(node->input_pins[i + offset]);
     }
 
@@ -1846,7 +1832,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_ENABLE_width == num_rd_ports);
     add_input_port_information(transformed_mem, RD_ENABLE_width);
     allocate_more_input_pins(transformed_mem, RD_ENABLE_width);
-    for (i = 0; i < RD_ENABLE_width; i++) {
+    for (int i = 0; i < RD_ENABLE_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += RD_ENABLE_width;
@@ -1856,7 +1842,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_ADDR_width == num_wr_ports * addr_width);
     add_input_port_information(transformed_mem, WR_ADDR_width);
     allocate_more_input_pins(transformed_mem, WR_ADDR_width);
-    for (i = 0; i < WR_ADDR_width; i++) {
+    for (int i = 0; i < WR_ADDR_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += WR_ADDR_width;
@@ -1866,7 +1852,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_DATA_width == num_wr_ports * data_width);
     add_input_port_information(transformed_mem, WR_DATA_width);
     allocate_more_input_pins(transformed_mem, WR_DATA_width);
-    for (i = 0; i < WR_DATA_width; i++) {
+    for (int i = 0; i < WR_DATA_width; i++) {
         remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, i + new_offset);
     }
     new_offset += WR_DATA_width;
@@ -1876,7 +1862,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(WR_ENABLE_width == num_wr_ports * data_width);
     add_input_port_information(transformed_mem, num_wr_ports);
     allocate_more_input_pins(transformed_mem, num_wr_ports);
-    for (i = 0; i < WR_ENABLE_width; i++) {
+    for (int i = 0; i < WR_ENABLE_width; i++) {
         if (i % data_width == 0)
             remap_pin_to_new_node(node->input_pins[i + offset], transformed_mem, new_offset++);
         else
@@ -1888,7 +1874,7 @@ static nnode_t *ymem2_to_bram(nnode_t *node, uintptr_t traverse_mark_number)
     oassert(RD_DATA_width == num_rd_ports * data_width);
     add_output_port_information(transformed_mem, RD_DATA_width);
     allocate_more_output_pins(transformed_mem, RD_DATA_width);
-    for (i = 0; i < RD_DATA_width; i++) {
+    for (int i = 0; i < RD_DATA_width; i++) {
         remap_pin_to_new_node(node->output_pins[i + offset], transformed_mem, i);
     }
 
@@ -1978,7 +1964,6 @@ static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t
     /* validate signals list size */
     oassert(signalvar->count % desired_width == 0);
 
-    int i, j;
     int num_chunk = signalvar->count / desired_width;
     signal_list_t *return_value = NULL;
     /* validate selector size */
@@ -1990,25 +1975,25 @@ static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t
     /* create cascaded multiplexers */
     nnode_t **muxes = (nnode_t **)vtr::calloc(num_chunk, sizeof(nnode_t *));
     signal_list_t **internal_outputs = (signal_list_t **)vtr::calloc(num_chunk, sizeof(signal_list_t *));
-    for (i = 0; i < num_chunk; ++i) {
+    for (int i = 0; i < num_chunk; ++i) {
         /* mux inputs */
         signal_list_t **mux_inputs = (signal_list_t **)vtr::calloc(2, sizeof(signal_list_t *));
         mux_inputs[0] = init_signal_list();
         if (i == 0) {
             /* the first port of the first mux should be driven by PAD node */
-            for (j = 0; j < desired_width; j++) {
+            for (int j = 0; j < desired_width; j++) {
                 add_pin_to_signal_list(mux_inputs[0], get_pad_pin(netlist));
             }
         } else {
             /* the first port of the rest muxe should be driven by previous mux output */
-            for (j = 0; j < desired_width; j++) {
+            for (int j = 0; j < desired_width; j++) {
                 add_pin_to_signal_list(mux_inputs[0], internal_outputs[i - 1]->pins[j]);
             }
         }
 
         /* hook the splitted signals[i] as the second mux input */
         mux_inputs[1] = init_signal_list();
-        for (j = 0; j < desired_width; j++) {
+        for (int j = 0; j < desired_width; j++) {
             add_pin_to_signal_list(mux_inputs[1], splitted_signals[i]->pins[j]);
         }
 
@@ -2026,7 +2011,7 @@ static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t
 
         /* initialize the internal outputs */
         internal_outputs[i] = init_signal_list();
-        for (j = 0; j < desired_width; j++) {
+        for (int j = 0; j < desired_width; j++) {
             npin_t *output_pin = muxes[i]->output_pins[j];
             nnet_t *output_net = output_pin->net;
             /* add new fanout */
@@ -2045,13 +2030,13 @@ static signal_list_t *split_cascade_port(signal_list_t *signalvar, signal_list_t
     return_value = internal_outputs[num_chunk - 1];
 
     // CLEAN UP
-    for (i = 0; i < num_chunk; ++i) {
+    for (int i = 0; i < num_chunk; ++i) {
         free_signal_list(splitted_signals[i]);
     }
     vtr::free(splitted_signals);
 
     /* free internal output signal list expect the last one since it is the return value */
-    for (i = 0; i < num_chunk - 1; ++i) {
+    for (int i = 0; i < num_chunk - 1; ++i) {
         free_signal_list(internal_outputs[i]);
     }
     vtr::free(internal_outputs);
@@ -2078,7 +2063,6 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
     oassert(width != 0);
     oassert(outs->count % width == 0);
 
-    int i, j;
     int num_chunk = outs->count / width;
 
     /* initialize splitted signals */
@@ -2090,7 +2074,7 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
     /* adding fanout pins to src pin nets */
     signal_list_t **src_nets_fanouts = (signal_list_t **)vtr::calloc(width, sizeof(signal_list_t *));
     /* create the n fanout pin for src pins, since they are output pins of a memory */
-    for (i = 0; i < width; ++i) {
+    for (int i = 0; i < width; ++i) {
         npin_t *src_pin = src->pins[i];
         /* validate that it is output */
         oassert(src_pin->type == OUTPUT);
@@ -2098,7 +2082,7 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
         /* init the related sig list */
         src_nets_fanouts[i] = init_signal_list();
         /* add fanouts */
-        for (j = 0; j < num_chunk; j++) {
+        for (int j = 0; j < num_chunk; j++) {
             npin_t *new_pin = allocate_npin();
             /* adding fanout pin to the src_pin net */
             add_fanout_pin_to_net(src_pin->net, new_pin);
@@ -2109,18 +2093,18 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
 
     /* create multiplexers */
     nnode_t **muxes = (nnode_t **)vtr::calloc(num_chunk, sizeof(nnode_t *));
-    for (i = 0; i < num_chunk; ++i) {
+    for (int i = 0; i < num_chunk; ++i) {
         /* mux inputs */
         signal_list_t **mux_inputs = (signal_list_t **)vtr::calloc(2, sizeof(signal_list_t *));
         mux_inputs[0] = init_signal_list();
         /* the first port of the first mux should be driven by PAD node */
-        for (j = 0; j < width; j++) {
+        for (int j = 0; j < width; j++) {
             add_pin_to_signal_list(mux_inputs[0], get_pad_pin(netlist));
         }
 
         /* hook the splitted signals[i] as the second mux input */
         mux_inputs[1] = init_signal_list();
-        for (j = 0; j < width; j++) {
+        for (int j = 0; j < width; j++) {
             add_pin_to_signal_list(mux_inputs[1], src_nets_fanouts[j]->pins[i]);
         }
 
@@ -2143,11 +2127,11 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
     }
 
     // CLEAN UP
-    for (i = 0; i < num_chunk; ++i) {
+    for (int i = 0; i < num_chunk; ++i) {
         free_signal_list(splitted_signals[i]);
     }
     vtr::free(splitted_signals);
-    for (i = 0; i < width; ++i) {
+    for (int i = 0; i < width; ++i) {
         free_signal_list(src_nets_fanouts[i]);
     }
     vtr::free(src_nets_fanouts);
@@ -2163,15 +2147,14 @@ static void decode_out_port(signal_list_t *src, signal_list_t *outs, signal_list
  */
 static void cleanup_block_memory_old_node(nnode_t *old_node)
 {
-    int i;
-    for (i = 0; i < old_node->num_input_pins; ++i) {
+    for (int i = 0; i < old_node->num_input_pins; ++i) {
         npin_t *pin = old_node->input_pins[i];
 
         if (pin)
             old_node->input_pins[i] = NULL;
     }
 
-    for (i = 0; i < old_node->num_output_pins; ++i) {
+    for (int i = 0; i < old_node->num_output_pins; ++i) {
         npin_t *pin = old_node->output_pins[i];
 
         if (pin)
diff --git a/parmys-plugin/src/hard_block.cc b/parmys-plugin/src/hard_block.cc
index 0c9ca2dcd..f2735a3fd 100644
--- a/parmys-plugin/src/hard_block.cc
+++ b/parmys-plugin/src/hard_block.cc
@@ -286,11 +286,11 @@ void output_hard_blocks_yosys(Yosys::Design *design)
 
 void instantiate_hard_block(nnode_t *node, short mark, netlist_t * /*netlist*/)
 {
-    int i, port, index;
+    int port, index;
 
     port = index = 0;
     /* Give names to the output pins */
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         if (node->output_pins[i]->name == NULL)
             node->output_pins[i]->name = make_full_ref_name(node->name, NULL, NULL, node->output_pins[i]->mapping, i);
         // node->output_pins[i]->name = make_full_ref_name(node->name, NULL, NULL, node->output_pins[i]->mapping,
diff --git a/parmys-plugin/src/hard_soft_logic_mixer.cc b/parmys-plugin/src/hard_soft_logic_mixer.cc
index 674e840b3..3ac8571f9 100644
--- a/parmys-plugin/src/hard_soft_logic_mixer.cc
+++ b/parmys-plugin/src/hard_soft_logic_mixer.cc
@@ -21,7 +21,7 @@
 #include <vector>
 
 #include "multiplier.h" // instantiate_simple_soft_multiplier
-#include "odin_error.h"  // error_message
+#include "odin_error.h" // error_message
 
 HardSoftLogicMixer::HardSoftLogicMixer()
 {
diff --git a/parmys-plugin/src/memory.cc b/parmys-plugin/src/memory.cc
index fda6bf0c8..0038decc8 100644
--- a/parmys-plugin/src/memory.cc
+++ b/parmys-plugin/src/memory.cc
@@ -129,11 +129,10 @@ void copy_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const c
  */
 void remap_input_port_to_memory(nnode_t *node, signal_list_t *signals, const char *port_name)
 {
-    int i;
     int j = node->num_input_pins;
 
     // Make sure the port is not already assigned.
-    for (i = 0; i < j; i++) {
+    for (int i = 0; i < j; i++) {
         npin_t *pin = node->input_pins[i];
         if (!strcmp(pin->mapping, port_name)) {
             error_message(NETLIST, node->loc, "Attempted to reassign output port %s to memory %s.", port_name, node->name);
@@ -145,7 +144,7 @@ void remap_input_port_to_memory(nnode_t *node, signal_list_t *signals, const cha
     add_input_port_information(node, signals->count);
 
     // Add the new port.
-    for (i = 0; i < signals->count; i++, j++) {
+    for (int i = 0; i < signals->count; i++, j++) {
         npin_t *pin = signals->pins[i];
         if (strcmp(pin->mapping, port_name)) {
             if (pin->mapping)
@@ -163,11 +162,10 @@ void remap_input_port_to_memory(nnode_t *node, signal_list_t *signals, const cha
  */
 void add_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const char *port_name)
 {
-    int i;
     int j = node->num_input_pins;
 
     // Make sure the port is not already assigned.
-    for (i = 0; i < j; i++) {
+    for (int i = 0; i < j; i++) {
         npin_t *pin = node->input_pins[i];
         if (!strcmp(pin->mapping, port_name)) {
             error_message(NETLIST, node->loc, "Attempted to reassign input port %s to memory %s.", port_name, node->name);
@@ -179,7 +177,7 @@ void add_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const ch
     add_input_port_information(node, signalsvar->count);
 
     // Add the new port.
-    for (i = 0; i < signalsvar->count; i++, j++) {
+    for (int i = 0; i < signalsvar->count; i++, j++) {
         npin_t *pin = signalsvar->pins[i];
         if (pin->mapping) {
             vtr::free(pin->mapping);
@@ -196,12 +194,11 @@ void add_input_port_to_memory(nnode_t *node, signal_list_t *signalsvar, const ch
  */
 void add_output_port_to_memory(nnode_t *node, signal_list_t *signals, const char *port_name)
 {
-    int i;
     int j = node->num_output_pins;
 
     // Make sure the port is not already assigned.
     // TODO: more complicated logic needs to be implementd this is temporary solution
-    for (i = 0; i < j; i++) {
+    for (int i = 0; i < j; i++) {
         npin_t *pin = node->output_pins[i];
         if (!strcmp(pin->mapping, port_name)) {
             error_message(NETLIST, node->loc, "Attempted to reassign output port %s to node %s.", port_name, node->name);
@@ -214,7 +211,7 @@ void add_output_port_to_memory(nnode_t *node, signal_list_t *signals, const char
     add_output_port_information(node, signals->count);
 
     // Add the new port.
-    for (i = 0; i < signals->count; i++, j++) {
+    for (int i = 0; i < signals->count; i++, j++) {
         npin_t *pin = signals->pins[i];
         if (pin->mapping) {
             vtr::free(pin->mapping);
@@ -322,9 +319,8 @@ void split_sp_memory_depth(nnode_t *node, int split_size)
         return;
     }
 
-    int i;
     signal_list_t *new_addr = init_signal_list();
-    for (i = 1; i < signals->addr->count; i++)
+    for (int i = 1; i < signals->addr->count; i++)
         add_pin_to_signal_list(new_addr, signals->addr->pins[i]);
 
     /* Create the new memory node */
@@ -388,7 +384,7 @@ void split_sp_memory_depth(nnode_t *node, int split_size)
     add_output_port_information(new_mem_node2, signals->out->count);
 
     /* Copy over the output pins for the new memory */
-    for (i = 0; i < signals->data->count; i++) {
+    for (int i = 0; i < signals->data->count; i++) {
         nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
         nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
         add_input_pin_to_node(mux, copy_input_npin(signals->addr->pins[0]), 0);
@@ -450,12 +446,11 @@ void split_dp_memory_depth(nnode_t *node, int split_size)
 
     signal_list_t *new_addr1 = init_signal_list();
 
-    int i;
-    for (i = 1; i < signals->addr1->count; i++)
+    for (int i = 1; i < signals->addr1->count; i++)
         add_pin_to_signal_list(new_addr1, signals->addr1->pins[i]);
 
     signal_list_t *new_addr2 = init_signal_list();
-    for (i = 1; i < signals->addr2->count; i++)
+    for (int i = 1; i < signals->addr2->count; i++)
         add_pin_to_signal_list(new_addr2, signals->addr2->pins[i]);
 
     /* Create the new memory node */
@@ -542,7 +537,7 @@ void split_dp_memory_depth(nnode_t *node, int split_size)
     add_output_port_information(new_mem_node2, signals->out2->count);
 
     /* Copy over the output pins for the new memory */
-    for (i = 0; i < signals->data1->count; i++) {
+    for (int i = 0; i < signals->data1->count; i++) {
         nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
         nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
         add_input_pin_to_node(mux, copy_input_npin(signals->addr1->pins[0]), 0);
@@ -575,7 +570,7 @@ void split_dp_memory_depth(nnode_t *node, int split_size)
     }
 
     /* Copy over the output pins for the new memory */
-    for (i = 0; i < signals->data1->count; i++) {
+    for (int i = 0; i < signals->data1->count; i++) {
         nnode_t *mux = make_2port_gate(MUX_2, 2, 2, 1, new_mem_node1, new_mem_node1->traverse_visited);
         nnode_t *not_g = make_not_gate(new_mem_node1, new_mem_node1->traverse_visited);
         add_input_pin_to_node(mux, copy_input_npin(signals->addr2->pins[0]), 0);
@@ -637,10 +632,9 @@ void split_sp_memory_width(nnode_t *node, int target_size)
         // If we don't need to split, put the original node back.
         sp_memory_list = insert_in_vptr_list(sp_memory_list, node);
     } else {
-        int i;
         int data_pins_moved = 0;
         int output_pins_moved = 0;
-        for (i = 0; i < num_memories; i++) {
+        for (int i = 0; i < num_memories; i++) {
             nnode_t *new_node = allocate_nnode(node->loc);
             new_node->name = append_string(node->name, "-%d", i);
             sp_memory_list = insert_in_vptr_list(sp_memory_list, new_node);
@@ -651,21 +645,19 @@ void split_sp_memory_width(nnode_t *node, int target_size)
             new_node->traverse_visited = node->traverse_visited;
             new_node->node_data = NULL;
 
-            int j;
-            for (j = 0; j < node->num_input_port_sizes; j++)
+            for (int j = 0; j < node->num_input_port_sizes; j++)
                 add_input_port_information(new_node, 0);
 
             add_output_port_information(new_node, 0);
 
             int index = 0;
             int old_index = 0;
-            for (j = 0; j < node->num_input_port_sizes; j++) {
+            for (int j = 0; j < node->num_input_port_sizes; j++) {
                 // Move this node's share of data pins out of the data port of the original node.
                 if (j == data_port_number) {
                     // Skip over data pins we've already moved.
                     old_index += data_pins_moved;
-                    int k;
-                    for (k = 0; k < target_size && data_pins_moved < data_port_size; k++) {
+                    for (int k = 0; k < target_size && data_pins_moved < data_port_size; k++) {
                         allocate_more_input_pins(new_node, 1);
                         new_node->input_port_sizes[j]++;
                         remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
@@ -677,8 +669,7 @@ void split_sp_memory_width(nnode_t *node, int target_size)
                     // Skip over pins we have yet to copy.
                     old_index += remaining_data_pins;
                 } else {
-                    int k;
-                    for (k = 0; k < node->input_port_sizes[j]; k++) {
+                    for (int k = 0; k < node->input_port_sizes[j]; k++) {
                         allocate_more_input_pins(new_node, 1);
                         new_node->input_port_sizes[j]++;
                         // Copy pins for all but the last memory. the last one get the original pins moved to it.
@@ -696,8 +687,7 @@ void split_sp_memory_width(nnode_t *node, int target_size)
             old_index = 0;
             old_index += output_pins_moved;
 
-            int k;
-            for (k = 0; k < target_size && output_pins_moved < data_port_size; k++) {
+            for (int k = 0; k < target_size && output_pins_moved < data_port_size; k++) {
                 allocate_more_output_pins(new_node, 1);
                 new_node->output_port_sizes[0]++;
                 remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
@@ -749,12 +739,11 @@ void split_dp_memory_width(nnode_t *node, int target_size)
         // If we're not splitting, put the original memory node back.
         dp_memory_list = insert_in_vptr_list(dp_memory_list, node);
     } else {
-        int i;
         int data1_pins_moved = 0;
         int data2_pins_moved = 0;
         int out1_pins_moved = 0;
         int out2_pins_moved = 0;
-        for (i = 0; i < num_memories; i++) {
+        for (int i = 0; i < num_memories; i++) {
             nnode_t *new_node = allocate_nnode(node->loc);
             new_node->name = append_string(node->name, "-%d", i);
             dp_memory_list = insert_in_vptr_list(dp_memory_list, new_node);
@@ -765,19 +754,17 @@ void split_dp_memory_width(nnode_t *node, int target_size)
             new_node->traverse_visited = node->traverse_visited;
             new_node->node_data = NULL;
 
-            int j;
-            for (j = 0; j < node->num_input_port_sizes; j++)
+            for (int j = 0; j < node->num_input_port_sizes; j++)
                 add_input_port_information(new_node, 0);
 
             int index = 0;
             int old_index = 0;
-            for (j = 0; j < node->num_input_port_sizes; j++) {
+            for (int j = 0; j < node->num_input_port_sizes; j++) {
                 // Move this node's share of data pins out of the data port of the original node.
                 if (j == data1_port_number) {
                     // Skip over data pins we've already moved.
                     old_index += data1_pins_moved;
-                    int k;
-                    for (k = 0; k < target_size && data1_pins_moved < data1_port_size; k++) {
+                    for (int k = 0; k < target_size && data1_pins_moved < data1_port_size; k++) {
                         allocate_more_input_pins(new_node, 1);
                         new_node->input_port_sizes[j]++;
                         remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
@@ -791,8 +778,7 @@ void split_dp_memory_width(nnode_t *node, int target_size)
                 } else if (j == data2_port_number) {
                     // Skip over data pins we've already moved.
                     old_index += data2_pins_moved;
-                    int k;
-                    for (k = 0; k < target_size && data2_pins_moved < data2_port_size; k++) {
+                    for (int k = 0; k < target_size && data2_pins_moved < data2_port_size; k++) {
                         allocate_more_input_pins(new_node, 1);
                         new_node->input_port_sizes[j]++;
                         remap_pin_to_new_node(node->input_pins[old_index], new_node, index);
@@ -804,8 +790,7 @@ void split_dp_memory_width(nnode_t *node, int target_size)
                     // Skip over pins we have yet to copy.
                     old_index += remaining_data_pins;
                 } else {
-                    int k;
-                    for (k = 0; k < node->input_port_sizes[j]; k++) {
+                    for (int k = 0; k < node->input_port_sizes[j]; k++) {
                         allocate_more_input_pins(new_node, 1);
                         new_node->input_port_sizes[j]++;
                         // Copy pins for all but the last memory. the last one get the original pins moved to it.
@@ -819,18 +804,17 @@ void split_dp_memory_width(nnode_t *node, int target_size)
                 }
             }
 
-            for (j = 0; j < node->num_output_port_sizes; j++)
+            for (int j = 0; j < node->num_output_port_sizes; j++)
                 add_output_port_information(new_node, 0);
 
             index = 0;
             old_index = 0;
-            for (j = 0; j < node->num_output_port_sizes; j++) {
+            for (int j = 0; j < node->num_output_port_sizes; j++) {
                 // Move this node's share of data pins out of the data port of the original node.
                 if (j == out1_port_number) {
                     // Skip over data pins we've already moved.
                     old_index += out1_pins_moved;
-                    int k;
-                    for (k = 0; k < target_size && out1_pins_moved < out1_port_size; k++) {
+                    for (int k = 0; k < target_size && out1_pins_moved < out1_port_size; k++) {
                         allocate_more_output_pins(new_node, 1);
                         new_node->output_port_sizes[j]++;
                         remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
@@ -844,8 +828,7 @@ void split_dp_memory_width(nnode_t *node, int target_size)
                 } else if (j == out2_port_number) {
                     // Skip over data pins we've already moved.
                     old_index += out2_pins_moved;
-                    int k;
-                    for (k = 0; k < target_size && out2_pins_moved < out2_port_size; k++) {
+                    for (int k = 0; k < target_size && out2_pins_moved < out2_port_size; k++) {
                         allocate_more_output_pins(new_node, 1);
                         new_node->output_port_sizes[j]++;
                         remap_pin_to_new_node(node->output_pins[old_index], new_node, index);
@@ -1152,11 +1135,10 @@ void pad_memory_output_port(nnode_t *node, netlist_t * /*netlist*/, t_model *mod
         allocate_more_output_pins(node, diff);
 
         // Shift other pins to the right, if any.
-        int i;
-        for (i = node->num_output_pins - 1; i >= port_index + target_size; i--)
+        for (int i = node->num_output_pins - 1; i >= port_index + target_size; i--)
             move_output_pin(node, i - diff, i);
 
-        for (i = port_index + port_size; i < port_index + target_size; i++) {
+        for (int i = port_index + port_size; i < port_index + target_size; i++) {
             // Add new pins to the higher order spots.
             npin_t *new_pin = allocate_npin();
             // Pad outputs with a unique and descriptive name to avoid collisions.
@@ -1196,11 +1178,10 @@ void pad_memory_input_port(nnode_t *node, netlist_t *netlist, t_model *model, co
         allocate_more_input_pins(node, diff);
 
         // Shift other pins to the right, if any.
-        int i;
-        for (i = node->num_input_pins - 1; i >= port_index + target_size; i--)
+        for (int i = node->num_input_pins - 1; i >= port_index + target_size; i--)
             move_input_pin(node, i - diff, i);
 
-        for (i = port_index + port_size; i < port_index + target_size; i++) {
+        for (int i = port_index + port_size; i < port_index + target_size; i++) {
             add_input_pin_to_node(node, get_pad_pin(netlist), i);
             if (node->input_pins[i]->mapping) {
                 vtr::free(node->input_pins[i]->mapping);
@@ -1337,8 +1318,7 @@ sp_ram_signals *get_sp_ram_signals(nnode_t *node)
     signals->we = NULL;
     signals->clk = NULL;
 
-    int i;
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         npin_t *pin = node->input_pins[i];
         if (!strcmp(pin->mapping, "addr"))
             add_pin_to_signal_list(signals->addr, pin);
@@ -1358,7 +1338,7 @@ sp_ram_signals *get_sp_ram_signals(nnode_t *node)
     oassert(signals->data->count >= 1);
     oassert(signals->data->count == node->num_output_pins);
 
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         npin_t *pin = node->output_pins[i];
         if (!strcmp(pin->mapping, "out"))
             add_pin_to_signal_list(signals->out, pin);
@@ -1398,8 +1378,7 @@ dp_ram_signals *get_dp_ram_signals(nnode_t *node)
     signals->we2 = NULL;
     signals->clk = NULL;
 
-    int i;
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         npin_t *pin = node->input_pins[i];
         if (!strcmp(pin->mapping, "addr1"))
             add_pin_to_signal_list(signals->addr1, pin);
@@ -1429,7 +1408,7 @@ dp_ram_signals *get_dp_ram_signals(nnode_t *node)
     oassert(signals->data1->count + signals->data2->count == node->num_output_pins);
 
     // Separate output signals according to mapping.
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         npin_t *pin = node->output_pins[i];
         if (!strcmp(pin->mapping, "out1"))
             add_pin_to_signal_list(signals->out1, pin);
@@ -1498,8 +1477,7 @@ void instantiate_soft_single_port_ram(nnode_t *node, short mark, netlist_t *netl
         // The output multiplexer determines which memory cell is connected to the output register.
         nnode_t *output_mux = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
 
-        int j;
-        for (j = 0; j < num_addr; j++) {
+        for (int j = 0; j < num_addr; j++) {
             npin_t *address_pin = decoder->pins[j];
             /* Check that the input pin is driven */
             oassert(address_pin->net->num_driver_pins || address_pin->net == netlist->zero_net || address_pin->net == netlist->one_net ||
@@ -1580,8 +1558,7 @@ void instantiate_soft_dual_port_ram(nnode_t *node, short mark, netlist_t *netlis
     nnode_t **and2_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
     nnode_t **or_gates = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * num_addr);
 
-    int i;
-    for (i = 0; i < num_addr; i++) {
+    for (int i = 0; i < num_addr; i++) {
         npin_t *addr1_pin = decoder1->pins[i];
         npin_t *addr2_pin = decoder2->pins[i];
 
@@ -1619,7 +1596,7 @@ void instantiate_soft_dual_port_ram(nnode_t *node, short mark, netlist_t *netlis
         or_gates[i] = or_g;
     }
 
-    for (i = 0; i < data_width; i++) {
+    for (int i = 0; i < data_width; i++) {
         npin_t *data1_pin = signals->data1->pins[i];
         npin_t *data2_pin = signals->data2->pins[i];
 
@@ -1627,8 +1604,7 @@ void instantiate_soft_dual_port_ram(nnode_t *node, short mark, netlist_t *netlis
         nnode_t *output_mux1 = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
         nnode_t *output_mux2 = make_2port_gate(MULTI_PORT_MUX, num_addr, num_addr, 1, node, mark);
 
-        int j;
-        for (j = 0; j < num_addr; j++) {
+        for (int j = 0; j < num_addr; j++) {
             npin_t *addr1_pin = decoder1->pins[j];
             npin_t *addr2_pin = decoder2->pins[j];
 
@@ -2030,7 +2006,6 @@ void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netl
      * DATAOUT: output port [0]
      */
 
-    int i;
     int SP_ADDR_width = node->input_port_sizes[0];
     int SP_CLK_width = node->input_port_sizes[1]; // should be 1
     int SP_DATA_width = node->input_port_sizes[2];
@@ -2048,7 +2023,7 @@ void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netl
 
     /* INPUTS */
     /* adding the addr signals */
-    for (i = 0; i < SP_ADDR_width; i++) {
+    for (int i = 0; i < SP_ADDR_width; i++) {
         npin_t *pin = node->input_pins[offset + i];
         /* detach from the main node, since it will be connected to a new dpram */
         pin->node->input_pins[pin->pin_node_idx] = NULL;
@@ -2066,7 +2041,7 @@ void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netl
     offset += 1;
 
     /* adding the data signals */
-    for (i = 0; i < SP_DATA_width; i++) {
+    for (int i = 0; i < SP_DATA_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->input_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
@@ -2085,7 +2060,7 @@ void resolve_single_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netl
     /* OUTPUT */
     /* adding the output signals */
     offset = 0;
-    for (i = 0; i < SP_OUT_width; i++) {
+    for (int i = 0; i < SP_OUT_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->output_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
@@ -2136,7 +2111,6 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
      * DATAOUT1: output port [0]
      * DATAOUT2: output port [1]
      */
-    int i;
     int DP_ADDR1_width = node->input_port_sizes[0];
     int DP_ADDR2_width = node->input_port_sizes[1];
     int DP_CLK_width = node->input_port_sizes[2]; // should be 1
@@ -2162,7 +2136,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
 
     /* INPUTS */
     /* adding the addr1 signals */
-    for (i = 0; i < max_addr_width; i++) {
+    for (int i = 0; i < max_addr_width; i++) {
         /* hook the addr1 pin to new node */
         if (i < DP_ADDR1_width) {
             npin_t *pin = node->input_pins[offset + i];
@@ -2177,7 +2151,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
     offset += DP_ADDR1_width;
 
     /* adding the addr2 signals */
-    for (i = 0; i < max_addr_width; i++) {
+    for (int i = 0; i < max_addr_width; i++) {
         /* hook the addr1 pin to new node */
         if (i < DP_ADDR2_width) {
             npin_t *pin = node->input_pins[offset + i];
@@ -2200,7 +2174,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
     offset += 1;
 
     /* adding the data1 signals */
-    for (i = 0; i < DP_DATA1_width; i++) {
+    for (int i = 0; i < DP_DATA1_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->input_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
@@ -2211,7 +2185,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
     offset += DP_DATA1_width;
 
     /* adding the data2 signals */
-    for (i = 0; i < DP_DATA2_width; i++) {
+    for (int i = 0; i < DP_DATA2_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->input_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
@@ -2238,7 +2212,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
     /* OUTPUT */
     offset = 0;
     /* adding the output1 signals */
-    for (i = 0; i < DP_OUT1_width; i++) {
+    for (int i = 0; i < DP_OUT1_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->output_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
@@ -2249,7 +2223,7 @@ void resolve_dual_port_ram(nnode_t *node, uintptr_t traverse_mark_number, netlis
     offset += DP_OUT1_width;
 
     /* adding the output1 signals */
-    for (i = 0; i < DP_OUT2_width; i++) {
+    for (int i = 0; i < DP_OUT2_width; i++) {
         /* hook the data1 pin to new node */
         npin_t *pin = node->output_pins[offset + i];
         /* in case of padding, pins have not been remapped, need to detach them from the BRAM node */
diff --git a/parmys-plugin/src/mixing_optimization.cc b/parmys-plugin/src/mixing_optimization.cc
index c4e0148b3..5ddd8adbb 100644
--- a/parmys-plugin/src/mixing_optimization.cc
+++ b/parmys-plugin/src/mixing_optimization.cc
@@ -20,11 +20,11 @@
 #include <stdint.h> // INT_MAX
 #include <vector>
 
+#include "adder.h"                 // hard_adders
 #include "hard_soft_logic_mixer.h" // HardSoftLogicMixer
-#include "adder.h"               // hard_adders
-#include "multiplier.h"          // instantiate_simple_soft_multiplier
-#include "netlist_statistic.h"    // mixing_optimization_stats
-#include "odin_error.h"           // error_message
+#include "multiplier.h"            // instantiate_simple_soft_multiplier
+#include "netlist_statistic.h"     // mixing_optimization_stats
+#include "odin_error.h"            // error_message
 
 void MixingOpt::scale_counts()
 {
diff --git a/parmys-plugin/src/multiplier.cc b/parmys-plugin/src/multiplier.cc
index 25841d676..fadbdc1b5 100644
--- a/parmys-plugin/src/multiplier.cc
+++ b/parmys-plugin/src/multiplier.cc
@@ -100,7 +100,6 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
     int multiplicand_offset_index;
     int multiplier_offset_index;
     int current_index;
-    int i, j;
 
     /* need for an carry-ripple-adder for each of the bits of port B. */
     /* good question of which is better to put on the bottom of multiplier.  Larger means more smaller adds, or small is
@@ -124,7 +123,7 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
     partial_products = (nnode_t ***)vtr::malloc(sizeof(nnode_t **) * multiplicand_width);
 
     /* generate the AND partial products */
-    for (i = 0; i < multiplicand_width; i++) {
+    for (int i = 0; i < multiplicand_width; i++) {
         /* create the memory for each AND gate needed for the levels of partial products */
         partial_products[i] = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * multiplier_width);
 
@@ -132,15 +131,15 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
             adders_for_partial_products[i] = make_2port_gate(ADD, multiplier_width + 1, multiplier_width + 1, multiplier_width + 1, node, mark);
         }
 
-        for (j = 0; j < multiplier_width; j++) {
+        for (int j = 0; j < multiplier_width; j++) {
             /* create each one of the partial products */
             partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, mark);
         }
     }
 
     /* generate the connections to the AND gates */
-    for (i = 0; i < multiplicand_width; i++) {
-        for (j = 0; j < multiplier_width; j++) {
+    for (int i = 0; i < multiplicand_width; i++) {
+        for (int j = 0; j < multiplier_width; j++) {
             /* hookup the input of B to each AND gate */
             if (j == 0) {
                 /* IF - this is the first time we are mapping multiplicand port then can remap */
@@ -162,9 +161,9 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
     }
 
     /* hookup each of the adders */
-    for (i = 0; i < multiplicand_width - 1; i++) // -1 since the first stage is a combo of partial products while all others are part of tree
+    for (int i = 0; i < multiplicand_width - 1; i++) // -1 since the first stage is a combo of partial products while all others are part of tree
     {
-        for (j = 0; j < multiplier_width + 1; j++) // +1 since adders are one greater than multwidth to pass carry
+        for (int j = 0; j < multiplier_width + 1; j++) // +1 since adders are one greater than multwidth to pass carry
         {
             /* join to port 1 of the add one of the partial products.  */
             if (i == 0) {
@@ -194,10 +193,10 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
 
     current_index = 0;
     /* hookup the outputs */
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         if (multiplicand_width == 1) {
             // this is undealt with
-            error_message(AST, node->loc, "%s", "Cannot create soft multiplier with multiplicand width of 1.\n");
+            error_message(RESOLVE, node->loc, "%s", "Cannot create soft multiplier with multiplicand width of 1.\n");
         } else if (i == 0) {
             /* IF - this is the LSbit, then we use a pass through from the partial product */
             remap_pin_to_new_node(node->output_pins[i], partial_products[0][0], 0);
@@ -221,19 +220,19 @@ void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *ne
     }
 
     /* soft map the adders if they need to be mapped */
-    for (i = 0; i < multiplicand_width - 1; i++) {
+    for (int i = 0; i < multiplicand_width - 1; i++) {
         instantiate_add_w_carry(adders_for_partial_products[i], mark, netlist);
     }
 
     /* Cleanup everything */
     if (adders_for_partial_products != NULL) {
-        for (i = 0; i < multiplicand_width - 1; i++) {
+        for (int i = 0; i < multiplicand_width - 1; i++) {
             free_nnode(adders_for_partial_products[i]);
         }
         vtr::free(adders_for_partial_products);
     }
     /* generate the AND partial products */
-    for (i = 0; i < multiplicand_width; i++) {
+    for (int i = 0; i < multiplicand_width; i++) {
         /* create the memory for each AND gate needed for the levels of partial products */
         if (partial_products[i] != NULL) {
             vtr::free(partial_products[i]);
@@ -277,7 +276,6 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
 
     int IN1_width = node->input_port_sizes[0];
 
-    int i, j;
     int const_operand_offset = (port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? IN1_width : 0;
     int const_operand_width = node->input_port_sizes[(port_status == mult_port_stat_e::MULTIPICAND_CONSTANT) ? 1 : 0];
 
@@ -291,12 +289,12 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
 
     /* container for constatnt operand */
     signal_list_t *const_operand = init_signal_list();
-    for (i = 0; i < const_operand_width; i++) {
+    for (int i = 0; i < const_operand_width; i++) {
         add_pin_to_signal_list(const_operand, node->input_pins[const_operand_offset + i]);
     }
     /* container for variable operand */
     signal_list_t *variable_operand = init_signal_list();
-    for (i = 0; i < variable_operand_width; i++) {
+    for (int i = 0; i < variable_operand_width; i++) {
         add_pin_to_signal_list(variable_operand, node->input_pins[variable_operand_offset + i]);
     }
 
@@ -308,13 +306,13 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
     /* to keep the record of internal outputs for connection purposes */
     signal_list_t **internal_outputs = (signal_list_t **)vtr::calloc(internal_outputs_size, sizeof(signal_list_t *));
     /* implementing the multipication using shift and add operation */
-    for (i = 0; i < node->num_output_pins + 1; i++) {
+    for (int i = 0; i < node->num_output_pins + 1; i++) {
         npin_t *pin;
         /* checking a couple conditions to avoid going further if there is not needed */
         if (i == node->num_output_pins || i == const_operand_width) {
             internal_outputs_size = i;
             /* initializing the return value */
-            for (j = 0; j < internal_outputs[i - 1]->count; j++) {
+            for (int j = 0; j < internal_outputs[i - 1]->count; j++) {
                 add_pin_to_signal_list(return_value, internal_outputs[i - 1]->pins[j]);
             }
             break;
@@ -326,7 +324,7 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
 
         /* if the pin is GND we pass */
         if (!strcmp(pin->net->name, gnd_net->name)) {
-            for (j = 0; j < width; j++) {
+            for (int j = 0; j < width; j++) {
                 /* if the first bit of const_operand is zero we need to initiate the multipication by zero pins */
                 npin_t *internal_output_pin = (i == 0) ? get_zero_pin(netlist) : internal_outputs[i - 1]->pins[j];
                 add_pin_to_signal_list(internal_outputs[i], internal_output_pin);
@@ -336,7 +334,7 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
         else if (!strcmp(pin->net->name, vcc_net->name)) {
             /* for the first round we do not need to shift */
             if (i == 0) {
-                for (j = 0; j < width; j++) {
+                for (int j = 0; j < width; j++) {
                     if (j < variable_operand_width) {
                         add_pin_to_signal_list(internal_outputs[0], copy_input_npin(variable_operand->pins[j]));
                     } else {
@@ -364,7 +362,7 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
                 signal_list_t *shift_outputs = init_signal_list();
 
                 int pad_pin = variable_operand->count - 1;
-                for (j = 0; j < width; j++) {
+                for (int j = 0; j < width; j++) {
                     if (j < variable_operand_width) {
                         /* connecing the first input of the shift node */
                         add_input_pin_to_node(shift_node, copy_input_npin(variable_operand->pins[j]), j);
@@ -400,7 +398,7 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
                 nnode_t *add_node = make_2port_gate(ADD, width, width, width, node, mark);
                 add_list = insert_in_vptr_list(add_list, add_node);
                 /* connecting add node input pins */
-                for (j = 0; j < width; j++) {
+                for (int j = 0; j < width; j++) {
                     /* connecting the previous stage internal outputs as the first add inputs */
                     add_input_pin_to_node(add_node, internal_outputs[i - 1]->pins[j], j);
 
@@ -435,7 +433,7 @@ static signal_list_t *implement_constant_multipication(nnode_t *node, mult_port_
     free_signal_list(const_operand);
     free_signal_list(variable_operand);
 
-    for (i = 0; i < internal_outputs_size; i++) {
+    for (int i = 0; i < internal_outputs_size; i++) {
         if (internal_outputs[i])
             free_signal_list(internal_outputs[i]);
     }
@@ -461,9 +459,8 @@ void connect_constant_mult_outputs(nnode_t *node, signal_list_t *output_signal_l
     int output_width = node->num_output_pins;
     oassert(output_width == output_signal_list->count);
 
-    int i;
     /* hook the output signals into the node output */
-    for (i = 0; i < output_signal_list->count; i++) {
+    for (int i = 0; i < output_signal_list->count; i++) {
         npin_t *pin = output_signal_list->pins[i];
         /* join nets of the output pin and the calculated pin */
         nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, node->traverse_visited);
@@ -476,7 +473,7 @@ void connect_constant_mult_outputs(nnode_t *node, signal_list_t *output_signal_l
 
     // CLEAN UP
     free_signal_list(output_signal_list);
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         npin_t *pin = node->input_pins[i];
 
         /* detach from input nets */
@@ -829,8 +826,6 @@ void define_mult_function_yosys(nnode_t *node, Yosys::Module *module, Yosys::Des
  *---------------------------------------------------------------------*/
 void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int offb, int b, nnode_t *node_a, nnode_t *node_b)
 {
-    int i;
-
     /* Copy properties from original node */
     ptr->type = node->type;
     ptr->related_ast_node = node->related_ast_node;
@@ -849,14 +844,14 @@ void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int off
     /* Set the number of pins and re-locate previous pin entries */
     ptr->num_input_pins = a + b;
     ptr->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
-    for (i = 0; i < a; i++) {
+    for (int i = 0; i < a; i++) {
         if (node_a)
             add_input_pin_to_node(ptr, copy_input_npin(node_a->input_pins[i]), i);
         else
             remap_pin_to_new_node(node->input_pins[i + offa], ptr, i);
     }
 
-    for (i = 0; i < b; i++) {
+    for (int i = 0; i < b; i++) {
         if (node_b)
             add_input_pin_to_node(ptr, copy_input_npin(node_b->input_pins[i + node_b->input_port_sizes[0]]), i + a);
         else
@@ -866,7 +861,7 @@ void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int off
     /* Prep output pins for connecting to cascaded multipliers */
     ptr->num_output_pins = a + b;
     ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
-    for (i = 0; i < a + b; i++)
+    for (int i = 0; i < a + b; i++)
         ptr->output_pins[i] = NULL;
 
     return;
@@ -880,16 +875,13 @@ void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int off
  *-----------------------------------------------------------------------*/
 void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b)
 {
-    int i, size;
-
     node->type = ADD;
     node->related_ast_node = parent->related_ast_node;
     node->traverse_visited = parent->traverse_visited;
     node->node_data = NULL;
 
     /* Set size to be the maximum input size */
-    size = a;
-    size = (size < b) ? b : size;
+    int size = (a < b) ? b : a;
 
     /* Set new port sizes and parameters */
     node->num_input_port_sizes = 2;
@@ -903,13 +895,13 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b)
     /* Set the number of input pins and clear pin entries */
     node->num_input_pins = a + b;
     node->input_pins = (npin_t **)vtr::malloc(sizeof(void *) * (a + b));
-    for (i = 0; i < a + b; i++)
+    for (int i = 0; i < a + b; i++)
         node->input_pins[i] = NULL;
 
     /* Set the number of output pins and clear pin entries */
     node->num_output_pins = size;
     node->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * size);
-    for (i = 0; i < size; i++)
+    for (int i = 0; i < size; i++)
         node->output_pins[i] = NULL;
 
     add_list = insert_in_vptr_list(add_list, node);
@@ -944,7 +936,7 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b)
 void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist)
 {
     nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig;
-    int i, size;
+    int size;
 
     /* Check for a legitimate split */
     oassert(node->input_port_sizes[0] == (a0 + a1));
@@ -998,33 +990,33 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *
     init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins);
 
     // connect inputs to port a of addsmall
-    for (i = 0; i < a1b0->num_output_pins; i++)
+    for (int i = 0; i < a1b0->num_output_pins; i++)
         connect_nodes(a1b0, i, addsmall, i);
     add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins);
     // connect inputs to port b of addsmall
-    for (i = 0; i < a0b1->num_output_pins; i++)
+    for (int i = 0; i < a0b1->num_output_pins; i++)
         connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]);
     add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]);
 
     // connect inputs to port a of addbig
     size = addsmall->num_output_pins;
-    for (i = 0; i < size; i++)
+    for (int i = 0; i < size; i++)
         connect_nodes(addsmall, i, addbig, i);
 
     // connect inputs to port b of addbig
-    for (i = b0; i < a0b0->output_port_sizes[0]; i++)
+    for (int i = b0; i < a0b0->output_port_sizes[0]; i++)
         connect_nodes(a0b0, i, addbig, i - b0 + size);
     size = size + a0b0->output_port_sizes[0] - b0;
-    for (i = 0; i < a1b1->output_port_sizes[0]; i++)
+    for (int i = 0; i < a1b1->output_port_sizes[0]; i++)
         connect_nodes(a1b1, i, addbig, i + size);
 
     // remap the multiplier outputs coming directly from a0b0
-    for (i = 0; i < b0; i++) {
+    for (int i = 0; i < b0; i++) {
         remap_pin_to_new_node(node->output_pins[i], a0b0, i);
     }
 
     // remap the multiplier outputs coming from addbig
-    for (i = 0; i < addbig->num_output_pins; i++) {
+    for (int i = 0; i < addbig->num_output_pins; i++) {
         remap_pin_to_new_node(node->output_pins[i + b0], addbig, i);
     }
 
@@ -1054,7 +1046,6 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *
 void split_multiplier_a(nnode_t *node, int a0, int a1, int b)
 {
     nnode_t *a0b, *a1b, *addsmall;
-    int i;
 
     /* Check for a legitimate split */
     oassert(node->input_port_sizes[0] == (a0 + a1));
@@ -1084,16 +1075,16 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b)
     init_multiplier_adder(addsmall, a0b, b, a1b->num_output_pins);
 
     /* Connect pins for addsmall */
-    for (i = a0; i < a0b->num_output_pins; i++)
+    for (int i = a0; i < a0b->num_output_pins; i++)
         connect_nodes(a0b, i, addsmall, i - a0);
-    for (i = 0; i < a1b->num_output_pins; i++)
+    for (int i = 0; i < a1b->num_output_pins; i++)
         connect_nodes(a1b, i, addsmall, i + addsmall->input_port_sizes[0]);
 
     /* Move original output pins for multiply to new outputs */
-    for (i = 0; i < a0; i++)
+    for (int i = 0; i < a0; i++)
         remap_pin_to_new_node(node->output_pins[i], a0b, i);
 
-    for (i = 0; i < addsmall->num_output_pins; i++)
+    for (int i = 0; i < addsmall->num_output_pins; i++)
         remap_pin_to_new_node(node->output_pins[i + a0], addsmall, i);
 
     // CLEAN UP
@@ -1122,7 +1113,6 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b)
 void split_multiplier_b(nnode_t *node, int a, int b1, int b0)
 {
     nnode_t *ab0, *ab1, *addsmall;
-    int i;
 
     /* Check for a legitimate split */
     oassert(node->input_port_sizes[0] == a);
@@ -1152,18 +1142,18 @@ void split_multiplier_b(nnode_t *node, int a, int b1, int b0)
     init_multiplier_adder(addsmall, ab1, ab1->num_output_pins, a + b1);
 
     /* Connect pins for addsmall */
-    for (i = b0; i < ab0->output_port_sizes[0]; i++)
+    for (int i = b0; i < ab0->output_port_sizes[0]; i++)
         connect_nodes(ab0, i, addsmall, i - b0);
-    for (i = ab0->output_port_sizes[0] - b0; i < a + b1; i++) /* Sign extend */
+    for (int i = ab0->output_port_sizes[0] - b0; i < a + b1; i++) /* Sign extend */
         connect_nodes(ab0, ab0->output_port_sizes[0] - 1, addsmall, i);
-    for (i = b1 + a; i < (2 * (a + b1)); i++)
+    for (int i = b1 + a; i < (2 * (a + b1)); i++)
         connect_nodes(ab1, i - (b1 + a), addsmall, i);
 
     /* Move original output pins for multiply to new outputs */
-    for (i = 0; i < b0; i++)
+    for (int i = 0; i < b0; i++)
         remap_pin_to_new_node(node->output_pins[i], ab0, i);
 
-    for (i = b0; i < node->num_output_pins; i++)
+    for (int i = b0; i < node->num_output_pins; i++)
         remap_pin_to_new_node(node->output_pins[i], addsmall, i - b0);
 
     // CLEAN UP
@@ -1182,7 +1172,7 @@ void split_multiplier_b(nnode_t *node, int a, int b1, int b0)
  *-----------------------------------------------------------------------*/
 void pad_multiplier(nnode_t *node, netlist_t *netlist)
 {
-    int diffa, diffb, diffout, i;
+    int diffa, diffb, diffout;
     int sizea, sizeb, sizeout;
     int ina, inb;
 
@@ -1230,12 +1220,12 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist)
 
         /* Shift pins for expansion of first input pins */
         if (diffa != 0) {
-            for (i = 1; i <= sizeb; i++) {
+            for (int i = 1; i <= sizeb; i++) {
                 move_input_pin(node, sizea + sizeb - i, node->num_input_pins - diffb - i);
             }
 
             /* Connect unused first input pins to zero/pad pin */
-            for (i = 0; i < diffa; i++) {
+            for (int i = 0; i < diffa; i++) {
                 if (configuration.mult_padding == 0)
                     add_input_pin_to_node(node, get_zero_pin(netlist), i + sizea);
                 else
@@ -1247,7 +1237,7 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist)
 
         if (diffb != 0) {
             /* Connect unused second input pins to zero/pad pin */
-            for (i = 1; i <= diffb; i++) {
+            for (int i = 1; i <= diffb; i++) {
                 if (configuration.mult_padding == 0)
                     add_input_pin_to_node(node, get_zero_pin(netlist), node->num_input_pins - i);
                 else
@@ -1261,7 +1251,7 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist)
     /* Expand the outputs */
     if (diffout != 0) {
         allocate_more_output_pins(node, diffout);
-        for (i = 0; i < diffout; i++) {
+        for (int i = 0; i < diffout; i++) {
             // Add new pins to the higher order spots.
             npin_t *new_pin = allocate_npin();
             // Pad outputs with a unique and descriptive name to avoid collisions.
@@ -1583,7 +1573,6 @@ void split_soft_multiplier(nnode_t *node, netlist_t *netlist)
  * -------------------------------------------------------------------------*/
 mult_port_stat_e is_constant_multipication(nnode_t *node, netlist_t *netlist)
 {
-    int i;
     mult_port_stat_e is_const = mult_port_stat_e::mult_port_stat_END;
 
     /**
@@ -1598,7 +1587,7 @@ mult_port_stat_e is_constant_multipication(nnode_t *node, netlist_t *netlist)
 
     bool multiplier_const = true;
     /* going through the IN1 port */
-    for (i = 0; i < IN1_width; i++) {
+    for (int i = 0; i < IN1_width; i++) {
         /* corresponding pin of the port */
         npin_t *pin = node->input_pins[i];
         /* atleast equal to VCC or GND */
@@ -1612,7 +1601,7 @@ mult_port_stat_e is_constant_multipication(nnode_t *node, netlist_t *netlist)
 
     bool multiplicand_const = true;
     /* going through the IN1 port */
-    for (i = 0; i < IN2_width; i++) {
+    for (int i = 0; i < IN2_width; i++) {
         /* corresponding pin of the port */
         npin_t *pin = node->input_pins[IN1_width + i];
         /* atleast equal to VCC or GND */
@@ -1684,7 +1673,6 @@ static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat,
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i;
     /* constatnt and variable port of the given multipication */
     signal_list_t *const_port = init_signal_list();
     signal_list_t *var_port = init_signal_list();
@@ -1694,23 +1682,23 @@ static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat,
     /* initialize const and var port signals */
     if (mult_port_stat == mult_port_stat_e::MULTIPICAND_CONSTANT) {
         /* adding var port pins to signal list */
-        for (i = 0; i < node->input_port_sizes[0]; i++) {
+        for (int i = 0; i < node->input_port_sizes[0]; i++) {
             add_pin_to_signal_list(var_port, node->input_pins[i]);
         }
         var_signedness = node->attributes->port_a_signed;
         /* adding const port pins to signal list */
-        for (i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
+        for (int i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
             add_pin_to_signal_list(const_port, node->input_pins[i]);
         }
         const_signedness = node->attributes->port_b_signed;
     } else if (mult_port_stat == mult_port_stat_e::MULTIPLIER_CONSTANT) {
         /* adding var port pins to signal list */
-        for (i = 0; i < node->input_port_sizes[0]; i++) {
+        for (int i = 0; i < node->input_port_sizes[0]; i++) {
             add_pin_to_signal_list(const_port, node->input_pins[i]);
         }
         const_signedness = node->attributes->port_a_signed;
         /* adding const port pins to signal list */
-        for (i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
+        for (int i = node->input_port_sizes[0]; i < node->num_input_pins; i++) {
             add_pin_to_signal_list(var_port, node->input_pins[i]);
         }
         var_signedness = node->attributes->port_b_signed;
@@ -1719,7 +1707,7 @@ static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat,
     int idx = -1;
     signal_list_t *new_const_port = init_signal_list();
     /* iterating over const port to determine useless ports */
-    for (i = const_port->count; i > 0; i--) {
+    for (int i = const_port->count; i > 0; i--) {
         npin_t *pin = const_port->pins[i - 1];
         /* starting from the end and prune pins connected to GND */
         if (!strcmp(pin->net->name, netlist->one_net->name)) {
@@ -1731,7 +1719,7 @@ static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat,
         }
     }
     /* initializing new const port */
-    for (i = 0; i < idx; i++) {
+    for (int i = 0; i < idx; i++) {
         npin_t *pin = const_port->pins[i];
         add_pin_to_signal_list(new_const_port, pin);
     }
@@ -1750,16 +1738,16 @@ static nnode_t *perform_const_mult_optimization(mult_port_stat_e mult_port_stat,
         new_node->attributes->port_b_signed = const_signedness;
     }
     /* adding first port */
-    for (i = 0; i < first_port->count; i++) {
+    for (int i = 0; i < first_port->count; i++) {
         remap_pin_to_new_node(first_port->pins[i], new_node, offset + i);
     }
     offset += first_port->count;
     /* adding second port */
-    for (i = 0; i < second_port->count; i++) {
+    for (int i = 0; i < second_port->count; i++) {
         remap_pin_to_new_node(second_port->pins[i], new_node, offset + i);
     }
     /* remap output ports */
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         remap_pin_to_new_node(node->output_pins[i], new_node, i);
     }
 
@@ -1852,14 +1840,13 @@ void clean_multipliers()
  *-----------------------------------------------------------------------*/
 static void cleanup_mult_old_node(nnode_t *nodeo, netlist_t *netlist)
 {
-    int i;
     /* Disconnecting input pins from the old node side */
-    for (i = 0; i < nodeo->num_input_pins; i++) {
+    for (int i = 0; i < nodeo->num_input_pins; i++) {
         nodeo->input_pins[i] = NULL;
     }
 
     /* connecting the extra output pins to the gnd node */
-    for (i = 0; i < nodeo->num_output_pins; i++) {
+    for (int i = 0; i < nodeo->num_output_pins; i++) {
         npin_t *output_pin = nodeo->output_pins[i];
 
         if (output_pin && output_pin->node) {
diff --git a/parmys-plugin/src/netlist_check.cc b/parmys-plugin/src/netlist_check.cc
deleted file mode 100644
index 58f21110c..000000000
--- a/parmys-plugin/src/netlist_check.cc
+++ /dev/null
@@ -1,735 +0,0 @@
-/*
- * Copyright 2022 CAS—Atlantic (University of New Brunswick, CASA)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-#include "odin_globals.h"
-#include "odin_types.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "netlist_utils.h"
-#include "odin_util.h"
-// #include "ast_util.h"
-#include "netlist_check.h"
-#include "netlist_visualizer.h"
-#include "string_cache.h"
-#include "vtr_memory.h"
-
-void levelize_backwards(netlist_t *netlist);
-void levelize_backwards_clean_checking_for_liveness(netlist_t *netlist);
-void levelize_forwards(netlist_t *netlist);
-void levelize_forwards_clean_checking_for_combo_loop_and_liveness(netlist_t *netlist);
-nnode_t *find_node_at_top_of_combo_loop(nnode_t *start_node);
-void depth_first_traversal_check_if_forward_leveled(short marker_value, netlist_t *netlist);
-void depth_first_traverse_check_if_forward_leveled(nnode_t *node, uintptr_t traverse_mark_number);
-
-void sequential_levelized_dfs(short marker_value, netlist_t *netlist);
-void depth_first_traverse_until_next_ff_or_output(nnode_t *node, nnode_t *calling_node, uintptr_t traverse_mark_number, int seq_level,
-                                                  netlist_t *netlist);
-
-/*---------------------------------------------------------------------------------------------
- * (function: check_netlist)
- * Note: netlist passed in needs to be initialized by allocate_netlist() to make sure correctly initialized.
- *-------------------------------------------------------------------------------------------*/
-void check_netlist(netlist_t *netlist)
-{
-    /* create a graph output of this netlist */
-    if (configuration.output_netlist_graphs) {
-        /* Path is where we are */
-        graphVizOutputNetlist(configuration.debug_output_path, "net", 1, netlist);
-    }
-}
-
-void depth_traverse_check_combinational_loop(nnode_t *node, short start, STRING_CACHE *in_path);
-
-/*---------------------------------------------------------------------------------------------
- * (function: depth_first_traversal_check_if_forward_leveled()
- *-------------------------------------------------------------------------------------------*/
-void sequential_levelized_dfs(short marker_value, netlist_t *netlist)
-{
-    int i;
-
-    int sequential_level = 0;
-    netlist->num_sequential_levels = 1;
-    netlist->num_at_sequential_level = (int *)vtr::realloc(netlist->num_at_sequential_level, sizeof(int) * netlist->num_sequential_levels);
-    netlist->sequential_level_nodes =
-      (nnode_t ***)vtr::realloc(netlist->sequential_level_nodes, sizeof(nnode_t **) * (netlist->num_sequential_levels));
-    netlist->sequential_level_nodes[netlist->num_sequential_levels - 1] = NULL;
-    netlist->num_at_sequential_level[netlist->num_sequential_levels - 1] = 0;
-
-    /* allocate the first list.  Includes vcc and gnd */
-    netlist->sequential_level_nodes[sequential_level] =
-      (nnode_t **)vtr::realloc(netlist->sequential_level_nodes[sequential_level], sizeof(nnode_t *) * (netlist->num_top_input_nodes + 2));
-
-    /* add all the primary nodes to the first level */
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
-        if (netlist->top_input_nodes[i] != NULL) {
-            netlist->sequential_level_nodes[sequential_level][i] = netlist->top_input_nodes[i];
-            netlist->num_at_sequential_level[sequential_level]++;
-            /* record the level */
-            netlist->top_input_nodes[i]->sequential_level = sequential_level;
-        }
-    }
-
-    /* now traverse the ground and vcc pins */
-    if (netlist->gnd_node != NULL) {
-        netlist->sequential_level_nodes[sequential_level][i] = netlist->gnd_node;
-        netlist->num_at_sequential_level[sequential_level]++;
-        /* record the level */
-        netlist->gnd_node->sequential_level = sequential_level;
-    }
-    if (netlist->vcc_node != NULL) {
-        netlist->sequential_level_nodes[sequential_level][i + 1] = netlist->vcc_node;
-        netlist->num_at_sequential_level[sequential_level]++;
-        /* record the level */
-        netlist->vcc_node->sequential_level = sequential_level;
-    }
-
-    while (netlist->num_at_sequential_level[sequential_level] > 0) {
-        /* WHILE there are PIs at this level */
-
-        /* Allocate the next level of storage since this part is a forward thing of the next flip-flops at the level */
-        /* add anothersequential level.  Note, needs to be done before we depth first the current combinational level. */
-        netlist->num_sequential_levels++;
-        netlist->sequential_level_nodes =
-          (nnode_t ***)vtr::realloc(netlist->sequential_level_nodes, sizeof(nnode_t **) * (netlist->num_sequential_levels));
-        netlist->num_at_sequential_level = (int *)vtr::realloc(netlist->num_at_sequential_level, sizeof(int) * netlist->num_sequential_levels);
-        netlist->sequential_level_nodes[netlist->num_sequential_levels - 1] = NULL;
-        netlist->num_at_sequential_level[netlist->num_sequential_levels - 1] = 0;
-
-        /* deals with recording the combinational nodes that terminate this level */
-        netlist->num_sequential_level_combinational_termination_nodes++;
-        netlist->sequential_level_combinational_termination_node =
-          (nnode_t ***)vtr::realloc(netlist->sequential_level_combinational_termination_node,
-                                    sizeof(nnode_t **) * (netlist->num_sequential_level_combinational_termination_nodes));
-        netlist->num_at_sequential_level_combinational_termination_node =
-          (int *)vtr::realloc(netlist->num_at_sequential_level_combinational_termination_node,
-                              sizeof(int) * netlist->num_sequential_level_combinational_termination_nodes);
-        netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] = NULL;
-        netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] = 0;
-
-        /* go through the entire list, mark with sequential level, and build the next list */
-        for (i = 0; i < netlist->num_at_sequential_level[sequential_level]; i++) {
-            depth_first_traverse_until_next_ff_or_output(netlist->sequential_level_nodes[sequential_level][i], NULL, marker_value, sequential_level,
-                                                         netlist);
-        }
-
-        /* now potentially do next sequential level */
-        sequential_level++;
-    }
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: depth_first_traverse_until_next_ff_or_output)
- *-------------------------------------------------------------------------------------------*/
-void depth_first_traverse_until_next_ff_or_output(nnode_t *node, nnode_t *calling_node, uintptr_t traverse_mark_number, int seq_level,
-                                                  netlist_t *netlist)
-{
-    int i, j;
-    nnode_t *next_node;
-    nnet_t *next_net;
-
-    /* first, check if the clalling node should be recorderd */
-    if ((calling_node != NULL) && ((node->type == FF_NODE) || (node->type == OUTPUT_NODE))) {
-        /* IF - the this node is the end of a sequential level then the node before needs to be stored */
-        if (calling_node->sequential_terminator == false) {
-            /* IF - it hasn't been stored before */
-            netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1]++;
-            netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] =
-              (nnode_t **)vtr::realloc(
-                netlist->sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1],
-                sizeof(nnode_t *) *
-                  netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1]);
-            netlist->sequential_level_combinational_termination_node
-              [netlist->num_sequential_level_combinational_termination_nodes - 1]
-              [netlist->num_at_sequential_level_combinational_termination_node[netlist->num_sequential_level_combinational_termination_nodes - 1] -
-               1] = calling_node;
-            /* mark the node locally */
-            calling_node->sequential_terminator = true;
-        }
-    }
-
-    if (node->traverse_visited == traverse_mark_number) {
-        /* if already visited then nothing to do */
-        return;
-    } else if (node->type == CLOCK_NODE) {
-        /* since this is a node that touches all flip flops, don't analyze for sequential level */
-        return;
-    } else if (node->type == FF_NODE) {
-        /* ELSE IF - this is a ff_node, so add it to the list for the next sequential level */
-        /* mark as traversed */
-        node->traverse_visited = traverse_mark_number;
-        node->sequential_level = seq_level + 1;
-
-        /* add to the next sequntial list */
-        netlist->num_at_sequential_level[seq_level + 1]++;
-        netlist->sequential_level_nodes[seq_level + 1] = (nnode_t **)vtr::realloc(
-          netlist->sequential_level_nodes[seq_level + 1], sizeof(nnode_t *) * netlist->num_at_sequential_level[seq_level + 1]);
-        netlist->sequential_level_nodes[seq_level + 1][netlist->num_at_sequential_level[seq_level + 1] - 1] = node;
-
-        return;
-    } else {
-        /* ELSE - this is a node so depth visit it */
-
-        node->traverse_visited = traverse_mark_number;
-        node->sequential_level = seq_level;
-
-        for (i = 0; i < node->num_output_pins; i++) {
-            if (node->output_pins[i]->net == NULL)
-                continue;
-
-            next_net = node->output_pins[i]->net;
-            for (j = 0; j < next_net->num_fanout_pins; j++) {
-                if (next_net->fanout_pins[j] == NULL)
-                    continue;
-
-                next_node = next_net->fanout_pins[j]->node;
-                if (next_node == NULL)
-                    continue;
-
-                depth_first_traverse_until_next_ff_or_output(next_node, node, traverse_mark_number, seq_level, netlist);
-            }
-        }
-    }
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: depth_first_traversal_check_if_forward_leveled()
- *-------------------------------------------------------------------------------------------*/
-void depth_first_traversal_check_if_forward_leveled(short marker_value, netlist_t *netlist)
-{
-    int i;
-
-    /* start with the primary input list */
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
-        if (netlist->top_input_nodes[i] != NULL) {
-            depth_first_traverse_check_if_forward_leveled(netlist->top_input_nodes[i], marker_value);
-        }
-    }
-    /* now traverse the ground and vcc pins */
-    if (netlist->gnd_node != NULL)
-        depth_first_traverse_check_if_forward_leveled(netlist->gnd_node, marker_value);
-    if (netlist->vcc_node != NULL)
-        depth_first_traverse_check_if_forward_leveled(netlist->vcc_node, marker_value);
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: depth_first_traverse)
- *-------------------------------------------------------------------------------------------*/
-void depth_first_traverse_check_if_forward_leveled(nnode_t *node, uintptr_t traverse_mark_number)
-{
-    int i, j;
-    nnode_t *next_node;
-    nnet_t *next_net;
-
-    if (node->traverse_visited == traverse_mark_number) {
-        return;
-    } else {
-        /* ELSE - this is a new node so depth visit it */
-
-        node->traverse_visited = traverse_mark_number;
-
-        for (i = 0; i < node->num_output_pins; i++) {
-            if (node->output_pins[i]->net == NULL)
-                continue;
-
-            next_net = node->output_pins[i]->net;
-            for (j = 0; j < next_net->num_fanout_pins; j++) {
-                if (next_net->fanout_pins[j] == NULL)
-                    continue;
-
-                next_node = next_net->fanout_pins[j]->node;
-                if (next_node == NULL)
-                    continue;
-
-                if ((next_node->forward_level == -1) && (next_node->type != FF_NODE)) {
-                    graphVizOutputCombinationalNet(configuration.debug_output_path, "combo_loop", COMBO_LOOP_ERROR,
-                                                   /*next_node);*/ find_node_at_top_of_combo_loop(next_node));
-                    oassert(false);
-                }
-
-                depth_first_traverse_check_if_forward_leveled(next_node, traverse_mark_number);
-            }
-        }
-    }
-}
-/*---------------------------------------------------------------------------------------------
- * (function: levelize_forwards)
- * Note that this levlizing is combinational delay levels where the assumption is that
- * each node has a unit delay.
- *-------------------------------------------------------------------------------------------*/
-void levelize_forwards(netlist_t *netlist)
-{
-    int i, j, k;
-    int cur_for_level;
-    short more_levels = true;
-    short all_visited = true;
-
-    /* add all the POs and FFs POs as forward level 0 */
-    cur_for_level = 0;
-    netlist->num_forward_levels = 1;
-    netlist->num_at_forward_level = (int *)vtr::realloc(netlist->num_at_forward_level, sizeof(int) * netlist->num_forward_levels);
-    netlist->forward_levels = (nnode_t ***)vtr::realloc(netlist->forward_levels, sizeof(nnode_t **) * (netlist->num_forward_levels));
-    netlist->forward_levels[netlist->num_forward_levels - 1] = NULL;
-    netlist->num_at_forward_level[netlist->num_forward_levels - 1] = 0;
-    for (i = 0; i < netlist->num_top_input_nodes + 3; i++) {
-        if ((i == netlist->num_top_input_nodes) && (netlist->vcc_node != NULL)) {
-            /* vcc */
-            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
-                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
-            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->vcc_node;
-            netlist->num_at_forward_level[cur_for_level]++;
-            netlist->vcc_node->forward_level = 0;
-        } else if ((i == netlist->num_top_input_nodes + 1) && (netlist->gnd_node != NULL)) {
-            /* gnd */
-            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
-                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
-            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->gnd_node;
-            netlist->num_at_forward_level[cur_for_level]++;
-            netlist->gnd_node->forward_level = 0;
-        } else if ((i == netlist->num_top_input_nodes + 2) && (netlist->pad_node != NULL)) {
-            /* pad */
-            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
-                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
-            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->pad_node;
-            netlist->num_at_forward_level[cur_for_level]++;
-            netlist->pad_node->forward_level = 0;
-        } else if (i >= netlist->num_top_input_nodes) {
-            continue;
-        } else if (netlist->top_input_nodes[i] != NULL) {
-            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
-                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
-            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->top_input_nodes[i];
-            netlist->num_at_forward_level[cur_for_level]++;
-            netlist->top_input_nodes[i]->forward_level = 0;
-        }
-    }
-    for (i = 0; i < netlist->num_ff_nodes; i++) {
-        if (netlist->ff_nodes[i] != NULL) {
-            netlist->forward_levels[cur_for_level] = (nnode_t **)vtr::realloc(netlist->forward_levels[cur_for_level],
-                                                                              sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level] + 1));
-            netlist->forward_levels[cur_for_level][netlist->num_at_forward_level[cur_for_level]] = netlist->ff_nodes[i];
-            netlist->num_at_forward_level[cur_for_level]++;
-            netlist->ff_nodes[i]->forward_level = 0;
-        }
-    }
-
-    while (more_levels) {
-        /* another level so add space */
-        netlist->num_forward_levels++;
-        netlist->num_at_forward_level = (int *)vtr::realloc(netlist->num_at_forward_level, sizeof(int) * netlist->num_forward_levels);
-        netlist->forward_levels = (nnode_t ***)vtr::realloc(netlist->forward_levels, sizeof(nnode_t **) * (netlist->num_forward_levels));
-        netlist->forward_levels[netlist->num_forward_levels - 1] = NULL;
-        netlist->num_at_forward_level[netlist->num_forward_levels - 1] = 0;
-
-        /* go through each element at this level */
-        for (i = 0; i < netlist->num_at_forward_level[cur_for_level]; i++) {
-            nnode_t *current_node = netlist->forward_levels[cur_for_level][i];
-            if (current_node == NULL)
-                continue;
-
-            /* at each node visit all the inputs */
-            for (j = 0; j < current_node->num_output_pins; j++) {
-                int *fanouts_visited;
-                if (current_node->output_pins[j] == NULL)
-                    continue;
-
-                for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
-                    int idx;
-                    /* visit the fanout point */
-                    if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
-                        (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
-                        continue;
-
-                    nnode_t *output_node = current_node->output_pins[j]->net->fanout_pins[k]->node;
-
-                    if (output_node == NULL)
-                        continue;
-
-                    if (output_node->node_data == NULL) {
-                        /* if this fanout hasn't been visited yet this will be null */
-                        fanouts_visited = (int *)vtr::malloc(sizeof(int) * (output_node->num_input_pins));
-
-                        for (idx = 0; idx < output_node->num_input_pins; idx++) {
-                            fanouts_visited[idx] = -1;
-                        }
-
-                        output_node->node_data = (void *)fanouts_visited;
-                        output_node->unique_node_data_id = LEVELIZE;
-                    } else {
-                        /* ELSE - get the list */
-                        oassert(output_node->unique_node_data_id == LEVELIZE);
-                        fanouts_visited = (int *)output_node->node_data;
-                    }
-
-                    /* mark this entry as visited */
-                    fanouts_visited[current_node->output_pins[j]->net->fanout_pins[k]->pin_node_idx] = cur_for_level;
-
-                    /* check if they've all been marked */
-                    all_visited = true;
-                    for (idx = 0; idx < output_node->num_input_pins; idx++) {
-                        if (fanouts_visited[idx] == -1) {
-                            all_visited = false;
-                            break;
-                        }
-                    }
-
-                    if ((all_visited == true) && (output_node->type != FF_NODE)) {
-                        /* This one has been visited by everyone */
-                        netlist->forward_levels[cur_for_level + 1] = (nnode_t **)vtr::realloc(
-                          netlist->forward_levels[cur_for_level + 1], sizeof(nnode_t *) * (netlist->num_at_forward_level[cur_for_level + 1] + 1));
-                        netlist->forward_levels[cur_for_level + 1][netlist->num_at_forward_level[cur_for_level + 1]] = output_node;
-                        netlist->num_at_forward_level[cur_for_level + 1]++;
-
-                        output_node->forward_level = cur_for_level + 1;
-                    }
-                }
-            }
-        }
-
-        /* check if tere are more elements to procees at the next level */
-        if (netlist->num_at_forward_level[cur_for_level + 1] > 0) {
-            /* there are elements in the next set then process */
-            cur_for_level++;
-        } else {
-            /* ELSE - we've levelized forwards */
-            more_levels = false;
-        }
-    }
-}
-/*---------------------------------------------------------------------------------------------
- * (function: levelize_forwards_clean_checking_for_combo_loop_and_liveness)
- *-------------------------------------------------------------------------------------------*/
-void levelize_forwards_clean_checking_for_combo_loop_and_liveness(netlist_t *netlist)
-{
-    int i, j, k;
-    int cur_for_level;
-    short more_levels = true;
-    short all_visited = true;
-
-    cur_for_level = 0;
-
-    while (more_levels) {
-        /* go through each element at this level */
-        for (i = 0; i < netlist->num_at_forward_level[cur_for_level]; i++) {
-            nnode_t *current_node = netlist->forward_levels[cur_for_level][i];
-            if (current_node == NULL)
-                continue;
-
-            /* at each node visit all the inputs */
-            for (j = 0; j < current_node->num_output_pins; j++) {
-                int *fanouts_visited;
-                if (current_node->output_pins[j] == NULL)
-                    continue;
-
-                for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
-                    if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
-                        (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
-                        continue;
-
-                    /* visit the fanout point */
-                    nnode_t *output_node = current_node->output_pins[j]->net->fanout_pins[k]->node;
-
-                    if (output_node == NULL)
-                        continue;
-
-                    if (output_node->node_data == NULL) {
-                        oassert(output_node->unique_node_data_id == RESET);
-                    } else {
-                        int idx;
-                        /* ELSE - get the list */
-                        oassert(output_node->unique_node_data_id == LEVELIZE);
-                        fanouts_visited = (int *)output_node->node_data;
-                        output_node->node_data = NULL;
-
-                        /* check if they've all been marked */
-                        all_visited = true;
-                        for (idx = 0; idx < output_node->num_input_pins; idx++) {
-                            if (fanouts_visited[idx] == -1) {
-                                all_visited = false;
-                                break;
-                            }
-                        }
-
-                        if (all_visited == false) {
-                            /* Combo node since one of the outputs hasn'y been visisted. */
-                            error_message(
-                              NETLIST, output_node->loc,
-                              "!!!Combinational loop on forward pass.  Node %s is missing a driven pin idx %d.  Isn't neccessarily the culprit of "
-                              "the combinational loop.  Odin only detects combinational loops, but currently doesn't pinpoint.\n",
-                              output_node->name, idx);
-                        }
-                        /* free the data and reset to be used elsewhere */
-                        vtr::free(fanouts_visited);
-                        output_node->unique_node_data_id = RESET;
-                    }
-
-                    if ((output_node->backward_level == -1) && (output_node->type != FF_NODE)) {
-                        warning_message(
-                          NETLIST, output_node->loc,
-                          "Node does not connect to a primary output or FF...DEAD NODE!!!.  Node %s is not connected to a primary output.\n",
-                          output_node->name);
-                    }
-                }
-            }
-        }
-
-        /* check if tere are more elements to procees at the next level */
-        if (netlist->num_at_forward_level[cur_for_level + 1] > 0) {
-            /* there are elements in the next set then process */
-            cur_for_level++;
-        } else {
-            /* ELSE - we've levelized forwards */
-            more_levels = false;
-        }
-    }
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: levelize_backwards)
- * Note this levelizing is a reverse combinational delay count
- *-------------------------------------------------------------------------------------------*/
-void levelize_backwards(netlist_t *netlist)
-{
-    int i, j, k;
-    int cur_back_level;
-    short more_levels = true;
-    short all_visited = true;
-
-    /* add all the POs and FFs POs as backward level 0 */
-    cur_back_level = 0;
-    netlist->num_backward_levels = 1;
-    netlist->num_at_backward_level = (int *)vtr::realloc(netlist->num_at_backward_level, sizeof(int) * netlist->num_backward_levels);
-    netlist->backward_levels = (nnode_t ***)vtr::realloc(netlist->backward_levels, sizeof(nnode_t **) * (netlist->num_backward_levels));
-    netlist->backward_levels[netlist->num_backward_levels - 1] = NULL;
-    netlist->num_at_backward_level[netlist->num_backward_levels - 1] = 0;
-    for (i = 0; i < netlist->num_top_output_nodes; i++) {
-        if (netlist->top_output_nodes[i] != NULL) {
-            netlist->backward_levels[cur_back_level] = (nnode_t **)vtr::realloc(
-              netlist->backward_levels[cur_back_level], sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level] + 1));
-            netlist->backward_levels[cur_back_level][netlist->num_at_backward_level[cur_back_level]] = netlist->top_output_nodes[i];
-            netlist->num_at_backward_level[cur_back_level]++;
-            netlist->top_output_nodes[i]->backward_level = 0;
-        }
-    }
-    for (i = 0; i < netlist->num_ff_nodes; i++) {
-        if (netlist->ff_nodes[i] != NULL) {
-            netlist->backward_levels[cur_back_level] = (nnode_t **)vtr::realloc(
-              netlist->backward_levels[cur_back_level], sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level] + 1));
-            netlist->backward_levels[cur_back_level][netlist->num_at_backward_level[cur_back_level]] = netlist->ff_nodes[i];
-            netlist->num_at_backward_level[cur_back_level]++;
-            netlist->ff_nodes[i]->backward_level = 0;
-        }
-    }
-
-    while (more_levels) {
-        /* another level so add space */
-        netlist->num_backward_levels++;
-        netlist->num_at_backward_level = (int *)vtr::realloc(netlist->num_at_backward_level, sizeof(int) * netlist->num_backward_levels);
-        netlist->backward_levels = (nnode_t ***)vtr::realloc(netlist->backward_levels, sizeof(nnode_t **) * (netlist->num_backward_levels));
-        netlist->backward_levels[netlist->num_backward_levels - 1] = NULL;
-        netlist->num_at_backward_level[netlist->num_backward_levels - 1] = 0;
-
-        /* go through each element at this level */
-        for (i = 0; i < netlist->num_at_backward_level[cur_back_level]; i++) {
-            nnode_t *current_node = netlist->backward_levels[cur_back_level][i];
-            if (current_node) {
-                /* at each node visit all the inputs */
-                for (j = 0; j < current_node->num_input_pins; j++) {
-                    int *fanouts_visited = NULL;
-                    if (current_node->input_pins[j]) {
-                        /* visit the fanout point */
-                        nnet_t *fanout_net = current_node->input_pins[j]->net;
-                        if (fanout_net) {
-                            if (fanout_net->net_data == NULL) {
-                                int idx;
-                                /* if this fanout hasn't been visited yet this will be null */
-                                fanouts_visited = (int *)vtr::malloc(sizeof(int) * (fanout_net->num_fanout_pins));
-
-                                for (idx = 0; idx < fanout_net->num_fanout_pins; idx++) {
-                                    fanouts_visited[idx] = -1;
-                                }
-
-                                fanout_net->net_data = (void *)fanouts_visited;
-                                fanout_net->unique_net_data_id = LEVELIZE;
-                            } else {
-                                /* ELSE - get the list */
-                                fanouts_visited = (int *)fanout_net->net_data;
-                                oassert(fanout_net->unique_net_data_id == LEVELIZE);
-                            }
-
-                            /* mark this entry as visited */
-                            if (fanout_net->num_driver_pins != 0) {
-                                fanouts_visited[current_node->input_pins[j]->pin_net_idx] = cur_back_level;
-                            }
-
-                            /* check if they've all been marked */
-                            all_visited = true;
-                            for (k = 0; k < fanout_net->num_fanout_pins && all_visited; k++) {
-                                all_visited = (!(fanout_net->fanout_pins[k] && fanout_net->fanout_pins[k]->node && fanouts_visited[k] == -1));
-                            }
-
-                            if (all_visited) {
-                                for (k = 0; k < fanout_net->num_driver_pins; k++) {
-                                    if (!fanout_net->driver_pins[k]->node || fanout_net->driver_pins[k]->node->type == FF_NODE)
-                                        continue;
-                                    /* This one has been visited by everyone */
-                                    if (fanout_net->driver_pins[k]->node->backward_level == -1) {
-                                        /* already added to a list...this means that we won't have the correct ordering */
-                                        netlist->backward_levels[cur_back_level + 1] =
-                                          (nnode_t **)vtr::realloc(netlist->backward_levels[cur_back_level + 1],
-                                                                   sizeof(nnode_t *) * (netlist->num_at_backward_level[cur_back_level + 1] + 1));
-                                        netlist->backward_levels[cur_back_level + 1][netlist->num_at_backward_level[cur_back_level + 1]] =
-                                          fanout_net->driver_pins[k]->node;
-                                        netlist->num_at_backward_level[cur_back_level + 1]++;
-                                    }
-
-                                    fanout_net->driver_pins[k]->node->backward_level = cur_back_level + 1;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        /* check if tere are more elements to procees at the next level */
-        if (netlist->num_at_backward_level[cur_back_level + 1] > 0) {
-            /* there are elements in the next set then process */
-            cur_back_level++;
-        } else {
-            /* ELSE - we've levelized backwards */
-            more_levels = false;
-        }
-    }
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: levelize_backwards_clean_checking_for_liveness)
- *-------------------------------------------------------------------------------------------*/
-void levelize_backwards_clean_checking_for_liveness(netlist_t *netlist)
-{
-    int i, j, k;
-    int cur_back_level;
-    short more_levels = true;
-    short all_visited = true;
-
-    cur_back_level = 0;
-
-    while (more_levels) {
-        /* go through each element at this level */
-        for (i = 0; i < netlist->num_at_backward_level[cur_back_level]; i++) {
-            nnode_t *current_node = netlist->backward_levels[cur_back_level][i];
-            if (current_node == NULL)
-                continue;
-
-            /* at each node visit all the inputs */
-            for (j = 0; j < current_node->num_input_pins; j++) {
-                int *fanouts_visited;
-                if (current_node->input_pins[j] == NULL)
-                    continue;
-
-                /* visit the fanout point */
-                nnet_t *fanout_net = current_node->input_pins[j]->net;
-
-                if (fanout_net->net_data == NULL) {
-                    /* IF - already cleaned */
-                    oassert(fanout_net->unique_net_data_id == -1);
-                } else {
-                    /* ELSE - get the list */
-                    oassert(fanout_net->unique_net_data_id == LEVELIZE);
-                    fanouts_visited = (int *)fanout_net->net_data;
-                    fanout_net->net_data = NULL;
-
-                    /* check if they've all been marked */
-                    all_visited = true;
-                    for (k = 0; k < fanout_net->num_fanout_pins; k++) {
-                        if ((fanout_net->fanout_pins[k] != NULL) && (fanout_net->fanout_pins[k]->node != NULL) && (fanouts_visited[k] == -1)) {
-                            all_visited = false;
-                            break;
-                        }
-                    }
-
-                    if (all_visited == false) {
-                        /* one of these nodes was not visited on the backward analysis */
-                        warning_message(NETLIST, current_node->loc, "Liveness check on backward pass.  Node %s is missing a driving pin idx %d\n",
-                                        current_node->name, k);
-                    }
-
-                    /* free the data and reset to be used elsewhere */
-                    vtr::free(fanouts_visited);
-                    fanout_net->unique_net_data_id = -1;
-                }
-            }
-        }
-
-        /* check if tere are more elements to procees at the next level */
-        if (netlist->num_at_backward_level[cur_back_level + 1] > 0) {
-            /* there are elements in the next set then process */
-            cur_back_level++;
-        } else {
-            /* ELSE - we've levelized backwards */
-            more_levels = false;
-        }
-    }
-}
-
-/*---------------------------------------------------------------------------------------------
- * (function: find_node_at_top_of_combo_loop)
- *-------------------------------------------------------------------------------------------*/
-nnode_t *find_node_at_top_of_combo_loop(nnode_t *start_node)
-{
-    int stack_size = 1;
-    nnode_t **stack = (nnode_t **)vtr::calloc(stack_size, sizeof(nnode_t *));
-    stack[0] = start_node;
-
-    while (true) {
-        nnode_t *next_node = stack[--stack_size];
-        oassert(next_node->unique_node_data_id == LEVELIZE);
-        int *fanouts_visited = (int *)next_node->node_data;
-        next_node->node_data = NULL;
-
-        /* check if they've all been marked */
-        bool all_visited = true;
-        int idx_missed = -1;
-        for (int i = 0; i < next_node->num_input_pins; i++) {
-            if (fanouts_visited[i] == -1) {
-                all_visited = false;
-                idx_missed = i;
-                break;
-            }
-        }
-
-        if (!all_visited) {
-            for (int i = 0; i < next_node->input_pins[idx_missed]->net->num_driver_pins; i++) {
-                if (next_node->input_pins[idx_missed]->net->driver_pins[i]->node->backward_level < next_node->backward_level) {
-                    /* IF - the next node has a lower backward level than this node suggests that it is
-                     * closer to primary outputs and not in the combo loop */
-                    vtr::free(stack);
-                    return next_node;
-                }
-
-                stack_size++;
-                stack = (nnode_t **)vtr::realloc(stack, sizeof(nnode_t *) * stack_size);
-                stack[stack_size - 1] = next_node->input_pins[idx_missed]->net->driver_pins[i]->node;
-            }
-        } else {
-            vtr::free(stack);
-            return next_node;
-        }
-    }
-}
diff --git a/parmys-plugin/src/netlist_cleanup.cc b/parmys-plugin/src/netlist_cleanup.cc
index 914807c5b..b720127a1 100644
--- a/parmys-plugin/src/netlist_cleanup.cc
+++ b/parmys-plugin/src/netlist_cleanup.cc
@@ -74,8 +74,7 @@ void traverse_backward(nnode_t *node)
     if (node->node_data == VISITED_BACKWARD)
         return;                         // Already visited
     node->node_data = VISITED_BACKWARD; // Mark as visited
-    int i;
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         // ensure this net has a driver (i.e. skip undriven outputs)
         for (int j = 0; j < node->input_pins[i]->net->num_driver_pins; j++) {
             if (node->input_pins[i]->net->driver_pins[j]->node)
@@ -129,10 +128,9 @@ void traverse_forward(nnode_t *node, int toplevel, int remove_me)
     }
 
     /* Iterate through every fanout node */
-    int i, j;
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         if (node->output_pins[i] && node->output_pins[i]->net) {
-            for (j = 0; j < node->output_pins[i]->net->num_fanout_pins; j++) {
+            for (int j = 0; j < node->output_pins[i]->net->num_fanout_pins; j++) {
                 if (node->output_pins[i]->net->fanout_pins[j]) {
                     nnode_t *child = node->output_pins[i]->net->fanout_pins[j]->node;
                     if (child) {
@@ -151,8 +149,7 @@ void traverse_forward(nnode_t *node, int toplevel, int remove_me)
  * to determine which nodes have an effect on the outputs */
 void mark_output_dependencies(netlist_t *netlist)
 {
-    int i;
-    for (i = 0; i < netlist->num_top_output_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_output_nodes; i++) {
         traverse_backward(netlist->top_output_nodes[i]);
     }
 }
@@ -170,8 +167,7 @@ void identify_unused_nodes(netlist_t *netlist)
     traverse_forward(netlist->gnd_node, true, false);
     traverse_forward(netlist->vcc_node, true, false);
     traverse_forward(netlist->pad_node, true, false);
-    int i;
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
         traverse_forward(netlist->top_input_nodes[i], true, false);
     }
 }
@@ -181,8 +177,7 @@ void identify_unused_nodes(netlist_t *netlist)
 void remove_unused_nodes(node_list_t *remove)
 {
     while (remove != NULL && remove->node != NULL) {
-        int i;
-        for (i = 0; i < remove->node->num_input_pins; i++) {
+        for (int i = 0; i < remove->node->num_input_pins; i++) {
             npin_t *input_pin = remove->node->input_pins[i];
             /* Remove the fanout pin from the net */
             if (input_pin)
diff --git a/parmys-plugin/src/netlist_utils.cc b/parmys-plugin/src/netlist_utils.cc
index a7b9049e5..9c3fb060b 100644
--- a/parmys-plugin/src/netlist_utils.cc
+++ b/parmys-plugin/src/netlist_utils.cc
@@ -125,15 +125,13 @@ nnode_t *free_nnode(nnode_t *to_free)
  *-----------------------------------------------------------------------*/
 void allocate_more_input_pins(nnode_t *node, int width)
 {
-    int i;
-
     if (width <= 0) {
         error_message(NETLIST, node->loc, "tried adding input pins for width %d <= 0 %s\n", width, node->name);
         return;
     }
 
     node->input_pins = (npin_t **)vtr::realloc(node->input_pins, sizeof(npin_t *) * (node->num_input_pins + width));
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         node->input_pins[node->num_input_pins + i] = NULL;
     }
     node->num_input_pins += width;
@@ -145,15 +143,13 @@ void allocate_more_input_pins(nnode_t *node, int width)
  *-----------------------------------------------------------------------*/
 void allocate_more_output_pins(nnode_t *node, int width)
 {
-    int i;
-
     if (width <= 0) {
         error_message(NETLIST, node->loc, "tried adding output pins for width %d <= 0 %s\n", width, node->name);
         return;
     }
 
     node->output_pins = (npin_t **)vtr::realloc(node->output_pins, sizeof(npin_t *) * (node->num_output_pins + width));
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         node->output_pins[node->num_output_pins + i] = NULL;
     }
     node->num_output_pins += width;
@@ -676,10 +672,9 @@ signal_list_t *init_signal_list()
  *-------------------------------------------------------------------------------------------*/
 bool is_constant_signal(signal_list_t *signal, netlist_t *netlist)
 {
-    int i;
     bool is_constant = true;
 
-    for (i = 0; i < signal->count; i++) {
+    for (int i = 0; i < signal->count; i++) {
         nnet_t *net = signal->pins[i]->net;
         /* neither connected to GND nor VCC */
         if (strcmp(net->name, netlist->zero_net->name) && strcmp(net->name, netlist->one_net->name)) {
@@ -708,8 +703,7 @@ long constant_signal_value(signal_list_t *signal, netlist_t *netlist)
 
     long return_value = 0;
 
-    int i;
-    for (i = 0; i < signal->count; i++) {
+    for (int i = 0; i < signal->count; i++) {
         nnet_t *net = signal->pins[i]->net;
         /* if the pin is connected to VCC */
         if (!strcmp(net->name, netlist->one_net->name)) {
@@ -734,7 +728,6 @@ signal_list_t *create_constant_signal(const long long value, const int desired_w
 {
     signal_list_t *list = init_signal_list();
 
-    long i;
     std::string binary_value_str = string_of_radix_to_bitstring(std::to_string(value), 10);
     long width = binary_value_str.length();
 
@@ -752,7 +745,7 @@ signal_list_t *create_constant_signal(const long long value, const int desired_w
     bool extension = false;
 
     /* create vcc/gnd signal pins */
-    for (i = start; i > end; i--) {
+    for (long i = start; i > end; i--) {
         if (binary_value_str[i - 1] == '1') {
             add_pin_to_signal_list(list, get_one_pin(netlist));
         } else {
@@ -788,16 +781,15 @@ signal_list_t *prune_signal(signal_list_t *signalsvar, long signal_width, long p
     if (prune_size >= signal_width)
         return (signalsvar);
 
-    int i, j;
     /* new signal list */
     signal_list_t *new_signals = NULL;
     signal_list_t **splitted_signals = split_signal_list(signalsvar, signal_width);
 
     /* iterating over signals to prune them */
-    for (i = 0; i < num_of_signals; i++) {
+    for (int i = 0; i < num_of_signals; i++) {
         /* init pruned signal list */
         signal_list_t *new_signal = init_signal_list();
-        for (j = 0; j < signal_width; j++) {
+        for (int j = 0; j < signal_width; j++) {
             npin_t *pin = splitted_signals[i]->pins[j];
             /* adding pin to new signal list */
             if (j < prune_size) {
@@ -842,14 +834,11 @@ void add_pin_to_signal_list(signal_list_t *list, npin_t *pin)
  *-------------------------------------------------------------------------------------------*/
 signal_list_t *combine_lists(signal_list_t **signal_lists, int num_signal_lists)
 {
-    int i;
-    for (i = 1; i < num_signal_lists; i++) {
+    for (int i = 1; i < num_signal_lists; i++) {
         if (signal_lists[i]) {
-            int j;
-            for (j = 0; j < signal_lists[i]->count; j++) {
-                int k;
+            for (int j = 0; j < signal_lists[i]->count; j++) {
                 bool pin_already_added = false;
-                for (k = 0; k < signal_lists[0]->count; k++) {
+                for (int k = 0; k < signal_lists[0]->count; k++) {
                     if (!strcmp(signal_lists[0]->pins[k]->name, signal_lists[i]->pins[j]->name))
                         pin_already_added = true;
                 }
@@ -890,15 +879,14 @@ signal_list_t **split_signal_list(signal_list_t *signalsvar, const int width)
     oassert(width != 0);
     oassert(signalsvar->count % width == 0);
 
-    int i, j;
     int offset = 0;
     int num_chunk = signalsvar->count / width;
 
     /* initialize splitted signals */
     splitted_signals = (signal_list_t **)vtr::calloc(num_chunk, sizeof(signal_list_t *));
-    for (i = 0; i < num_chunk; i++) {
+    for (int i = 0; i < num_chunk; i++) {
         splitted_signals[i] = init_signal_list();
-        for (j = 0; j < width; j++) {
+        for (int j = 0; j < width; j++) {
             npin_t *pin = signalsvar->pins[j + offset];
             /* add to splitted signals list */
             add_pin_to_signal_list(splitted_signals[i], pin);
@@ -922,8 +910,7 @@ bool sigcmp(signal_list_t *sig, signal_list_t *be_checked)
     /* validate signal sizes */
     oassert(sig->count == be_checked->count);
 
-    int i;
-    for (i = 0; i < sig->count; i++) {
+    for (int i = 0; i < sig->count; i++) {
         /* checking their net */
         if (sig->pins[i]->net != be_checked->pins[i]->net) {
             return (false);
@@ -935,8 +922,7 @@ bool sigcmp(signal_list_t *sig, signal_list_t *be_checked)
 signal_list_t *copy_input_signals(signal_list_t *signalsvar)
 {
     signal_list_t *duplicate_signals = init_signal_list();
-    int i;
-    for (i = 0; i < signalsvar->count; i++) {
+    for (int i = 0; i < signalsvar->count; i++) {
         npin_t *pin = signalsvar->pins[i];
         pin = copy_input_npin(pin);
         add_pin_to_signal_list(duplicate_signals, pin);
@@ -952,11 +938,10 @@ signal_list_t *copy_input_signals(signal_list_t *signalsvar)
 signal_list_t *make_output_pins_for_existing_node(nnode_t *node, int width)
 {
     signal_list_t *return_list = init_signal_list();
-    int i;
 
     oassert(node->num_output_pins == width);
 
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         npin_t *new_pin1;
         npin_t *new_pin2;
         nnet_t *new_net;
@@ -1017,7 +1002,6 @@ void depth_traverse_count(nnode_t *node, int *count, uintptr_t traverse_mark_num
  *-------------------------------------------------------------------------------------------*/
 void depth_traverse_count(nnode_t *node, int *count, uintptr_t traverse_mark_number)
 {
-    int i, j;
     nnode_t *next_node;
     nnet_t *next_net;
 
@@ -1029,12 +1013,12 @@ void depth_traverse_count(nnode_t *node, int *count, uintptr_t traverse_mark_num
 
         node->traverse_visited = traverse_mark_number;
 
-        for (i = 0; i < node->num_output_pins; i++) {
+        for (int i = 0; i < node->num_output_pins; i++) {
             if (node->output_pins[i]->net == NULL)
                 continue;
 
             next_net = node->output_pins[i]->net;
-            for (j = 0; j < next_net->num_fanout_pins; j++) {
+            for (int j = 0; j < next_net->num_fanout_pins; j++) {
                 if (next_net->fanout_pins[j] == NULL)
                     continue;
                 next_node = next_net->fanout_pins[j]->node;
@@ -1113,8 +1097,7 @@ void free_netlist(netlist_t *to_free)
  */
 int get_output_pin_index_from_mapping(nnode_t *node, const char *name)
 {
-    int i;
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         npin_t *pin = node->output_pins[i];
         if (!strcmp(pin->mapping, name))
             return i;
@@ -1129,11 +1112,9 @@ int get_output_pin_index_from_mapping(nnode_t *node, const char *name)
  */
 int get_output_port_index_from_mapping(nnode_t *node, const char *name)
 {
-    int i;
     int pin_number = 0;
-    for (i = 0; i < node->num_output_port_sizes; i++) {
-        int j;
-        for (j = 0; j < node->output_port_sizes[i]; j++, pin_number++) {
+    for (int i = 0; i < node->num_output_port_sizes; i++) {
+        for (int j = 0; j < node->output_port_sizes[i]; j++, pin_number++) {
             npin_t *pin = node->output_pins[pin_number];
             if (!strcmp(pin->mapping, name))
                 return i;
@@ -1147,8 +1128,7 @@ int get_output_port_index_from_mapping(nnode_t *node, const char *name)
  */
 int get_input_pin_index_from_mapping(nnode_t *node, const char *name)
 {
-    int i;
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         npin_t *pin = node->input_pins[i];
         if (!strcmp(pin->mapping, name))
             return i;
@@ -1163,11 +1143,9 @@ int get_input_pin_index_from_mapping(nnode_t *node, const char *name)
  */
 int get_input_port_index_from_mapping(nnode_t *node, const char *name)
 {
-    int i;
     int pin_number = 0;
-    for (i = 0; i < node->num_input_port_sizes; i++) {
-        int j;
-        for (j = 0; j < node->input_port_sizes[i]; j++, pin_number++) {
+    for (int i = 0; i < node->num_input_port_sizes; i++) {
+        for (int j = 0; j < node->input_port_sizes[i]; j++, pin_number++) {
             npin_t *pin = node->input_pins[pin_number];
             if (!strcmp(pin->mapping, name))
                 return i;
@@ -1239,16 +1217,15 @@ void reduce_input_ports(nnode_t *&node, netlist_t *netlist)
 {
     oassert(node->num_input_port_sizes == 1 || node->num_input_port_sizes == 2);
 
-    int i, j;
     int offset = 0;
     nnode_t *new_node;
 
     signal_list_t **input_ports = (signal_list_t **)vtr::calloc(node->num_input_port_sizes, sizeof(signal_list_t *));
     /* add pins to signals lists */
-    for (i = 0; i < node->num_input_port_sizes; i++) {
+    for (int i = 0; i < node->num_input_port_sizes; i++) {
         /* initialize signal list */
         input_ports[i] = init_signal_list();
-        for (j = 0; j < node->input_port_sizes[i]; j++) {
+        for (int j = 0; j < node->input_port_sizes[i]; j++) {
             add_pin_to_signal_list(input_ports[i], node->input_pins[j + offset]);
         }
         offset += node->input_port_sizes[i];
@@ -1278,7 +1255,7 @@ void reduce_input_ports(nnode_t *&node, netlist_t *netlist)
     copy_signedness(new_node->attributes, node->attributes);
 
     /* hook the input pins */
-    for (i = 0; i < input_ports[0]->count; i++) {
+    for (int i = 0; i < input_ports[0]->count; i++) {
         npin_t *pin = input_ports[0]->pins[i];
         if (pin->node) {
             /* remap pins to new node */
@@ -1291,7 +1268,7 @@ void reduce_input_ports(nnode_t *&node, netlist_t *netlist)
     offset = input_ports[0]->count;
 
     if (node->num_input_port_sizes == 2) {
-        for (i = 0; i < input_ports[1]->count; i++) {
+        for (int i = 0; i < input_ports[1]->count; i++) {
             npin_t *pin = input_ports[1]->pins[i];
             if (pin->node) {
                 /* remap pins to new node */
@@ -1304,12 +1281,12 @@ void reduce_input_ports(nnode_t *&node, netlist_t *netlist)
     }
 
     /* hook the output pins */
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         remap_pin_to_new_node(node->output_pins[i], new_node, i);
     }
 
     // CLEAN UP
-    for (i = 0; i < node->num_input_port_sizes; i++) {
+    for (int i = 0; i < node->num_input_port_sizes; i++) {
         free_signal_list(input_ports[i]);
     }
     vtr::free(input_ports);
@@ -1334,12 +1311,11 @@ signal_list_t *reduce_signal_list(signal_list_t *signalvar, operation_list signe
     /* validate signedness */
     oassert(signedness == operation_list::SIGNED || signedness == operation_list::UNSIGNED);
 
-    int i;
     signal_list_t *return_value = init_signal_list();
     /* specify the extension net */
     nnet_t *extended_net = (signedness == operation_list::SIGNED) ? netlist->one_net : netlist->zero_net;
 
-    for (i = signalvar->count - 1; i > -1; i--) {
+    for (int i = signalvar->count - 1; i > -1; i--) {
         npin_t *pin = signalvar->pins[i];
         if (pin->net == extended_net) {
             delete_npin(pin);
@@ -1351,7 +1327,7 @@ signal_list_t *reduce_signal_list(signal_list_t *signalvar, operation_list signe
     }
 
     /* adding valuable pins to new signals list */
-    for (i = 0; i < signalvar->count; i++) {
+    for (int i = 0; i < signalvar->count; i++) {
         if (signalvar->pins[i]) {
             add_pin_to_signal_list(return_value, signalvar->pins[i]);
         }
@@ -1425,14 +1401,13 @@ void equalize_ports_size(nnode_t *&node, uintptr_t traverse_mark_number, netlist
     /* copy signedness attributes */
     copy_signedness(new_node->attributes, node->attributes);
 
-    int i;
-    for (i = 0; i < node->num_input_pins; i++) {
+    for (int i = 0; i < node->num_input_pins; i++) {
         /* remapping the a pins */
         remap_pin_to_new_node(node->input_pins[i], new_node, i);
     }
 
     /* Connecting output pins */
-    for (i = 0; i < new_out_size; i++) {
+    for (int i = 0; i < new_out_size; i++) {
         if (i < port_y_size) {
             remap_pin_to_new_node(node->output_pins[i], new_node, i);
         } else {
@@ -1451,7 +1426,7 @@ void equalize_ports_size(nnode_t *&node, uintptr_t traverse_mark_number, netlist
     }
 
     if (new_out_size < port_y_size) {
-        for (i = new_out_size; i < port_y_size; i++) {
+        for (int i = new_out_size; i < port_y_size; i++) {
             /* need to drive extra output pins with PAD */
             nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, traverse_mark_number);
             /* hook a pin from PAD node into the buf node */
diff --git a/parmys-plugin/src/netlist_visualizer.cc b/parmys-plugin/src/netlist_visualizer.cc
index a4b01ce79..305cd2d5b 100644
--- a/parmys-plugin/src/netlist_visualizer.cc
+++ b/parmys-plugin/src/netlist_visualizer.cc
@@ -60,10 +60,8 @@ void graphVizOutputNetlist(std::string path, const char *name, uintptr_t marker_
  *-------------------------------------------------------------------------------------------*/
 void depth_first_traversal_graph_display(FILE *out, uintptr_t marker_value, netlist_t *netlist)
 {
-    int i;
-
     /* start with the primary input list */
-    for (i = 0; i < netlist->num_top_input_nodes; i++) {
+    for (int i = 0; i < netlist->num_top_input_nodes; i++) {
         if (netlist->top_input_nodes[i] != NULL) {
             depth_first_traverse_visualize(netlist->top_input_nodes[i], out, marker_value);
         }
@@ -80,7 +78,6 @@ void depth_first_traversal_graph_display(FILE *out, uintptr_t marker_value, netl
  *-------------------------------------------------------------------------------------------*/
 void depth_first_traverse_visualize(nnode_t *node, FILE *fp, uintptr_t traverse_mark_number)
 {
-    int i, j;
     nnode_t *next_node;
     nnet_t *next_net;
 
@@ -107,12 +104,12 @@ void depth_first_traverse_visualize(nnode_t *node, FILE *fp, uintptr_t traverse_
         }
         vtr::free(temp_string);
 
-        for (i = 0; i < node->num_output_pins; i++) {
+        for (int i = 0; i < node->num_output_pins; i++) {
             if (node->output_pins[i]->net == NULL)
                 continue;
 
             next_net = node->output_pins[i]->net;
-            for (j = 0; j < next_net->num_fanout_pins; j++) {
+            for (int j = 0; j < next_net->num_fanout_pins; j++) {
                 npin_t *pin = next_net->fanout_pins[j];
                 if (pin) {
                     next_node = pin->node;
@@ -186,7 +183,6 @@ void graphVizOutputCombinationalNet(std::string path, const char *name, uintptr_
  *-------------------------------------------------------------------------------------------*/
 void forward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode_t *node)
 {
-    int j, k;
     nnode_t **stack_of_nodes;
     int index_in_stack = 0;
     int num_stack_of_nodes = 1;
@@ -220,11 +216,11 @@ void forward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode
         vtr::free(temp_string);
 
         /* at each node visit all the outputs */
-        for (j = 0; j < current_node->num_output_pins; j++) {
+        for (int j = 0; j < current_node->num_output_pins; j++) {
             if (current_node->output_pins[j] == NULL)
                 continue;
 
-            for (k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
+            for (int k = 0; k < current_node->output_pins[j]->net->num_fanout_pins; k++) {
                 if ((current_node->output_pins[j] == NULL) || (current_node->output_pins[j]->net == NULL) ||
                     (current_node->output_pins[j]->net->fanout_pins[k] == NULL))
                     continue;
@@ -277,7 +273,6 @@ void forward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode
  *-------------------------------------------------------------------------------------------*/
 void backward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnode_t *node)
 {
-    int j;
     char *temp_string;
     char *temp_string2;
     nnode_t **stack_of_nodes;
@@ -311,7 +306,7 @@ void backward_traversal_net_graph_display(FILE *fp, uintptr_t marker_value, nnod
         vtr::free(temp_string);
 
         /* at each node visit all the outputs */
-        for (j = 0; j < current_node->num_input_pins; j++) {
+        for (int j = 0; j < current_node->num_input_pins; j++) {
             if (current_node->input_pins[j] == NULL)
                 continue;
 
diff --git a/parmys-plugin/src/node_utils.cc b/parmys-plugin/src/node_utils.cc
index 8db010a35..a6326bd5f 100644
--- a/parmys-plugin/src/node_utils.cc
+++ b/parmys-plugin/src/node_utils.cc
@@ -240,7 +240,6 @@ nnode_t *make_2port_gate(operation_list type, int width_port1, int width_port2,
  *-------------------------------------------------------------------------------------------*/
 nnode_t *make_nport_gate(operation_list type, int port_sizes, int width, int width_output, nnode_t *node, short mark)
 {
-    int i;
     nnode_t *logic_node = allocate_nnode(node->loc);
     logic_node->traverse_visited = mark;
     logic_node->type = type;
@@ -248,7 +247,7 @@ nnode_t *make_nport_gate(operation_list type, int port_sizes, int width, int wid
     logic_node->related_ast_node = node->related_ast_node;
 
     /* add the input ports as needed */
-    for (i = 0; i < port_sizes; i++) {
+    for (int i = 0; i < port_sizes; i++) {
         allocate_more_input_pins(logic_node, width);
         add_input_port_information(logic_node, width);
     }
@@ -324,7 +323,6 @@ nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, in
     int valid_num_mux_inputs = shift_left_value_with_overflow_check(0X1, selector->count, node->loc);
     oassert(valid_num_mux_inputs >= num_muxed_inputs);
 
-    int i, j;
     int offset = 0;
 
     nnode_t *mux = allocate_nnode(node->loc);
@@ -335,7 +333,7 @@ nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, in
     /* add selector signal */
     add_input_port_information(mux, selector->count);
     allocate_more_input_pins(mux, selector->count);
-    for (i = 0; i < selector->count; i++) {
+    for (int i = 0; i < selector->count; i++) {
         npin_t *sel = selector->pins[i];
         /* hook selector into mux node as first port */
         if (sel->node)
@@ -346,18 +344,18 @@ nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, in
     offset += selector->count;
 
     int max_width = 0;
-    for (i = 0; i < num_muxed_inputs; i++) {
+    for (int i = 0; i < num_muxed_inputs; i++) {
         /* keep the size of max input to allocate equal output */
         if (inputs[i]->count > max_width)
             max_width = inputs[i]->count;
     }
 
-    for (i = 0; i < num_muxed_inputs; i++) {
+    for (int i = 0; i < num_muxed_inputs; i++) {
         /* add input port data */
         add_input_port_information(mux, max_width);
         allocate_more_input_pins(mux, max_width);
 
-        for (j = 0; j < inputs[i]->count; j++) {
+        for (int j = 0; j < inputs[i]->count; j++) {
             npin_t *pin = inputs[i]->pins[j];
             /* hook inputs into mux node */
             if (j < max_width) {
@@ -380,7 +378,7 @@ nnode_t *make_multiport_smux(signal_list_t **inputs, signal_list_t *selector, in
 
     // specify output pin
     if (outs != NULL) {
-        for (i = 0; i < outs->count; i++) {
+        for (int i = 0; i < outs->count; i++) {
             npin_t *output_pin;
             if (i < max_width) {
                 output_pin = outs->pins[i];
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/src/partial_map.cc
index f2e24d875..293a7f444 100644
--- a/parmys-plugin/src/partial_map.cc
+++ b/parmys-plugin/src/partial_map.cc
@@ -82,17 +82,15 @@ void depth_first_traversal_to_partial_map(short marker_value, netlist_t *netlist
  *-------------------------------------------------------------------------------------------*/
 void depth_first_traverse_partial_map(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist)
 {
-    int i, j;
-
     if (node->traverse_visited != traverse_mark_number) {
 
         node->traverse_visited = traverse_mark_number;
 
-        for (i = 0; i < node->num_output_pins; i++) {
+        for (int i = 0; i < node->num_output_pins; i++) {
             if (node->output_pins[i]->net) {
                 nnet_t *next_net = node->output_pins[i]->net;
                 if (next_net->fanout_pins) {
-                    for (j = 0; j < next_net->num_fanout_pins; j++) {
+                    for (int j = 0; j < next_net->num_fanout_pins; j++) {
                         if (next_net->fanout_pins[j]) {
                             if (next_net->fanout_pins[j]->node) {
                                 depth_first_traverse_partial_map(next_net->fanout_pins[j]->node, traverse_mark_number, netlist);
@@ -276,7 +274,6 @@ void instantiate_soft_logic_ram(nnode_t *node, short mark, netlist_t *netlist)
  *-------------------------------------------------------------------------------------------*/
 void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t * /*netlist*/)
 {
-    int i, j;
     int width_of_one_hot_logic;
     int num_ports;
     int port_offset;
@@ -288,12 +285,12 @@ void instantiate_multi_port_mux(nnode_t *node, short mark, netlist_t * /*netlist
     port_offset = node->input_port_sizes[1];
 
     muxes = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (num_ports - 1));
-    for (i = 0; i < num_ports - 1; i++) {
+    for (int i = 0; i < num_ports - 1; i++) {
         muxes[i] = make_2port_gate(MUX_2, width_of_one_hot_logic, width_of_one_hot_logic, 1, node, mark);
     }
 
-    for (j = 0; j < num_ports - 1; j++) {
-        for (i = 0; i < width_of_one_hot_logic; i++) {
+    for (int j = 0; j < num_ports - 1; j++) {
+        for (int i = 0; i < width_of_one_hot_logic; i++) {
             /* map the inputs to the muxt */
             remap_pin_to_new_node(node->input_pins[i + (j + 1) * port_offset], muxes[j], width_of_one_hot_logic + i);
 
@@ -325,9 +322,8 @@ nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_ma
 {
     oassert(node->traverse_visited == traverse_mark_number);
 
-    int i, j;
     /* to check all mux inputs have the same width(except [0] which is selector) */
-    for (i = 2; i < node->num_input_port_sizes; i++) {
+    for (int i = 2; i < node->num_input_port_sizes; i++) {
         oassert(node->input_port_sizes[i] == node->input_port_sizes[1]);
     }
 
@@ -342,7 +338,7 @@ nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_ma
      * input_pin[SEL_WIDTH..n] -> MUX inputs
      * output_pin[0..n-1] -> MUX outputs
      */
-    for (i = 0; i < num_mux_nodes; i++) {
+    for (int i = 0; i < num_mux_nodes; i++) {
         mux_node[i] = allocate_nnode(node->loc);
 
         mux_node[i]->type = node->type;
@@ -362,13 +358,13 @@ nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_ma
              * remap the SEL pins from the mux node
              * to the last splitted mux node
              */
-            for (j = 0; j < selector_width; j++) {
+            for (int j = 0; j < selector_width; j++) {
                 remap_pin_to_new_node(node->input_pins[j], mux_node[i], j);
             }
 
         } else {
             /* add a copy of SEL pins from the mux node to the splitted mux nodes */
-            for (j = 0; j < selector_width; j++) {
+            for (int j = 0; j < selector_width; j++) {
                 add_input_pin_to_node(mux_node[i], copy_input_npin(node->input_pins[j]), j);
             }
         }
@@ -378,7 +374,7 @@ nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_ma
          * last splitted ff node since we do not need it in dff node anymore
          **/
         int acc_port_sizes = selector_width;
-        for (j = 1; j < num_input_ports; j++) {
+        for (int j = 1; j < num_input_ports; j++) {
             add_input_port_information(mux_node[i], 1);
             allocate_more_input_pins(mux_node[i], 1);
 
@@ -413,16 +409,13 @@ nnode_t **transform_to_single_bit_mux_nodes(nnode_t *node, uintptr_t traverse_ma
  */
 void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t *netlist)
 {
-    int i, j;
-
     char *name = vtr::strdup(node->name);
     int num_single_muxes = node->num_output_pins;
     /* This split the multiport n bit mux node into multiport 1 bit muxes*/
     nnode_t **single_bit_muxes = transform_to_single_bit_mux_nodes(node, mark, netlist);
 
-    int cnt;
     /* iterating over single bit muxes that has multiple (>2) port to turn them into 2-mux */
-    for (cnt = 0; cnt < num_single_muxes; cnt++) {
+    for (int cnt = 0; cnt < num_single_muxes; cnt++) {
         nnode_t *single_bit_mux = single_bit_muxes[cnt];
 
         /* keeping the information of each single bit mux */
@@ -442,7 +435,7 @@ void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t *net
             /* to keep the internal output signals for future usage */
             signal_list_t **output_signals = (signal_list_t **)vtr::calloc(selector_width, sizeof(signal_list_t *));
             /* creating multiple stages to decode single bit mux into 2-mux */
-            for (i = 0; i < selector_width; i++) {
+            for (int i = 0; i < selector_width; i++) {
                 /* num of muxes in each stage */
                 int num_of_muxes = shift_left_value_with_overflow_check(0x1, selector_width - (i + 1), single_bit_mux->loc);
                 muxes[i] = (nnode_t **)vtr::calloc(num_of_muxes, sizeof(nnode_t *));
@@ -452,7 +445,7 @@ void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t *net
                 npin_t *selector_pin = single_bit_mux->input_pins[selector_width - i - 1];
 
                 /* iterating over each single bit 2-mux to connect inputs */
-                for (j = 0; j < num_of_muxes; j++) {
+                for (int j = 0; j < num_of_muxes; j++) {
 
                     muxes[i][j] = make_2port_gate(SMUX_2, 1, 2, 1, single_bit_mux, mark);
 
@@ -497,12 +490,12 @@ void instantiate_multi_port_n_bits_mux(nnode_t *node, short mark, netlist_t *net
             }
 
             // CLEAN UP per single mux
-            for (i = 0; i < selector_width; i++) {
+            for (int i = 0; i < selector_width; i++) {
                 vtr::free(muxes[i]);
             }
             vtr::free(muxes);
 
-            for (i = 0; i < selector_width; i++) {
+            for (int i = 0; i < selector_width; i++) {
                 free_signal_list(output_signals[i]);
             }
             vtr::free(output_signals);
@@ -524,16 +517,15 @@ void instantiate_not_logic(nnode_t *node, short mark, netlist_t * /*netlist*/)
 {
     int width = node->num_input_pins;
     nnode_t **new_not_cells;
-    int i;
 
     new_not_cells = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * width);
 
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         new_not_cells[i] = make_not_gate(node, mark);
     }
 
     /* connect inputs and outputs */
-    for (i = 0; i < width; i++) {
+    for (int i = 0; i < width; i++) {
         /* Joining the inputs to the new soft NOT GATES */
         remap_pin_to_new_node(node->input_pins[i], new_not_cells[i], 0);
         remap_pin_to_new_node(node->output_pins[i], new_not_cells[i], 0);
@@ -584,7 +576,6 @@ bool eliminate_buffer(nnode_t *node, short, netlist_t *)
  *-------------------------------------------------------------------------------------------*/
 void instantiate_logical_logic(nnode_t *node, operation_list op, short mark)
 {
-    int i;
     int port_B_offset;
     int width_a;
     int width_b;
@@ -606,11 +597,11 @@ void instantiate_logical_logic(nnode_t *node, operation_list op, short mark)
     reduction2 = make_1port_logic_gate(BITWISE_OR, width_b, node, mark);
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < width_a; i++) {
+    for (int i = 0; i < width_a; i++) {
         /* Joining the inputs to the input 1 of that gate */
         remap_pin_to_new_node(node->input_pins[i], reduction1, i);
     }
-    for (i = 0; i < width_b; i++) {
+    for (int i = 0; i < width_b; i++) {
         /* Joining the inputs to the input 1 of that gate */
         remap_pin_to_new_node(node->input_pins[i + port_B_offset], reduction2, i);
     }
@@ -630,7 +621,6 @@ void instantiate_logical_logic(nnode_t *node, operation_list op, short mark)
  *-------------------------------------------------------------------------------------------*/
 void instantiate_bitwise_reduction(nnode_t *node, operation_list op, short mark)
 {
-    int i;
     int width_a;
     nnode_t *new_logic_cell;
     operation_list cell_op;
@@ -675,7 +665,7 @@ void instantiate_bitwise_reduction(nnode_t *node, operation_list op, short mark)
     new_logic_cell = make_1port_logic_gate(cell_op, width_a, node, mark);
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < width_a; i++) {
+    for (int i = 0; i < width_a; i++) {
         /* Joining the inputs to the input 1 of that gate */
         remap_pin_to_new_node(node->input_pins[i], new_logic_cell, i);
     }
@@ -690,8 +680,6 @@ void instantiate_bitwise_reduction(nnode_t *node, operation_list op, short mark)
  *-------------------------------------------------------------------------------------------*/
 void instantiate_bitwise_logic(nnode_t *node, operation_list op, short mark, netlist_t *netlist)
 {
-    int i, j;
-
     operation_list cell_op;
     if (!node)
         return;
@@ -724,11 +712,11 @@ void instantiate_bitwise_logic(nnode_t *node, operation_list op, short mark, net
     }
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < node->output_port_sizes[0]; i++) {
+    for (int i = 0; i < node->output_port_sizes[0]; i++) {
         nnode_t *new_logic_cells = make_nport_gate(cell_op, node->num_input_port_sizes, 1, 1, node, mark);
         int current_port_offset = 0;
         /* Joining the inputs to the input 1 of that gate */
-        for (j = 0; j < node->num_input_port_sizes; j++) {
+        for (int j = 0; j < node->num_input_port_sizes; j++) {
             /* IF - this current input will also have a corresponding other input ports then join it to the gate */
             if (i < node->input_port_sizes[j])
                 remap_pin_to_new_node(node->input_pins[i + current_port_offset], new_logic_cells, j);
@@ -842,7 +830,6 @@ void instantiate_EQUAL(nnode_t *node, operation_list type, short mark, netlist_t
     int width_a;
     int width_b;
     int width_max;
-    int i;
     int port_B_offset;
     nnode_t *compare;
     nnode_t *combine;
@@ -866,7 +853,7 @@ void instantiate_EQUAL(nnode_t *node, operation_list type, short mark, netlist_t
     /* build an and bitwise AND */
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < width_max; i++) {
+    for (int i = 0; i < width_max; i++) {
         /* Joining the inputs to the input 1 of that gate */
         if (i < width_a) {
             if (i < width_b) {
@@ -917,7 +904,6 @@ void instantiate_GT(nnode_t *node, operation_list type, short mark, netlist_t *n
     int width_a;
     int width_b;
     int width_max;
-    int i;
     int port_A_offset;
     int port_B_offset;
     int port_A_index;
@@ -967,7 +953,7 @@ void instantiate_GT(nnode_t *node, operation_list type, short mark, netlist_t *n
     /* each cell checks if A > B and sends out a 1 if history has no 1s (3rd input) */
     gt_cells = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * width_max);
 
-    for (i = 0; i < width_max; i++) {
+    for (int i = 0; i < width_max; i++) {
         gt_cells[i] = make_3port_gate(GT, 1, 1, 1, 1, node, mark);
         if (i < width_max - 1) {
             or_cells[i] = make_2port_gate(LOGICAL_OR, 1, 1, 1, node, mark);
@@ -975,7 +961,7 @@ void instantiate_GT(nnode_t *node, operation_list type, short mark, netlist_t *n
     }
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < width_max; i++) {
+    for (int i = 0; i < width_max; i++) {
         /* Joining the inputs to the input 1 of that gate */
         if (i < width_a) {
             /* IF - this current input will also have a corresponding b_port input then join it to the gate */
@@ -1053,7 +1039,6 @@ void instantiate_GE(nnode_t *node, operation_list type, short mark, netlist_t *n
     int width_a;
     int width_b;
     int width_max;
-    int i;
     int port_B_offset;
     int port_A_offset;
     nnode_t *equal;
@@ -1083,7 +1068,7 @@ void instantiate_GE(nnode_t *node, operation_list type, short mark, netlist_t *n
     logical_or_final_gate = make_1port_logic_gate(LOGICAL_OR, 2, node, mark);
 
     /* connect inputs.  In the case that a signal is smaller than the other then zero pad */
-    for (i = 0; i < width_max; i++) {
+    for (int i = 0; i < width_max; i++) {
         /* Joining the inputs to the input 1 of that gate */
         if (i < width_a) {
             /* IF - this current input will also have a corresponding b_port input then join it to the gate */
@@ -1143,12 +1128,11 @@ void instantiate_shift(nnode_t *node, short mark, netlist_t *netlist)
     oassert(node->num_input_port_sizes == 2);
     oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
 
-    int i;
     int operand_width = node->input_port_sizes[0];
     int shift_width = node->input_port_sizes[1];
     /* shift signal */
     signal_list_t *shift_signal = init_signal_list();
-    for (i = 0; i < shift_width; i++) {
+    for (int i = 0; i < shift_width; i++) {
         add_pin_to_signal_list(shift_signal, node->input_pins[operand_width + i]);
     }
 
@@ -1184,19 +1168,18 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
     oassert(node->num_input_port_sizes == 2);
     oassert(node->input_port_sizes[0] == node->input_port_sizes[1]);
 
-    int i;
     int operand_width = node->input_port_sizes[0];
     int shift_width = node->input_port_sizes[1];
     int output_width = node->output_port_sizes[0];
 
     /* operand signal */
     signal_list_t *operand_signal = init_signal_list();
-    for (i = 0; i < operand_width; i++) {
+    for (int i = 0; i < operand_width; i++) {
         add_pin_to_signal_list(operand_signal, node->input_pins[i]);
     }
     /* shift signal */
     signal_list_t *shift_signal = init_signal_list();
-    for (i = 0; i < shift_width; i++) {
+    for (int i = 0; i < shift_width; i++) {
         add_pin_to_signal_list(shift_signal, node->input_pins[operand_width + i]);
     }
 
@@ -1214,7 +1197,7 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
     case SL:
     case ASL: {
         /* connect ZERO to outputs that don't have inputs connected */
-        for (i = 0; i < shift_size; i++) {
+        for (int i = 0; i < shift_size; i++) {
             if (i < output_width) {
                 // connect 0 to lower outputs
                 add_pin_to_signal_list(result, get_zero_pin(netlist));
@@ -1222,7 +1205,7 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
         }
 
         /* connect inputs to outputs */
-        for (i = 0; i < output_width - shift_size; i++) {
+        for (int i = 0; i < output_width - shift_size; i++) {
             if (i < operand_width) {
                 npin_t *pin = operand_signal->pins[i];
                 // connect higher output pin to lower input pin
@@ -1239,7 +1222,7 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
     }
     case SR: // fallthrough
     case ASR: {
-        for (i = shift_size; i < operand_width; i++) {
+        for (int i = shift_size; i < operand_width; i++) {
             npin_t *pin = operand_signal->pins[i];
             // connect higher output pin to lower input pin
             if (i - shift_size < output_width) {
@@ -1249,7 +1232,7 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
         }
 
         /* Extend pad_bit to outputs that don't have inputs connected */
-        for (i = output_width - 1; i >= operand_width - shift_size; i--) {
+        for (int i = output_width - 1; i >= operand_width - shift_size; i--) {
             npin_t *extension_pin = NULL;
             if (node->related_ast_node && node->attributes->port_a_signed == SIGNED && node->type == ASR) {
                 /* for signed values padding will be with last pin */
@@ -1268,7 +1251,7 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
         break;
     }
 
-    for (i = 0; i < output_width; i++) {
+    for (int i = 0; i < output_width; i++) {
         /* create a buf node to drive output pins */
         nnode_t *buf_node = make_1port_gate(BUF_NODE, 1, 1, node, mark);
         /* add result as inout pins */
@@ -1279,13 +1262,13 @@ static void instantiate_constant_shift(nnode_t *node, operation_list type, short
     }
 
     // CLEAN UP
-    for (i = 0; i < operand_signal->count; i++) {
+    for (int i = 0; i < operand_signal->count; i++) {
         /* delete unused operand pins */
         if (operand_signal->pins[i]->node == node)
             delete_npin(operand_signal->pins[i]);
     }
     free_signal_list(operand_signal);
-    for (i = 0; i < shift_signal->count; i++) {
+    for (int i = 0; i < shift_signal->count; i++) {
         /* delete shift pins */
         delete_npin(shift_signal->pins[i]);
     }
diff --git a/parmys-plugin/src/string_cache.cc b/parmys-plugin/src/string_cache.cc
index 038a4ab6f..b34dc13b7 100644
--- a/parmys-plugin/src/string_cache.cc
+++ b/parmys-plugin/src/string_cache.cc
@@ -26,19 +26,18 @@ void generate_sc_hash(STRING_CACHE *sc);
 
 unsigned long string_hash(STRING_CACHE *sc, const char *string)
 {
-    long a, i, mod, mul;
+    long a, mod, mul;
 
     a = 0;
     mod = sc->mod;
     mul = sc->mul;
-    for (i = 0; string[i]; i++)
+    for (int i = 0; string[i]; i++)
         a = (a * mul + (unsigned char)string[i]) % mod;
     return a;
 }
 
 void generate_sc_hash(STRING_CACHE *sc)
 {
-    long i;
     long hash;
 
     if (sc->string_hash != NULL)
@@ -50,7 +49,7 @@ void generate_sc_hash(STRING_CACHE *sc)
     sc->next_string = (long *)sc_do_alloc(sc->size, sizeof(long));
     memset(sc->string_hash, 0xff, sc->string_hash_size * sizeof(long));
     memset(sc->next_string, 0xff, sc->size * sizeof(long));
-    for (i = 0; i < sc->free; i++) {
+    for (long i = 0; i < sc->free; i++) {
         hash = string_hash(sc, sc->string[i]) % sc->string_hash_size;
         sc->next_string[i] = sc->string_hash[hash];
         sc->string_hash[hash] = i;
diff --git a/parmys-plugin/src/subtractor.cc b/parmys-plugin/src/subtractor.cc
index ac96c86be..dc25554bb 100644
--- a/parmys-plugin/src/subtractor.cc
+++ b/parmys-plugin/src/subtractor.cc
@@ -110,7 +110,7 @@ void declare_hard_adder_for_sub(nnode_t *node)
 void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t * /*netlist*/)
 {
     char *new_name = NULL;
-    int len, sanity, i;
+    int len, sanity;
 
     declare_hard_adder_for_sub(node);
 
@@ -133,7 +133,7 @@ void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t * /
         oassert(false);
 
     /* Give names to the output pins */
-    for (i = 0; i < node->num_output_pins; i++) {
+    for (int i = 0; i < node->num_output_pins; i++) {
         if (node->output_pins[i]->name == NULL) {
             len = strlen(node->name) + 20; /* 6 chars for pin idx */
             new_name = (char *)vtr::malloc(len);
@@ -153,7 +153,6 @@ void instantiate_hard_adder_subtraction(nnode_t *node, short mark, netlist_t * /
  *---------------------------------------------------------------------*/
 void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int b, int sizeb, int cin, int cout, int index, int flag)
 {
-    int i;
     int flaga = 0;
     int current_sizea, current_sizeb;
     int aa = 0;
@@ -218,30 +217,30 @@ void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int
     // the normal sub: if flaga or flagb = 1, the input pins should be empty.
     // the unary sub: all input pins for a should be null, input pins for b should be connected to node
     if (node->num_input_port_sizes == 1) {
-        for (i = 0; i < current_sizea; i++)
+        for (int i = 0; i < current_sizea; i++)
             ptr->input_pins[i] = NULL;
     } else if ((flaga == 1) && (node->num_input_port_sizes == 2)) {
-        for (i = 0; i < current_sizea; i++)
+        for (int i = 0; i < current_sizea; i++)
             ptr->input_pins[i] = NULL;
     } else if ((flaga == 2) && (node->num_input_port_sizes == 2)) {
         if (index == 0) {
             ptr->input_pins[0] = NULL;
             if (sizea > 1) {
-                for (i = 1; i < aa; i++) {
+                for (int i = 1; i < aa; i++) {
                     ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
                 }
-                for (i = 0; i < (sizea - aa); i++)
+                for (int i = 0; i < (sizea - aa); i++)
                     ptr->input_pins[i + aa] = NULL;
             }
         } else {
-            for (i = 0; i < aa; i++) {
+            for (int i = 0; i < aa; i++) {
                 ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
                 ptr->input_pins[i]->node = ptr;
                 ptr->input_pins[i]->pin_node_idx = i;
             }
-            for (i = 0; i < (sizea - aa); i++)
+            for (int i = 0; i < (sizea - aa); i++)
                 ptr->input_pins[i + aa] = NULL;
         }
     } else {
@@ -249,14 +248,14 @@ void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int
             if (flag == 0) {
                 ptr->input_pins[0] = NULL;
                 if (sizea > 1) {
-                    for (i = 1; i < sizea; i++) {
+                    for (int i = 1; i < sizea; i++) {
                         ptr->input_pins[i] = node->input_pins[i + index * sizea - 1];
                         ptr->input_pins[i]->node = ptr;
                         ptr->input_pins[i]->pin_node_idx = i;
                     }
                 }
             } else {
-                for (i = 0; i < current_sizea; i++) {
+                for (int i = 0; i < current_sizea; i++) {
                     ptr->input_pins[i] = node->input_pins[i];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
@@ -264,14 +263,14 @@ void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int
             }
         } else {
             if (flag == 0) {
-                for (i = 0; i < sizea; i++) {
+                for (int i = 0; i < sizea; i++) {
                     ptr->input_pins[i] = node->input_pins[i + index * sizea - offset];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
                 }
             } else {
                 num = node->input_port_sizes[0];
-                for (i = 0; i < current_sizea; i++) {
+                for (int i = 0; i < current_sizea; i++) {
                     ptr->input_pins[i] = node->input_pins[i + num - current_sizea];
                     ptr->input_pins[i]->node = ptr;
                     ptr->input_pins[i]->pin_node_idx = i;
@@ -280,11 +279,11 @@ void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int
         }
     }
 
-    for (i = 0; i < current_sizeb; i++)
+    for (int i = 0; i < current_sizeb; i++)
         ptr->input_pins[i + current_sizeb] = NULL;
 
     /* Carry_in should be NULL*/
-    for (i = 0; i < cin; i++) {
+    for (int i = 0; i < cin; i++) {
         ptr->input_pins[i + current_sizea + current_sizeb] = NULL;
     }
 
@@ -297,7 +296,7 @@ void init_split_adder_for_sub(nnode_t *node, nnode_t *ptr, int a, int sizea, int
 
     ptr->num_output_pins = output;
     ptr->output_pins = (npin_t **)vtr::malloc(sizeof(void *) * output);
-    for (i = 0; i < output; i++)
+    for (int i = 0; i < output; i++)
         ptr->output_pins[i] = NULL;
 
     return;
@@ -322,7 +321,6 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
 {
     nnode_t **node;
     nnode_t **not_node;
-    int i, j;
     int num;
     int max_num = 0;
     int flag = 0, lefta = 0, leftb = 0;
@@ -344,7 +342,7 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
     node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (count));
     not_node = (nnode_t **)vtr::malloc(sizeof(nnode_t *) * (b));
 
-    for (i = 0; i < b; i++) {
+    for (int i = 0; i < b; i++) {
         not_node[i] = allocate_nnode(nodeo->loc);
         nnode_t *temp = not_node[i];
         if (nodeo->num_input_port_sizes == 2)
@@ -354,7 +352,7 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
         free_nnode(temp);
     }
 
-    for (i = 0; i < count; i++) {
+    for (int i = 0; i < count; i++) {
         node[i] = allocate_nnode(nodeo->loc);
         node[i]->name = (char *)vtr::malloc(strlen(nodeo->name) + 20);
         odin_sprintf(node[i]->name, "%s-%d", nodeo->name, i);
@@ -398,7 +396,7 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
     sub_chain_list = insert_in_vptr_list(sub_chain_list, adder_chain);
 
     if (flag == 1 && count == 1) {
-        for (i = 0; i < b; i++) {
+        for (int i = 0; i < b; i++) {
             /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
              * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
             /* connecting untouched nets in the netlist creation to the pad node */
@@ -424,7 +422,7 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
                 num = b;
             else
                 num = sizeb - 1;
-            for (i = 0; i < num; i++) {
+            for (int i = 0; i < num; i++) {
                 /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
                  * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
                 /* connecting untouched nets in the netlist creation to the pad node */
@@ -446,12 +444,12 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
             }
         }
 
-        for (i = offset; i < count; i++) {
+        for (int i = offset; i < count; i++) {
             num = (b + 1) - i * sizeb;
             if (num > sizeb)
                 num = sizeb;
 
-            for (j = 0; j < num; j++) {
+            for (int j = 0; j < num; j++) {
                 if (i == count - 1 && flag == 1) {
                     /* If the input pin of not gate connects to gnd, replacing the input pin and the not gate with vcc;
                      * if the input pin of not gate connects to vcc, replacing the input pin and the not gate with gnd.*/
@@ -516,9 +514,9 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
 
     // for normal subtraction: if any input pins beside intial cin is NULL, it should connect to unconn
     // for unary subtraction: the first number should has the number of a input pins connected to gnd. The others are as same as normal subtraction
-    for (i = 0; i < count; i++) {
+    for (int i = 0; i < count; i++) {
         num = node[i]->num_input_pins;
-        for (j = 0; j < num - 1; j++) {
+        for (int j = 0; j < num - 1; j++) {
             if (node[i]->input_pins[j] == NULL) {
                 if (nodeo->num_input_port_sizes != 3 && i * sizea + j < a)
                     connect_nodes(netlist->gnd_node, 0, node[i], j);
@@ -529,11 +527,11 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
     }
 
     // connect cout to next node's cin
-    for (i = 1; i < count; i++)
+    for (int i = 1; i < count; i++)
         connect_nodes(node[i - 1], 0, node[i], (node[i]->num_input_pins - 1));
 
     if (flag == 1 && count == 1) {
-        for (j = 0; j < node[0]->num_output_pins - 1; j++) {
+        for (int j = 0; j < node[0]->num_output_pins - 1; j++) {
             if (j < nodeo->num_output_pins)
                 remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 1);
             else {
@@ -543,7 +541,7 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
             }
         }
     } else {
-        for (j = 0; j < node[0]->num_output_pins - 2; j++) {
+        for (int j = 0; j < node[0]->num_output_pins - 2; j++) {
             if (j < nodeo->num_output_pins)
                 remap_pin_to_new_node(nodeo->output_pins[j], node[0], j + 2);
             else {
@@ -556,8 +554,8 @@ void split_adder_for_sub(nnode_t *nodeo, int a, int b, int sizea, int sizeb, int
 
     if (count > 1 || configuration.adder_cin_global) {
         // remap the output pins of each adder to nodeo
-        for (i = offset; i < count; i++) {
-            for (j = 0; j < node[i]->num_output_pins - 1; j++) {
+        for (int i = offset; i < count; i++) {
+            for (int j = 0; j < node[i]->num_output_pins - 1; j++) {
                 if ((i * sizea + j - offset) < nodeo->num_output_pins)
                     remap_pin_to_new_node(nodeo->output_pins[i * sizea + j - offset], node[i], j + 1);
                 else {
@@ -844,9 +842,8 @@ void clean_adders_for_sub()
  *-----------------------------------------------------------------------*/
 static void cleanup_sub_old_node(nnode_t *nodeo, netlist_t *netlist)
 {
-    int i;
     /* Disconnecting input pins from the old node side */
-    for (i = 0; i < nodeo->num_input_pins; i++) {
+    for (int i = 0; i < nodeo->num_input_pins; i++) {
         npin_t *input_pin = nodeo->input_pins[i];
         if (input_pin->node == nodeo)
             delete_npin(input_pin);
@@ -855,7 +852,7 @@ static void cleanup_sub_old_node(nnode_t *nodeo, netlist_t *netlist)
     }
 
     /* connecting the extra output pins to the gnd node */
-    for (i = 0; i < nodeo->num_output_pins; i++) {
+    for (int i = 0; i < nodeo->num_output_pins; i++) {
         npin_t *output_pin = nodeo->output_pins[i];
 
         if (output_pin && output_pin->node) {

From 608b352b9482a89fb5b9f46909008a74f7740ea4 Mon Sep 17 00:00:00 2001
From: Dani <17553473+poname@users.noreply.github.com>
Date: Mon, 26 Dec 2022 11:42:01 -0400
Subject: [PATCH 28/56] Vtr dir (#2)

* no pugixml

* vtr is now based on VTR_INSTALL_DIR

* clean makefile

* makefile improved

* minor improvements

* ci

* white space format

* ci typo
---
 .github/workflows/build-and-test.sh           |    2 +-
 .github/workflows/licensing.yml               |   15 +-
 .gitmodules                                   |    3 -
 Makefile                                      |    3 +-
 environment.yml                               |    1 +
 parmys-plugin/Makefile                        |  105 +-
 parmys-plugin/parmys.cc                       |    4 +-
 parmys-plugin/parmys_arch.cc                  |    4 +
 parmys-plugin/parmys_update.cc                |    2 +-
 parmys-plugin/src/adder.cc                    |   28 +-
 parmys-plugin/{ => src}/include/adder.h       |    0
 parmys-plugin/{ => src}/include/ast_util.h    |    0
 .../{ => src}/include/block_memory.h          |    0
 parmys-plugin/{ => src}/include/config_t.h    |    0
 parmys-plugin/{ => src}/include/hard_block.h  |    0
 .../{ => src}/include/hard_soft_logic_mixer.h |    0
 parmys-plugin/{ => src}/include/hash_table.h  |    0
 parmys-plugin/{ => src}/include/memory.h      |    0
 .../{ => src}/include/mixing_optimization.h   |    0
 parmys-plugin/{ => src}/include/multiplier.h  |    0
 .../{ => src}/include/netlist_cleanup.h       |    0
 .../{ => src}/include/netlist_statistic.h     |    0
 .../{ => src}/include/netlist_utils.h         |    0
 .../{ => src}/include/netlist_visualizer.h    |    0
 parmys-plugin/{ => src}/include/node_utils.h  |    0
 parmys-plugin/{ => src}/include/odin_error.h  |    0
 .../{ => src}/include/odin_globals.h          |    0
 parmys-plugin/{ => src}/include/odin_ii.h     |    0
 parmys-plugin/{ => src}/include/odin_types.h  |    0
 parmys-plugin/{ => src}/include/odin_util.h   |    0
 parmys-plugin/{ => src}/include/partial_map.h |    0
 .../{ => src}/include/read_xml_config_file.h  |    0
 .../{ => src}/include/string_cache.h          |    0
 parmys-plugin/{ => src}/include/subtractor.h  |    0
 parmys-plugin/src/memory.cc                   |   20 +-
 parmys-plugin/src/multiplier.cc               |   12 +-
 parmys-plugin/src/netlist_cleanup.cc          |    4 +-
 parmys-plugin/src/netlist_statistic.cc        |   20 +-
 parmys-plugin/src/partial_map.cc              |    4 +-
 parmys-plugin/src/read_xml_config_file.cc     |    4 +-
 parmys-plugin/src/subtractor.cc               |   22 +-
 parmys-plugin/techlibs/vtr_primitives.v       |    2 +-
 parmys-plugin/tests/Makefile                  |    4 +-
 .../tests/eltwise_layer/eltwise_layer.v       |    2 +-
 .../tests/eltwise_layer/hard_block_include.v  |    2 +-
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml |    2 +-
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    |    2 +-
 parmys-plugin/tests/raygentop/raygentop.v     |    2 +-
 third_party/pugixml                           |    1 -
 third_party/vtr/arch/LICENSE.md               |   69 -
 third_party/vtr/libs/LICENSE.md               |   69 -
 third_party/vtr/libs/archfpga/.gitignore      |    1 -
 third_party/vtr/libs/archfpga/CMakeLists.txt  |   75 -
 third_party/vtr/libs/archfpga/arch/README.txt |   11 -
 .../vtr/libs/archfpga/arch/mult_luts_arch.xml |  744 ---
 .../vtr/libs/archfpga/arch/sample_arch.xml    | 1215 ----
 .../vtr/libs/archfpga/src/arch_check.cc       |  412 --
 .../vtr/libs/archfpga/src/arch_check.h        |   80 -
 .../vtr/libs/archfpga/src/arch_error.cc       |   16 -
 .../vtr/libs/archfpga/src/arch_error.h        |   18 -
 .../vtr/libs/archfpga/src/arch_types.h        |   35 -
 .../vtr/libs/archfpga/src/arch_util.cc        | 1572 -----
 third_party/vtr/libs/archfpga/src/arch_util.h |  125 -
 third_party/vtr/libs/archfpga/src/cad_types.h |  130 -
 .../vtr/libs/archfpga/src/clock_types.h       |   63 -
 .../vtr/libs/archfpga/src/device_grid.cc      |   42 -
 .../vtr/libs/archfpga/src/device_grid.h       |   61 -
 .../vtr/libs/archfpga/src/echo_arch.cc        |  632 ---
 third_party/vtr/libs/archfpga/src/echo_arch.h |   11 -
 .../vtr/libs/archfpga/src/histogram.cc        |  119 -
 third_party/vtr/libs/archfpga/src/histogram.h |   26 -
 .../vtr/libs/archfpga/src/logic_types.h       |   57 -
 third_party/vtr/libs/archfpga/src/main.cc     |   80 -
 .../libs/archfpga/src/parse_switchblocks.cc   |  473 --
 .../libs/archfpga/src/parse_switchblocks.h    |   22 -
 .../vtr/libs/archfpga/src/physical_types.cc   |  255 -
 .../vtr/libs/archfpga/src/physical_types.h    | 1896 -------
 .../libs/archfpga/src/physical_types_util.cc  |  492 --
 .../libs/archfpga/src/physical_types_util.h   |  304 -
 .../src/read_fpga_interchange_arch.cc         | 2542 ---------
 .../archfpga/src/read_fpga_interchange_arch.h |   34 -
 .../libs/archfpga/src/read_xml_arch_file.cc   | 5037 -----------------
 .../libs/archfpga/src/read_xml_arch_file.h    |   27 -
 .../vtr/libs/archfpga/src/read_xml_util.cc    |  142 -
 .../vtr/libs/archfpga/src/read_xml_util.h     |   28 -
 third_party/vtr/libs/archfpga/test/main.cpp   |    2 -
 .../archfpga/test/test_read_xml_arch_file.cpp |  269 -
 third_party/vtr/libs/log/.gitignore           |    1 -
 third_party/vtr/libs/log/CMakeLists.txt       |   24 -
 third_party/vtr/libs/log/LICENSE.txt          |   21 -
 third_party/vtr/libs/log/Readme.txt           |   13 -
 third_party/vtr/libs/log/src/log.cc           |  117 -
 third_party/vtr/libs/log/src/log.h            |   22 -
 third_party/vtr/libs/log/src/main.cc          |   18 -
 third_party/vtr/libs/pugiutil/CMakeLists.txt  |   22 -
 .../vtr/libs/pugiutil/src/pugixml_loc.cc      |   49 -
 .../vtr/libs/pugiutil/src/pugixml_loc.hpp     |   51 -
 .../vtr/libs/pugiutil/src/pugixml_util.cc     |  298 -
 .../vtr/libs/pugiutil/src/pugixml_util.hpp    |  198 -
 third_party/vtr/libs/rtlnumber/.gitignore     |    1 -
 third_party/vtr/libs/rtlnumber/CMakeLists.txt |   33 -
 third_party/vtr/libs/rtlnumber/Makefile       |   81 -
 third_party/vtr/libs/rtlnumber/README.md      |    9 -
 third_party/vtr/libs/rtlnumber/main.cpp       |  200 -
 .../basic_regression_tests.csv                |  310 -
 .../rtlnumber/src/include/internal_bits.hpp   | 1140 ----
 .../libs/rtlnumber/src/include/rtl_int.hpp    |   95 -
 .../libs/rtlnumber/src/include/rtl_utils.hpp  |   57 -
 third_party/vtr/libs/rtlnumber/src/rtl_int.cc |  746 ---
 .../vtr/libs/rtlnumber/src/rtl_utils.cc       |  304 -
 .../vtr/libs/rtlnumber/unit_test/Makefile     |   79 -
 .../libs/rtlnumber/unit_test/verilog_bits.cpp |   27 -
 .../vtr/libs/rtlnumber/verify_librtlnumber.sh |  124 -
 .../vpr/src/draw/breakpoint_state_globals.h   |   33 -
 third_party/vtr/libs/vtrutil/CMakeLists.txt   |  119 -
 .../cmake/modules/configure_version.cmake     |   55 -
 third_party/vtr/libs/vtrutil/src/picosha2.h   |  357 --
 third_party/vtr/libs/vtrutil/src/vpr_error.cc |   89 -
 third_party/vtr/libs/vtrutil/src/vpr_error.h  |  127 -
 .../vtr/libs/vtrutil/src/vtr_array_view.h     |  273 -
 .../vtr/libs/vtrutil/src/vtr_assert.cc        |   23 -
 third_party/vtr/libs/vtrutil/src/vtr_assert.h |  151 -
 third_party/vtr/libs/vtrutil/src/vtr_bimap.h  |  167 -
 third_party/vtr/libs/vtrutil/src/vtr_cache.h  |   46 -
 .../vtr/libs/vtrutil/src/vtr_color_map.cc     |  831 ---
 .../vtr/libs/vtrutil/src/vtr_color_map.h      |   58 -
 .../vtr/libs/vtrutil/src/vtr_digest.cc        |   39 -
 third_party/vtr/libs/vtrutil/src/vtr_digest.h |   16 -
 .../vtr/libs/vtrutil/src/vtr_dynamic_bitset.h |   72 -
 third_party/vtr/libs/vtrutil/src/vtr_error.h  |   68 -
 .../vtr/libs/vtrutil/src/vtr_expr_eval.cc     |  904 ---
 .../vtr/libs/vtrutil/src/vtr_expr_eval.h      |  234 -
 .../vtr/libs/vtrutil/src/vtr_flat_map.h       |  483 --
 .../vtr/libs/vtrutil/src/vtr_geometry.h       |  312 -
 .../vtr/libs/vtrutil/src/vtr_geometry.tpp     |  347 --
 third_party/vtr/libs/vtrutil/src/vtr_hash.h   |   30 -
 .../vtr/libs/vtrutil/src/vtr_linear_map.h     |  312 -
 third_party/vtr/libs/vtrutil/src/vtr_list.cc  |   25 -
 third_party/vtr/libs/vtrutil/src/vtr_list.h   |   24 -
 third_party/vtr/libs/vtrutil/src/vtr_log.cc   |   50 -
 third_party/vtr/libs/vtrutil/src/vtr_log.h    |  167 -
 third_party/vtr/libs/vtrutil/src/vtr_logic.h  |   33 -
 .../vtr/libs/vtrutil/src/vtr_map_util.h       |   45 -
 third_party/vtr/libs/vtrutil/src/vtr_math.cc  |  106 -
 third_party/vtr/libs/vtrutil/src/vtr_math.h   |  168 -
 .../vtr/libs/vtrutil/src/vtr_memory.cc        |  178 -
 third_party/vtr/libs/vtrutil/src/vtr_memory.h |  151 -
 .../vtr/libs/vtrutil/src/vtr_ndmatrix.h       |  409 --
 .../vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h |  459 --
 .../vtr/libs/vtrutil/src/vtr_ostream_guard.h  |   40 -
 .../vtr/libs/vtrutil/src/vtr_pair_util.h      |   96 -
 third_party/vtr/libs/vtrutil/src/vtr_path.cc  |   87 -
 third_party/vtr/libs/vtrutil/src/vtr_path.h   |   33 -
 .../vtr/libs/vtrutil/src/vtr_ragged_matrix.h  |  255 -
 .../vtr/libs/vtrutil/src/vtr_random.cc        |   77 -
 third_party/vtr/libs/vtrutil/src/vtr_random.h |   45 -
 third_party/vtr/libs/vtrutil/src/vtr_range.h  |   85 -
 .../vtr/libs/vtrutil/src/vtr_rusage.cc        |   30 -
 third_party/vtr/libs/vtrutil/src/vtr_rusage.h |   11 -
 .../vtr/libs/vtrutil/src/vtr_sentinels.h      |   49 -
 .../vtr/libs/vtrutil/src/vtr_small_vector.h   |  854 ---
 .../libs/vtrutil/src/vtr_string_interning.h   |  558 --
 .../vtr/libs/vtrutil/src/vtr_string_view.h    |  192 -
 .../vtr/libs/vtrutil/src/vtr_strong_id.h      |  245 -
 .../libs/vtrutil/src/vtr_strong_id_range.h    |  185 -
 third_party/vtr/libs/vtrutil/src/vtr_time.cc  |   99 -
 third_party/vtr/libs/vtrutil/src/vtr_time.h   |   99 -
 third_party/vtr/libs/vtrutil/src/vtr_token.cc |  218 -
 third_party/vtr/libs/vtrutil/src/vtr_token.h  |   40 -
 third_party/vtr/libs/vtrutil/src/vtr_util.cc  |  504 --
 third_party/vtr/libs/vtrutil/src/vtr_util.h   |  123 -
 .../vtr/libs/vtrutil/src/vtr_vec_id_set.h     |  106 -
 third_party/vtr/libs/vtrutil/src/vtr_vector.h |  211 -
 .../vtr/libs/vtrutil/src/vtr_vector_map.h     |  172 -
 .../vtr/libs/vtrutil/src/vtr_version.cpp.in   |   20 -
 .../vtr/libs/vtrutil/src/vtr_version.h        |   20 -
 third_party/vtr/libs/vtrutil/test/main.cpp    |    2 -
 .../vtr/libs/vtrutil/test/test_array_view.cpp |  110 -
 .../vtr/libs/vtrutil/test/test_expr_eval.cpp  |   90 -
 .../vtr/libs/vtrutil/test/test_geometry.cpp   |  245 -
 .../vtr/libs/vtrutil/test/test_map_util.cpp   |   35 -
 .../vtr/libs/vtrutil/test/test_math.cpp       |   58 -
 .../libs/vtrutil/test/test_ragged_vector.cpp  |  104 -
 .../vtr/libs/vtrutil/test/test_random.cpp     |   16 -
 .../vtr/libs/vtrutil/test/test_range.cpp      |   41 -
 .../libs/vtrutil/test/test_small_vector.cpp   |  148 -
 .../vtr/libs/vtrutil/test/test_strings.cpp    |  211 -
 .../vtr/libs/vtrutil/test/test_strong_id.cpp  |  130 -
 .../vtr/libs/vtrutil/test/test_vector.cpp     |   57 -
 third_party/vtr/verilog/LICENSE.md            |   69 -
 third_party/{vtr => vtr_flow}/LICENSE.md      |    0
 third_party/vtr_flow/README.md                |    7 +
 ...cN10LB_mem20K_complexDSP_customSB_22nm.xml |    0
 .../k6_frac_N10_frac_chain_mem32K_40nm.xml    |    0
 third_party/{vtr => vtr_flow}/primitives.v    |    0
 .../{vtr => vtr_flow}/verilog/eltwise_layer.v |    0
 .../verilog/hard_block_include.v              |    0
 .../{vtr => vtr_flow}/verilog/raygentop.v     |    0
 198 files changed, 141 insertions(+), 34093 deletions(-)
 rename parmys-plugin/{ => src}/include/adder.h (100%)
 rename parmys-plugin/{ => src}/include/ast_util.h (100%)
 rename parmys-plugin/{ => src}/include/block_memory.h (100%)
 rename parmys-plugin/{ => src}/include/config_t.h (100%)
 rename parmys-plugin/{ => src}/include/hard_block.h (100%)
 rename parmys-plugin/{ => src}/include/hard_soft_logic_mixer.h (100%)
 rename parmys-plugin/{ => src}/include/hash_table.h (100%)
 rename parmys-plugin/{ => src}/include/memory.h (100%)
 rename parmys-plugin/{ => src}/include/mixing_optimization.h (100%)
 rename parmys-plugin/{ => src}/include/multiplier.h (100%)
 rename parmys-plugin/{ => src}/include/netlist_cleanup.h (100%)
 rename parmys-plugin/{ => src}/include/netlist_statistic.h (100%)
 rename parmys-plugin/{ => src}/include/netlist_utils.h (100%)
 rename parmys-plugin/{ => src}/include/netlist_visualizer.h (100%)
 rename parmys-plugin/{ => src}/include/node_utils.h (100%)
 rename parmys-plugin/{ => src}/include/odin_error.h (100%)
 rename parmys-plugin/{ => src}/include/odin_globals.h (100%)
 rename parmys-plugin/{ => src}/include/odin_ii.h (100%)
 rename parmys-plugin/{ => src}/include/odin_types.h (100%)
 rename parmys-plugin/{ => src}/include/odin_util.h (100%)
 rename parmys-plugin/{ => src}/include/partial_map.h (100%)
 rename parmys-plugin/{ => src}/include/read_xml_config_file.h (100%)
 rename parmys-plugin/{ => src}/include/string_cache.h (100%)
 rename parmys-plugin/{ => src}/include/subtractor.h (100%)
 delete mode 160000 third_party/pugixml
 delete mode 100644 third_party/vtr/arch/LICENSE.md
 delete mode 100644 third_party/vtr/libs/LICENSE.md
 delete mode 100644 third_party/vtr/libs/archfpga/.gitignore
 delete mode 100644 third_party/vtr/libs/archfpga/CMakeLists.txt
 delete mode 100644 third_party/vtr/libs/archfpga/arch/README.txt
 delete mode 100644 third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
 delete mode 100755 third_party/vtr/libs/archfpga/arch/sample_arch.xml
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_check.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_check.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_error.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_error.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_types.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_util.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/arch_util.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/cad_types.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/clock_types.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/device_grid.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/device_grid.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/echo_arch.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/echo_arch.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/histogram.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/histogram.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/logic_types.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/main.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/parse_switchblocks.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/physical_types.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/physical_types.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/physical_types_util.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/physical_types_util.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_xml_util.cc
 delete mode 100644 third_party/vtr/libs/archfpga/src/read_xml_util.h
 delete mode 100644 third_party/vtr/libs/archfpga/test/main.cpp
 delete mode 100644 third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
 delete mode 100644 third_party/vtr/libs/log/.gitignore
 delete mode 100644 third_party/vtr/libs/log/CMakeLists.txt
 delete mode 100644 third_party/vtr/libs/log/LICENSE.txt
 delete mode 100644 third_party/vtr/libs/log/Readme.txt
 delete mode 100644 third_party/vtr/libs/log/src/log.cc
 delete mode 100644 third_party/vtr/libs/log/src/log.h
 delete mode 100644 third_party/vtr/libs/log/src/main.cc
 delete mode 100644 third_party/vtr/libs/pugiutil/CMakeLists.txt
 delete mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
 delete mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
 delete mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_util.cc
 delete mode 100644 third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
 delete mode 100644 third_party/vtr/libs/rtlnumber/.gitignore
 delete mode 100644 third_party/vtr/libs/rtlnumber/CMakeLists.txt
 delete mode 100644 third_party/vtr/libs/rtlnumber/Makefile
 delete mode 100644 third_party/vtr/libs/rtlnumber/README.md
 delete mode 100644 third_party/vtr/libs/rtlnumber/main.cpp
 delete mode 100644 third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
 delete mode 100644 third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
 delete mode 100644 third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
 delete mode 100644 third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
 delete mode 100644 third_party/vtr/libs/rtlnumber/src/rtl_int.cc
 delete mode 100644 third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
 delete mode 100644 third_party/vtr/libs/rtlnumber/unit_test/Makefile
 delete mode 100644 third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
 delete mode 100755 third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
 delete mode 100644 third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
 delete mode 100644 third_party/vtr/libs/vtrutil/CMakeLists.txt
 delete mode 100644 third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
 delete mode 100644 third_party/vtr/libs/vtrutil/src/picosha2.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vpr_error.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vpr_error.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_array_view.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_assert.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_assert.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_bimap.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_cache.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_color_map.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_digest.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_digest.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_error.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_geometry.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_hash.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_list.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_list.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_log.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_log.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_logic.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_map_util.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_math.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_math.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_memory.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_memory.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_path.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_path.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_random.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_random.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_range.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_rusage.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_string_view.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_time.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_time.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_token.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_token.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_util.cc
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_util.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vector.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
 delete mode 100644 third_party/vtr/libs/vtrutil/src/vtr_version.h
 delete mode 100644 third_party/vtr/libs/vtrutil/test/main.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_array_view.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_geometry.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_map_util.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_math.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_random.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_range.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_strings.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
 delete mode 100644 third_party/vtr/libs/vtrutil/test/test_vector.cpp
 delete mode 100644 third_party/vtr/verilog/LICENSE.md
 rename third_party/{vtr => vtr_flow}/LICENSE.md (100%)
 create mode 100755 third_party/vtr_flow/README.md
 rename third_party/{vtr => vtr_flow}/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml (100%)
 rename third_party/{vtr => vtr_flow}/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml (100%)
 rename third_party/{vtr => vtr_flow}/primitives.v (100%)
 rename third_party/{vtr => vtr_flow}/verilog/eltwise_layer.v (100%)
 rename third_party/{vtr => vtr_flow}/verilog/hard_block_include.v (100%)
 rename third_party/{vtr => vtr_flow}/verilog/raygentop.v (100%)

diff --git a/.github/workflows/build-and-test.sh b/.github/workflows/build-and-test.sh
index 7d0401e65..baf1f8e2f 100755
--- a/.github/workflows/build-and-test.sh
+++ b/.github/workflows/build-and-test.sh
@@ -24,7 +24,7 @@ source .github/workflows/common.sh
 start_section Building
 
 export CXXFLAGS=-Werror
-make UHDM_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins/ plugins -j`nproc`
+make UHDM_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins/ VTR_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins plugins -j`nproc`
 unset CXXFLAGS
 
 end_section
diff --git a/.github/workflows/licensing.yml b/.github/workflows/licensing.yml
index 0a5c6f1e1..6d5302ef5 100644
--- a/.github/workflows/licensing.yml
+++ b/.github/workflows/licensing.yml
@@ -33,15 +33,12 @@ jobs:
           ./design_introspection-plugin/tests/selection_to_tcl_list/selection_to_tcl_list.v
           ./third_party/minilitex_ddr_arty/minilitex_ddr_arty.v
           ./third_party/VexRiscv_Lite/VexRiscv_Lite.v
-          ./third_party/vtr/verilog/eltwise_layer.v
-          ./third_party/vtr/verilog/raygentop.v
-          ./third_party/vtr/verilog/hard_block_include.v
-          ./third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
-          ./third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
-          ./third_party/vtr/primitives.v
+          ./third_party/vtr_flow/verilog/eltwise_layer.v
+          ./third_party/vtr_flow/verilog/raygentop.v
+          ./third_party/vtr_flow/verilog/hard_block_include.v
+          ./third_party/vtr_flow/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+          ./third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
+          ./third_party/vtr_flow/primitives.v
         third_party: |
           ./third_party/googletest/
-          ./third_party/libargparse/
-          ./third_party/pugixml/
-          ./third_party/vtr/
           .third_party/mips32r1_core/
diff --git a/.gitmodules b/.gitmodules
index 4a0680aa3..9c7ba5744 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,9 +4,6 @@
 [submodule "third_party/make-env"]
 	path = third_party/make-env
 	url = https://github.com/SymbiFlow/make-env.git
-[submodule "third_party/pugixml"]
-	path = third_party/pugixml
-	url = https://github.com/zeux/pugixml.git
 [submodule "third_party/mips32r1_core"]
 	path = third_party/mips32r1_core
 	url = https://github.com/grantae/mips32r1_core.git
diff --git a/Makefile b/Makefile
index aa1d61923..8e371ba00 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/environment.yml b/environment.yml
index a4cf46c24..252afc444 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,3 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog
   - litex-hub::iverilog
+  - litex-hub::vtr
diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index 5ca2525a4..7d487078e 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -16,76 +16,61 @@
 
 PLUGIN_DIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 
+ODIN_II_DIR=src
+
 NAME = parmys
 SOURCES = parmys.cc \
 		  parmys_arch.cc \
 		  parmys_update.cc \
 		  parmys_utils.cc \
 		  parmys_resolve.cc \
-		  src/adder.cc \
-		  src/enum_str.cc \
-		  src/mixing_optimization.cc \
-		  src/read_xml_config_file.cc \
-		  src/odin_error.cc \
-		  src/odin_util.cc \
-		  src/netlist_statistic.cc \
-		  src/netlist_utils.cc \
-		  src/netlist_cleanup.cc \
-		  src/node_utils.cc \
-		  src/multiplier.cc \
-		  src/subtractor.cc \
-		  src/hard_soft_logic_mixer.cc \
-		  src/odin_ii.cc \
-		  src/string_cache.cc \
-		  src/partial_map.cc \
-		  src/hard_block.cc \
-		  src/block_memory.cc \
-		  src/memory.cc \
-		  src/netlist_visualizer.cc \
-		  src/hash_table.cc \
-		  src/ast_util.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_util.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_token.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_memory.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_list.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_log.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_digest.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_math.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_path.cc \
-		  ../third_party/vtr/libs/vtrutil/src/vtr_assert.cc \
-		  ../third_party/vtr/libs/log/src/log.cc \
-		  ../third_party/pugixml/src/pugixml.cpp \
-		  ../third_party/vtr/libs/rtlnumber/src/rtl_int.cc \
-		  ../third_party/vtr/libs/rtlnumber/src/rtl_utils.cc \
-		  ../third_party/vtr/libs/pugiutil/src/pugixml_loc.cc \
-		  ../third_party/vtr/libs/pugiutil/src/pugixml_util.cc \
-		  ../third_party/vtr/libs/archfpga/src/physical_types.cc \
-		  ../third_party/vtr/libs/archfpga/src/read_xml_util.cc \
-		  ../third_party/vtr/libs/archfpga/src/arch_error.cc \
-		  ../third_party/vtr/libs/archfpga/src/physical_types_util.cc \
-		  ../third_party/vtr/libs/archfpga/src/arch_check.cc \
-		  ../third_party/vtr/libs/archfpga/src/arch_util.cc \
-		  ../third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc \
-		  ../third_party/vtr/libs/archfpga/src/parse_switchblocks.cc \
-		  ../third_party/vtr/libs/archfpga/src/echo_arch.cc
+		  ${ODIN_II_DIR}/adder.cc \
+		  ${ODIN_II_DIR}/enum_str.cc \
+		  ${ODIN_II_DIR}/mixing_optimization.cc \
+		  ${ODIN_II_DIR}/read_xml_config_file.cc \
+		  ${ODIN_II_DIR}/odin_error.cc \
+		  ${ODIN_II_DIR}/odin_util.cc \
+		  ${ODIN_II_DIR}/netlist_statistic.cc \
+		  ${ODIN_II_DIR}/netlist_utils.cc \
+		  ${ODIN_II_DIR}/netlist_cleanup.cc \
+		  ${ODIN_II_DIR}/node_utils.cc \
+		  ${ODIN_II_DIR}/multiplier.cc \
+		  ${ODIN_II_DIR}/subtractor.cc \
+		  ${ODIN_II_DIR}/hard_soft_logic_mixer.cc \
+		  ${ODIN_II_DIR}/odin_ii.cc \
+		  ${ODIN_II_DIR}/string_cache.cc \
+		  ${ODIN_II_DIR}/partial_map.cc \
+		  ${ODIN_II_DIR}/hard_block.cc \
+		  ${ODIN_II_DIR}/block_memory.cc \
+		  ${ODIN_II_DIR}/memory.cc \
+		  ${ODIN_II_DIR}/netlist_visualizer.cc \
+		  ${ODIN_II_DIR}/hash_table.cc \
+		  ${ODIN_II_DIR}/ast_util.cc
+
+VTR_INSTALL_DIR ?= /usr/local
 
 include ../Makefile_plugin.common
 
-CXXFLAGS += -I./include
-CXXFLAGS += -I../third_party/pugixml/src
-CXXFLAGS += -I../third_party/vtr/libs/archfpga/src
-CXXFLAGS += -I../third_party/vtr/libs/log/src
-CXXFLAGS += -I../third_party/vtr/libs/pugiutil/src
-CXXFLAGS += -I../third_party/vtr/libs/rtlnumber/src/include
-CXXFLAGS += -I../third_party/vtr/libs/rtlnumber/src
-CXXFLAGS += -I../third_party/vtr/libs/vtrutil/src
-CXXFLAGS += -I../third_party/vtr/libs/vpr/src/draw
+CXXFLAGS += -std=c++14 -Wall -W -Wextra \
+            -Wno-deprecated-declarations \
+            -Wno-unused-parameter \
+			-I${ODIN_II_DIR}/include \
+			-I${VTR_INSTALL_DIR}/include/libarchfpga \
+			-I${VTR_INSTALL_DIR}/include/liblog \
+			-I${VTR_INSTALL_DIR}/include/libpugiutil \
+			-I${VTR_INSTALL_DIR}/include/libpugixml \
+			-I${VTR_INSTALL_DIR}/include/librtlnumber \
+			-I${VTR_INSTALL_DIR}/include/libvtrutil
 
-CXXSTD := c++14
-CXXFLAGS += -std=$(CXXSTD) -Os
+LDFLAGS += -L${VTR_INSTALL_DIR}/lib \
+		   -L${VTR_INSTALL_DIR}/bin
 
-LDLIBS += -lpthread
+LDLIBS += -larchfpga \
+		  -lvtrutil \
+		  -llog \
+		  -lpugixml \
+		  -lpugiutil \
+		  -lpthread
 
 TECHLIBS_DIR = techlibs
 VERILOG_MODULES = adff2dff.v \
@@ -94,8 +79,6 @@ VERILOG_MODULES = adff2dff.v \
 				  aldffe2dff.v \
 				  vtr_primitives.v
 
-# install_modules: $(VERILOG_MODULES)
-# install -D $< $(YOSYS_PLUGINS_DIR)/parmys/$<
 install_modules:
 	$(foreach f, $(wildcard $(TECHLIBS_DIR)/*), install -D $(f) $(YOSYS_DATA_DIR)/parmys/$(notdir $(f));)
 
diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index 05a0f0960..29fe73ae3 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -647,7 +647,7 @@ struct ParMYSPass : public Pass {
                 SigBit lhs_bit = conn.first[i];
                 SigBit rhs_bit = conn.second[i];
 
-                if (cstr_bits_seen.count(lhs_bit) == 0) // @TODO to be double checked later
+                if (cstr_bits_seen.count(lhs_bit) == 0)
                     continue;
 
                 nnode_t *buf_node = allocate_nnode(my_location);
@@ -1092,6 +1092,8 @@ struct ParMYSPass : public Pass {
         log("\nTotal Synthesis Time: ");
         log_time(synthesis_time);
         log("\n--------------------------------------------------------------------\n");
+        report(transformed);
+        log("\n--------------------------------------------------------------------\n");
 
         log("Updating the Design\n");
         Pass::call(design, "delete");
diff --git a/parmys-plugin/parmys_arch.cc b/parmys-plugin/parmys_arch.cc
index 3b005e7e3..2175ebb38 100644
--- a/parmys-plugin/parmys_arch.cc
+++ b/parmys-plugin/parmys_arch.cc
@@ -18,6 +18,7 @@
 #include "kernel/yosys.h"
 
 #include "arch_util.h"
+#include "echo_arch.h"
 #include "odin_types.h"
 #include "parmys_utils.hpp"
 #include "read_xml_arch_file.h"
@@ -111,6 +112,9 @@ struct ParmysArchPass : public Pass {
                       vtr_error.line());
         }
 
+        const char *arch_info_file = "arch.info";
+        EchoArch(arch_info_file, physical_tile_types, logical_block_types, &arch);
+
         t_model *hb = arch.models;
         while (hb) {
             if (strcmp(hb->name, SINGLE_PORT_RAM_string) && strcmp(hb->name, DUAL_PORT_RAM_string) && strcmp(hb->name, "multiply") &&
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
index ff91db1c9..b643a4682 100644
--- a/parmys-plugin/parmys_update.cc
+++ b/parmys-plugin/parmys_update.cc
@@ -437,7 +437,7 @@ void define_logical_function_yosys(nnode_t *node, Module *module)
 
     for (int i = 0; i < node->num_input_pins; i++) {
         nnet_t *input_net = node->input_pins[i]->net;
-        Wire *driver_wire = wire_net_driver(module, node, input_net, 0); // 0 TODO?
+        Wire *driver_wire = wire_net_driver(module, node, input_net, 0);
 
         input_sig.append(driver_wire);
     }
diff --git a/parmys-plugin/src/adder.cc b/parmys-plugin/src/adder.cc
index db8a0e3ba..935cdb238 100644
--- a/parmys-plugin/src/adder.cc
+++ b/parmys-plugin/src/adder.cc
@@ -32,6 +32,8 @@
 
 using vtr::t_linked_vptr;
 
+USING_YOSYS_NAMESPACE
+
 t_model *hard_adders = NULL;
 t_linked_vptr *add_list = NULL;
 t_linked_vptr *processed_adder_list = NULL;
@@ -69,24 +71,24 @@ void report_add_distribution()
     if (hard_adders == NULL)
         return;
 
-    printf("\nHard adder Distribution\n");
-    printf("============================\n");
-    printf("\n");
-    printf("\nTotal # of chains = %ld\n", adder_chain_count);
+    log("\nHard adder Distribution\n");
+    log("============================\n");
+    log("\n");
+    log("\nTotal # of chains = %ld\n", adder_chain_count);
 
-    printf("\nHard adder chain Details\n");
-    printf("============================\n");
+    log("\nHard adder chain Details\n");
+    log("============================\n");
 
-    printf("\n");
-    printf("\nThe Number of Hard Block adders in the Longest Chain: %ld\n", longest_adder_chain);
+    log("\n");
+    log("\nThe Number of Hard Block adders in the Longest Chain: %ld\n", longest_adder_chain);
 
-    printf("\n");
-    printf("\nThe Total Number of Hard Block adders: %ld\n", total_adders);
+    log("\n");
+    log("\nThe Total Number of Hard Block adders: %ld\n", total_adders);
 
-    printf("\n");
-    printf("\nGeometric mean adder/subtractor chain length: %.2f\n", geomean_addsub_length);
+    log("\n");
+    log("\nGeometric mean adder/subtractor chain length: %.2f\n", geomean_addsub_length);
 
-    vtr::free(adder);
+    // vtr::free(adder);
 }
 
 /*---------------------------------------------------------------------------
diff --git a/parmys-plugin/include/adder.h b/parmys-plugin/src/include/adder.h
similarity index 100%
rename from parmys-plugin/include/adder.h
rename to parmys-plugin/src/include/adder.h
diff --git a/parmys-plugin/include/ast_util.h b/parmys-plugin/src/include/ast_util.h
similarity index 100%
rename from parmys-plugin/include/ast_util.h
rename to parmys-plugin/src/include/ast_util.h
diff --git a/parmys-plugin/include/block_memory.h b/parmys-plugin/src/include/block_memory.h
similarity index 100%
rename from parmys-plugin/include/block_memory.h
rename to parmys-plugin/src/include/block_memory.h
diff --git a/parmys-plugin/include/config_t.h b/parmys-plugin/src/include/config_t.h
similarity index 100%
rename from parmys-plugin/include/config_t.h
rename to parmys-plugin/src/include/config_t.h
diff --git a/parmys-plugin/include/hard_block.h b/parmys-plugin/src/include/hard_block.h
similarity index 100%
rename from parmys-plugin/include/hard_block.h
rename to parmys-plugin/src/include/hard_block.h
diff --git a/parmys-plugin/include/hard_soft_logic_mixer.h b/parmys-plugin/src/include/hard_soft_logic_mixer.h
similarity index 100%
rename from parmys-plugin/include/hard_soft_logic_mixer.h
rename to parmys-plugin/src/include/hard_soft_logic_mixer.h
diff --git a/parmys-plugin/include/hash_table.h b/parmys-plugin/src/include/hash_table.h
similarity index 100%
rename from parmys-plugin/include/hash_table.h
rename to parmys-plugin/src/include/hash_table.h
diff --git a/parmys-plugin/include/memory.h b/parmys-plugin/src/include/memory.h
similarity index 100%
rename from parmys-plugin/include/memory.h
rename to parmys-plugin/src/include/memory.h
diff --git a/parmys-plugin/include/mixing_optimization.h b/parmys-plugin/src/include/mixing_optimization.h
similarity index 100%
rename from parmys-plugin/include/mixing_optimization.h
rename to parmys-plugin/src/include/mixing_optimization.h
diff --git a/parmys-plugin/include/multiplier.h b/parmys-plugin/src/include/multiplier.h
similarity index 100%
rename from parmys-plugin/include/multiplier.h
rename to parmys-plugin/src/include/multiplier.h
diff --git a/parmys-plugin/include/netlist_cleanup.h b/parmys-plugin/src/include/netlist_cleanup.h
similarity index 100%
rename from parmys-plugin/include/netlist_cleanup.h
rename to parmys-plugin/src/include/netlist_cleanup.h
diff --git a/parmys-plugin/include/netlist_statistic.h b/parmys-plugin/src/include/netlist_statistic.h
similarity index 100%
rename from parmys-plugin/include/netlist_statistic.h
rename to parmys-plugin/src/include/netlist_statistic.h
diff --git a/parmys-plugin/include/netlist_utils.h b/parmys-plugin/src/include/netlist_utils.h
similarity index 100%
rename from parmys-plugin/include/netlist_utils.h
rename to parmys-plugin/src/include/netlist_utils.h
diff --git a/parmys-plugin/include/netlist_visualizer.h b/parmys-plugin/src/include/netlist_visualizer.h
similarity index 100%
rename from parmys-plugin/include/netlist_visualizer.h
rename to parmys-plugin/src/include/netlist_visualizer.h
diff --git a/parmys-plugin/include/node_utils.h b/parmys-plugin/src/include/node_utils.h
similarity index 100%
rename from parmys-plugin/include/node_utils.h
rename to parmys-plugin/src/include/node_utils.h
diff --git a/parmys-plugin/include/odin_error.h b/parmys-plugin/src/include/odin_error.h
similarity index 100%
rename from parmys-plugin/include/odin_error.h
rename to parmys-plugin/src/include/odin_error.h
diff --git a/parmys-plugin/include/odin_globals.h b/parmys-plugin/src/include/odin_globals.h
similarity index 100%
rename from parmys-plugin/include/odin_globals.h
rename to parmys-plugin/src/include/odin_globals.h
diff --git a/parmys-plugin/include/odin_ii.h b/parmys-plugin/src/include/odin_ii.h
similarity index 100%
rename from parmys-plugin/include/odin_ii.h
rename to parmys-plugin/src/include/odin_ii.h
diff --git a/parmys-plugin/include/odin_types.h b/parmys-plugin/src/include/odin_types.h
similarity index 100%
rename from parmys-plugin/include/odin_types.h
rename to parmys-plugin/src/include/odin_types.h
diff --git a/parmys-plugin/include/odin_util.h b/parmys-plugin/src/include/odin_util.h
similarity index 100%
rename from parmys-plugin/include/odin_util.h
rename to parmys-plugin/src/include/odin_util.h
diff --git a/parmys-plugin/include/partial_map.h b/parmys-plugin/src/include/partial_map.h
similarity index 100%
rename from parmys-plugin/include/partial_map.h
rename to parmys-plugin/src/include/partial_map.h
diff --git a/parmys-plugin/include/read_xml_config_file.h b/parmys-plugin/src/include/read_xml_config_file.h
similarity index 100%
rename from parmys-plugin/include/read_xml_config_file.h
rename to parmys-plugin/src/include/read_xml_config_file.h
diff --git a/parmys-plugin/include/string_cache.h b/parmys-plugin/src/include/string_cache.h
similarity index 100%
rename from parmys-plugin/include/string_cache.h
rename to parmys-plugin/src/include/string_cache.h
diff --git a/parmys-plugin/include/subtractor.h b/parmys-plugin/src/include/subtractor.h
similarity index 100%
rename from parmys-plugin/include/subtractor.h
rename to parmys-plugin/src/include/subtractor.h
diff --git a/parmys-plugin/src/memory.cc b/parmys-plugin/src/memory.cc
index 0038decc8..d9a915ce9 100644
--- a/parmys-plugin/src/memory.cc
+++ b/parmys-plugin/src/memory.cc
@@ -32,6 +32,8 @@
 
 using vtr::t_linked_vptr;
 
+USING_YOSYS_NAMESPACE
+
 t_model *single_port_rams = NULL;
 t_model *dual_port_rams = NULL;
 
@@ -231,8 +233,8 @@ void check_memories_and_report_distribution()
     if ((sp_memory_list == NULL) && (dp_memory_list == NULL))
         return;
 
-    printf("\nHard Logical Memory Distribution\n");
-    printf("============================\n");
+    log("\nHard Logical Memory Distribution\n");
+    log("============================\n");
 
     long total_memory_bits = 0;
     int total_memory_block_counter = 0;
@@ -249,7 +251,7 @@ void check_memories_and_report_distribution()
         if (depth > shift_left_value_with_overflow_check(0x1, HARD_RAM_ADDR_LIMIT, node->loc))
             error_message(NETLIST, node->loc, "Memory %s of depth %zu exceeds ODIN depth bound of 2^%d.", node->name, depth, HARD_RAM_ADDR_LIMIT);
 
-        printf("SPRAM: %zu width %zu depth\n", width, depth);
+        log("SPRAM: %zu width %zu depth\n", width, depth);
 
         total_memory_bits += width * depth;
 
@@ -274,7 +276,7 @@ void check_memories_and_report_distribution()
         if (depth > shift_left_value_with_overflow_check(0x1, HARD_RAM_ADDR_LIMIT, node->loc))
             error_message(NETLIST, node->loc, "Memory %s of depth %zu exceeds ODIN depth bound of 2^%d.", node->name, depth, HARD_RAM_ADDR_LIMIT);
 
-        printf("DPRAM: %zu width %zu depth\n", width, depth);
+        log("DPRAM: %zu width %zu depth\n", width, depth);
         total_memory_bits += width * depth;
 
         total_memory_block_counter++;
@@ -288,11 +290,11 @@ void check_memories_and_report_distribution()
         temp = temp->next;
     }
 
-    printf("\nTotal Logical Memory Blocks = %d \n", total_memory_block_counter);
-    printf("Total Logical Memory bits = %ld \n", total_memory_bits);
-    printf("Max Memory Width = %ld \n", memory_max_width);
-    printf("Max Memory Depth = %ld \n", memory_max_depth);
-    printf("\n");
+    log("\nTotal Logical Memory Blocks = %d \n", total_memory_block_counter);
+    log("Total Logical Memory bits = %ld \n", total_memory_bits);
+    log("Max Memory Width = %ld \n", memory_max_width);
+    log("Max Memory Depth = %ld \n", memory_max_depth);
+    log("\n");
 
     return;
 }
diff --git a/parmys-plugin/src/multiplier.cc b/parmys-plugin/src/multiplier.cc
index fadbdc1b5..847618a5b 100644
--- a/parmys-plugin/src/multiplier.cc
+++ b/parmys-plugin/src/multiplier.cc
@@ -524,19 +524,19 @@ void report_mult_distribution()
     if (hard_multipliers == NULL)
         return;
 
-    printf("\nHard Multiplier Distribution\n");
-    printf("============================\n");
+    log("\nHard Multiplier Distribution\n");
+    log("============================\n");
     for (long i = 0; i <= hard_multipliers->inputs->size; i++) {
         for (long j = 1; j <= hard_multipliers->inputs->next->size; j++) {
             if (mults[i * hard_multipliers->inputs->size + j] != 0) {
                 num_total += mults[i * hard_multipliers->inputs->size + j];
-                printf("%ld X %ld => %d\n", i, j, mults[i * hard_multipliers->inputs->size + j]);
+                log("%ld X %ld => %d\n", i, j, mults[i * hard_multipliers->inputs->size + j]);
             }
         }
     }
-    printf("\n");
-    printf("\nTotal # of multipliers = %ld\n", num_total);
-    vtr::free(mults);
+    log("\n");
+    log("\nTotal # of multipliers = %ld\n", num_total);
+    // vtr::free(mults);
 }
 
 /*---------------------------------------------------------------------------
diff --git a/parmys-plugin/src/netlist_cleanup.cc b/parmys-plugin/src/netlist_cleanup.cc
index b720127a1..d336b5ccd 100644
--- a/parmys-plugin/src/netlist_cleanup.cc
+++ b/parmys-plugin/src/netlist_cleanup.cc
@@ -27,6 +27,8 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
+USING_YOSYS_NAMESPACE
+
 bool coarsen_cleanup;
 
 /* Used in the nnode_t.node_data field to mark if the node was already visited
@@ -313,7 +315,7 @@ void report_removed_nodes(long long *node_list)
     for (int i = 0; i < operation_list_END; i++) {
         if (node_list[i] > UNUSED_NODE_TYPE) {
             std::string msg = std::string("Number of removed <") + operation_list_STR[i][ODIN_LONG_STRING] + "> node(s): ";
-            printf("%-42s%lld\n", msg.c_str(), node_list[i]);
+            log("%-42s%lld\n", msg.c_str(), node_list[i]);
         }
     }
 }
diff --git a/parmys-plugin/src/netlist_statistic.cc b/parmys-plugin/src/netlist_statistic.cc
index b65fef626..3e97652ad 100644
--- a/parmys-plugin/src/netlist_statistic.cc
+++ b/parmys-plugin/src/netlist_statistic.cc
@@ -25,6 +25,8 @@
 #include "odin_types.h"
 #include "vtr_memory.h"
 
+USING_YOSYS_NAMESPACE
+
 static void init(metric_t *m);
 static void print_stats(metric_t *m);
 static void copy(metric_t *dest, metric_t *src);
@@ -81,8 +83,8 @@ void mixing_optimization_stats(nnode_t *node, netlist_t *netlist)
 
 static void print_stats(metric_t *m)
 {
-    printf("\n\t%s:%0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n", "shortest path", m->min_depth, "critical path", m->max_depth, "average path",
-           m->avg_depth, "overall fan-out", m->avg_width);
+    log("\n\t%s:%0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n\t%s: %0.4lf\n", "shortest path", m->min_depth, "critical path", m->max_depth, "average path",
+        m->avg_depth, "overall fan-out", m->avg_width);
 }
 _static_unused(print_stats) // quiet warning
 
@@ -370,19 +372,19 @@ void compute_statistics(netlist_t *netlist, bool display)
         get_upward_stat(&netlist->output_node_stat, netlist->top_output_nodes, netlist->num_top_output_nodes, netlist, travelsal_id + 1);
 
         if (display) {
-            printf("\n\t==== Stats ====\n");
+            log("\n\t==== Stats ====\n");
             for (long long op = 0; op < operation_list_END; op += 1) {
                 if (netlist->num_of_type[op] > UNUSED_NODE_TYPE) {
                     std::string hdr = std::string("Number of <") + operation_list_STR[op][ODIN_LONG_STRING] + "> node: ";
 
-                    printf("%-42s%lld\n", hdr.c_str(), netlist->num_of_type[op]);
+                    log("%-42s%lld\n", hdr.c_str(), netlist->num_of_type[op]);
                 }
             }
-            printf("%-42s%lld\n", "Total estimated number of lut: ", netlist->num_logic_element);
-            printf("%-42s%lld\n", "Total number of node: ", netlist->num_of_node);
-            printf("%-42s%0.0f\n", "Longest path: ", netlist->output_node_stat.max_depth);
-            printf("%-42s%0.0f\n", "Average path: ", netlist->output_node_stat.avg_depth);
-            printf("\n");
+            log("%-42s%lld\n", "Total estimated number of lut: ", netlist->num_logic_element);
+            log("%-42s%lld\n", "Total number of node: ", netlist->num_of_node);
+            log("%-42s%0.0f\n", "Longest path: ", netlist->output_node_stat.max_depth);
+            log("%-42s%0.0f\n", "Average path: ", netlist->output_node_stat.avg_depth);
+            log("\n");
         }
     }
 }
\ No newline at end of file
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/src/partial_map.cc
index 293a7f444..b1f676a7c 100644
--- a/parmys-plugin/src/partial_map.cc
+++ b/parmys-plugin/src/partial_map.cc
@@ -34,6 +34,8 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
+USING_YOSYS_NAMESPACE
+
 void depth_first_traverse_partial_map(nnode_t *node, uintptr_t traverse_mark_number, netlist_t *netlist);
 
 void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist);
@@ -219,7 +221,7 @@ void partial_map_node(nnode_t *node, short traverse_number, netlist_t *netlist)
             if (depth > configuration.soft_logic_memory_depth_threshold || width > configuration.soft_logic_memory_width_threshold) {
                 instantiate_hard_block(node, traverse_number, netlist);
             } else {
-                printf("\tInferring soft logic ram: %zux%zu\n", width, depth);
+                log("\tInferring soft logic ram: %zux%zu\n", width, depth);
                 instantiate_soft_logic_ram(node, traverse_number, netlist);
             }
         } else {
diff --git a/parmys-plugin/src/read_xml_config_file.cc b/parmys-plugin/src/read_xml_config_file.cc
index b3f10c8df..7bc81b1a7 100644
--- a/parmys-plugin/src/read_xml_config_file.cc
+++ b/parmys-plugin/src/read_xml_config_file.cc
@@ -28,6 +28,8 @@
 
 using namespace pugiutil;
 
+USING_YOSYS_NAMESPACE
+
 config_t configuration;
 
 void read_inputs(pugi::xml_node a_node, config_t *config, const pugiutil::loc_data &loc_data);
@@ -79,7 +81,7 @@ void read_config_file(const char *file_name)
         read_debug_switches(next, &configuration, loc_data);
 
     } catch (XmlError &e) {
-        printf("error: could not parse xml configuration file '%s': %s\n", file_name, e.what());
+        log("error: could not parse xml configuration file '%s': %s\n", file_name, e.what());
         return;
     }
 
diff --git a/parmys-plugin/src/subtractor.cc b/parmys-plugin/src/subtractor.cc
index dc25554bb..03c259f95 100644
--- a/parmys-plugin/src/subtractor.cc
+++ b/parmys-plugin/src/subtractor.cc
@@ -29,6 +29,8 @@
 
 using vtr::t_linked_vptr;
 
+USING_YOSYS_NAMESPACE
+
 t_linked_vptr *sub_list = NULL;
 t_linked_vptr *sub_chain_list = NULL;
 int subchaintotal = 0;
@@ -51,19 +53,19 @@ void report_sub_distribution()
     if (hard_adders == NULL)
         return;
 
-    printf("\nHard MINUS Distribution\n");
-    printf("============================\n");
-    printf("\n");
-    printf("\nTotal # of chains = %ld\n", subtractor_chain_count);
+    log("\nHard MINUS Distribution\n");
+    log("============================\n");
+    log("\n");
+    log("\nTotal # of chains = %ld\n", subtractor_chain_count);
 
-    printf("\nHard sub chain Details\n");
-    printf("============================\n");
+    log("\nHard sub chain Details\n");
+    log("============================\n");
 
-    printf("\n");
-    printf("\nThe Number of Hard Block subs in the Longest Chain: %ld\n", longest_subtractor_chain);
+    log("\n");
+    log("\nThe Number of Hard Block subs in the Longest Chain: %ld\n", longest_subtractor_chain);
 
-    printf("\n");
-    printf("\nThe Total Number of Hard Block subs: %ld\n", total_subtractors);
+    log("\n");
+    log("\nThe Total Number of Hard Block subs: %ld\n", total_subtractors);
 
     return;
 }
diff --git a/parmys-plugin/techlibs/vtr_primitives.v b/parmys-plugin/techlibs/vtr_primitives.v
index b6739f726..7c9507e8e 120000
--- a/parmys-plugin/techlibs/vtr_primitives.v
+++ b/parmys-plugin/techlibs/vtr_primitives.v
@@ -1 +1 @@
-../../third_party/vtr/primitives.v
\ No newline at end of file
+../../third_party/vtr_flow/primitives.v
\ No newline at end of file
diff --git a/parmys-plugin/tests/Makefile b/parmys-plugin/tests/Makefile
index 758361f7b..2555d9fce 100644
--- a/parmys-plugin/tests/Makefile
+++ b/parmys-plugin/tests/Makefile
@@ -17,7 +17,7 @@
 TESTS = raygentop \
         eltwise_layer \
 		VexRiscv_Lite \
-		mips32r1_core \
+		mips32r1_core
         
 include $(shell pwd)/../../Makefile_test.common
 
@@ -27,6 +27,6 @@ VexRiscv_Lite_verify = true
 mips32r1_core_verify = true
 
 clean_modules:
-	@find . -name "*.net.dot" -or -name "*.yosys.blif" | xargs rm -rf
+	@find . -name "*.net.dot" -or -name "*.yosys.blif" -or -name "*.info" | xargs rm -rf
 
 clean: clean_modules
diff --git a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
index ff069de4e..0223b89b0 120000
--- a/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
+++ b/parmys-plugin/tests/eltwise_layer/eltwise_layer.v
@@ -1 +1 @@
-../../../third_party/vtr/verilog/eltwise_layer.v
\ No newline at end of file
+../../../third_party/vtr_flow/verilog/eltwise_layer.v
\ No newline at end of file
diff --git a/parmys-plugin/tests/eltwise_layer/hard_block_include.v b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
index 04689e506..f3f32f627 120000
--- a/parmys-plugin/tests/eltwise_layer/hard_block_include.v
+++ b/parmys-plugin/tests/eltwise_layer/hard_block_include.v
@@ -1 +1 @@
-../../../third_party/vtr/verilog/hard_block_include.v
\ No newline at end of file
+../../../third_party/vtr_flow/verilog/hard_block_include.v
\ No newline at end of file
diff --git a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
index 8456a8597..34b35f896 120000
--- a/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
+++ b/parmys-plugin/tests/eltwise_layer/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
@@ -1 +1 @@
-../../../third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
\ No newline at end of file
+../../../third_party/vtr_flow/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
index af1bf2426..f8c1cb9d4 120000
--- a/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
+++ b/parmys-plugin/tests/raygentop/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -1 +1 @@
-../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
+../../../third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/raygentop/raygentop.v b/parmys-plugin/tests/raygentop/raygentop.v
index 125e9fc84..750464717 120000
--- a/parmys-plugin/tests/raygentop/raygentop.v
+++ b/parmys-plugin/tests/raygentop/raygentop.v
@@ -1 +1 @@
-../../../third_party/vtr/verilog/raygentop.v
\ No newline at end of file
+../../../third_party/vtr_flow/verilog/raygentop.v
\ No newline at end of file
diff --git a/third_party/pugixml b/third_party/pugixml
deleted file mode 160000
index a0e064336..000000000
--- a/third_party/pugixml
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit a0e064336317c9347a91224112af9933598714e9
diff --git a/third_party/vtr/arch/LICENSE.md b/third_party/vtr/arch/LICENSE.md
deleted file mode 100644
index 01332da43..000000000
--- a/third_party/vtr/arch/LICENSE.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# VTR License
-
-The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
-well as additional benchmarks, documentation, libraries and scripts. The authors
-of the various components of VTR retain their ownership of their tools.
-
-* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
-all software, documents, and scripts in VTR, follows the standard MIT license described
-[here](http://www.opensource.org/licenses/mit-license.php) copied below for
-your convenience:
-
-> The MIT License (MIT)
->
-> Copyright 2012 VTR Developers
->
-> Permission is hereby granted, free of charge, to any person obtaining a copy of
-> this software and associated documentation files (the "Software"), to deal in
-> the Software without restriction, including without limitation the rights to
-> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-> of the Software, and to permit persons to whom the Software is furnished to do
-> so, subject to the following conditions:
->
-> The above copyright notice and this permission notice shall be included in all
-> copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-> SOFTWARE.
-
-* Terms and conditions for ABC is found
-[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
-for your convenience:
-
-> Copyright (c) The Regents of the University of California. All rights reserved.
->
-> Permission is hereby granted, without written agreement and without license or
-> royalty fees, to use, copy, modify, and distribute this software and its
-> documentation for any purpose, provided that the above copyright notice and the
-> following two paragraphs appear in all copies of this software.
->
-> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
-> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
-> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
->
-> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
-> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
-> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
-> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
-The benchmark circuits are all open source but each have their own
-individual terms and conditions which are listed in the source code of each
-benchmark.
-
-Subject to these conditions, the software is provided free of charge to all
-interested parties.
-
-If you do decide to use this tool, please reference our work as references are
-important in academia.
-
-Donations in the form of research grants to promote further research and
-development on the tools will be gladly accepted, either anonymously or with
-attribution on our future publications.
-
diff --git a/third_party/vtr/libs/LICENSE.md b/third_party/vtr/libs/LICENSE.md
deleted file mode 100644
index 01332da43..000000000
--- a/third_party/vtr/libs/LICENSE.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# VTR License
-
-The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
-well as additional benchmarks, documentation, libraries and scripts. The authors
-of the various components of VTR retain their ownership of their tools.
-
-* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
-all software, documents, and scripts in VTR, follows the standard MIT license described
-[here](http://www.opensource.org/licenses/mit-license.php) copied below for
-your convenience:
-
-> The MIT License (MIT)
->
-> Copyright 2012 VTR Developers
->
-> Permission is hereby granted, free of charge, to any person obtaining a copy of
-> this software and associated documentation files (the "Software"), to deal in
-> the Software without restriction, including without limitation the rights to
-> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-> of the Software, and to permit persons to whom the Software is furnished to do
-> so, subject to the following conditions:
->
-> The above copyright notice and this permission notice shall be included in all
-> copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-> SOFTWARE.
-
-* Terms and conditions for ABC is found
-[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
-for your convenience:
-
-> Copyright (c) The Regents of the University of California. All rights reserved.
->
-> Permission is hereby granted, without written agreement and without license or
-> royalty fees, to use, copy, modify, and distribute this software and its
-> documentation for any purpose, provided that the above copyright notice and the
-> following two paragraphs appear in all copies of this software.
->
-> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
-> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
-> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
->
-> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
-> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
-> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
-> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
-The benchmark circuits are all open source but each have their own
-individual terms and conditions which are listed in the source code of each
-benchmark.
-
-Subject to these conditions, the software is provided free of charge to all
-interested parties.
-
-If you do decide to use this tool, please reference our work as references are
-important in academia.
-
-Donations in the form of research grants to promote further research and
-development on the tools will be gladly accepted, either anonymously or with
-attribution on our future publications.
-
diff --git a/third_party/vtr/libs/archfpga/.gitignore b/third_party/vtr/libs/archfpga/.gitignore
deleted file mode 100644
index 72c5cad21..000000000
--- a/third_party/vtr/libs/archfpga/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-read_arch
diff --git a/third_party/vtr/libs/archfpga/CMakeLists.txt b/third_party/vtr/libs/archfpga/CMakeLists.txt
deleted file mode 100644
index e2a5ddadb..000000000
--- a/third_party/vtr/libs/archfpga/CMakeLists.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-cmake_minimum_required(VERSION 3.9)
-
-project("libarchfpga")
-
-file(GLOB_RECURSE READ_ARCH_EXEC_SRC src/main.cpp)
-file(GLOB_RECURSE WRITE_ARCH_BB_EXEC_SRC src/write_arch_bb.cpp)
-file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
-file(GLOB_RECURSE LIB_HEADERS src/*.h)
-files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
-
-#Remove test executable from library
-list(REMOVE_ITEM LIB_SOURCES ${READ_ARCH_EXEC_SRC} ${WRITE_ARCH_BB_EXEC_SRC})
-
-#Create the library
-add_library(libarchfpga STATIC
-    ${LIB_HEADERS}
-    ${LIB_SOURCES}
-)
-
-target_include_directories(libarchfpga PUBLIC ${LIB_INCLUDE_DIRS})
-
-set_target_properties(libarchfpga PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
-
-#Specify link-time dependancies
-target_link_libraries(libarchfpga
-                        libvtrutil
-                        libpugixml
-                        libpugiutil
-                        libvtrcapnproto
-)
-
-target_compile_definitions(libarchfpga PUBLIC ${INTERCHANGE_SCHEMA_HEADERS})
-
-#Create the test executable
-add_executable(read_arch ${READ_ARCH_EXEC_SRC})
-add_executable(write_arch_bb ${WRITE_ARCH_BB_EXEC_SRC})
-target_link_libraries(read_arch libarchfpga)
-target_link_libraries(write_arch_bb libarchfpga)
-
-#Supress IPO link warnings if IPO is enabled
-get_target_property(READ_ARCH_USES_IPO read_arch INTERPROCEDURAL_OPTIMIZATION)
-get_target_property(WRITE_ARCH_BB_USES_IPO write_arch_bb INTERPROCEDURAL_OPTIMIZATION)
-if (READ_ARCH_USES_IPO)
-    set_property(TARGET read_arch APPEND PROPERTY LINK_FLAGS ${IPO_LINK_WARN_SUPRESS_FLAGS})
-endif()
-if (WRITE_ARCH_BB_USES_IPO)
-    set_property(TARGET write_arch_bb APPEND PROPERTY LINK_FLAGS ${IPO_LINK_WARN_SUPRESS_FLAGS})
-endif()
-
-install(TARGETS libarchfpga read_arch write_arch_bb DESTINATION bin)
-
-#
-# install executables in the VTR source root directory
-# to utilize them for scripts running VTR flow
-#
-set(ARCHFPGA_DIR ${VTR_SOURCE_DIR}/ArchFPGA)
-# making a new custom target out of libarchfpga to automatically perform
-# the house keeping for end users by deleting the ArchFPGA dir in the
-# VTR root dir if it exists and holds expired execs
-add_custom_target(archfpga-execs ALL 
-                    DEPENDS read_arch write_arch_bb
-                    COMMAND ${CMAKE_COMMAND} -E 
-                                            remove_directory ${ARCHFPGA_DIR}                     
-                    COMMAND ${CMAKE_COMMAND} -E 
-                                            make_directory ${ARCHFPGA_DIR}
-                    COMMAND ${CMAKE_COMMAND} -E 
-                                            copy_directory ${CMAKE_CURRENT_BINARY_DIR} ${ARCHFPGA_DIR}
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-
-# 
-# Unit Tests
-#
-file(GLOB_RECURSE TEST_SOURCES test/*.cpp)
-add_executable(test_archfpga ${TEST_SOURCES})
-target_link_libraries(test_archfpga Catch2::Catch2WithMain libarchfpga)
diff --git a/third_party/vtr/libs/archfpga/arch/README.txt b/third_party/vtr/libs/archfpga/arch/README.txt
deleted file mode 100644
index 88e5886d9..000000000
--- a/third_party/vtr/libs/archfpga/arch/README.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-This directory contains sample architecture files that are used in testing
-libarchfpga. In addition, the architecture files in this directory are used by
-the regression testing facilities of Odin II.
-
-Please be sure to retain sample_arch.xml and update it with any changes that
-are made to the libvpr library.
-
-Ken Kent
-ken@unb.ca
-06.18.2009
-
diff --git a/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml b/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
deleted file mode 100644
index 9941d3a79..000000000
--- a/third_party/vtr/libs/archfpga/arch/mult_luts_arch.xml
+++ /dev/null
@@ -1,744 +0,0 @@
-<architecture>
-  <!-- jluu and ken: ODIN II specific config -->
-  <models>
-    <model name="multiply">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="single_port_ram">
-      <input_ports>
-        <port name="we"/>
-        <!-- control -->
-        <port name="addr"/>
-        <!-- address lines -->
-        <port name="data"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <model name="dual_port_ram">
-      <input_ports>
-        <port name="we1"/>
-        <!-- write enable -->
-        <port name="we2"/>
-        <!-- write enable -->
-        <port name="addr1"/>
-        <!-- address lines -->
-        <port name="addr2"/>
-        <!-- address lines -->
-        <port name="data1"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="data2"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out1"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-        <port name="out2"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-  </models>
-  <tiles>
-    <tile name="io">
-      <sub_tile name="io" capacity="7">
-        <equivalent_sites>
-          <site pb_type="io" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="outpad" num_pins="1" equivalent="none"/>
-        <output name="inpad" num_pins="1"/>
-        <clock name="clock" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
-        <pinlocations pattern="custom">
-          <loc side="left">io.outpad io.inpad io.clock</loc>
-          <loc side="top">io.outpad io.inpad io.clock</loc>
-          <loc side="right">io.outpad io.inpad io.clock</loc>
-          <loc side="bottom">io.outpad io.inpad io.clock</loc>
-        </pinlocations>
-      </sub_tile>
-    </tile>
-    <tile name="clb">
-      <sub_tile name="clb">
-        <equivalent_sites>
-          <site pb_type="clb" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="I" num_pins="56" equivalent="full"/>
-        <output name="O" num_pins="16"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="memory" height="4">
-      <sub_tile name="memory">
-        <equivalent_sites>
-          <site pb_type="memory" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="addr1" num_pins="16"/>
-        <input name="addr2" num_pins="16"/>
-        <input name="data" num_pins="64"/>
-        <input name="we1" num_pins="1"/>
-        <input name="we2" num_pins="1"/>
-        <output name="out" num_pins="64"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="mult_36" height="3">
-      <sub_tile name="mult_36">
-        <equivalent_sites>
-          <site pb_type="mult_36" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="a" num_pins="36"/>
-        <input name="b" num_pins="36"/>
-        <output name="out" num_pins="72"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.125"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-  </tiles>
-  <!-- jluu and ken: ODIN II specific config ends -->
-  <!-- jluu and ken: Physical descriptions begin -->
-  <!-- <layout width="20" height="20"/> -->
-  <layout>
-    <auto_layout aspect_ratio="1.0">
-      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
-      <perimeter type="io" priority="100"/>
-      <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
-      <fill type="clb" priority="10"/>
-      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="memory" startx="2" starty="1" repeatx="5" priority="20"/>
-      <col type="EMPTY" startx="2" repeatx="5" starty="1" priority="19"/>
-      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="mult_36" startx="4" starty="1" repeatx="5" priority="20"/>
-      <col type="EMPTY" startx="4" repeatx="5" starty="1" priority="19"/>
-    </auto_layout>
-  </layout>
-  <device>
-    <sizing R_minW_nmos="5726.870117" R_minW_pmos="15491.700195"/>
-    <area grid_logic_tile_area="30000.000000"/>
-    <chan_width_distr>
-      <x distr="uniform" peak="1.000000"/>
-      <y distr="uniform" peak="1.000000"/>
-    </chan_width_distr>
-    <switch_block type="wilton" fs="3"/>
-    <connection_block input_switch_name="ipin_cblock"/>
-  </device>
-  <switchlist>
-    <switch type="mux" name="0" R="94.841003" Cin="1.537000e-14" Cout="2.194000e-13" Tdel="6.562000e-11" mux_trans_size="10.000000" buf_size="1"/>
-    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
-    <switch type="mux" name="ipin_cblock" R="1431.71752925" Cout="0." Cin="1.191000e-14" Tdel="1.482000e-10" mux_trans_size="1.000000" buf_size="auto"/>
-  </switchlist>
-  <segmentlist>
-    <segment freq="1.000000" length="4" type="unidir" Rmetal="11.064550" Cmetal="4.727860e-14">
-      <mux name="0"/>
-      <sb type="pattern">1 1 1 1 1</sb>
-      <cb type="pattern">1 1 1 1</cb>
-    </segment>
-  </segmentlist>
-  <complexblocklist>
-    <pb_type name="io">
-      <input name="outpad" num_pins="1" equivalent="none"/>
-      <output name="inpad" num_pins="1"/>
-      <clock name="clock" num_pins="1"/>
-      <!-- IOs can operate as either inputs or outputs -->
-      <mode name="inpad">
-        <pb_type name="inpad" blif_model=".input" num_pb="1">
-          <output name="inpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="inpad" input="inpad.inpad" output="io.inpad"/>
-        </interconnect>
-      </mode>
-      <mode name="outpad">
-        <pb_type name="outpad" blif_model=".output" num_pb="1">
-          <input name="outpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="outpad" input="io.outpad" output="outpad.outpad"/>
-        </interconnect>
-      </mode>
-      <!-- IOs go on the periphery of the FPGA, for consistency, 
-          make it physically equivalent on all sides so that only one definition of I/Os is needed.
-          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-        -->
-    </pb_type>
-    <pb_type name="clb">
-      <input name="I" num_pins="56" equivalent="full"/>
-      <output name="O" num_pins="16"/>
-      <clock name="clk" num_pins="1"/>
-      <pb_type name="ble" num_pb="8">
-        <input name="in" num_pins="7"/>
-        <output name="out" num_pins="2"/>
-        <clock name="clk" num_pins="1"/>
-        <pb_type name="soft_logic" num_pb="1">
-          <input name="in" num_pins="7"/>
-          <output name="out" num_pins="2"/>
-          <mode name="n2_lut5">
-            <pb_type name="lut5" blif_model=".names" num_pb="2" class="lut">
-              <input name="in" num_pins="5" port_class="lut_in"/>
-              <output name="out" num_pins="1" port_class="lut_out"/>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="soft_logic.in[4:0]" output="lut5[0:0].in[4:0]"/>
-              <direct name="direct2" input="lut5[0:0].out" output="soft_logic.out[0:0]"/>
-              <direct name="direct3" input="soft_logic.in[6:2]" output="lut5[1:1].in[4:0]"/>
-              <direct name="direct4" input="lut5[1:1].out" output="soft_logic.out[1:1]"/>
-            </interconnect>
-          </mode>
-          <mode name="n1_lut6">
-            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
-              <input name="in" num_pins="6" port_class="lut_in"/>
-              <output name="out" num_pins="1" port_class="lut_out"/>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="soft_logic.in[5:0]" output="lut6[0:0].in[5:0]"/>
-              <direct name="direct2" input="lut6[0:0].out" output="soft_logic.out[0:0]"/>
-            </interconnect>
-          </mode>
-        </pb_type>
-        <pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
-          <input name="D" num_pins="1" port_class="D"/>
-          <output name="Q" num_pins="1" port_class="Q"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <!-- Two ff, make ff available to only corresponding luts -->
-          <direct name="direct1" input="ble.in" output="soft_logic.in"/>
-          <direct name="direct2" input="soft_logic.out[0:0]" output="ff[0:0].D"/>
-          <direct name="direct3" input="soft_logic.out[1:1]" output="ff[1:1].D"/>
-          <direct name="direct4" input="ble.clk" output="ff[0:0].clk"/>
-          <direct name="direct5" input="ble.clk" output="ff[1:1].clk"/>
-          <mux name="mux1" input="ff[0:0].Q soft_logic.out[0:0]" output="ble.out[0:0]"/>
-          <mux name="mux2" input="ff[1:1].Q soft_logic.out[1:1]" output="ble.out[1:1]"/>
-        </interconnect>
-      </pb_type>
-      <interconnect>
-        <complete name="complete1" input="clb.I ble[7:0].out" output="ble[7:0].in"/>
-        <complete name="complete2" input="clb.clk" output="ble[7:0].clk"/>
-        <direct name="direct1" input="ble[7:0].out" output="clb.O"/>
-      </interconnect>
-    </pb_type>
-    <pb_type name="memory">
-      <input name="addr1" num_pins="16"/>
-      <input name="addr2" num_pins="16"/>
-      <input name="data" num_pins="64"/>
-      <input name="we1" num_pins="1"/>
-      <input name="we2" num_pins="1"/>
-      <output name="out" num_pins="64"/>
-      <clock name="clk" num_pins="1"/>
-      <mode name="mem_1024x64_sp">
-        <pb_type name="mem_1024x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="10" port_class="address"/>
-          <input name="data" num_pins="64" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="64" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x64_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[63:0]" output="mem_1024x64_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x64_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_1024x64_sp.out" output="memory.out[63:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x64_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x32_dp">
-        <pb_type name="mem_2048x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="11" port_class="address1"/>
-          <input name="addr2" num_pins="11" port_class="address2"/>
-          <input name="data1" num_pins="32" port_class="data_in1"/>
-          <input name="data2" num_pins="32" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="32" port_class="data_out1"/>
-          <output name="out2" num_pins="32" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x32_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x32_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_2048x32_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[63:32]" output="mem_2048x32_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x32_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x32_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_2048x32_dp.out1" output="memory.out[31:0]">
-              </direct>
-          <direct name="dataout2" input="mem_2048x32_dp.out2" output="memory.out[63:32]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x32_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x32_sp">
-        <pb_type name="mem_2048x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="11" port_class="address"/>
-          <input name="data" num_pins="32" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="32" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x32_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_2048x32_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x32_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_2048x32_sp.out" output="memory.out[31:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x32_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_4096x16_dp">
-        <pb_type name="mem_4096x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="12" port_class="address1"/>
-          <input name="addr2" num_pins="12" port_class="address2"/>
-          <input name="data1" num_pins="16" port_class="data_in1"/>
-          <input name="data2" num_pins="16" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="16" port_class="data_out1"/>
-          <output name="out2" num_pins="16" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x16_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[11:0]" output="mem_4096x16_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_4096x16_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[31:16]" output="mem_4096x16_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_4096x16_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_4096x16_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_4096x16_dp.out1" output="memory.out[15:0]">
-              </direct>
-          <direct name="dataout2" input="mem_4096x16_dp.out2" output="memory.out[31:16]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_4096x16_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_4096x16_sp">
-        <pb_type name="mem_4096x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="12" port_class="address"/>
-          <input name="data" num_pins="16" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="16" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x16_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_4096x16_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_4096x16_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_4096x16_sp.out" output="memory.out[15:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_4096x16_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x8_dp">
-        <pb_type name="mem_8192x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="13" port_class="address1"/>
-          <input name="addr2" num_pins="13" port_class="address2"/>
-          <input name="data1" num_pins="8" port_class="data_in1"/>
-          <input name="data2" num_pins="8" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="8" port_class="data_out1"/>
-          <output name="out2" num_pins="8" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x8_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x8_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_8192x8_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[15:8]" output="mem_8192x8_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x8_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_8192x8_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_8192x8_dp.out1" output="memory.out[7:0]">
-              </direct>
-          <direct name="dataout2" input="mem_8192x8_dp.out2" output="memory.out[15:8]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x8_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x8_sp">
-        <pb_type name="mem_8192x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="13" port_class="address"/>
-          <input name="data" num_pins="8" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="8" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x8_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_8192x8_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x8_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_8192x8_sp.out" output="memory.out[7:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x8_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x4_dp">
-        <pb_type name="mem_16384x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="14" port_class="address1"/>
-          <input name="addr2" num_pins="14" port_class="address2"/>
-          <input name="data1" num_pins="4" port_class="data_in1"/>
-          <input name="data2" num_pins="4" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="4" port_class="data_out1"/>
-          <output name="out2" num_pins="4" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x4_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x4_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_16384x4_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[7:4]" output="mem_16384x4_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x4_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_16384x4_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_16384x4_dp.out1" output="memory.out[3:0]">
-              </direct>
-          <direct name="dataout2" input="mem_16384x4_dp.out2" output="memory.out[7:4]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x4_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x4_sp">
-        <pb_type name="mem_16384x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="14" port_class="address"/>
-          <input name="data" num_pins="4" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="4" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x4_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_16384x4_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x4_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_16384x4_sp.out" output="memory.out[3:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x4_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x2_dp">
-        <pb_type name="mem_32768x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="15" port_class="address1"/>
-          <input name="addr2" num_pins="15" port_class="address2"/>
-          <input name="data1" num_pins="2" port_class="data_in1"/>
-          <input name="data2" num_pins="2" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="2" port_class="data_out1"/>
-          <output name="out2" num_pins="2" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x2_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x2_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_32768x2_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[3:2]" output="mem_32768x2_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x2_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_32768x2_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_32768x2_dp.out1" output="memory.out[1:0]">
-              </direct>
-          <direct name="dataout2" input="mem_32768x2_dp.out2" output="memory.out[3:2]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x2_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x2_sp">
-        <pb_type name="mem_32768x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="15" port_class="address"/>
-          <input name="data" num_pins="2" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="2" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x2_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_32768x2_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x2_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_32768x2_sp.out" output="memory.out[1:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x2_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_65536x1_dp">
-        <pb_type name="mem_65536x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr1" num_pins="16" port_class="address1"/>
-          <input name="addr2" num_pins="16" port_class="address2"/>
-          <input name="data1" num_pins="1" port_class="data_in1"/>
-          <input name="data2" num_pins="1" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="1" port_class="data_out1"/>
-          <output name="out2" num_pins="1" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[15:0]" output="mem_65536x1_dp.addr1">
-              </direct>
-          <direct name="address2" input="memory.addr2[15:0]" output="mem_65536x1_dp.addr2">
-              </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_65536x1_dp.data1">
-              </direct>
-          <direct name="data2" input="memory.data[1:1]" output="mem_65536x1_dp.data2">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_65536x1_dp.we1">
-              </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_65536x1_dp.we2">
-              </direct>
-          <direct name="dataout1" input="mem_65536x1_dp.out1" output="memory.out[0:0]">
-              </direct>
-          <direct name="dataout2" input="mem_65536x1_dp.out2" output="memory.out[1:1]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_65536x1_dp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_65536x1_sp">
-        <pb_type name="mem_65536x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1" area="1000">
-          <input name="addr" num_pins="16" port_class="address"/>
-          <input name="data" num_pins="1" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="1" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[15:0]" output="mem_65536x1_sp.addr">
-              </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_65536x1_sp.data">
-              </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_65536x1_sp.we">
-              </direct>
-          <direct name="dataout1" input="mem_65536x1_sp.out" output="memory.out[0:0]">
-              </direct>
-          <direct name="clk" input="memory.clk" output="mem_65536x1_sp.clk">
-              </direct>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <!-- This is the 36*36 uniform mult -->
-    <pb_type name="mult_36">
-      <input name="a" num_pins="36"/>
-      <input name="b" num_pins="36"/>
-      <output name="out" num_pins="72"/>
-      <mode name="two_divisible_mult_18x18">
-        <pb_type name="divisible_mult_18x18" num_pb="2">
-          <input name="a" num_pins="18"/>
-          <input name="b" num_pins="18"/>
-          <output name="out" num_pins="36"/>
-          <mode name="two_mult_9x9">
-            <pb_type name="mult_9x9_slice" num_pb="2">
-              <input name="A_cfg" num_pins="9"/>
-              <input name="B_cfg" num_pins="9"/>
-              <output name="OUT_cfg" num_pins="18"/>
-              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1" area="300">
-                <input name="a" num_pins="9"/>
-                <input name="b" num_pins="9"/>
-                <output name="out" num_pins="18"/>
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice.A_cfg" out_port="mult_9x9.a"/>
-                  <C_constant C="1.89e-13" in_port="mult_9x9_slice.A_cfg" out_port="mult_9x9.a"/>
-                </direct>
-                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice.B_cfg" out_port="mult_9x9.b"/>
-                  <C_constant C="1.89e-13" in_port="mult_9x9_slice.B_cfg" out_port="mult_9x9.b"/>
-                </direct>
-                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9.out" out_port="mult_9x9_slice.OUT_cfg"/>
-                  <C_constant C="1.89e-13" in_port="mult_9x9.out" out_port="mult_9x9_slice.OUT_cfg"/>
-                </direct>
-              </interconnect>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_9x9_slice[1:0].A_cfg"/>
-                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_9x9_slice[1:0].A_cfg"/>
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_9x9_slice[1:0].B_cfg"/>
-                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_9x9_slice[1:0].B_cfg"/>
-              </direct>
-              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_9x9_slice[1:0].OUT_cfg" out_port="divisible_mult_18x18.out"/>
-                <C_constant C="1.89e-13" in_port="mult_9x9_slice[1:0].OUT_cfg" out_port="divisible_mult_18x18.out"/>
-              </direct>
-            </interconnect>
-          </mode>
-          <mode name="mult_18x18">
-            <pb_type name="mult_18x18_slice" num_pb="1">
-              <input name="A_cfg" num_pins="18"/>
-              <input name="B_cfg" num_pins="18"/>
-              <output name="OUT_cfg" num_pins="36"/>
-              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1" area="1000">
-                <input name="a" num_pins="18"/>
-                <input name="b" num_pins="18"/>
-                <output name="out" num_pins="36"/>
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.A_cfg" out_port="mult_18x18.a"/>
-                  <C_constant C="1.89e-13" in_port="mult_18x18_slice.A_cfg" out_port="mult_18x18.a"/>
-                </direct>
-                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.B_cfg" out_port="mult_18x18.b"/>
-                  <C_constant C="1.89e-13" in_port="mult_18x18_slice.B_cfg" out_port="mult_18x18.b"/>
-                </direct>
-                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
-                  <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18.out" out_port="mult_18x18_slice.OUT_cfg"/>
-                  <C_constant C="1.89e-13" in_port="mult_18x18.out" out_port="mult_18x18_slice.OUT_cfg"/>
-                </direct>
-              </interconnect>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_18x18_slice.A_cfg"/>
-                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.a" out_port="mult_18x18_slice.A_cfg"/>
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_18x18_slice.B_cfg"/>
-                <C_constant C="1.89e-13" in_port="divisible_mult_18x18.b" out_port="mult_18x18_slice.B_cfg"/>
-              </direct>
-              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
-                <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_18x18_slice.OUT_cfg" out_port="divisible_mult_18x18.out"/>
-                <C_constant C="1.89e-13" in_port="mult_18x18_slice.OUT_cfg" out_port="divisible_mult_18x18.out"/>
-              </direct>
-            </interconnect>
-          </mode>
-        </pb_type>
-        <interconnect>
-          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
-            <C_constant C="1.89e-13" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].a">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].a"/>
-            <C_constant C="1.89e-13" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].a"/>
-          </direct>
-          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
-            <C_constant C="1.89e-13" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mult_36x36">
-        <pb_type name="mult_36x36_slice" num_pb="1">
-          <input name="A_cfg" num_pins="36"/>
-          <input name="B_cfg" num_pins="36"/>
-          <output name="OUT_cfg" num_pins="72"/>
-          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1" area="4000">
-            <input name="a" num_pins="36"/>
-            <input name="b" num_pins="36"/>
-            <output name="out" num_pins="72"/>
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="a" out_port="out"/>
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="b" out_port="out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
-              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.A_cfg" out_port="mult_36x36.a"/>
-              <C_constant C="1.89e-13" in_port="mult_36x36_slice.A_cfg" out_port="mult_36x36.a"/>
-            </direct>
-            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
-              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.B_cfg" out_port="mult_36x36.b"/>
-              <C_constant C="1.89e-13" in_port="mult_36x36_slice.B_cfg" out_port="mult_36x36.b"/>
-            </direct>
-            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
-              <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36.out" out_port="mult_36x36_slice.OUT_cfg"/>
-              <C_constant C="1.89e-13" in_port="mult_36x36.out" out_port="mult_36x36_slice.OUT_cfg"/>
-            </direct>
-          </interconnect>
-        </pb_type>
-        <interconnect>
-          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
-            <C_constant C="1.89e-13" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
-            <C_constant C="1.89e-13" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
-          </direct>
-          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
-            <delay_constant max="2.03e-13" min="1.89e-13" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
-            <C_constant C="1.89e-13" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-    </pb_type>
-  </complexblocklist>
-</architecture>
diff --git a/third_party/vtr/libs/archfpga/arch/sample_arch.xml b/third_party/vtr/libs/archfpga/arch/sample_arch.xml
deleted file mode 100755
index f2f3e7173..000000000
--- a/third_party/vtr/libs/archfpga/arch/sample_arch.xml
+++ /dev/null
@@ -1,1215 +0,0 @@
-<!-- 
-  Flagship Heterogeneous Architecture (No Carry Chains) for VTR 7.0.
-
-  - 40 nm technology
-  - General purpose logic block: 
-    K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared) 
-    with optionally registered outputs
-  - Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.  
-    Height = 6, found on every (8n+2)th column
-  - Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.  
-    Height = 4, found on every (8n+6)th column
-  - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
-
-  Details on Modelling:
-
-  The electrical design of the architecture described here is NOT from an 
-  optimized, SPICED architecture.  Instead, we attempt to create a reasonable 
-  architecture file by using an existing commercial FPGA to approximate the area, 
-  delay, and power of the underlying components. This is combined with a reasonable 40 nm 
-  model of wiring and circuit design for low-level routing components, where available.
-  The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also 
-  has wiring electrical parameters that allow the wire lengths and switch patterns to be 
-  modified and you will still get reasonable delay results for the new architecture.
-  The following describes, in detail, how we obtained the various electrical values for this 
-  architecture.
-
-  Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar 
-  architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture. 
-  (n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)      
-  This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to 
-  match the overall target (a 40 nm FPGA).
-
-  We obtain delay numbers by measuring delays of routing, soft logic blocks, 
-  memories, and multipliers from test circuits on a Stratix IV GX device 
-  (EP4SGX230DF29C2X, i.e. fastest speed grade). For routing, we took the average delay of H4 and V4 
-  wires.  Rmetal and Cmetal values for the routing wires were obtained from work done by Charles 
-  Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal stack 40 nm routing) and 
-  take the R and C data from the ITRS roadmap.  
-
- For the general purpose logic block, we assume that the area and delays of the Stratix IV 
-  crossbar is close enough to the crossbar modelled here.  We use 33 inputs and 20 feedback lines in 
-  the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
-  Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to 
-  36:1 multiplexers.  We require 60 such multiplexers, while Stratix IV requires 88 for its more
-  complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows: 
-  The Stratix IV crossbar has more inputs (72 vs. 53) and 
-  outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the 
-  Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its 
-  area and delay.  The total number of crossbar switch points is very similar between the two 
-  architectures (3160 for SIV and 3180 for the academic architecture below), so we can use the area 
-  & delay of the Stratix IV crossbar as a good approximation of our crossbar.
-
-  For LUTs, we include LUT 
-  delays measured from Stratix IV which is dependant on the input used (ie. some 
-  LUT inputs are faster than others).  The CAD tools at the time of VTR 7 does 
-  not consider differences in LUT input delays.
-
-  Logic block area numbers obtained by scaling overall tile area of a 65nm 
-  Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out 
-  routing area at a channel width of 300. We use a channel width of 300 because it can route 
-  all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
-  total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
-  choosing a width that provides high routability. The architecture can be routed at different channel
-  widths, but we estimate the tile size and hence the physical length of routing wires assuming
-  a channel width of 300.
-
-  Sanity checks employed:
-    1.  We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches 
-        common electrical design.
-
-
-  Authors: Jason Luu, Jeff Goeders, Vaughn Betz
--->
-<architecture>
-  <!-- 
-       ODIN II specific config begins 
-       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
-       ".model [type_of_block]") that this architecture supports.
-
-       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
-       already special structures in blif (.names, .input, .output, and .latch) 
-       that describe them.
-  -->
-  <models>
-    <model name="multiply">
-      <input_ports>
-        <port name="a" combinational_sink_ports="out"/>
-        <port name="b" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="single_port_ram">
-      <input_ports>
-        <port name="we" clock="clk"/>
-        <!-- control -->
-        <port name="addr" clock="clk"/>
-        <!-- address lines -->
-        <port name="data" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-    <model name="dual_port_ram">
-      <input_ports>
-        <port name="we1" clock="clk"/>
-        <!-- write enable -->
-        <port name="we2" clock="clk"/>
-        <!-- write enable -->
-        <port name="addr1" clock="clk"/>
-        <!-- address lines -->
-        <port name="addr2" clock="clk"/>
-        <!-- address lines -->
-        <port name="data1" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="data2" clock="clk"/>
-        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
-        <port name="clk" is_clock="1"/>
-        <!-- memories are often clocked -->
-      </input_ports>
-      <output_ports>
-        <port name="out1" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-        <port name="out2" clock="clk"/>
-        <!-- output can be broken down into smaller bit widths minimum size 1 -->
-      </output_ports>
-    </model>
-  </models>
-  <tiles>
-    <tile name="io">
-      <sub_tile name="io" capacity="8">
-        <equivalent_sites>
-          <site pb_type="io" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="outpad" num_pins="1"/>
-        <output name="inpad" num_pins="1"/>
-        <clock name="clock" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="custom">
-          <loc side="left">io.outpad io.inpad io.clock</loc>
-          <loc side="top">io.outpad io.inpad io.clock</loc>
-          <loc side="right">io.outpad io.inpad io.clock</loc>
-          <loc side="bottom">io.outpad io.inpad io.clock</loc>
-        </pinlocations>
-      </sub_tile>
-    </tile>
-    <tile name="clb">
-      <sub_tile name="clb">
-        <equivalent_sites>
-          <site pb_type="clb" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="I" num_pins="33" equivalent="full"/>
-        <output name="O" num_pins="20" equivalent="none"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="mult_36" height="4">
-      <sub_tile name="mult_36">
-        <equivalent_sites>
-          <site pb_type="mult_36" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="a" num_pins="36"/>
-        <input name="b" num_pins="36"/>
-        <output name="out" num_pins="72"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-    <tile name="memory" height="6">
-      <sub_tile name="memory">
-        <equivalent_sites>
-          <site pb_type="memory" pin_mapping="direct"/>
-        </equivalent_sites>
-        <input name="addr1" num_pins="15"/>
-        <input name="addr2" num_pins="15"/>
-        <input name="data" num_pins="64"/>
-        <input name="we1" num_pins="1"/>
-        <input name="we2" num_pins="1"/>
-        <output name="out" num_pins="64"/>
-        <clock name="clk" num_pins="1"/>
-        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
-        <pinlocations pattern="spread"/>
-      </sub_tile>
-    </tile>
-  </tiles>
-  <!-- ODIN II specific config ends -->
-  <!-- Physical descriptions begin -->
-  <layout>
-    <auto_layout aspect_ratio="1.0">
-      <!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
-      <perimeter type="io" priority="100"/>
-      <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
-      <fill type="clb" priority="10"/>
-      <!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
-      <col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
-      <!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
-      <col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
-      <col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
-    </auto_layout>
-  </layout>
-  <device>
-    <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
-			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
-			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
-			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
-			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
-			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
-			     lined up with Stratix IV. 
-			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
-			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
-			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
-	                     by 2.5x when looking up in Jeff's tables.
-			     The delay values are lined up with Stratix IV, which has an architecture similar to this
-			     proposed FPGA, and which is also 40 nm 
-			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
-			     4x minimum drive strength buffer. -->
-    <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
-    <!-- Total Stratix IV tile area is about 8100 um^2, minimum width transistor area is 60 L^2 yields a tile area of 84375 MWTAs,
-	   Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area 
-	   This means that only 37% of our area is in the general routing, and 63% is inside the logic
-	   block. Note that the crossbar / local interconnect is considered part of the logic block
-	   area in this analysis. That is a lower proportion of of routing area than most academics
-	   assume, but note that the total routing area really includes the crossbar, which would push
-	   routing area up significantly, we estimate into the ~70% range.
-	   -->
-    <area grid_logic_tile_area="53894"/>
-    <chan_width_distr>
-      <x distr="uniform" peak="1.000000"/>
-      <y distr="uniform" peak="1.000000"/>
-    </chan_width_distr>
-    <switch_block type="wilton" fs="3"/>
-    <connection_block input_switch_name="ipin_cblock"/>
-  </device>
-  <switchlist>
-    <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
-	       book area formula. This means the mux transistors are about 5x minimum drive strength.
-	       We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
-	       mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
-	       the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
-	       by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
-	       buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
-	       I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
-	       (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
-	       The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
-	       2.5x when looking up in Jeff's tables.
-	       Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
-	       This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
-    <switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
-    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
-    <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
-  </switchlist>
-  <segmentlist>
-    <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
-			     With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
-			     reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
-    <segment freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
-      <mux name="0"/>
-      <sb type="pattern">1 1 1 1 1</sb>
-      <cb type="pattern">1 1 1 1</cb>
-    </segment>
-  </segmentlist>
-  <complexblocklist>
-    <!-- Define I/O pads begin -->
-    <!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
-    <pb_type name="io">
-      <input name="outpad" num_pins="1"/>
-      <output name="inpad" num_pins="1"/>
-      <clock name="clock" num_pins="1"/>
-      <!-- IOs can operate as either inputs or outputs.
-	     Delays below come from Ian Kuon. They are small, so they should be interpreted as
-	     the delays to and from registers in the I/O (and generally I/Os are registered 
-	     today and that is when you timing analyze them.
-	     -->
-      <mode name="inpad">
-        <pb_type name="inpad" blif_model=".input" num_pb="1">
-          <output name="inpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="inpad" input="inpad.inpad" output="io.inpad">
-            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="outpad">
-        <pb_type name="outpad" blif_model=".output" num_pb="1">
-          <input name="outpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct name="outpad" input="io.outpad" output="outpad.outpad">
-            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- IOs go on the periphery of the FPGA, for consistency, 
-          make it physically equivalent on all sides so that only one definition of I/Os is needed.
-          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-        -->
-      <!-- Place I/Os on the sides of the FPGA -->
-      <power method="ignore"/>
-    </pb_type>
-    <!-- Define I/O pads ends -->
-    <!-- Define general purpose logic block (CLB) begin -->
-    <pb_type name="clb">
-      <input name="I" num_pins="33" equivalent="full"/>
-      <output name="O" num_pins="20" equivalent="none"/>
-      <clock name="clk" num_pins="1"/>
-      <!-- Describe fracturable logic element.  
-             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
-             The outputs of the fracturable logic element can be optionally registered
-        -->
-      <pb_type name="fle" num_pb="10">
-        <input name="in" num_pins="6"/>
-        <output name="out" num_pins="2"/>
-        <clock name="clk" num_pins="1"/>
-        <!-- Dual 5-LUT mode definition begin -->
-        <mode name="n2_lut5">
-          <pb_type name="lut5inter" num_pb="1">
-            <input name="in" num_pins="5"/>
-            <output name="out" num_pins="2"/>
-            <clock name="clk" num_pins="1"/>
-            <pb_type name="ble5" num_pb="2">
-              <input name="in" num_pins="5"/>
-              <output name="out" num_pins="1"/>
-              <clock name="clk" num_pins="1"/>
-              <!-- Define the LUT -->
-              <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
-                <input name="in" num_pins="5" port_class="lut_in"/>
-                <output name="out" num_pins="1" port_class="lut_out"/>
-                <!-- LUT timing using delay matrix -->
-                <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                           we instead take the average of these numbers to get more stable results
-                      82e-12
-                      173e-12
-                      261e-12
-                      263e-12
-                      398e-12
-                      -->
-                <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
-                  235e-12
-                  235e-12
-                  235e-12
-                  235e-12
-                  235e-12
-                </delay_matrix>
-              </pb_type>
-              <!-- Define the flip-flop -->
-              <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-                <input name="D" num_pins="1" port_class="D"/>
-                <output name="Q" num_pins="1" port_class="Q"/>
-                <clock name="clk" num_pins="1" port_class="clock"/>
-                <T_setup value="66e-12" port="ff.D" clock="clk"/>
-                <T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
-              </pb_type>
-              <interconnect>
-                <direct name="direct1" input="ble5.in[4:0]" output="lut5[0:0].in[4:0]"/>
-                <direct name="direct2" input="lut5[0:0].out" output="ff[0:0].D">
-                  <!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
-                  <pack_pattern name="ble5" in_port="lut5[0:0].out" out_port="ff[0:0].D"/>
-                </direct>
-                <direct name="direct3" input="ble5.clk" output="ff[0:0].clk"/>
-                <mux name="mux1" input="ff[0:0].Q lut5.out[0:0]" output="ble5.out[0:0]">
-                  <!-- LUT to output is faster than FF to output on a Stratix IV -->
-                  <delay_constant max="25e-12" in_port="lut5.out[0:0]" out_port="ble5.out[0:0]"/>
-                  <delay_constant max="45e-12" in_port="ff[0:0].Q" out_port="ble5.out[0:0]"/>
-                </mux>
-              </interconnect>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
-              <direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
-              <direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
-              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
-            <direct name="direct2" input="lut5inter.out" output="fle.out"/>
-            <direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
-          </interconnect>
-        </mode>
-        <!-- Dual 5-LUT mode definition end -->
-        <!-- 6-LUT mode definition begin -->
-        <mode name="n1_lut6">
-          <!-- Define 6-LUT mode -->
-          <pb_type name="ble6" num_pb="1">
-            <input name="in" num_pins="6"/>
-            <output name="out" num_pins="1"/>
-            <clock name="clk" num_pins="1"/>
-            <!-- Define LUT -->
-            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
-              <input name="in" num_pins="6" port_class="lut_in"/>
-              <output name="out" num_pins="1" port_class="lut_out"/>
-              <!-- LUT timing using delay matrix -->
-              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                       we instead take the average of these numbers to get more stable results
-                  82e-12
-                  173e-12
-                  261e-12
-                  263e-12
-                  398e-12
-                  397e-12
-                  -->
-              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
-                261e-12
-                261e-12
-                261e-12
-                261e-12
-                261e-12
-                261e-12
-              </delay_matrix>
-            </pb_type>
-            <!-- Define flip-flop -->
-            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
-              <input name="D" num_pins="1" port_class="D"/>
-              <output name="Q" num_pins="1" port_class="Q"/>
-              <clock name="clk" num_pins="1" port_class="clock"/>
-              <T_setup value="66e-12" port="ff.D" clock="clk"/>
-              <T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
-            </pb_type>
-            <interconnect>
-              <direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
-              <direct name="direct2" input="lut6.out" output="ff.D">
-                <!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
-                <pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
-              </direct>
-              <direct name="direct3" input="ble6.clk" output="ff.clk"/>
-              <mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
-                <!-- LUT to output is faster than FF to output on a Stratix IV -->
-                <delay_constant max="25e-12" in_port="lut6.out" out_port="ble6.out"/>
-                <delay_constant max="45e-12" in_port="ff.Q" out_port="ble6.out"/>
-              </mux>
-            </interconnect>
-          </pb_type>
-          <interconnect>
-            <direct name="direct1" input="fle.in" output="ble6.in"/>
-            <direct name="direct2" input="ble6.out" output="fle.out[0:0]"/>
-            <direct name="direct3" input="fle.clk" output="ble6.clk"/>
-          </interconnect>
-        </mode>
-        <!-- 6-LUT mode definition end -->
-      </pb_type>
-      <interconnect>
-        <!-- We use a full crossbar to get logical equivalence at inputs of CLB 
-		     The delays below come from Stratix IV. the delay through a connection block
-		     input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps 
-		     delay on the connection block input mux (modeled by Ian Kuon), so the remaining
-		     delay within the crossbar is 95 ps. 
-		     The delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
-		     Since all our outputs LUT outputs go to a BLE output, and have a delay of 
-		     25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
-		     to get the part that should be marked on the crossbar.	 -->
-        <complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in">
-          <delay_constant max="95e-12" in_port="clb.I" out_port="fle[9:0].in"/>
-          <delay_constant max="75e-12" in_port="fle[9:0].out" out_port="fle[9:0].in"/>
-        </complete>
-        <complete name="clks" input="clb.clk" output="fle[9:0].clk">
-        </complete>
-        <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
-               By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
-               then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
-               naive specification).
-          -->
-        <direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
-        <direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
-      </interconnect>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- Place this general purpose logic block in any unspecified column -->
-    </pb_type>
-    <!-- Define general purpose logic block (CLB) ends -->
-    <!-- Define fracturable multiplier begin -->
-    <!-- This multiplier can operate as a 36x36 multiplier that can fracture to two 18x18 multipliers each of which can further fracture to two 9x9 multipliers 
-	   For delay modelling, the 36x36 DSP multiplier in Stratix IV has a delay of 1.523 ns + 1.93 ns
-	    = 3.45 ns. The 18x18 mode doesn't need to sum four 18x18 multipliers, so it is a bit
-	   faster: 1.523 ns for the multiplier, and 1.09 ns for the multiplier output block.
-	    For the input and output interconnect delays, unlike Stratix IV, we don't
-	   have any routing/logic flexibility (crossbars) at the inputs.  There is some output muxing
-	   in Stratix IV and this architecture to select which multiplier outputs should go out (e.g.
-	   9x9 outputs, 18x18 or 36x36) so those are very close between the two architectures. 
-	   We take the conservative (slightly pessimistic)
-           approach modelling the input as the same as the Stratix IV input delay and the output delay the same as the Stratix IV DSP out delay.
-      -->
-    <pb_type name="mult_36">
-      <input name="a" num_pins="36"/>
-      <input name="b" num_pins="36"/>
-      <output name="out" num_pins="72"/>
-      <mode name="two_divisible_mult_18x18">
-        <pb_type name="divisible_mult_18x18" num_pb="2">
-          <input name="a" num_pins="18"/>
-          <input name="b" num_pins="18"/>
-          <output name="out" num_pins="36"/>
-          <!-- Model 9x9 delay and 18x18 delay as the same.  9x9 could be faster, but in Stratix IV
-	          isn't, presumably because the multiplier layout is really optimized for 18x18.
-		-->
-          <mode name="two_mult_9x9">
-            <pb_type name="mult_9x9_slice" num_pb="2">
-              <input name="A_cfg" num_pins="9"/>
-              <input name="B_cfg" num_pins="9"/>
-              <output name="OUT_cfg" num_pins="18"/>
-              <pb_type name="mult_9x9" blif_model=".subckt multiply" num_pb="1">
-                <input name="a" num_pins="9"/>
-                <input name="b" num_pins="9"/>
-                <output name="out" num_pins="18"/>
-                <delay_constant max="1.523e-9" in_port="mult_9x9.a" out_port="mult_9x9.out"/>
-                <delay_constant max="1.523e-9" in_port="mult_9x9.b" out_port="mult_9x9.out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_9x9_slice.A_cfg" output="mult_9x9.a">
-                </direct>
-                <direct name="b2b" input="mult_9x9_slice.B_cfg" output="mult_9x9.b">
-                </direct>
-                <direct name="out2out" input="mult_9x9.out" output="mult_9x9_slice.OUT_cfg">
-                </direct>
-              </interconnect>
-              <power method="pin-toggle">
-                <port name="A_cfg" energy_per_toggle="1.45e-12"/>
-                <port name="B_cfg" energy_per_toggle="1.45e-12"/>
-                <static_power power_per_instance="0.0"/>
-              </power>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_9x9_slice[1:0].A_cfg">
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_9x9_slice[1:0].B_cfg">
-              </direct>
-              <direct name="out2out" input="mult_9x9_slice[1:0].OUT_cfg" output="divisible_mult_18x18.out">
-              </direct>
-            </interconnect>
-          </mode>
-          <mode name="mult_18x18">
-            <pb_type name="mult_18x18_slice" num_pb="1">
-              <input name="A_cfg" num_pins="18"/>
-              <input name="B_cfg" num_pins="18"/>
-              <output name="OUT_cfg" num_pins="36"/>
-              <pb_type name="mult_18x18" blif_model=".subckt multiply" num_pb="1">
-                <input name="a" num_pins="18"/>
-                <input name="b" num_pins="18"/>
-                <output name="out" num_pins="36"/>
-                <delay_constant max="1.523e-9" in_port="mult_18x18.a" out_port="mult_18x18.out"/>
-                <delay_constant max="1.523e-9" in_port="mult_18x18.b" out_port="mult_18x18.out"/>
-              </pb_type>
-              <interconnect>
-                <direct name="a2a" input="mult_18x18_slice.A_cfg" output="mult_18x18.a">
-                </direct>
-                <direct name="b2b" input="mult_18x18_slice.B_cfg" output="mult_18x18.b">
-                </direct>
-                <direct name="out2out" input="mult_18x18.out" output="mult_18x18_slice.OUT_cfg">
-                </direct>
-              </interconnect>
-              <power method="pin-toggle">
-                <port name="A_cfg" energy_per_toggle="1.09e-12"/>
-                <port name="B_cfg" energy_per_toggle="1.09e-12"/>
-                <static_power power_per_instance="0.0"/>
-              </power>
-            </pb_type>
-            <interconnect>
-              <direct name="a2a" input="divisible_mult_18x18.a" output="mult_18x18_slice.A_cfg">
-              </direct>
-              <direct name="b2b" input="divisible_mult_18x18.b" output="mult_18x18_slice.B_cfg">
-              </direct>
-              <direct name="out2out" input="mult_18x18_slice.OUT_cfg" output="divisible_mult_18x18.out">
-              </direct>
-            </interconnect>
-          </mode>
-          <power method="sum-of-children"/>
-        </pb_type>
-        <interconnect>
-          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
-		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
-              -->
-          <direct name="a2a" input="mult_36.a" output="divisible_mult_18x18[1:0].a">
-            <delay_constant max="134e-12" in_port="mult_36.a" out_port="divisible_mult_18x18[1:0].a"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="divisible_mult_18x18[1:0].b">
-            <delay_constant max="134e-12" in_port="mult_36.b" out_port="divisible_mult_18x18[1:0].b"/>
-          </direct>
-          <direct name="out2out" input="divisible_mult_18x18[1:0].out" output="mult_36.out">
-            <delay_constant max="1.09e-9" in_port="divisible_mult_18x18[1:0].out" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mult_36x36">
-        <pb_type name="mult_36x36_slice" num_pb="1">
-          <input name="A_cfg" num_pins="36"/>
-          <input name="B_cfg" num_pins="36"/>
-          <output name="OUT_cfg" num_pins="72"/>
-          <pb_type name="mult_36x36" blif_model=".subckt multiply" num_pb="1">
-            <input name="a" num_pins="36"/>
-            <input name="b" num_pins="36"/>
-            <output name="out" num_pins="72"/>
-            <delay_constant max="1.523e-9" in_port="mult_36x36.a" out_port="mult_36x36.out"/>
-            <delay_constant max="1.523e-9" in_port="mult_36x36.b" out_port="mult_36x36.out"/>
-          </pb_type>
-          <interconnect>
-            <direct name="a2a" input="mult_36x36_slice.A_cfg" output="mult_36x36.a">
-            </direct>
-            <direct name="b2b" input="mult_36x36_slice.B_cfg" output="mult_36x36.b">
-            </direct>
-            <direct name="out2out" input="mult_36x36.out" output="mult_36x36_slice.OUT_cfg">
-            </direct>
-          </interconnect>
-          <power method="pin-toggle">
-            <port name="A_cfg" energy_per_toggle="2.13e-12"/>
-            <port name="B_cfg" energy_per_toggle="2.13e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <!-- Stratix IV input delay of 207ps is conservative for this architecture because this architecture does not have an input crossbar in the multiplier. 
-		   Subtract 72.5 ps delay, which is already in the connection block input mux, leading
-		   to a 134 ps delay.
-              -->
-          <direct name="a2a" input="mult_36.a" output="mult_36x36_slice.A_cfg">
-            <delay_constant max="134e-12" in_port="mult_36.a" out_port="mult_36x36_slice.A_cfg"/>
-          </direct>
-          <direct name="b2b" input="mult_36.b" output="mult_36x36_slice.B_cfg">
-            <delay_constant max="134e-12" in_port="mult_36.b" out_port="mult_36x36_slice.B_cfg"/>
-          </direct>
-          <direct name="out2out" input="mult_36x36_slice.OUT_cfg" output="mult_36.out">
-            <delay_constant max="1.93e-9" in_port="mult_36x36_slice.OUT_cfg" out_port="mult_36.out"/>
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Place this multiplier block every 8 columns from (and including) the sixth column -->
-      <power method="sum-of-children"/>
-    </pb_type>
-    <!-- Define fracturable multiplier end -->
-    <!-- Define fracturable memory begin -->
-    <!-- 32 Kb Memory that can operate from 512x64 to 32Kx1 for single-port mode and 1024x32 to 32Kx1 for dual-port mode.  
-           Area and delay based off Stratix IV 9K and 144K memories (delay from linear interpolation, Tsu(483 ps, 636 ps) Tco(1084ps, 1969ps)).  
-           Input delay = 204ps (from Stratix IV LAB line) - 72ps (this architecture does not lump connection box delay in internal delay)
-           Output delay = M4K buffer 50ps
-      -->
-    <pb_type name="memory">
-      <input name="addr1" num_pins="15"/>
-      <input name="addr2" num_pins="15"/>
-      <input name="data" num_pins="64"/>
-      <input name="we1" num_pins="1"/>
-      <input name="we2" num_pins="1"/>
-      <output name="out" num_pins="64"/>
-      <clock name="clk" num_pins="1"/>
-      <!-- Specify single port mode first -->
-      <mode name="mem_512x64_sp">
-        <pb_type name="mem_512x64_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="9" port_class="address"/>
-          <input name="data" num_pins="64" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="64" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_512x64_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_512x64_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x64_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[8:0]" out_port="mem_512x64_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[63:0]" output="mem_512x64_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[63:0]" out_port="mem_512x64_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_512x64_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_512x64_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_512x64_sp.out" output="memory.out[63:0]">
-            <delay_constant max="40e-12" in_port="mem_512x64_sp.out" out_port="memory.out[63:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_512x64_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_1024x32_sp">
-        <pb_type name="mem_1024x32_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="10" port_class="address"/>
-          <input name="data" num_pins="32" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="32" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x32_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_1024x32_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_1024x32_sp.out" output="memory.out[31:0]">
-            <delay_constant max="40e-12" in_port="mem_1024x32_sp.out" out_port="memory.out[31:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x32_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x16_sp">
-        <pb_type name="mem_2048x16_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="11" port_class="address"/>
-          <input name="data" num_pins="16" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="16" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x16_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x16_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x16_sp.out" output="memory.out[15:0]">
-            <delay_constant max="40e-12" in_port="mem_2048x16_sp.out" out_port="memory.out[15:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x16_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_4096x8_sp">
-        <pb_type name="mem_4096x8_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="12" port_class="address"/>
-          <input name="data" num_pins="8" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="8" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_4096x8_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_4096x8_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_4096x8_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[11:0]" out_port="mem_4096x8_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_4096x8_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[7:0]" out_port="mem_4096x8_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_4096x8_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_4096x8_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_4096x8_sp.out" output="memory.out[7:0]">
-            <delay_constant max="40e-12" in_port="mem_4096x8_sp.out" out_port="memory.out[7:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_4096x8_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x4_sp">
-        <pb_type name="mem_8192x4_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="13" port_class="address"/>
-          <input name="data" num_pins="4" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="4" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x4_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_8192x4_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_8192x4_sp.out" output="memory.out[3:0]">
-            <delay_constant max="40e-12" in_port="mem_8192x4_sp.out" out_port="memory.out[3:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x4_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x2_sp">
-        <pb_type name="mem_16384x2_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="14" port_class="address"/>
-          <input name="data" num_pins="2" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="2" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x2_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_16384x2_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_16384x2_sp.out" output="memory.out[1:0]">
-            <delay_constant max="40e-12" in_port="mem_16384x2_sp.out" out_port="memory.out[1:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x2_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x1_sp">
-        <pb_type name="mem_32768x1_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
-          <input name="addr" num_pins="15" port_class="address"/>
-          <input name="data" num_pins="1" port_class="data_in"/>
-          <input name="we" num_pins="1" port_class="write_en"/>
-          <output name="out" num_pins="1" port_class="data_out"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.addr" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.data" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_sp.we" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_sp.out" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="9.0e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_sp.addr">
-            <delay_constant max="132e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_sp.addr"/>
-          </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_sp.data">
-            <delay_constant max="132e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_sp.data"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x1_sp.we">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_32768x1_sp.we"/>
-          </direct>
-          <direct name="dataout1" input="mem_32768x1_sp.out" output="memory.out[0:0]">
-            <delay_constant max="40e-12" in_port="mem_32768x1_sp.out" out_port="memory.out[0:0]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x1_sp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Specify true dual port mode next -->
-      <mode name="mem_1024x32_dp">
-        <pb_type name="mem_1024x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="10" port_class="address1"/>
-          <input name="addr2" num_pins="10" port_class="address2"/>
-          <input name="data1" num_pins="32" port_class="data_in1"/>
-          <input name="data2" num_pins="32" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="32" port_class="data_out1"/>
-          <output name="out2" num_pins="32" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_1024x32_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_1024x32_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x32_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[9:0]" out_port="mem_1024x32_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x32_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[9:0]" out_port="mem_1024x32_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[31:0]" output="mem_1024x32_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[31:0]" out_port="mem_1024x32_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[63:32]" output="mem_1024x32_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[63:32]" out_port="mem_1024x32_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_1024x32_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_1024x32_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_1024x32_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_1024x32_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_1024x32_dp.out1" output="memory.out[31:0]">
-            <delay_constant max="40e-12" in_port="mem_1024x32_dp.out1" out_port="memory.out[31:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_1024x32_dp.out2" output="memory.out[63:32]">
-            <delay_constant max="40e-12" in_port="mem_1024x32_dp.out2" out_port="memory.out[63:32]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_1024x32_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x16_dp">
-        <pb_type name="mem_2048x16_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="11" port_class="address1"/>
-          <input name="addr2" num_pins="11" port_class="address2"/>
-          <input name="data1" num_pins="16" port_class="data_in1"/>
-          <input name="data2" num_pins="16" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="16" port_class="data_out1"/>
-          <output name="out2" num_pins="16" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x16_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_2048x16_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x16_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[10:0]" out_port="mem_2048x16_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x16_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[10:0]" out_port="mem_2048x16_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[15:0]" output="mem_2048x16_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[15:0]" out_port="mem_2048x16_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[31:16]" output="mem_2048x16_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[31:16]" out_port="mem_2048x16_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x16_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x16_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x16_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_2048x16_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x16_dp.out1" output="memory.out[15:0]">
-            <delay_constant max="40e-12" in_port="mem_2048x16_dp.out1" out_port="memory.out[15:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_2048x16_dp.out2" output="memory.out[31:16]">
-            <delay_constant max="40e-12" in_port="mem_2048x16_dp.out2" out_port="memory.out[31:16]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x16_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_2048x8_dp">
-        <pb_type name="mem_2048x8_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="12" port_class="address1"/>
-          <input name="addr2" num_pins="12" port_class="address2"/>
-          <input name="data1" num_pins="8" port_class="data_in1"/>
-          <input name="data2" num_pins="8" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="8" port_class="data_out1"/>
-          <output name="out2" num_pins="8" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_2048x8_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_2048x8_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_2048x8_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[11:0]" output="mem_2048x8_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[11:0]" out_port="mem_2048x8_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[11:0]" output="mem_2048x8_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[11:0]" out_port="mem_2048x8_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[7:0]" output="mem_2048x8_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[7:0]" out_port="mem_2048x8_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[15:8]" output="mem_2048x8_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[15:8]" out_port="mem_2048x8_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_2048x8_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_2048x8_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_2048x8_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_2048x8_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_2048x8_dp.out1" output="memory.out[7:0]">
-            <delay_constant max="40e-12" in_port="mem_2048x8_dp.out1" out_port="memory.out[7:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_2048x8_dp.out2" output="memory.out[15:8]">
-            <delay_constant max="40e-12" in_port="mem_2048x8_dp.out2" out_port="memory.out[15:8]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_2048x8_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_8192x4_dp">
-        <pb_type name="mem_8192x4_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="13" port_class="address1"/>
-          <input name="addr2" num_pins="13" port_class="address2"/>
-          <input name="data1" num_pins="4" port_class="data_in1"/>
-          <input name="data2" num_pins="4" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="4" port_class="data_out1"/>
-          <output name="out2" num_pins="4" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_8192x4_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_8192x4_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[12:0]" output="mem_8192x4_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[12:0]" out_port="mem_8192x4_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[12:0]" output="mem_8192x4_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[12:0]" out_port="mem_8192x4_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[3:0]" output="mem_8192x4_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[3:0]" out_port="mem_8192x4_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[7:4]" output="mem_8192x4_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[7:4]" out_port="mem_8192x4_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_8192x4_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_8192x4_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_8192x4_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_8192x4_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_8192x4_dp.out1" output="memory.out[3:0]">
-            <delay_constant max="40e-12" in_port="mem_8192x4_dp.out1" out_port="memory.out[3:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_8192x4_dp.out2" output="memory.out[7:4]">
-            <delay_constant max="40e-12" in_port="mem_8192x4_dp.out2" out_port="memory.out[7:4]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_8192x4_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_16384x2_dp">
-        <pb_type name="mem_16384x2_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="14" port_class="address1"/>
-          <input name="addr2" num_pins="14" port_class="address2"/>
-          <input name="data1" num_pins="2" port_class="data_in1"/>
-          <input name="data2" num_pins="2" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="2" port_class="data_out1"/>
-          <output name="out2" num_pins="2" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_16384x2_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_16384x2_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[13:0]" output="mem_16384x2_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[13:0]" out_port="mem_16384x2_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[13:0]" output="mem_16384x2_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[13:0]" out_port="mem_16384x2_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[1:0]" output="mem_16384x2_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[1:0]" out_port="mem_16384x2_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[3:2]" output="mem_16384x2_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[3:2]" out_port="mem_16384x2_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_16384x2_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_16384x2_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_16384x2_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_16384x2_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_16384x2_dp.out1" output="memory.out[1:0]">
-            <delay_constant max="40e-12" in_port="mem_16384x2_dp.out1" out_port="memory.out[1:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_16384x2_dp.out2" output="memory.out[3:2]">
-            <delay_constant max="40e-12" in_port="mem_16384x2_dp.out2" out_port="memory.out[3:2]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_16384x2_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <mode name="mem_32768x1_dp">
-        <pb_type name="mem_32768x1_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
-          <input name="addr1" num_pins="15" port_class="address1"/>
-          <input name="addr2" num_pins="15" port_class="address2"/>
-          <input name="data1" num_pins="1" port_class="data_in1"/>
-          <input name="data2" num_pins="1" port_class="data_in2"/>
-          <input name="we1" num_pins="1" port_class="write_en1"/>
-          <input name="we2" num_pins="1" port_class="write_en2"/>
-          <output name="out1" num_pins="1" port_class="data_out1"/>
-          <output name="out2" num_pins="1" port_class="data_out2"/>
-          <clock name="clk" num_pins="1" port_class="clock"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.addr1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.data1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.we1" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.addr2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.data2" clock="clk"/>
-          <T_setup value="509e-12" port="mem_32768x1_dp.we2" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_dp.out1" clock="clk"/>
-          <T_clock_to_Q max="1.234e-9" port="mem_32768x1_dp.out2" clock="clk"/>
-          <power method="pin-toggle">
-            <port name="clk" energy_per_toggle="17.9e-12"/>
-            <static_power power_per_instance="0.0"/>
-          </power>
-        </pb_type>
-        <interconnect>
-          <direct name="address1" input="memory.addr1[14:0]" output="mem_32768x1_dp.addr1">
-            <delay_constant max="132e-12" in_port="memory.addr1[14:0]" out_port="mem_32768x1_dp.addr1"/>
-          </direct>
-          <direct name="address2" input="memory.addr2[14:0]" output="mem_32768x1_dp.addr2">
-            <delay_constant max="132e-12" in_port="memory.addr2[14:0]" out_port="mem_32768x1_dp.addr2"/>
-          </direct>
-          <direct name="data1" input="memory.data[0:0]" output="mem_32768x1_dp.data1">
-            <delay_constant max="132e-12" in_port="memory.data[0:0]" out_port="mem_32768x1_dp.data1"/>
-          </direct>
-          <direct name="data2" input="memory.data[1:1]" output="mem_32768x1_dp.data2">
-            <delay_constant max="132e-12" in_port="memory.data[1:1]" out_port="mem_32768x1_dp.data2"/>
-          </direct>
-          <direct name="writeen1" input="memory.we1" output="mem_32768x1_dp.we1">
-            <delay_constant max="132e-12" in_port="memory.we1" out_port="mem_32768x1_dp.we1"/>
-          </direct>
-          <direct name="writeen2" input="memory.we2" output="mem_32768x1_dp.we2">
-            <delay_constant max="132e-12" in_port="memory.we2" out_port="mem_32768x1_dp.we2"/>
-          </direct>
-          <direct name="dataout1" input="mem_32768x1_dp.out1" output="memory.out[0:0]">
-            <delay_constant max="40e-12" in_port="mem_32768x1_dp.out1" out_port="memory.out[0:0]"/>
-          </direct>
-          <direct name="dataout2" input="mem_32768x1_dp.out2" output="memory.out[1:1]">
-            <delay_constant max="40e-12" in_port="mem_32768x1_dp.out2" out_port="memory.out[1:1]"/>
-          </direct>
-          <direct name="clk" input="memory.clk" output="mem_32768x1_dp.clk">
-          </direct>
-        </interconnect>
-      </mode>
-      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
-      <!-- Place this memory block every 8 columns from (and including) the second column -->
-      <power method="sum-of-children"/>
-    </pb_type>
-    <!-- Define fracturable memory end -->
-  </complexblocklist>
-  <power>
-    <local_interconnect C_wire="2.5e-10"/>
-  </power>
-  <clocks>
-    <clock buffer_size="auto" C_wire="2.5e-10"/>
-  </clocks>
-</architecture>
diff --git a/third_party/vtr/libs/archfpga/src/arch_check.cc b/third_party/vtr/libs/archfpga/src/arch_check.cc
deleted file mode 100644
index 58e8ced5b..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_check.cc
+++ /dev/null
@@ -1,412 +0,0 @@
-#include <set>
-
-#include "vtr_log.h"
-#include "arch_error.h"
-#include "arch_check.h"
-
-bool check_model_clocks(t_model* model, const char* file, uint32_t line) {
-    //Collect the ports identified as clocks
-    std::set<std::string> clocks;
-    for (t_model_ports* ports : {model->inputs, model->outputs}) {
-        for (t_model_ports* port = ports; port != nullptr; port = port->next) {
-            if (port->is_clock) {
-                clocks.insert(port->name);
-            }
-        }
-    }
-
-    //Check that any clock references on the ports are to identified clock ports
-    for (t_model_ports* ports : {model->inputs, model->outputs}) {
-        for (t_model_ports* port = ports; port != nullptr; port = port->next) {
-            if (!port->clock.empty() && !clocks.count(port->clock)) {
-                archfpga_throw(file, line,
-                               "No matching clock port '%s' on model '%s', required for port '%s'",
-                               port->clock.c_str(), model->name, port->name);
-            }
-        }
-    }
-    return true;
-}
-
-bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line) {
-    //Outputs should have no combinational sinks
-    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
-        if (port->combinational_sink_ports.size() != 0) {
-            archfpga_throw(file, line,
-                           "Model '%s' output port '%s' can not have combinational sink ports",
-                           model->name, port->name);
-        }
-    }
-
-    //Record the output ports
-    std::map<std::string, t_model_ports*> output_ports;
-    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
-        output_ports.insert({port->name, port});
-    }
-
-    for (t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
-        for (const std::string& sink_port_name : port->combinational_sink_ports) {
-            //Check that the input port combinational sinks are all outputs
-            if (!output_ports.count(sink_port_name)) {
-                archfpga_throw(file, line,
-                               "Model '%s' input port '%s' can not be combinationally connected to '%s' (not an output port of the model)",
-                               model->name, port->name, sink_port_name.c_str());
-            }
-
-            //Check that any output combinational sinks are not clocks
-            t_model_ports* sink_port = output_ports[sink_port_name];
-            VTR_ASSERT(sink_port);
-            if (sink_port->is_clock) {
-                archfpga_throw(file, line,
-                               "Model '%s' output port '%s' can not be both: a clock source (is_clock=\"%d\"),"
-                               " and combinationally connected to input port '%s' (acting as a clock buffer).",
-                               model->name, sink_port->name, sink_port->is_clock, port->name);
-            }
-        }
-    }
-
-    return true;
-}
-
-void warn_model_missing_timing(const t_model* model, const char* file, uint32_t line) {
-    //Check whether there are missing edges and warn the user
-    std::set<std::string> comb_connected_outputs;
-    for (t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
-        if (port->clock.empty()                       //Not sequential
-            && port->combinational_sink_ports.empty() //Doesn't drive any combinational outputs
-            && !port->is_clock                        //Not an input clock
-        ) {
-            VTR_LOGF_WARN(file, line,
-                          "Model '%s' input port '%s' has no timing specification (no clock specified to create a sequential input port, not combinationally connected to any outputs, not a clock input)\n", model->name, port->name);
-        }
-
-        comb_connected_outputs.insert(port->combinational_sink_ports.begin(), port->combinational_sink_ports.end());
-    }
-
-    for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
-        if (port->clock.empty()                          //Not sequential
-            && !comb_connected_outputs.count(port->name) //Not combinationally drivven
-            && !port->is_clock                           //Not an output clock
-        ) {
-            VTR_LOGF_WARN(file, line,
-                          "Model '%s' output port '%s' has no timing specification (no clock specified to create a sequential output port, not combinationally connected to any inputs, not a clock output)\n", model->name, port->name);
-        }
-    }
-}
-
-void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_tile* sub_tile, t_logical_block_type_ptr logical_block) {
-    auto pb_type = logical_block->pb_type;
-
-    if (pb_type->num_pins > (sub_tile->num_phy_pins / sub_tile->capacity.total())) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Logical Block (%s) has more pins than the Sub Tile (%s).\n",
-                       logical_block->name, sub_tile->name);
-    }
-
-    auto& pin_direct_maps = physical_tile->tile_block_pin_directs_map.at(logical_block->index);
-    auto pin_direct_map = pin_direct_maps.at(sub_tile->index);
-
-    if (pb_type->num_pins != (int)pin_direct_map.size()) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Logical block (%s) and Sub tile (%s) have a different number of ports.\n",
-                       logical_block->name, physical_tile->name);
-    }
-
-    for (auto pin_map : pin_direct_map) {
-        auto block_port = get_port_by_pin(logical_block, pin_map.first.pin);
-
-        auto sub_tile_port = get_port_by_pin(sub_tile, pin_map.second.pin);
-
-        VTR_ASSERT(block_port != nullptr);
-        VTR_ASSERT(sub_tile_port != nullptr);
-
-        if (sub_tile_port->type != block_port->type
-            || sub_tile_port->num_pins != block_port->num_pins
-            || sub_tile_port->equivalent != block_port->equivalent) {
-            archfpga_throw(__FILE__, __LINE__,
-                           "Logical block (%s) and Physical tile (%s) do not have equivalent port specifications. Sub tile port %s, logical block port %s\n",
-                           logical_block->name, sub_tile->name, sub_tile_port->name, block_port->name);
-        }
-    }
-}
-
-bool check_leaf_pb_model_timing_consistency(const t_pb_type* pb_type, const t_arch& arch) {
-    //Normalize the blif model name to match the model name
-    // by removing the leading '.' (.latch, .inputs, .names etc.)
-    // by removing the leading '.subckt'
-    VTR_ASSERT(pb_type->blif_model);
-    std::string blif_model = pb_type->blif_model;
-    std::string subckt = ".subckt ";
-    auto pos = blif_model.find(subckt);
-    if (pos != std::string::npos) {
-        blif_model = blif_model.substr(pos + subckt.size());
-    }
-
-    //Find the matching model
-    const t_model* model = nullptr;
-
-    for (const t_model* models : {arch.models, arch.model_library}) {
-        for (model = models; model != nullptr; model = model->next) {
-            if (std::string(model->name) == blif_model) {
-                break;
-            }
-        }
-        if (model != nullptr) {
-            break;
-        }
-    }
-    if (model == nullptr) {
-        archfpga_throw(get_arch_file_name(), -1,
-                       "Unable to find model for blif_model '%s' found on pb_type '%s'",
-                       blif_model.c_str(), pb_type->name);
-    }
-
-    //Now that we have the model we can compare the timing annotations
-
-    //Check from the pb_type's delay annotations match the model
-    //
-    //  This ensures that the pb_types' delay annotations are consistent with the model
-    for (int i = 0; i < pb_type->num_annotations; ++i) {
-        const t_pin_to_pin_annotation* annot = &pb_type->annotations[i];
-
-        if (annot->type == E_ANNOT_PIN_TO_PIN_DELAY) {
-            //Check that any combinational delays specified match the 'combinational_sinks_ports' in the model
-
-            if (annot->clock) {
-                //Sequential annotation, check that the clock on the specified port matches the model
-
-                //Annotations always put the pin in the input_pins field
-                VTR_ASSERT(annot->input_pins);
-                for (const std::string& input_pin : vtr::split(annot->input_pins)) {
-                    InstPort annot_port(input_pin);
-                    for (const std::string& clock : vtr::split(annot->clock)) {
-                        InstPort annot_clock(clock);
-
-                        //Find the model port
-                        const t_model_ports* model_port = nullptr;
-                        for (const t_model_ports* ports : {model->inputs, model->outputs}) {
-                            for (const t_model_ports* port = ports; port != nullptr; port = port->next) {
-                                if (port->name == annot_port.port_name()) {
-                                    model_port = port;
-                                    break;
-                                }
-                            }
-                            if (model_port != nullptr) break;
-                        }
-                        if (model_port == nullptr) {
-                            archfpga_throw(get_arch_file_name(), annot->line_num,
-                                           "Failed to find port '%s' on '%s' for sequential delay annotation",
-                                           annot_port.port_name().c_str(), annot_port.instance_name().c_str());
-                        }
-
-                        //Check that the clock matches the model definition
-                        std::string model_clock = model_port->clock;
-                        if (model_clock.empty()) {
-                            archfpga_throw(get_arch_file_name(), annot->line_num,
-                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', model specifies"
-                                           " no clock but timing annotation specifies '%s'",
-                                           annot_port.port_name().c_str(), model->name, annot_clock.port_name().c_str());
-                        }
-                        if (model_port->clock != annot_clock.port_name()) {
-                            archfpga_throw(get_arch_file_name(), annot->line_num,
-                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', model specifies"
-                                           " clock as '%s' but timing annotation specifies '%s'",
-                                           annot_port.port_name().c_str(), model->name, model_clock.c_str(), annot_clock.port_name().c_str());
-                        }
-                    }
-                }
-
-            } else if (annot->input_pins && annot->output_pins) {
-                //Combinational annotation
-                VTR_ASSERT_MSG(!annot->clock, "Combinational annotations should have no clock");
-                for (const std::string& input_pin : vtr::split(annot->input_pins)) {
-                    InstPort annot_in(input_pin);
-                    for (const std::string& output_pin : vtr::split(annot->output_pins)) {
-                        InstPort annot_out(output_pin);
-
-                        //Find the input model port
-                        const t_model_ports* model_port = nullptr;
-                        for (const t_model_ports* port = model->inputs; port != nullptr; port = port->next) {
-                            if (port->name == annot_in.port_name()) {
-                                model_port = port;
-                                break;
-                            }
-                        }
-
-                        if (model_port == nullptr) {
-                            archfpga_throw(get_arch_file_name(), annot->line_num,
-                                           "Failed to find port '%s' on '%s' for combinational delay annotation",
-                                           annot_in.port_name().c_str(), annot_in.instance_name().c_str());
-                        }
-
-                        //Check that the output port is listed in the model's combinational sinks
-                        auto b = model_port->combinational_sink_ports.begin();
-                        auto e = model_port->combinational_sink_ports.end();
-                        auto iter = std::find(b, e, annot_out.port_name());
-                        if (iter == e) {
-                            archfpga_throw(get_arch_file_name(), annot->line_num,
-                                           "<pb_type> timing-annotation/<model> mismatch on port '%s' of model '%s', timing annotation"
-                                           " specifies combinational connection to port '%s' but the connection does not exist in the model",
-                                           model_port->name, model->name, annot_out.port_name().c_str());
-                        }
-                    }
-                }
-            } else {
-                throw ArchFpgaError("Unrecognized delay annotation");
-            }
-        }
-    }
-
-    //Build a list of combinationally connected sinks
-    std::set<std::string> comb_connected_outputs;
-    for (t_model_ports* model_ports : {model->inputs, model->outputs}) {
-        for (t_model_ports* model_port = model_ports; model_port != nullptr; model_port = model_port->next) {
-            comb_connected_outputs.insert(model_port->combinational_sink_ports.begin(), model_port->combinational_sink_ports.end());
-        }
-    }
-
-    //Check from the model to pb_type's delay annotations
-    //
-    //  This ensures that the pb_type has annotations for all delays/values
-    //  required by the model
-    for (t_model_ports* model_ports : {model->inputs, model->outputs}) {
-        for (t_model_ports* model_port = model_ports; model_port != nullptr; model_port = model_port->next) {
-            //If the model port has no timing specification don't check anything (e.g. architectures with no timing info)
-            if (model_port->clock.empty()
-                && model_port->combinational_sink_ports.empty()
-                && !comb_connected_outputs.count(model_port->name)) {
-                continue;
-            }
-
-            if (!model_port->clock.empty()) {
-                //Sequential port
-
-                if (model_port->dir == IN_PORT) {
-                    //Sequential inputs must have a T_setup or T_hold
-                    if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_TSETUP) == nullptr
-                        && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_THOLD) == nullptr) {
-                        std::stringstream msg;
-                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
-                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
-                        msg << " port is a sequential input but has neither T_setup nor T_hold specified";
-
-                        if (is_library_model(model)) {
-                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
-                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
-                        } else {
-                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
-                        }
-                    }
-
-                    if (!model_port->combinational_sink_ports.empty()) {
-                        //Sequential input with internal combinational connectsion it must also have T_clock_to_Q
-                        if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX) == nullptr
-                            && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN) == nullptr) {
-                            std::stringstream msg;
-                            msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
-                            msg << " port '" << model_port->name << "' of model '" << model->name << "',";
-                            msg << " port is a sequential input with internal combinational connects but has neither";
-                            msg << " min nor max T_clock_to_Q specified";
-
-                            if (is_library_model(model)) {
-                                //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
-                                VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
-                            } else {
-                                archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
-                            }
-                        }
-                    }
-
-                } else {
-                    VTR_ASSERT(model_port->dir == OUT_PORT);
-                    //Sequential outputs must have T_clock_to_Q
-                    if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX) == nullptr
-                        && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN) == nullptr) {
-                        std::stringstream msg;
-                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
-                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
-                        msg << " port is a sequential output but has neither min nor max T_clock_to_Q specified";
-
-                        if (is_library_model(model)) {
-                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
-                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
-                        } else {
-                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
-                        }
-                    }
-
-                    if (comb_connected_outputs.count(model_port->name)) {
-                        //Sequential output with internal combinational connectison must have T_setup/T_hold
-                        if (find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_TSETUP) == nullptr
-                            && find_sequential_annotation(pb_type, model_port, E_ANNOT_PIN_TO_PIN_DELAY_THOLD) == nullptr) {
-                            std::stringstream msg;
-                            msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
-                            msg << " port '" << model_port->name << "' of model '" << model->name << "',";
-                            msg << " port is a sequential output with internal combinational connections but has";
-                            msg << " neither T_setup nor T_hold specified";
-
-                            if (is_library_model(model)) {
-                                //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
-                                VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
-                            } else {
-                                archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
-                            }
-                        }
-                    }
-                }
-            }
-
-            //Check that combinationally connected inputs/outputs have combinational delays between them
-            if (model_port->dir == IN_PORT) {
-                for (const auto& sink_port : model_port->combinational_sink_ports) {
-                    if (find_combinational_annotation(pb_type, model_port->name, sink_port) == nullptr) {
-                        std::stringstream msg;
-                        msg << "<pb_type> '" << pb_type->name << "' timing-annotation/<model> mismatch on";
-                        msg << " port '" << model_port->name << "' of model '" << model->name << "',";
-                        msg << " input port '" << model_port->name << "' has combinational connections to";
-                        msg << " port '" << sink_port.c_str() << "'; specified in model, but no combinational delays found on pb_type";
-
-                        if (is_library_model(model)) {
-                            //Only warn if timing info is missing from a library model (e.g. .names/.latch on a non-timing architecture)
-                            VTR_LOGF_WARN(get_arch_file_name(), -1, "%s\n", msg.str().c_str());
-                        } else {
-                            archfpga_throw(get_arch_file_name(), -1, msg.str().c_str());
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-void check_models(t_arch* arch) {
-    for (t_model* model = arch->models; model != nullptr; model = model->next) {
-        if (model->pb_types == nullptr) {
-            archfpga_throw(get_arch_file_name(), 0,
-                           "No pb_type found for model %s\n", model->name);
-        }
-
-        int clk_count, input_count, output_count;
-        clk_count = input_count = output_count = 0;
-        for (auto ports : {model->inputs, model->outputs}) {
-            for (auto port = ports; port != nullptr; port = port->next) {
-                int index;
-                switch (port->dir) {
-                    case IN_PORT:
-                        index = port->is_clock ? clk_count++ : input_count++;
-                        break;
-                    case OUT_PORT:
-                        index = output_count++;
-                        break;
-                    default:
-                        archfpga_throw(get_arch_file_name(), 0,
-                                       "Port %s of model %s, has an unrecognized type %s\n", port->name, model->name);
-                }
-
-                port->index = index;
-            }
-        }
-    }
-}
diff --git a/third_party/vtr/libs/archfpga/src/arch_check.h b/third_party/vtr/libs/archfpga/src/arch_check.h
deleted file mode 100644
index 20b3ad30d..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_check.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef ARCH_CHECK_H
-#define ARCH_CHECK_H
-
-/**
- *  This file includes all the definitions of functions which purpose is to
- *  check the correctness of the architecture's internal data structures.
- *
- *  All new functions corresponding to the architecture checking should end up here.
- */
-
-#include "arch_types.h"
-#include "arch_util.h"
-
-#include "physical_types_util.h"
-
-#include "vtr_util.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Checks whether the model has correct clock port specifications
- *
- * @param model model definition
- * @param file architecture file
- * @param line line in the architecture file that generates the failure
- */
-bool check_model_clocks(t_model* model, const char* file, uint32_t line);
-
-/**
- * @brief Checks the correctness of the combinational sinks in the model inputs to outputs connections
- *
- * @param model model definition
- * @param file architecture file
- * @param line line in the architecture file that generates the failure
- */
-bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line);
-
-/**
- * @brief Checks whether the I/O ports can have timing specifications based on their connectivity.
- *        A port can have timing specs whether it is clocked or is combinationally connected to a
- *        corresponding I/O port.
- *        If the check fails, a warning is printed in the output log.
- *
- * @param model model definition
- * @param file architecture file
- * @param line line in the architecture file that generates the failure
- */
-void warn_model_missing_timing(const t_model* model, const char* file, uint32_t line);
-
-/**
- * @brief Checks the consistency of the mappings between a logical block and the corresponding physical tile.
- *
- * @param physical_tile physical tile type
- * @param sub_tile sub tile to check
- * @param logical_block logical block type
- */
-void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_tile* sub_tile, t_logical_block_type_ptr logical_block);
-
-/**
- * @brief Checks the timing consistency between tha pb_type and the corresponding model.
- *
- * @param pb_type pb type to check
- * @param arch architecture data structure
- */
-bool check_leaf_pb_model_timing_consistency(const t_pb_type* pb_type, const t_arch& arch);
-
-/**
- * @brief Checks that each model has at least one corresponding pb type. This function also updates the port indices of the models
- *        based on their type: e.g. clock, input, output.
- *
- * @param arch architecture data structure
- */
-void check_models(t_arch* arch);
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_error.cc b/third_party/vtr/libs/archfpga/src/arch_error.cc
deleted file mode 100644
index a9d502ae2..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_error.cc
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <cstdarg>
-
-#include "vtr_util.h"
-#include "arch_error.h"
-
-void archfpga_throw(const char* filename, int line, const char* fmt, ...) {
-    va_list va_args;
-
-    va_start(va_args, fmt);
-
-    auto msg = vtr::vstring_fmt(fmt, va_args);
-
-    va_end(va_args);
-
-    throw ArchFpgaError(msg, filename, line);
-}
diff --git a/third_party/vtr/libs/archfpga/src/arch_error.h b/third_party/vtr/libs/archfpga/src/arch_error.h
deleted file mode 100644
index 0dae1d859..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_error.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef ARCH_ERROR_H
-#define ARCH_ERROR_H
-
-#include "vtr_error.h"
-#include <cstdarg>
-
-//Note that we mark this function with the C++11 attribute 'noreturn'
-//as it will throw exceptions and not return normally. This can help
-//reduce false-positive compiler warnings.
-[[noreturn]] void archfpga_throw(const char* filename, int line, const char* fmt, ...);
-
-class ArchFpgaError : public vtr::VtrError {
-  public:
-    ArchFpgaError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
-        : vtr::VtrError(msg, new_filename, new_linenumber) {}
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_types.h b/third_party/vtr/libs/archfpga/src/arch_types.h
deleted file mode 100644
index 9f88a6466..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_types.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Data types describing the FPGA architecture.
- *
- * Date: February 19, 2009
- * Authors: Jason Luu and Kenneth Kent
- */
-
-#ifndef ARCH_TYPES_H
-#define ARCH_TYPES_H
-
-#include "logic_types.h"
-#include "physical_types.h"
-#include "cad_types.h"
-
-/* Input file parsing. */
-#define TOKENS " \t\n"
-
-/* Value for UNDEFINED data */
-constexpr int UNDEFINED = -1;
-
-/* Maximum value for mininum channel width to avoid overflows of short data type.               */
-constexpr int MAX_CHANNEL_WIDTH = 8000;
-
-/* Built-in library models */
-constexpr const char* MODEL_NAMES = ".names";
-constexpr const char* MODEL_LATCH = ".latch";
-constexpr const char* MODEL_INPUT = ".input";
-constexpr const char* MODEL_OUTPUT = ".output";
-
-enum class e_arch_format {
-    VTR,            ///<VTR-specific device XML format
-    FPGAInterchange ///<FPGA Interchange device format
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/arch_util.cc b/third_party/vtr/libs/archfpga/src/arch_util.cc
deleted file mode 100644
index a8d89c91e..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_util.cc
+++ /dev/null
@@ -1,1572 +0,0 @@
-#include <cstring>
-#include <sstream>
-
-#include "vtr_assert.h"
-#include "vtr_memory.h"
-#include "vtr_util.h"
-
-#include "arch_types.h"
-#include "arch_util.h"
-#include "arch_error.h"
-
-#include "read_xml_arch_file.h"
-#include "read_xml_util.h"
-
-/******************** Subroutine declarations ********************************/
-
-static void free_all_pb_graph_nodes(std::vector<t_logical_block_type>& type_descriptors);
-static void free_pb_graph(t_pb_graph_node* pb_graph_node);
-static void free_pb_type(t_pb_type* pb_type);
-
-/******************** End Subroutine declarations ****************************/
-
-/* This gives access to the architecture file name to
- * all architecture-parser functions       */
-static const char* arch_file_name = nullptr;
-
-void set_arch_file_name(const char* arch) {
-    arch_file_name = arch;
-}
-
-/* Used by functions outside read_xml_util.c to gain access to arch filename */
-const char* get_arch_file_name() {
-    VTR_ASSERT(arch_file_name != nullptr);
-
-    return arch_file_name;
-}
-
-InstPort::InstPort(std::string str) {
-    std::vector<std::string> inst_port = vtr::split(str, ".");
-
-    if (inst_port.size() == 1) {
-        instance_ = name_index();
-        port_ = parse_name_index(inst_port[0]);
-
-    } else if (inst_port.size() == 2) {
-        instance_ = parse_name_index(inst_port[0]);
-        port_ = parse_name_index(inst_port[1]);
-    } else {
-        std::string msg = vtr::string_fmt("Failed to parse instance port specification '%s'",
-                                          str.c_str());
-        throw ArchFpgaError(msg);
-    }
-}
-
-InstPort::name_index InstPort::parse_name_index(const std::string& str) {
-    auto open_bracket_pos = str.find('[');
-    auto close_bracket_pos = str.find(']');
-    auto colon_pos = str.find(':');
-
-    //Parse checks
-    if (open_bracket_pos == std::string::npos && close_bracket_pos != std::string::npos) {
-        //Close brace only
-        std::string msg = "near '" + str + "', missing '['";
-        throw ArchFpgaError(msg);
-    }
-
-    if (open_bracket_pos != std::string::npos && close_bracket_pos == std::string::npos) {
-        //Open brace only
-        std::string msg = "near '" + str + "', missing ']'";
-        throw ArchFpgaError(msg);
-    }
-
-    if (open_bracket_pos != std::string::npos && close_bracket_pos != std::string::npos) {
-        //Have open and close braces, close must be after open
-        if (open_bracket_pos > close_bracket_pos) {
-            std::string msg = "near '" + str + "', '[' after ']'";
-            throw ArchFpgaError(msg);
-        }
-    }
-
-    if (colon_pos != std::string::npos) {
-        //Have a colon, it must be between open/close braces
-        if (colon_pos > close_bracket_pos || colon_pos < open_bracket_pos) {
-            std::string msg = "near '" + str + "', found ':' but not between '[' and ']'";
-            throw ArchFpgaError(msg);
-        }
-    }
-
-    //Extract the name and index info
-    std::string name = str.substr(0, open_bracket_pos);
-    std::string first_idx_str;
-    std::string second_idx_str;
-
-    if (colon_pos == std::string::npos && open_bracket_pos == std::string::npos && close_bracket_pos == std::string::npos) {
-    } else if (colon_pos == std::string::npos) {
-        //No colon, implies a single element
-        first_idx_str = str.substr(open_bracket_pos + 1, close_bracket_pos);
-        second_idx_str = first_idx_str;
-    } else {
-        //Colon, implies a range
-        first_idx_str = str.substr(open_bracket_pos + 1, colon_pos);
-        second_idx_str = str.substr(colon_pos + 1, close_bracket_pos);
-    }
-
-    int first_idx = UNSPECIFIED;
-    if (!first_idx_str.empty()) {
-        std::stringstream ss(first_idx_str);
-        size_t idx;
-        ss >> idx;
-        if (!ss.good()) {
-            std::string msg = "near '" + str + "', expected positive integer";
-            throw ArchFpgaError(msg);
-        }
-        first_idx = idx;
-    }
-
-    int second_idx = UNSPECIFIED;
-    if (!second_idx_str.empty()) {
-        std::stringstream ss(second_idx_str);
-        size_t idx;
-        ss >> idx;
-        if (!ss.good()) {
-            std::string msg = "near '" + str + "', expected positive integer";
-            throw ArchFpgaError(msg);
-        }
-        second_idx = idx;
-    }
-
-    name_index value;
-    value.name = name;
-    value.low_idx = std::min(first_idx, second_idx);
-    value.high_idx = std::max(first_idx, second_idx);
-    return value;
-}
-
-int InstPort::num_instances() const {
-    if (instance_high_index() == UNSPECIFIED || instance_low_index() == UNSPECIFIED) {
-        throw ArchFpgaError("Unspecified instance indicies");
-    }
-    return instance_high_index() - instance_low_index() + 1;
-}
-
-int InstPort::num_pins() const {
-    if (port_high_index() == UNSPECIFIED || port_low_index() == UNSPECIFIED) {
-        throw ArchFpgaError("Unspecified port indicies");
-    }
-    return port_high_index() - port_low_index() + 1;
-}
-
-void free_arch(t_arch* arch) {
-    if (arch == nullptr) {
-        return;
-    }
-
-    for (int i = 0; i < arch->num_switches; ++i) {
-        if (arch->Switches->name != nullptr) {
-            vtr::free(arch->Switches[i].name);
-        }
-    }
-    delete[] arch->Switches;
-    arch->Switches = nullptr;
-
-    free_arch_models(arch->models);
-
-    for (int i = 0; i < arch->num_directs; ++i) {
-        vtr::free(arch->Directs[i].name);
-        vtr::free(arch->Directs[i].from_pin);
-        vtr::free(arch->Directs[i].to_pin);
-    }
-    vtr::free(arch->Directs);
-
-    vtr::free(arch->architecture_id);
-
-    if (arch->model_library) {
-        for (int i = 0; i < 4; ++i) {
-            vtr::t_linked_vptr* vptr = arch->model_library[i].pb_types;
-            while (vptr) {
-                vtr::t_linked_vptr* vptr_prev = vptr;
-                vptr = vptr->next;
-                vtr::free(vptr_prev);
-            }
-        }
-
-        vtr::free(arch->model_library[0].name);
-        vtr::free(arch->model_library[0].outputs->name);
-        delete[] arch->model_library[0].outputs;
-        vtr::free(arch->model_library[1].inputs->name);
-        delete[] arch->model_library[1].inputs;
-        vtr::free(arch->model_library[1].name);
-        vtr::free(arch->model_library[2].name);
-        vtr::free(arch->model_library[2].inputs[0].name);
-        vtr::free(arch->model_library[2].inputs[1].name);
-        delete[] arch->model_library[2].inputs;
-        vtr::free(arch->model_library[2].outputs->name);
-        delete[] arch->model_library[2].outputs;
-        vtr::free(arch->model_library[3].name);
-        vtr::free(arch->model_library[3].inputs->name);
-        delete[] arch->model_library[3].inputs;
-        vtr::free(arch->model_library[3].outputs->name);
-        delete[] arch->model_library[3].outputs;
-        delete[] arch->model_library;
-    }
-
-    if (arch->clocks) {
-        vtr::free(arch->clocks->clock_inf);
-    }
-
-    delete (arch->noc);
-}
-
-//Frees all models in the linked list
-void free_arch_models(t_model* models) {
-    t_model* model = models;
-    while (model) {
-        model = free_arch_model(model);
-    }
-}
-
-//Frees the specified model, and returns the next model (if any) in the linked list
-t_model* free_arch_model(t_model* model) {
-    if (!model) return nullptr;
-
-    t_model* next_model = model->next;
-
-    free_arch_model_ports(model->inputs);
-    free_arch_model_ports(model->outputs);
-
-    vtr::t_linked_vptr* vptr = model->pb_types;
-    while (vptr) {
-        vtr::t_linked_vptr* vptr_prev = vptr;
-        vptr = vptr->next;
-        vtr::free(vptr_prev);
-    }
-
-    if (model->instances)
-        vtr::free(model->instances);
-    vtr::free(model->name);
-    delete model;
-
-    return next_model;
-}
-
-//Frees all the model portss in a linked list
-void free_arch_model_ports(t_model_ports* model_ports) {
-    t_model_ports* model_port = model_ports;
-    while (model_port) {
-        model_port = free_arch_model_port(model_port);
-    }
-}
-
-//Frees the specified model_port, and returns the next model_port (if any) in the linked list
-t_model_ports* free_arch_model_port(t_model_ports* model_port) {
-    if (!model_port) return nullptr;
-
-    t_model_ports* next_port = model_port->next;
-
-    vtr::free(model_port->name);
-    delete model_port;
-
-    return next_port;
-}
-
-void free_type_descriptors(std::vector<t_physical_tile_type>& type_descriptors) {
-    for (auto& type : type_descriptors) {
-        vtr::free(type.name);
-        if (type.index == EMPTY_TYPE_INDEX) {
-            continue;
-        }
-
-        for (auto& sub_tile : type.sub_tiles) {
-            vtr::free(sub_tile.name);
-
-            for (auto port : sub_tile.ports) {
-                vtr::free(port.name);
-            }
-        }
-    }
-    type_descriptors.clear();
-}
-
-void free_type_descriptors(std::vector<t_logical_block_type>& type_descriptors) {
-    free_all_pb_graph_nodes(type_descriptors);
-
-    for (auto& type : type_descriptors) {
-        vtr::free(type.name);
-        if (type.index == EMPTY_TYPE_INDEX) {
-            continue;
-        }
-
-        free_pb_type(type.pb_type);
-        delete type.pb_type;
-    }
-    type_descriptors.clear();
-}
-
-static void free_all_pb_graph_nodes(std::vector<t_logical_block_type>& type_descriptors) {
-    for (auto& type : type_descriptors) {
-        if (type.pb_type) {
-            if (type.pb_graph_head) {
-                free_pb_graph(type.pb_graph_head);
-                delete type.pb_graph_head;
-            }
-        }
-    }
-}
-
-static void free_pb_graph(t_pb_graph_node* pb_graph_node) {
-    int i, j, k;
-    const t_pb_type* pb_type;
-
-    pb_type = pb_graph_node->pb_type;
-
-    /*free all lists of connectable input pin pointer of pb_graph_node and it's children*/
-    /*free_list_of_connectable_input_pin_ptrs (pb_graph_node);*/
-
-    /* Free ports for pb graph node */
-    for (i = 0; i < pb_graph_node->num_input_ports; i++) {
-        for (j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
-            if (pb_graph_node->input_pins[i][j].parent_pin_class)
-                delete[] pb_graph_node->input_pins[i][j].parent_pin_class;
-        }
-        delete[] pb_graph_node->input_pins[i];
-    }
-    for (i = 0; i < pb_graph_node->num_output_ports; i++) {
-        for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
-            if (pb_graph_node->output_pins[i][j].parent_pin_class)
-                delete[] pb_graph_node->output_pins[i][j].parent_pin_class;
-
-            if (pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs) {
-                for (k = 0; k < pb_graph_node->pb_type->depth; k++) {
-                    delete[] pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs[k];
-                }
-                delete[] pb_graph_node->output_pins[i][j].list_of_connectable_input_pin_ptrs;
-            }
-
-            if (pb_graph_node->output_pins[i][j].num_connectable_primitive_input_pins)
-                delete[] pb_graph_node->output_pins[i][j].num_connectable_primitive_input_pins;
-        }
-        delete[] pb_graph_node->output_pins[i];
-    }
-    for (i = 0; i < pb_graph_node->num_clock_ports; i++) {
-        for (j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
-            if (pb_graph_node->clock_pins[i][j].parent_pin_class)
-                delete[] pb_graph_node->clock_pins[i][j].parent_pin_class;
-        }
-        delete[] pb_graph_node->clock_pins[i];
-    }
-
-    delete[] pb_graph_node->input_pins;
-    delete[] pb_graph_node->output_pins;
-    delete[] pb_graph_node->clock_pins;
-
-    delete[] pb_graph_node->num_input_pins;
-    delete[] pb_graph_node->num_output_pins;
-    delete[] pb_graph_node->num_clock_pins;
-
-    delete[] pb_graph_node->input_pin_class_size;
-    delete[] pb_graph_node->output_pin_class_size;
-
-    if (pb_graph_node->interconnect_pins) {
-        for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
-            if (pb_graph_node->interconnect_pins[i] == nullptr) continue;
-
-            t_mode* mode = &pb_graph_node->pb_type->modes[i];
-
-            for (j = 0; j < mode->num_interconnect; ++j) {
-                //The interconnect_pins data structures are only initialized for power analysis and
-                //are bizarrely baroque...
-                t_interconnect* interconn = pb_graph_node->interconnect_pins[i][j].interconnect;
-                VTR_ASSERT(interconn == &mode->interconnect[j]);
-
-                t_interconnect_power* interconn_power = interconn->interconnect_power;
-                for (int iport = 0; iport < interconn_power->num_input_ports; ++iport) {
-                    delete[] pb_graph_node->interconnect_pins[i][j].input_pins[iport];
-                }
-                for (int iport = 0; iport < interconn_power->num_output_ports; ++iport) {
-                    delete[] pb_graph_node->interconnect_pins[i][j].output_pins[iport];
-                }
-                delete[] pb_graph_node->interconnect_pins[i][j].input_pins;
-                delete[] pb_graph_node->interconnect_pins[i][j].output_pins;
-            }
-            delete[] pb_graph_node->interconnect_pins[i];
-        }
-    }
-    delete[] pb_graph_node->interconnect_pins;
-    delete pb_graph_node->pb_node_power;
-
-    for (i = 0; i < pb_type->num_modes; i++) {
-        for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
-            for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                free_pb_graph(&pb_graph_node->child_pb_graph_nodes[i][j][k]);
-            }
-            vtr::free(pb_graph_node->child_pb_graph_nodes[i][j]);
-        }
-        vtr::free(pb_graph_node->child_pb_graph_nodes[i]);
-    }
-    vtr::free(pb_graph_node->child_pb_graph_nodes);
-}
-
-static void free_pb_type(t_pb_type* pb_type) {
-    vtr::free(pb_type->name);
-    if (pb_type->blif_model)
-        vtr::free(pb_type->blif_model);
-
-    for (int i = 0; i < pb_type->num_modes; ++i) {
-        for (int j = 0; j < pb_type->modes[i].num_pb_type_children; ++j) {
-            free_pb_type(&pb_type->modes[i].pb_type_children[j]);
-        }
-        delete[] pb_type->modes[i].pb_type_children;
-        vtr::free(pb_type->modes[i].name);
-        for (int j = 0; j < pb_type->modes[i].num_interconnect; ++j) {
-            vtr::free(pb_type->modes[i].interconnect[j].input_string);
-            vtr::free(pb_type->modes[i].interconnect[j].output_string);
-            vtr::free(pb_type->modes[i].interconnect[j].name);
-
-            for (int k = 0; k < pb_type->modes[i].interconnect[j].num_annotations; ++k) {
-                if (pb_type->modes[i].interconnect[j].annotations[k].clock)
-                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].clock);
-                if (pb_type->modes[i].interconnect[j].annotations[k].input_pins) {
-                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].input_pins);
-                }
-                if (pb_type->modes[i].interconnect[j].annotations[k].output_pins) {
-                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].output_pins);
-                }
-                for (int m = 0; m < pb_type->modes[i].interconnect[j].annotations[k].num_value_prop_pairs; ++m) {
-                    vtr::free(pb_type->modes[i].interconnect[j].annotations[k].value[m]);
-                }
-                vtr::free(pb_type->modes[i].interconnect[j].annotations[k].prop);
-                vtr::free(pb_type->modes[i].interconnect[j].annotations[k].value);
-            }
-            vtr::free(pb_type->modes[i].interconnect[j].annotations);
-            if (pb_type->modes[i].interconnect[j].interconnect_power)
-                vtr::free(pb_type->modes[i].interconnect[j].interconnect_power);
-        }
-        if (pb_type->modes[i].interconnect)
-            delete[] pb_type->modes[i].interconnect;
-        if (pb_type->modes[i].mode_power)
-            vtr::free(pb_type->modes[i].mode_power);
-    }
-    if (pb_type->modes)
-        delete[] pb_type->modes;
-
-    for (int i = 0; i < pb_type->num_annotations; ++i) {
-        for (int j = 0; j < pb_type->annotations[i].num_value_prop_pairs; ++j) {
-            vtr::free(pb_type->annotations[i].value[j]);
-        }
-        vtr::free(pb_type->annotations[i].value);
-        vtr::free(pb_type->annotations[i].prop);
-        if (pb_type->annotations[i].input_pins) {
-            vtr::free(pb_type->annotations[i].input_pins);
-        }
-        if (pb_type->annotations[i].output_pins) {
-            vtr::free(pb_type->annotations[i].output_pins);
-        }
-        if (pb_type->annotations[i].clock) {
-            vtr::free(pb_type->annotations[i].clock);
-        }
-    }
-    if (pb_type->num_annotations > 0) {
-        vtr::free(pb_type->annotations);
-    }
-
-    if (pb_type->pb_type_power) {
-        vtr::free(pb_type->pb_type_power);
-    }
-
-    for (int i = 0; i < pb_type->num_ports; ++i) {
-        vtr::free(pb_type->ports[i].name);
-        if (pb_type->ports[i].port_class) {
-            vtr::free(pb_type->ports[i].port_class);
-        }
-        if (pb_type->ports[i].port_power) {
-            vtr::free(pb_type->ports[i].port_power);
-        }
-    }
-    vtr::free(pb_type->ports);
-}
-
-t_port* findPortByName(const char* name, t_pb_type* pb_type, int* high_index, int* low_index) {
-    t_port* port;
-    int i;
-    unsigned int high;
-    unsigned int low;
-    unsigned int bracket_pos;
-    unsigned int colon_pos;
-
-    bracket_pos = strcspn(name, "[");
-
-    /* Find port by name */
-    port = nullptr;
-    for (i = 0; i < pb_type->num_ports; i++) {
-        char* compare_to = pb_type->ports[i].name;
-
-        if (strlen(compare_to) == bracket_pos
-            && strncmp(name, compare_to, bracket_pos) == 0) {
-            port = &pb_type->ports[i];
-            break;
-        }
-    }
-    if (i >= pb_type->num_ports) {
-        return nullptr;
-    }
-
-    /* Get indices */
-    if (strlen(name) > bracket_pos) {
-        high = atoi(&name[bracket_pos + 1]);
-
-        colon_pos = strcspn(name, ":");
-
-        if (colon_pos < strlen(name)) {
-            low = atoi(&name[colon_pos + 1]);
-        } else {
-            low = high;
-        }
-    } else {
-        high = port->num_pins - 1;
-        low = 0;
-    }
-
-    if (high_index && low_index) {
-        *high_index = high;
-        *low_index = low;
-    }
-
-    return port;
-}
-
-t_physical_tile_type get_empty_physical_type(const char* name) {
-    t_physical_tile_type type;
-    type.name = vtr::strdup(name);
-    type.num_pins = 0;
-    type.width = 1;
-    type.height = 1;
-    type.capacity = 0;
-    type.num_drivers = 0;
-    type.num_receivers = 0;
-    type.area = UNDEFINED;
-    type.switchblock_locations = vtr::Matrix<e_sb_type>({{size_t(type.width), size_t(type.height)}}, e_sb_type::FULL);
-    type.switchblock_switch_overrides = vtr::Matrix<int>({{size_t(type.width), size_t(type.height)}}, DEFAULT_SWITCH);
-    type.is_input_type = false;
-    type.is_output_type = false;
-
-    return type;
-}
-
-t_logical_block_type get_empty_logical_type(const char* name) {
-    t_logical_block_type type;
-    type.name = vtr::strdup(name);
-    type.pb_type = nullptr;
-
-    return type;
-}
-
-std::unordered_set<t_logical_block_type_ptr> get_equivalent_sites_set(t_physical_tile_type_ptr type) {
-    std::unordered_set<t_logical_block_type_ptr> equivalent_sites;
-
-    for (auto& sub_tile : type->sub_tiles) {
-        for (auto& logical_block : sub_tile.equivalent_sites) {
-            equivalent_sites.insert(logical_block);
-        }
-    }
-
-    return equivalent_sites;
-}
-
-void alloc_and_load_default_child_for_pb_type(t_pb_type* pb_type,
-                                              char* new_name,
-                                              t_pb_type* copy) {
-    int i, j;
-    char* dot;
-
-    VTR_ASSERT(pb_type->blif_model != nullptr);
-
-    copy->name = vtr::strdup(new_name);
-    copy->blif_model = vtr::strdup(pb_type->blif_model);
-    copy->class_type = pb_type->class_type;
-    copy->depth = pb_type->depth;
-    copy->model = pb_type->model;
-    copy->modes = nullptr;
-    copy->num_modes = 0;
-    copy->num_clock_pins = pb_type->num_clock_pins;
-    copy->num_input_pins = pb_type->num_input_pins;
-    copy->num_output_pins = pb_type->num_output_pins;
-    copy->num_pins = pb_type->num_pins;
-    copy->num_pb = 1;
-
-    /* Power */
-    copy->pb_type_power = (t_pb_type_power*)vtr::calloc(1,
-                                                        sizeof(t_pb_type_power));
-    copy->pb_type_power->estimation_method = power_method_inherited(pb_type->pb_type_power->estimation_method);
-
-    /* Ports */
-    copy->num_ports = pb_type->num_ports;
-    copy->ports = (t_port*)vtr::calloc(pb_type->num_ports, sizeof(t_port));
-    for (i = 0; i < pb_type->num_ports; i++) {
-        copy->ports[i].is_clock = pb_type->ports[i].is_clock;
-        copy->ports[i].model_port = pb_type->ports[i].model_port;
-        copy->ports[i].type = pb_type->ports[i].type;
-        copy->ports[i].num_pins = pb_type->ports[i].num_pins;
-        copy->ports[i].parent_pb_type = copy;
-        copy->ports[i].name = vtr::strdup(pb_type->ports[i].name);
-        copy->ports[i].port_class = vtr::strdup(pb_type->ports[i].port_class);
-        copy->ports[i].port_index_by_type = pb_type->ports[i].port_index_by_type;
-        copy->ports[i].index = pb_type->ports[i].index;
-        copy->ports[i].absolute_first_pin_index = pb_type->ports[i].absolute_first_pin_index;
-
-        copy->ports[i].port_power = (t_port_power*)vtr::calloc(1,
-                                                               sizeof(t_port_power));
-        //Defaults
-        if (copy->pb_type_power->estimation_method == POWER_METHOD_AUTO_SIZES) {
-            copy->ports[i].port_power->wire_type = POWER_WIRE_TYPE_AUTO;
-            copy->ports[i].port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
-        } else if (copy->pb_type_power->estimation_method
-                   == POWER_METHOD_SPECIFY_SIZES) {
-            copy->ports[i].port_power->wire_type = POWER_WIRE_TYPE_IGNORED;
-            copy->ports[i].port_power->buffer_type = POWER_BUFFER_TYPE_NONE;
-        }
-    }
-
-    copy->annotations = (t_pin_to_pin_annotation*)vtr::calloc(pb_type->num_annotations, sizeof(t_pin_to_pin_annotation));
-    copy->num_annotations = pb_type->num_annotations;
-    for (i = 0; i < copy->num_annotations; i++) {
-        copy->annotations[i].clock = vtr::strdup(pb_type->annotations[i].clock);
-        dot = strstr(pb_type->annotations[i].input_pins, ".");
-        copy->annotations[i].input_pins = (char*)vtr::malloc(sizeof(char) * (strlen(new_name) + strlen(dot) + 1));
-        copy->annotations[i].input_pins[0] = '\0';
-        strcat(copy->annotations[i].input_pins, new_name);
-        strcat(copy->annotations[i].input_pins, dot);
-        if (pb_type->annotations[i].output_pins != nullptr) {
-            dot = strstr(pb_type->annotations[i].output_pins, ".");
-            copy->annotations[i].output_pins = (char*)vtr::malloc(sizeof(char) * (strlen(new_name) + strlen(dot) + 1));
-            copy->annotations[i].output_pins[0] = '\0';
-            strcat(copy->annotations[i].output_pins, new_name);
-            strcat(copy->annotations[i].output_pins, dot);
-        } else {
-            copy->annotations[i].output_pins = nullptr;
-        }
-        copy->annotations[i].line_num = pb_type->annotations[i].line_num;
-        copy->annotations[i].format = pb_type->annotations[i].format;
-        copy->annotations[i].type = pb_type->annotations[i].type;
-        copy->annotations[i].num_value_prop_pairs = pb_type->annotations[i].num_value_prop_pairs;
-        copy->annotations[i].prop = (int*)vtr::malloc(sizeof(int) * pb_type->annotations[i].num_value_prop_pairs);
-        copy->annotations[i].value = (char**)vtr::malloc(sizeof(char*) * pb_type->annotations[i].num_value_prop_pairs);
-        for (j = 0; j < pb_type->annotations[i].num_value_prop_pairs; j++) {
-            copy->annotations[i].prop[j] = pb_type->annotations[i].prop[j];
-            copy->annotations[i].value[j] = vtr::strdup(pb_type->annotations[i].value[j]);
-        }
-    }
-}
-
-/* populate special lut class */
-void ProcessLutClass(t_pb_type* lut_pb_type) {
-    char* default_name;
-    t_port* in_port;
-    t_port* out_port;
-    int i, j;
-
-    if (strcmp(lut_pb_type->name, "lut") != 0) {
-        default_name = vtr::strdup("lut");
-    } else {
-        default_name = vtr::strdup("lut_child");
-    }
-
-    lut_pb_type->num_modes = 2;
-    lut_pb_type->pb_type_power->leakage_default_mode = 1;
-    lut_pb_type->modes = new t_mode[lut_pb_type->num_modes];
-
-    /* First mode, route_through */
-    lut_pb_type->modes[0].name = vtr::strdup("wire");
-    lut_pb_type->modes[0].parent_pb_type = lut_pb_type;
-    lut_pb_type->modes[0].index = 0;
-    lut_pb_type->modes[0].num_pb_type_children = 0;
-    lut_pb_type->modes[0].mode_power = (t_mode_power*)vtr::calloc(1,
-                                                                  sizeof(t_mode_power));
-
-    /* Process interconnect */
-    /* TODO: add timing annotations to route-through */
-    VTR_ASSERT(lut_pb_type->num_ports == 2);
-    if (strcmp(lut_pb_type->ports[0].port_class, "lut_in") == 0) {
-        VTR_ASSERT(strcmp(lut_pb_type->ports[1].port_class, "lut_out") == 0);
-        in_port = &lut_pb_type->ports[0];
-        out_port = &lut_pb_type->ports[1];
-    } else {
-        VTR_ASSERT(strcmp(lut_pb_type->ports[0].port_class, "lut_out") == 0);
-        VTR_ASSERT(strcmp(lut_pb_type->ports[1].port_class, "lut_in") == 0);
-        out_port = &lut_pb_type->ports[0];
-        in_port = &lut_pb_type->ports[1];
-    }
-    lut_pb_type->modes[0].num_interconnect = 1;
-    lut_pb_type->modes[0].interconnect = new t_interconnect[1];
-    lut_pb_type->modes[0].interconnect[0].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 10, sizeof(char));
-    sprintf(lut_pb_type->modes[0].interconnect[0].name, "complete:%s",
-            lut_pb_type->name);
-    lut_pb_type->modes[0].interconnect[0].type = COMPLETE_INTERC;
-    lut_pb_type->modes[0].interconnect[0].input_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(in_port->name) + 2,
-                                                                            sizeof(char));
-    sprintf(lut_pb_type->modes[0].interconnect[0].input_string, "%s.%s",
-            lut_pb_type->name, in_port->name);
-    lut_pb_type->modes[0].interconnect[0].output_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(out_port->name) + 2,
-                                                                             sizeof(char));
-    sprintf(lut_pb_type->modes[0].interconnect[0].output_string, "%s.%s",
-            lut_pb_type->name, out_port->name);
-
-    lut_pb_type->modes[0].interconnect[0].parent_mode_index = 0;
-    lut_pb_type->modes[0].interconnect[0].parent_mode = &lut_pb_type->modes[0];
-    lut_pb_type->modes[0].interconnect[0].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
-
-    lut_pb_type->modes[0].interconnect[0].annotations = (t_pin_to_pin_annotation*)vtr::calloc(lut_pb_type->num_annotations,
-                                                                                              sizeof(t_pin_to_pin_annotation));
-    lut_pb_type->modes[0].interconnect[0].num_annotations = lut_pb_type->num_annotations;
-    for (i = 0; i < lut_pb_type->modes[0].interconnect[0].num_annotations;
-         i++) {
-        lut_pb_type->modes[0].interconnect[0].annotations[i].clock = vtr::strdup(lut_pb_type->annotations[i].clock);
-        lut_pb_type->modes[0].interconnect[0].annotations[i].input_pins = vtr::strdup(lut_pb_type->annotations[i].input_pins);
-        lut_pb_type->modes[0].interconnect[0].annotations[i].output_pins = vtr::strdup(lut_pb_type->annotations[i].output_pins);
-        lut_pb_type->modes[0].interconnect[0].annotations[i].line_num = lut_pb_type->annotations[i].line_num;
-        lut_pb_type->modes[0].interconnect[0].annotations[i].format = lut_pb_type->annotations[i].format;
-        lut_pb_type->modes[0].interconnect[0].annotations[i].type = lut_pb_type->annotations[i].type;
-        lut_pb_type->modes[0].interconnect[0].annotations[i].num_value_prop_pairs = lut_pb_type->annotations[i].num_value_prop_pairs;
-        lut_pb_type->modes[0].interconnect[0].annotations[i].prop = (int*)vtr::malloc(sizeof(int)
-                                                                                      * lut_pb_type->annotations[i].num_value_prop_pairs);
-        lut_pb_type->modes[0].interconnect[0].annotations[i].value = (char**)vtr::malloc(sizeof(char*)
-                                                                                         * lut_pb_type->annotations[i].num_value_prop_pairs);
-        for (j = 0; j < lut_pb_type->annotations[i].num_value_prop_pairs; j++) {
-            lut_pb_type->modes[0].interconnect[0].annotations[i].prop[j] = lut_pb_type->annotations[i].prop[j];
-            lut_pb_type->modes[0].interconnect[0].annotations[i].value[j] = vtr::strdup(lut_pb_type->annotations[i].value[j]);
-        }
-    }
-
-    /* Second mode, LUT */
-
-    lut_pb_type->modes[1].name = vtr::strdup(lut_pb_type->name);
-    lut_pb_type->modes[1].parent_pb_type = lut_pb_type;
-    lut_pb_type->modes[1].index = 1;
-    lut_pb_type->modes[1].num_pb_type_children = 1;
-    lut_pb_type->modes[1].mode_power = (t_mode_power*)vtr::calloc(1,
-                                                                  sizeof(t_mode_power));
-    lut_pb_type->modes[1].pb_type_children = new t_pb_type[1];
-    alloc_and_load_default_child_for_pb_type(lut_pb_type, default_name,
-                                             lut_pb_type->modes[1].pb_type_children);
-    /* moved annotations to child so delete old annotations */
-    for (i = 0; i < lut_pb_type->num_annotations; i++) {
-        for (j = 0; j < lut_pb_type->annotations[i].num_value_prop_pairs; j++) {
-            free(lut_pb_type->annotations[i].value[j]);
-        }
-        free(lut_pb_type->annotations[i].value);
-        free(lut_pb_type->annotations[i].prop);
-        if (lut_pb_type->annotations[i].input_pins) {
-            free(lut_pb_type->annotations[i].input_pins);
-        }
-        if (lut_pb_type->annotations[i].output_pins) {
-            free(lut_pb_type->annotations[i].output_pins);
-        }
-        if (lut_pb_type->annotations[i].clock) {
-            free(lut_pb_type->annotations[i].clock);
-        }
-    }
-    lut_pb_type->num_annotations = 0;
-    free(lut_pb_type->annotations);
-    lut_pb_type->annotations = nullptr;
-    lut_pb_type->modes[1].pb_type_children[0].depth = lut_pb_type->depth + 1;
-    lut_pb_type->modes[1].pb_type_children[0].parent_mode = &lut_pb_type->modes[1];
-    for (i = 0; i < lut_pb_type->modes[1].pb_type_children[0].num_ports; i++) {
-        if (lut_pb_type->modes[1].pb_type_children[0].ports[i].type == IN_PORT) {
-            lut_pb_type->modes[1].pb_type_children[0].ports[i].equivalent = PortEquivalence::FULL;
-        }
-    }
-
-    /* Process interconnect */
-    lut_pb_type->modes[1].num_interconnect = 2;
-    lut_pb_type->modes[1].interconnect = new t_interconnect[lut_pb_type->modes[1].num_interconnect];
-    lut_pb_type->modes[1].interconnect[0].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 10, sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[0].name, "direct:%s",
-            lut_pb_type->name);
-    lut_pb_type->modes[1].interconnect[0].type = DIRECT_INTERC;
-    lut_pb_type->modes[1].interconnect[0].input_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(in_port->name) + 2,
-                                                                            sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[0].input_string, "%s.%s",
-            lut_pb_type->name, in_port->name);
-    lut_pb_type->modes[1].interconnect[0].output_string = (char*)vtr::calloc(strlen(default_name) + strlen(in_port->name) + 2, sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[0].output_string, "%s.%s",
-            default_name, in_port->name);
-    lut_pb_type->modes[1].interconnect[0].infer_annotations = true;
-
-    lut_pb_type->modes[1].interconnect[0].parent_mode_index = 1;
-    lut_pb_type->modes[1].interconnect[0].parent_mode = &lut_pb_type->modes[1];
-    lut_pb_type->modes[1].interconnect[0].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
-
-    lut_pb_type->modes[1].interconnect[1].name = (char*)vtr::calloc(strlen(lut_pb_type->name) + 11, sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[1].name, "direct:%s",
-            lut_pb_type->name);
-
-    lut_pb_type->modes[1].interconnect[1].type = DIRECT_INTERC;
-    lut_pb_type->modes[1].interconnect[1].input_string = (char*)vtr::calloc(strlen(default_name) + strlen(out_port->name) + 4, sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[1].input_string, "%s.%s",
-            default_name, out_port->name);
-    lut_pb_type->modes[1].interconnect[1].output_string = (char*)vtr::calloc(strlen(lut_pb_type->name) + strlen(out_port->name)
-                                                                                 + strlen(in_port->name) + 2,
-                                                                             sizeof(char));
-    sprintf(lut_pb_type->modes[1].interconnect[1].output_string, "%s.%s",
-            lut_pb_type->name, out_port->name);
-    lut_pb_type->modes[1].interconnect[1].infer_annotations = true;
-
-    lut_pb_type->modes[1].interconnect[1].parent_mode_index = 1;
-    lut_pb_type->modes[1].interconnect[1].parent_mode = &lut_pb_type->modes[1];
-    lut_pb_type->modes[1].interconnect[1].interconnect_power = (t_interconnect_power*)vtr::calloc(1, sizeof(t_interconnect_power));
-
-    free(default_name);
-
-    free(lut_pb_type->blif_model);
-    lut_pb_type->blif_model = nullptr;
-    lut_pb_type->model = nullptr;
-}
-
-/* populate special memory class */
-void ProcessMemoryClass(t_pb_type* mem_pb_type) {
-    char* default_name;
-    char *input_name, *input_port_name, *output_name, *output_port_name;
-    int i, j, i_inter, num_pb;
-
-    if (strcmp(mem_pb_type->name, "memory_slice") != 0) {
-        default_name = vtr::strdup("memory_slice");
-    } else {
-        default_name = vtr::strdup("memory_slice_1bit");
-    }
-
-    mem_pb_type->modes = new t_mode[1];
-    mem_pb_type->modes[0].name = vtr::strdup(default_name);
-    mem_pb_type->modes[0].parent_pb_type = mem_pb_type;
-    mem_pb_type->modes[0].index = 0;
-    mem_pb_type->modes[0].mode_power = (t_mode_power*)vtr::calloc(1,
-                                                                  sizeof(t_mode_power));
-    num_pb = OPEN;
-    for (i = 0; i < mem_pb_type->num_ports; i++) {
-        if (mem_pb_type->ports[i].port_class != nullptr
-            && strstr(mem_pb_type->ports[i].port_class, "data")
-                   == mem_pb_type->ports[i].port_class) {
-            if (num_pb == OPEN) {
-                num_pb = mem_pb_type->ports[i].num_pins;
-            } else if (num_pb != mem_pb_type->ports[i].num_pins) {
-                archfpga_throw(get_arch_file_name(), 0,
-                               "memory %s has inconsistent number of data bits %d and %d\n",
-                               mem_pb_type->name, num_pb,
-                               mem_pb_type->ports[i].num_pins);
-            }
-        }
-    }
-
-    mem_pb_type->modes[0].num_pb_type_children = 1;
-    mem_pb_type->modes[0].pb_type_children = new t_pb_type[1];
-    alloc_and_load_default_child_for_pb_type(mem_pb_type, default_name,
-                                             &mem_pb_type->modes[0].pb_type_children[0]);
-    mem_pb_type->modes[0].pb_type_children[0].depth = mem_pb_type->depth + 1;
-    mem_pb_type->modes[0].pb_type_children[0].parent_mode = &mem_pb_type->modes[0];
-    mem_pb_type->modes[0].pb_type_children[0].num_pb = num_pb;
-
-    mem_pb_type->num_modes = 1;
-
-    free(mem_pb_type->blif_model);
-    mem_pb_type->blif_model = nullptr;
-    mem_pb_type->model = nullptr;
-
-    mem_pb_type->modes[0].num_interconnect = mem_pb_type->num_ports * num_pb;
-    mem_pb_type->modes[0].interconnect = new t_interconnect[mem_pb_type->modes[0].num_interconnect];
-
-    for (i = 0; i < mem_pb_type->modes[0].num_interconnect; i++) {
-        mem_pb_type->modes[0].interconnect[i].parent_mode_index = 0;
-        mem_pb_type->modes[0].interconnect[i].parent_mode = &mem_pb_type->modes[0];
-    }
-
-    /* Process interconnect */
-    i_inter = 0;
-    for (i = 0; i < mem_pb_type->num_ports; i++) {
-        mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
-        input_port_name = mem_pb_type->ports[i].name;
-        output_port_name = mem_pb_type->ports[i].name;
-
-        if (mem_pb_type->ports[i].type == IN_PORT) {
-            input_name = mem_pb_type->name;
-            output_name = default_name;
-        } else {
-            input_name = default_name;
-            output_name = mem_pb_type->name;
-        }
-
-        if (mem_pb_type->ports[i].port_class != nullptr
-            && strstr(mem_pb_type->ports[i].port_class, "data")
-                   == mem_pb_type->ports[i].port_class) {
-            mem_pb_type->modes[0].interconnect[i_inter].name = (char*)vtr::calloc(i_inter / 10 + 8, sizeof(char));
-            sprintf(mem_pb_type->modes[0].interconnect[i_inter].name,
-                    "direct%d", i_inter);
-            mem_pb_type->modes[0].interconnect[i_inter].infer_annotations = true;
-
-            if (mem_pb_type->ports[i].type == IN_PORT) {
-                /* force data pins to be one bit wide and update stats */
-                mem_pb_type->modes[0].pb_type_children[0].ports[i].num_pins = 1;
-                mem_pb_type->modes[0].pb_type_children[0].num_input_pins -= (mem_pb_type->ports[i].num_pins - 1);
-
-                mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
-                                                                                                  + 2,
-                                                                                              sizeof(char));
-                sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
-                        "%s.%s", input_name, input_port_name);
-                mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name) + strlen(output_port_name)
-                                                                                                   + 2 * (6 + num_pb / 10),
-                                                                                               sizeof(char));
-                sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
-                        "%s[%d:0].%s", output_name, num_pb - 1,
-                        output_port_name);
-            } else {
-                /* force data pins to be one bit wide and update stats */
-                mem_pb_type->modes[0].pb_type_children[0].ports[i].num_pins = 1;
-                mem_pb_type->modes[0].pb_type_children[0].num_output_pins -= (mem_pb_type->ports[i].num_pins - 1);
-
-                mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
-                                                                                                  + 2 * (6 + num_pb / 10),
-                                                                                              sizeof(char));
-                sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
-                        "%s[%d:0].%s", input_name, num_pb - 1, input_port_name);
-                mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name) + strlen(output_port_name)
-                                                                                                   + 2,
-                                                                                               sizeof(char));
-                sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
-                        "%s.%s", output_name, output_port_name);
-            }
-
-            /* Allocate interconnect power structures */
-            mem_pb_type->modes[0].interconnect[i_inter].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
-                                                                                                                sizeof(t_interconnect_power));
-            i_inter++;
-        } else {
-            for (j = 0; j < num_pb; j++) {
-                /* Anything that is not data must be an input */
-                mem_pb_type->modes[0].interconnect[i_inter].name = (char*)vtr::calloc(i_inter / 10 + j / 10 + 10,
-                                                                                      sizeof(char));
-                sprintf(mem_pb_type->modes[0].interconnect[i_inter].name,
-                        "direct%d_%d", i_inter, j);
-                mem_pb_type->modes[0].interconnect[i_inter].infer_annotations = true;
-
-                if (mem_pb_type->ports[i].type == IN_PORT) {
-                    mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
-                    mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
-                                                                                                      + 2,
-                                                                                                  sizeof(char));
-                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
-                            "%s.%s", input_name, input_port_name);
-                    mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name)
-                                                                                                       + strlen(output_port_name)
-                                                                                                       + 2 * (6 + num_pb / 10),
-                                                                                                   sizeof(char));
-                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
-                            "%s[%d:%d].%s", output_name, j, j,
-                            output_port_name);
-                } else {
-                    mem_pb_type->modes[0].interconnect[i_inter].type = DIRECT_INTERC;
-                    mem_pb_type->modes[0].interconnect[i_inter].input_string = (char*)vtr::calloc(strlen(input_name) + strlen(input_port_name)
-                                                                                                      + 2 * (6 + num_pb / 10),
-                                                                                                  sizeof(char));
-                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].input_string,
-                            "%s[%d:%d].%s", input_name, j, j, input_port_name);
-                    mem_pb_type->modes[0].interconnect[i_inter].output_string = (char*)vtr::calloc(strlen(output_name)
-                                                                                                       + strlen(output_port_name) + 2,
-                                                                                                   sizeof(char));
-                    sprintf(mem_pb_type->modes[0].interconnect[i_inter].output_string,
-                            "%s.%s", output_name, output_port_name);
-                }
-
-                /* Allocate interconnect power structures */
-                mem_pb_type->modes[0].interconnect[i_inter].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
-                                                                                                                    sizeof(t_interconnect_power));
-                i_inter++;
-            }
-        }
-    }
-
-    mem_pb_type->modes[0].num_interconnect = i_inter;
-
-    free(default_name);
-}
-
-e_power_estimation_method power_method_inherited(e_power_estimation_method parent_power_method) {
-    switch (parent_power_method) {
-        case POWER_METHOD_IGNORE:
-        case POWER_METHOD_AUTO_SIZES:
-        case POWER_METHOD_SPECIFY_SIZES:
-        case POWER_METHOD_TOGGLE_PINS:
-            return parent_power_method;
-        case POWER_METHOD_C_INTERNAL:
-        case POWER_METHOD_ABSOLUTE:
-            return POWER_METHOD_IGNORE;
-        case POWER_METHOD_UNDEFINED:
-            return POWER_METHOD_UNDEFINED;
-        case POWER_METHOD_SUM_OF_CHILDREN:
-            /* Just revert to the default */
-            return POWER_METHOD_AUTO_SIZES;
-        default:
-            VTR_ASSERT(0);
-            return POWER_METHOD_UNDEFINED; // Should never get here, but avoids a compiler warning.
-    }
-}
-
-void CreateModelLibrary(t_arch* arch) {
-    t_model* model_library;
-
-    model_library = new t_model[4];
-
-    //INPAD
-    model_library[0].name = vtr::strdup(MODEL_INPUT);
-    model_library[0].index = 0;
-    model_library[0].inputs = nullptr;
-    model_library[0].instances = nullptr;
-    model_library[0].next = &model_library[1];
-    model_library[0].outputs = new t_model_ports[1];
-    model_library[0].outputs->dir = OUT_PORT;
-    model_library[0].outputs->name = vtr::strdup("inpad");
-    model_library[0].outputs->next = nullptr;
-    model_library[0].outputs->size = 1;
-    model_library[0].outputs->min_size = 1;
-    model_library[0].outputs->index = 0;
-    model_library[0].outputs->is_clock = false;
-
-    //OUTPAD
-    model_library[1].name = vtr::strdup(MODEL_OUTPUT);
-    model_library[1].index = 1;
-    model_library[1].inputs = new t_model_ports[1];
-    model_library[1].inputs->dir = IN_PORT;
-    model_library[1].inputs->name = vtr::strdup("outpad");
-    model_library[1].inputs->next = nullptr;
-    model_library[1].inputs->size = 1;
-    model_library[1].inputs->min_size = 1;
-    model_library[1].inputs->index = 0;
-    model_library[1].inputs->is_clock = false;
-    model_library[1].instances = nullptr;
-    model_library[1].next = &model_library[2];
-    model_library[1].outputs = nullptr;
-
-    //LATCH
-    model_library[2].name = vtr::strdup(MODEL_LATCH);
-    model_library[2].index = 2;
-    model_library[2].inputs = new t_model_ports[2];
-
-    model_library[2].inputs[0].dir = IN_PORT;
-    model_library[2].inputs[0].name = vtr::strdup("D");
-    model_library[2].inputs[0].next = &model_library[2].inputs[1];
-    model_library[2].inputs[0].size = 1;
-    model_library[2].inputs[0].min_size = 1;
-    model_library[2].inputs[0].index = 0;
-    model_library[2].inputs[0].is_clock = false;
-    model_library[2].inputs[0].clock = "clk";
-
-    model_library[2].inputs[1].dir = IN_PORT;
-    model_library[2].inputs[1].name = vtr::strdup("clk");
-    model_library[2].inputs[1].next = nullptr;
-    model_library[2].inputs[1].size = 1;
-    model_library[2].inputs[1].min_size = 1;
-    model_library[2].inputs[1].index = 0;
-    model_library[2].inputs[1].is_clock = true;
-
-    model_library[2].instances = nullptr;
-    model_library[2].next = &model_library[3];
-
-    model_library[2].outputs = new t_model_ports[1];
-    model_library[2].outputs[0].dir = OUT_PORT;
-    model_library[2].outputs[0].name = vtr::strdup("Q");
-    model_library[2].outputs[0].next = nullptr;
-    model_library[2].outputs[0].size = 1;
-    model_library[2].outputs[0].min_size = 1;
-    model_library[2].outputs[0].index = 0;
-    model_library[2].outputs[0].is_clock = false;
-    model_library[2].outputs[0].clock = "clk";
-
-    //NAMES
-    model_library[3].name = vtr::strdup(MODEL_NAMES);
-    model_library[3].index = 3;
-
-    model_library[3].inputs = new t_model_ports[1];
-    model_library[3].inputs[0].dir = IN_PORT;
-    model_library[3].inputs[0].name = vtr::strdup("in");
-    model_library[3].inputs[0].next = nullptr;
-    model_library[3].inputs[0].size = 1;
-    model_library[3].inputs[0].min_size = 1;
-    model_library[3].inputs[0].index = 0;
-    model_library[3].inputs[0].is_clock = false;
-    model_library[3].inputs[0].combinational_sink_ports = {"out"};
-
-    model_library[3].instances = nullptr;
-    model_library[3].next = nullptr;
-
-    model_library[3].outputs = new t_model_ports[1];
-    model_library[3].outputs[0].dir = OUT_PORT;
-    model_library[3].outputs[0].name = vtr::strdup("out");
-    model_library[3].outputs[0].next = nullptr;
-    model_library[3].outputs[0].size = 1;
-    model_library[3].outputs[0].min_size = 1;
-    model_library[3].outputs[0].index = 0;
-    model_library[3].outputs[0].is_clock = false;
-
-    arch->model_library = model_library;
-}
-
-void SyncModelsPbTypes(t_arch* arch,
-                       const std::vector<t_logical_block_type>& Types) {
-    for (auto& Type : Types) {
-        if (Type.pb_type != nullptr) {
-            SyncModelsPbTypes_rec(arch, Type.pb_type);
-        }
-    }
-}
-
-void SyncModelsPbTypes_rec(t_arch* arch,
-                           t_pb_type* pb_type) {
-    int i, j, p;
-    t_model *model_match_prim, *cur_model;
-    t_model_ports* model_port;
-    vtr::t_linked_vptr* old;
-    char* blif_model_name = nullptr;
-
-    bool found;
-
-    if (pb_type->blif_model != nullptr) {
-        /* get actual name of subckt */
-        blif_model_name = pb_type->blif_model;
-        if (strstr(blif_model_name, ".subckt ") == blif_model_name) {
-            blif_model_name = strchr(blif_model_name, ' ');
-            ++blif_model_name; //Advance past space
-        }
-        if (!blif_model_name) {
-            archfpga_throw(get_arch_file_name(), 0,
-                           "Unknown blif model %s in pb_type %s\n",
-                           pb_type->blif_model, pb_type->name);
-        }
-
-        /* There are two sets of models to consider, the standard library of models and the user defined models */
-        if (is_library_model(blif_model_name)) {
-            cur_model = arch->model_library;
-        } else {
-            cur_model = arch->models;
-        }
-
-        /* Determine the logical model to use */
-        found = false;
-        model_match_prim = nullptr;
-        while (cur_model && !found) {
-            /* blif model always starts with .subckt so need to skip first 8 characters */
-            if (strcmp(blif_model_name, cur_model->name) == 0) {
-                found = true;
-                model_match_prim = cur_model;
-            }
-            cur_model = cur_model->next;
-        }
-        if (found != true) {
-            archfpga_throw(get_arch_file_name(), 0,
-                           "No matching model for pb_type %s\n", pb_type->blif_model);
-        }
-
-        pb_type->model = model_match_prim;
-        old = model_match_prim->pb_types;
-        model_match_prim->pb_types = (vtr::t_linked_vptr*)vtr::malloc(sizeof(vtr::t_linked_vptr));
-        model_match_prim->pb_types->next = old;
-        model_match_prim->pb_types->data_vptr = pb_type;
-
-        for (p = 0; p < pb_type->num_ports; p++) {
-            found = false;
-            /* TODO: Parse error checking - check if INPUT matches INPUT and OUTPUT matches OUTPUT (not yet done) */
-            model_port = model_match_prim->inputs;
-            while (model_port && !found) {
-                if (strcmp(model_port->name, pb_type->ports[p].name) == 0) {
-                    if (model_port->size < pb_type->ports[p].num_pins) {
-                        model_port->size = pb_type->ports[p].num_pins;
-                    }
-                    if (model_port->min_size > pb_type->ports[p].num_pins
-                        || model_port->min_size == -1) {
-                        model_port->min_size = pb_type->ports[p].num_pins;
-                    }
-                    pb_type->ports[p].model_port = model_port;
-                    if (pb_type->ports[p].type != model_port->dir) {
-                        archfpga_throw(get_arch_file_name(), 0,
-                                       "Direction for port '%s' on model does not match port direction in pb_type '%s'\n",
-                                       pb_type->ports[p].name, pb_type->name);
-                    }
-                    if (pb_type->ports[p].is_clock != model_port->is_clock) {
-                        archfpga_throw(get_arch_file_name(), 0,
-                                       "Port '%s' on model does not match is_clock in pb_type '%s'\n",
-                                       pb_type->ports[p].name, pb_type->name);
-                    }
-                    found = true;
-                }
-                model_port = model_port->next;
-            }
-            model_port = model_match_prim->outputs;
-            while (model_port && !found) {
-                if (strcmp(model_port->name, pb_type->ports[p].name) == 0) {
-                    if (model_port->size < pb_type->ports[p].num_pins) {
-                        model_port->size = pb_type->ports[p].num_pins;
-                    }
-                    if (model_port->min_size > pb_type->ports[p].num_pins
-                        || model_port->min_size == -1) {
-                        model_port->min_size = pb_type->ports[p].num_pins;
-                    }
-
-                    pb_type->ports[p].model_port = model_port;
-                    if (pb_type->ports[p].type != model_port->dir) {
-                        archfpga_throw(get_arch_file_name(), 0,
-                                       "Direction for port '%s' on model does not match port direction in pb_type '%s'\n",
-                                       pb_type->ports[p].name, pb_type->name);
-                    }
-                    found = true;
-                }
-                model_port = model_port->next;
-            }
-            if (found != true) {
-                archfpga_throw(get_arch_file_name(), 0,
-                               "No matching model port for port %s in pb_type %s\n",
-                               pb_type->ports[p].name, pb_type->name);
-            }
-        }
-    } else {
-        for (i = 0; i < pb_type->num_modes; i++) {
-            for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
-                SyncModelsPbTypes_rec(arch,
-                                      &(pb_type->modes[i].pb_type_children[j]));
-            }
-        }
-    }
-}
-
-/* Date:July 10th, 2013
- * Author: Daniel Chen
- * Purpose: Attempts to match a clock_name specified in an
- *			timing annotation (Tsetup, Thold, Tc_to_q) with the
- *			clock_name specified in the primitive. Applies
- *			to flipflop/memory right now.
- */
-void primitives_annotation_clock_match(t_pin_to_pin_annotation* annotation,
-                                       t_pb_type* parent_pb_type) {
-    int i_port;
-    bool clock_valid = false; //Determine if annotation's clock is same as primtive's clock
-
-    if (!parent_pb_type || !annotation) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Annotation_clock check encouters invalid annotation or primitive.\n");
-    }
-
-    for (i_port = 0; i_port < parent_pb_type->num_ports; i_port++) {
-        if (parent_pb_type->ports[i_port].is_clock) {
-            if (strcmp(parent_pb_type->ports[i_port].name, annotation->clock)
-                == 0) {
-                clock_valid = true;
-                break;
-            }
-        }
-    }
-
-    if (!clock_valid) {
-        archfpga_throw(get_arch_file_name(), annotation->line_num,
-                       "Clock '%s' does not match any clock defined in pb_type '%s'.\n",
-                       annotation->clock, parent_pb_type->name);
-    }
-}
-
-const t_segment_inf* find_segment(const t_arch* arch, std::string name) {
-    for (size_t i = 0; i < (arch->Segments).size(); ++i) {
-        const t_segment_inf* seg = &arch->Segments[i];
-        if (seg->name == name) {
-            return seg;
-        }
-    }
-
-    return nullptr;
-}
-
-bool segment_exists(const t_arch* arch, std::string name) {
-    return find_segment(arch, name) != nullptr;
-}
-
-bool is_library_model(const char* model_name) {
-    if (model_name == std::string(MODEL_NAMES)
-        || model_name == std::string(MODEL_LATCH)
-        || model_name == std::string(MODEL_INPUT)
-        || model_name == std::string(MODEL_OUTPUT)) {
-        return true;
-    }
-    return false;
-}
-
-bool is_library_model(const t_model* model) {
-    return is_library_model(model->name);
-}
-
-//Returns true if the specified block type contains the specified blif model name
-//
-// TODO: Remove block_type_contains_blif_model / pb_type_contains_blif_model
-// as part of
-// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
-bool block_type_contains_blif_model(t_logical_block_type_ptr type, const std::string& blif_model_name) {
-    return pb_type_contains_blif_model(type->pb_type, blif_model_name);
-}
-
-//Returns true of a pb_type (or it's children) contain the specified blif model name
-bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::string& blif_model_name) {
-    if (!pb_type) {
-        return false;
-    }
-
-    if (pb_type->blif_model != nullptr) {
-        //Leaf pb_type
-        VTR_ASSERT(pb_type->num_modes == 0);
-        if (blif_model_name == pb_type->blif_model
-            || ".subckt " + blif_model_name == pb_type->blif_model) {
-            return true;
-        } else {
-            return false;
-        }
-    } else {
-        for (int imode = 0; imode < pb_type->num_modes; ++imode) {
-            const t_mode* mode = &pb_type->modes[imode];
-
-            for (int ichild = 0; ichild < mode->num_pb_type_children; ++ichild) {
-                const t_pb_type* pb_type_child = &mode->pb_type_children[ichild];
-                if (pb_type_contains_blif_model(pb_type_child, blif_model_name)) {
-                    return true;
-                }
-            }
-        }
-    }
-    return false;
-}
-
-const t_pin_to_pin_annotation* find_sequential_annotation(const t_pb_type* pb_type, const t_model_ports* port, enum e_pin_to_pin_delay_annotations annot_type) {
-    VTR_ASSERT(annot_type == E_ANNOT_PIN_TO_PIN_DELAY_TSETUP
-               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_THOLD
-               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX
-               || annot_type == E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN);
-
-    for (int iannot = 0; iannot < pb_type->num_annotations; ++iannot) {
-        const t_pin_to_pin_annotation* annot = &pb_type->annotations[iannot];
-        InstPort annot_in(annot->input_pins);
-        if (annot_in.port_name() == port->name) {
-            for (int iprop = 0; iprop < annot->num_value_prop_pairs; ++iprop) {
-                if (annot->prop[iprop] == annot_type) {
-                    return annot;
-                }
-            }
-        }
-    }
-
-    return nullptr;
-}
-
-const t_pin_to_pin_annotation* find_combinational_annotation(const t_pb_type* pb_type, std::string in_port, std::string out_port) {
-    for (int iannot = 0; iannot < pb_type->num_annotations; ++iannot) {
-        const t_pin_to_pin_annotation* annot = &pb_type->annotations[iannot];
-        for (const auto& annot_in_str : vtr::split(annot->input_pins)) {
-            InstPort in_pins(annot_in_str);
-            for (const auto& annot_out_str : vtr::split(annot->output_pins)) {
-                InstPort out_pins(annot_out_str);
-                if (in_pins.port_name() == in_port && out_pins.port_name() == out_port) {
-                    for (int iprop = 0; iprop < annot->num_value_prop_pairs; ++iprop) {
-                        if (annot->prop[iprop] == E_ANNOT_PIN_TO_PIN_DELAY_MAX
-                            || annot->prop[iprop] == E_ANNOT_PIN_TO_PIN_DELAY_MIN) {
-                            return annot;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return nullptr;
-}
-
-void link_physical_logical_types(std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                                 std::vector<t_logical_block_type>& LogicalBlockTypes) {
-    for (auto& physical_tile : PhysicalTileTypes) {
-        if (physical_tile.index == EMPTY_TYPE_INDEX) continue;
-
-        auto eq_sites_set = get_equivalent_sites_set(&physical_tile);
-        auto equivalent_sites = std::vector<t_logical_block_type_ptr>(eq_sites_set.begin(), eq_sites_set.end());
-
-        auto criteria = [&physical_tile](const t_logical_block_type* lhs, const t_logical_block_type* rhs) {
-            int num_pins = physical_tile.num_inst_pins;
-
-            int lhs_num_logical_pins = lhs->pb_type->num_pins;
-            int rhs_num_logical_pins = rhs->pb_type->num_pins;
-
-            int lhs_diff_num_pins = num_pins - lhs_num_logical_pins;
-            int rhs_diff_num_pins = num_pins - rhs_num_logical_pins;
-
-            return lhs_diff_num_pins < rhs_diff_num_pins;
-        };
-
-        std::sort(equivalent_sites.begin(), equivalent_sites.end(), criteria);
-
-        for (auto& logical_block : LogicalBlockTypes) {
-            for (auto site : equivalent_sites) {
-                if (0 == strcmp(logical_block.name, site->pb_type->name)) {
-                    logical_block.equivalent_tiles.push_back(&physical_tile);
-                    break;
-                }
-            }
-        }
-    }
-
-    for (auto& logical_block : LogicalBlockTypes) {
-        if (logical_block.index == EMPTY_TYPE_INDEX) continue;
-
-        auto& equivalent_tiles = logical_block.equivalent_tiles;
-
-        if ((int)equivalent_tiles.size() <= 0) {
-            archfpga_throw(__FILE__, __LINE__,
-                           "Logical Block %s does not have any equivalent tiles.\n", logical_block.name);
-        }
-
-        std::unordered_map<int, bool> ignored_pins_check_map;
-        std::unordered_map<int, bool> global_pins_check_map;
-
-        auto criteria = [&logical_block](const t_physical_tile_type* lhs, const t_physical_tile_type* rhs) {
-            int num_logical_pins = logical_block.pb_type->num_pins;
-
-            int lhs_num_pins = lhs->num_inst_pins;
-            int rhs_num_pins = rhs->num_inst_pins;
-
-            int lhs_diff_num_pins = lhs_num_pins - num_logical_pins;
-            int rhs_diff_num_pins = rhs_num_pins - num_logical_pins;
-
-            return lhs_diff_num_pins < rhs_diff_num_pins;
-        };
-
-        std::sort(equivalent_tiles.begin(), equivalent_tiles.end(), criteria);
-
-        for (int pin = 0; pin < logical_block.pb_type->num_pins; pin++) {
-            for (auto& tile : equivalent_tiles) {
-                auto direct_maps = tile->tile_block_pin_directs_map.at(logical_block.index);
-
-                for (auto& sub_tile : tile->sub_tiles) {
-                    auto equiv_sites = sub_tile.equivalent_sites;
-                    if (std::find(equiv_sites.begin(), equiv_sites.end(), &logical_block) == equiv_sites.end()) {
-                        continue;
-                    }
-
-                    auto direct_map = direct_maps.at(sub_tile.index);
-
-                    auto result = direct_map.find(t_logical_pin(pin));
-                    if (result == direct_map.end()) {
-                        archfpga_throw(__FILE__, __LINE__,
-                                       "Logical pin %d not present in pin mapping between Tile %s and Block %s.\n",
-                                       pin, tile->name, logical_block.name);
-                    }
-
-                    int sub_tile_pin_index = result->second.pin;
-                    int phy_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index];
-
-                    bool is_ignored = tile->is_ignored_pin[phy_index];
-                    bool is_global = tile->is_pin_global[phy_index];
-
-                    auto ignored_result = ignored_pins_check_map.insert(std::pair<int, bool>(pin, is_ignored));
-                    if (!ignored_result.second && ignored_result.first->second != is_ignored) {
-                        archfpga_throw(__FILE__, __LINE__,
-                                       "Physical Tile %s has a different value for the ignored pin (physical pin: %d, logical pin: %d) "
-                                       "different from the corresponding pins of the other equivalent site %s\n.",
-                                       tile->name, phy_index, pin, logical_block.name);
-                    }
-
-                    auto global_result = global_pins_check_map.insert(std::pair<int, bool>(pin, is_global));
-                    if (!global_result.second && global_result.first->second != is_global) {
-                        archfpga_throw(__FILE__, __LINE__,
-                                       "Physical Tile %s has a different value for the global pin (physical pin: %d, logical pin: %d) "
-                                       "different from the corresponding pins of the other equivalent sites\n.",
-                                       tile->name, phy_index, pin);
-                    }
-                }
-            }
-        }
-    }
-}
-
-/* Sets up the pin classes for the type. */
-void setup_pin_classes(t_physical_tile_type* type) {
-    int i, k;
-    int pin_count;
-    int num_class;
-
-    for (i = 0; i < type->num_pins; i++) {
-        type->pin_class.push_back(OPEN);
-        type->is_ignored_pin.push_back(true);
-        type->is_pin_global.push_back(true);
-    }
-
-    pin_count = 0;
-
-    t_class_range class_range;
-
-    /* Equivalent pins share the same class, non-equivalent pins belong to different pin classes */
-    for (auto& sub_tile : type->sub_tiles) {
-        int capacity = sub_tile.capacity.total();
-        class_range.low = type->class_inf.size();
-        class_range.high = class_range.low - 1;
-        for (i = 0; i < capacity; ++i) {
-            for (const auto& port : sub_tile.ports) {
-                if (port.equivalent != PortEquivalence::NONE) {
-                    t_class class_inf;
-                    num_class = (int)type->class_inf.size();
-                    class_inf.num_pins = port.num_pins;
-                    class_inf.equivalence = port.equivalent;
-
-                    if (port.type == IN_PORT) {
-                        class_inf.type = RECEIVER;
-                    } else {
-                        VTR_ASSERT(port.type == OUT_PORT);
-                        class_inf.type = DRIVER;
-                    }
-
-                    for (k = 0; k < port.num_pins; ++k) {
-                        class_inf.pinlist.push_back(pin_count);
-                        type->pin_class[pin_count] = num_class;
-                        // clock pins and other specified global ports are initially specified
-                        // as ignored pins (i.e. connections are not created in the rr_graph and
-                        // nets connected to the port are ignored as well).
-                        type->is_ignored_pin[pin_count] = port.is_clock || port.is_non_clock_global;
-                        // clock pins and other specified global ports are flaged as global
-                        type->is_pin_global[pin_count] = port.is_clock || port.is_non_clock_global;
-
-                        if (port.is_clock) {
-                            type->clock_pin_indices.push_back(pin_count);
-                        }
-
-                        pin_count++;
-                    }
-
-                    type->class_inf.push_back(class_inf);
-                    class_range.high++;
-                } else if (port.equivalent == PortEquivalence::NONE) {
-                    for (k = 0; k < port.num_pins; ++k) {
-                        t_class class_inf;
-                        num_class = (int)type->class_inf.size();
-                        class_inf.num_pins = 1;
-                        class_inf.pinlist.push_back(pin_count);
-                        class_inf.equivalence = port.equivalent;
-
-                        if (port.type == IN_PORT) {
-                            class_inf.type = RECEIVER;
-                        } else {
-                            VTR_ASSERT(port.type == OUT_PORT);
-                            class_inf.type = DRIVER;
-                        }
-
-                        type->pin_class[pin_count] = num_class;
-                        // clock pins and other specified global ports are initially specified
-                        // as ignored pins (i.e. connections are not created in the rr_graph and
-                        // nets connected to the port are ignored as well).
-                        type->is_ignored_pin[pin_count] = port.is_clock || port.is_non_clock_global;
-                        // clock pins and other specified global ports are flaged as global
-                        type->is_pin_global[pin_count] = port.is_clock || port.is_non_clock_global;
-
-                        if (port.is_clock) {
-                            type->clock_pin_indices.push_back(pin_count);
-                        }
-
-                        pin_count++;
-
-                        type->class_inf.push_back(class_inf);
-                        class_range.high++;
-                    }
-                }
-            }
-        }
-
-        type->sub_tiles[sub_tile.index].class_range = class_range;
-    }
-
-    VTR_ASSERT(pin_count == type->num_pins);
-}
diff --git a/third_party/vtr/libs/archfpga/src/arch_util.h b/third_party/vtr/libs/archfpga/src/arch_util.h
deleted file mode 100644
index 7d882450a..000000000
--- a/third_party/vtr/libs/archfpga/src/arch_util.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef ARCH_UTIL_H
-#define ARCH_UTIL_H
-
-#include <regex>
-#include <unordered_set>
-#include "physical_types.h"
-
-/**
- * @brief sets the architecture file name to be retrieved by the various parser functions
- */
-void set_arch_file_name(const char* arch);
-
-/**
- * @brief returns the architecture file name, requires that it was previously set
- */
-const char* get_arch_file_name();
-
-constexpr const char* EMPTY_BLOCK_NAME = "EMPTY";
-
-class InstPort {
-  public:
-    static constexpr int UNSPECIFIED = -1;
-
-    InstPort() = default;
-    InstPort(std::string str);
-    std::string instance_name() const { return instance_.name; }
-    std::string port_name() const { return port_.name; }
-
-    int instance_low_index() const { return instance_.low_idx; }
-    int instance_high_index() const { return instance_.high_idx; }
-    int port_low_index() const { return port_.low_idx; }
-    int port_high_index() const { return port_.high_idx; }
-
-    int num_instances() const;
-    int num_pins() const;
-
-  public:
-    void set_port_low_index(int val) { port_.low_idx = val; }
-    void set_port_high_index(int val) { port_.high_idx = val; }
-
-  private:
-    struct name_index {
-        std::string name = "";
-        int low_idx = UNSPECIFIED;
-        int high_idx = UNSPECIFIED;
-    };
-
-    name_index parse_name_index(const std::string& str);
-
-    name_index instance_;
-    name_index port_;
-};
-
-void free_arch(t_arch* arch);
-void free_arch_models(t_model* models);
-t_model* free_arch_model(t_model* model);
-void free_arch_model_ports(t_model_ports* model_ports);
-t_model_ports* free_arch_model_port(t_model_ports* model_port);
-
-void free_type_descriptors(std::vector<t_logical_block_type>& type_descriptors);
-void free_type_descriptors(std::vector<t_physical_tile_type>& type_descriptors);
-
-t_port* findPortByName(const char* name, t_pb_type* pb_type, int* high_index, int* low_index);
-
-/** @brief Returns and empty physical tile type, assigned with the given name argument.
- *         The default empty string is assigned if no name is provided
- */
-t_physical_tile_type get_empty_physical_type(const char* name = EMPTY_BLOCK_NAME);
-
-/** @brief Returns and empty logical block type, assigned with the given name argument.
- *         The default empty string is assigned if no name is provided
- */
-t_logical_block_type get_empty_logical_type(const char* name = EMPTY_BLOCK_NAME);
-
-std::unordered_set<t_logical_block_type_ptr> get_equivalent_sites_set(t_physical_tile_type_ptr type);
-
-void alloc_and_load_default_child_for_pb_type(t_pb_type* pb_type,
-                                              char* new_name,
-                                              t_pb_type* copy);
-
-void ProcessLutClass(t_pb_type* lut_pb_type);
-
-void ProcessMemoryClass(t_pb_type* mem_pb_type);
-
-e_power_estimation_method power_method_inherited(e_power_estimation_method parent_power_method);
-
-void CreateModelLibrary(t_arch* arch);
-
-void SyncModelsPbTypes(t_arch* arch,
-                       const std::vector<t_logical_block_type>& Types);
-
-void SyncModelsPbTypes_rec(t_arch* arch,
-                           t_pb_type* pb_type);
-
-void primitives_annotation_clock_match(t_pin_to_pin_annotation* annotation,
-                                       t_pb_type* parent_pb_type);
-
-bool segment_exists(const t_arch* arch, std::string name);
-const t_segment_inf* find_segment(const t_arch* arch, std::string name);
-bool is_library_model(const char* model_name);
-bool is_library_model(const t_model* model);
-
-//Returns true if the specified block type contains the specified blif model name
-bool block_type_contains_blif_model(t_logical_block_type_ptr type, const std::string& blif_model_name);
-
-//Returns true of a pb_type (or it's children) contain the specified blif model name
-bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::string& blif_model_name);
-
-const t_pin_to_pin_annotation* find_sequential_annotation(const t_pb_type* pb_type, const t_model_ports* port, enum e_pin_to_pin_delay_annotations annot_type);
-const t_pin_to_pin_annotation* find_combinational_annotation(const t_pb_type* pb_type, std::string in_port, std::string out_port);
-
-/**
- * @brief Updates the physical and logical types based on the equivalence between one and the other.
- *
- * This function is required to check and synchronize all the information to be able to use the logical block
- * equivalence, and link all the logical block pins to the physical tile ones, given that multiple logical blocks (i.e. pb_types)
- * can be placed at the same physical location if this is allowed in the architecture description.
- *
- * See https://docs.verilogtorouting.org/en/latest/tutorials/arch/equivalent_sites/ for reference
- */
-void link_physical_logical_types(std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                                 std::vector<t_logical_block_type>& LogicalBlockTypes);
-
-void setup_pin_classes(t_physical_tile_type* type);
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/cad_types.h b/third_party/vtr/libs/archfpga/src/cad_types.h
deleted file mode 100644
index d04845c47..000000000
--- a/third_party/vtr/libs/archfpga/src/cad_types.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Data types used to give architectural hints for the CAD algorithm
- */
-#ifndef CAD_TYPES_H
-#define CAD_TYPES_H
-
-#include "logic_types.h"
-#include "physical_types.h"
-
-struct t_pack_pattern_connections;
-
-/**
- * Data structure used to define the structure of a pack pattern that is defined in the architecture file
- *
- * For example: for a pack pattern of a 6-LUT and a FF, each of those primitives will be defined by
- * a t_pack_pattern_block and each of them will have one t_pack_pattern_connections.
- *
- * Data members:
- *      pattern_index : the id of the pattern this block is part of (matches "index" in t_pack_patterns)
- *      pb_type       : the pb_type (primitive) that this block represents (Ex. LUT, Adder, FF, etc.)
- *      connections   : linked list of connections between this t_pack_pattern_block and other
- *                      t_pack_pattern_blocks in this pack pattern as defined in the architecture
- *      block_id      : the id of this t_pack_pattern_block within its pack pattern, used to access
- *                      is_block_optional array in t_pack_patterns and also to access the atom_block_ids
- *                      vector in the t_pack_molecule data structure.
- */
-struct t_pack_pattern_block {
-    int pattern_index;
-    const t_pb_type* pb_type;
-    t_pack_pattern_connections* connections;
-    int block_id;
-};
-
-/**
- * Describes a linked list of connections of a t_pack_pattern_block
- *
- * Data members:
- *      from_block : block driving this connection
- *      from_pin   : specific pin in the from_block driving the connection
- *      to_block   : block driven by this connection
- *      to_pin     : specific pin in the to_block driven by this connection
- *      next       : next connection in the linked list
- */
-struct t_pack_pattern_connections {
-    t_pack_pattern_block* from_block;
-    t_pb_graph_pin* from_pin;
-
-    t_pack_pattern_block* to_block;
-    t_pb_graph_pin* to_pin;
-
-    t_pack_pattern_connections* next;
-};
-
-/**
- * Describes a pack pattern defined in the architecture. A pack pattern is an
- * architectural concept that defines a pattern of highly constrained and/or desirable
- * arrangement of primitives that exists within one logic cluster (Ex. CLB).
- *
- * For example: A pack pattern could be a 6-LUT and a FF. Where the architecture
- * file gives a hint for the packer to pack a 6-LUT that is followed by a FF in the
- * netlist together in one logic element. This helps the packer to achieve high
- * packing density. Another example, is a carry chain where the adders in the netlist
- * should be packed together to be able to route Cout to Cin connections using the
- * dedicated wiring in the architecture.
- *
- * Data members:
- *      name              : name given to the pack pattern in the architecture file
- *      index             : id of the pack pattern in the list_of_pack_patterns array defined in the packer code
- *      root_block        : the block defining the starting point of this pattern. For example: for
- *                          a carry chain pattern, it is the primitive driven by a cluster input pin.
- *      base_cost         : the sum of the primitive base costs of all the primitives in this pack pattern.
- *                          The primitive base cost is defined by compute_primitive_base_cost in vpr_utils.cpp 
- *      num_blocks        : total number of primitives in this pack pattern
- *      is_block_optional : [0..num_blocks-1] is true if the t_pack_pattern_block defined by block_id
- *                          is not mandatory for this pack pattern to be formed. For example, in a carry
- *                          chain pack pattern, the first adder primitive (root_block) is mandatory to
- *                          form the pattern, but every adder primitive after that is optional as the case
- *                          when forming a short adder chain.
- *      is_chain          : does this pack pattern go across clusters. For example, carry chains can normally cross
- *                          between logic blocks.
- *      chain_root_pins   : this is only non-empty for pack_patterns with is_chain set. It points to a specific
- *                          pin of the root_block primitive (Ex. cin of an adder primitive) that is directly
- *                          connected to a cluster-level block pin that can be drive from the preceding cluster.
- *                          The first dimension size is greater than one if the cluster has more than one chain
- *                          of this type. For example, an architecture with two independent adder carry chains
- *                          with different cluster level Cin and Cout pins. The second dimension size is greater
- *                          than one if cin of the cluster can reach more than one adder. This means that there is
- *                          a mux in front of the cin pin of one or more adders in the middle of this chain that
- *                          chooses between the cout of the preceding adder and the cin pin of the cluster. Which will
- *                          give more freedom to the packer when placing small adders that are driven by a constant
- *                          net (gnd/vdd)  [0...num_of_chains][0...num_of_tie_offs]
- */
-struct t_pack_patterns {
-    char* name;
-    int index;
-    float base_cost;
-
-    t_pack_pattern_block* root_block;
-
-    int num_blocks;
-    bool* is_block_optional;
-
-    bool is_chain;
-    std::vector<std::vector<t_pb_graph_pin*>> chain_root_pins;
-
-    // default constructor initializing to an invalid pack pattern
-    t_pack_patterns() {
-        name = nullptr;
-        index = -1;
-        root_block = nullptr;
-        base_cost = 0;
-        num_blocks = 0;
-        is_block_optional = nullptr;
-        is_chain = false;
-    }
-};
-
-/**
- * Keeps track of locations that a primitive can go to during packing
- * Linked list for easy insertion/deletion
- */
-struct t_cluster_placement_primitive {
-    t_pb_graph_node* pb_graph_node;
-    t_cluster_placement_primitive* next_primitive;
-    bool valid;
-    float base_cost;        /* cost independent of current status of packing */
-    float incremental_cost; /* cost dependant on current status of packing */
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/clock_types.h b/third_party/vtr/libs/archfpga/src/clock_types.h
deleted file mode 100644
index ac622d29a..000000000
--- a/third_party/vtr/libs/archfpga/src/clock_types.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef CLOCK_TYPES_H
-#define CLOCK_TYPES_H
-
-#include <string>
-#include <vector>
-
-enum class e_clock_type {
-    SPINE,
-    RIB,
-    H_TREE
-};
-
-struct t_metal_layer {
-    float r_metal;
-    float c_metal;
-};
-
-struct t_wire_repeat {
-    std::string x;
-    std::string y;
-};
-
-struct t_wire {
-    std::string start;
-    std::string end;
-    std::string position;
-};
-
-struct t_clock_drive {
-    std::string name;
-    std::string offset;
-    int arch_switch_idx;
-};
-
-struct t_clock_taps {
-    std::string name;
-    std::string offset;
-    std::string increment;
-};
-
-struct t_clock_network_arch {
-    std::string name;
-    int num_inst;
-
-    e_clock_type type;
-
-    std::string metal_layer;
-    t_wire wire;
-    t_wire_repeat repeat;
-    t_clock_drive drive;
-    t_clock_taps tap;
-};
-
-struct t_clock_connection_arch {
-    std::string from;
-    std::string to;
-    int arch_switch_idx;
-    std::string locationx;
-    std::string locationy;
-    float fc;
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/device_grid.cc b/third_party/vtr/libs/archfpga/src/device_grid.cc
deleted file mode 100644
index ffe0cc50e..000000000
--- a/third_party/vtr/libs/archfpga/src/device_grid.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "device_grid.h"
-
-DeviceGrid::DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid)
-    : name_(grid_name)
-    , grid_(grid) {
-    count_instances();
-}
-
-DeviceGrid::DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid, std::vector<t_logical_block_type_ptr> limiting_res)
-    : DeviceGrid(grid_name, grid) {
-    limiting_resources_ = limiting_res;
-}
-
-size_t DeviceGrid::num_instances(t_physical_tile_type_ptr type) const {
-    auto iter = instance_counts_.find(type);
-    if (iter != instance_counts_.end()) {
-        //Return count
-        return iter->second;
-    }
-    return 0; //None found
-}
-
-void DeviceGrid::clear() {
-    grid_.clear();
-    instance_counts_.clear();
-}
-
-void DeviceGrid::count_instances() {
-    instance_counts_.clear();
-
-    //Count the number of blocks in the grid
-    for (size_t x = 0; x < width(); ++x) {
-        for (size_t y = 0; y < height(); ++y) {
-            auto type = grid_[x][y].type;
-
-            if (grid_[x][y].width_offset == 0 && grid_[x][y].height_offset == 0) {
-                //Add capacity only if this is the root location
-                instance_counts_[type] += type->capacity;
-            }
-        }
-    }
-}
diff --git a/third_party/vtr/libs/archfpga/src/device_grid.h b/third_party/vtr/libs/archfpga/src/device_grid.h
deleted file mode 100644
index c4bfbd087..000000000
--- a/third_party/vtr/libs/archfpga/src/device_grid.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef DEVICE_GRID
-#define DEVICE_GRID
-
-#include <string>
-#include <vector>
-#include "vtr_ndmatrix.h"
-#include "physical_types.h"
-
-///@brief s_grid_tile is the minimum tile of the fpga
-struct t_grid_tile {
-    t_physical_tile_type_ptr type = nullptr; ///<Pointer to type descriptor, NULL for illegal
-    int width_offset = 0;                    ///<Number of grid tiles reserved based on width (right) of a block
-    int height_offset = 0;                   ///<Number of grid tiles reserved based on height (top) of a block
-    const t_metadata_dict* meta = nullptr;
-};
-
-class DeviceGrid {
-  public:
-    DeviceGrid() = default;
-    DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid);
-    DeviceGrid(std::string grid_name, vtr::Matrix<t_grid_tile> grid, std::vector<t_logical_block_type_ptr> limiting_res);
-
-    const std::string& name() const { return name_; }
-
-    size_t width() const { return grid_.dim_size(0); }
-    size_t height() const { return grid_.dim_size(1); }
-
-    //Note: supports 2-d indexing [0..width()-1][0..height()-1] yielding a t_grid_tile
-    auto operator[](size_t index) const { return grid_[index]; }
-    auto operator[](size_t index) { return grid_[index]; }
-
-    const vtr::Matrix<t_grid_tile>& matrix() const {
-        return grid_;
-    }
-
-    void clear();
-
-    size_t num_instances(t_physical_tile_type_ptr type) const;
-
-    /**
-     * @brief Returns the block types which limits the device size (may be empty if
-     *        resource limits were not considered when selecting the device).
-     */
-    std::vector<t_logical_block_type_ptr> limiting_resources() const { return limiting_resources_; }
-
-  private:
-    void count_instances();
-
-    std::string name_;
-
-    //Note that vtr::Matrix operator[] returns and intermediate type
-    //which can be used or indexing in the second dimension, allowing
-    //traditional 2-d indexing to be used
-    vtr::Matrix<t_grid_tile> grid_;
-
-    std::map<t_physical_tile_type_ptr, size_t> instance_counts_;
-
-    std::vector<t_logical_block_type_ptr> limiting_resources_;
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/echo_arch.cc b/third_party/vtr/libs/archfpga/src/echo_arch.cc
deleted file mode 100644
index 3943f6e6a..000000000
--- a/third_party/vtr/libs/archfpga/src/echo_arch.cc
+++ /dev/null
@@ -1,632 +0,0 @@
-#include <cstring>
-#include <cstdlib>
-#include <vector>
-#include <unordered_set>
-
-#include "echo_arch.h"
-#include "arch_types.h"
-#include "arch_util.h"
-#include "vtr_list.h"
-#include "vtr_util.h"
-#include "vtr_memory.h"
-#include "vtr_assert.h"
-
-using vtr::t_linked_vptr;
-
-void PrintArchInfo(FILE* Echo, const t_arch* arch);
-static void PrintPb_types_rec(FILE* Echo, const t_pb_type* pb_type, int level);
-static void PrintPb_types_recPower(FILE* Echo,
-                                   const t_pb_type* pb_type,
-                                   const char* tabs);
-
-/* Output the data from architecture data so user can verify it
- * was interpretted correctly. */
-void EchoArch(const char* EchoFile,
-              const std::vector<t_physical_tile_type>& PhysicalTileTypes,
-              const std::vector<t_logical_block_type>& LogicalBlockTypes,
-              const t_arch* arch) {
-    int i, j;
-    FILE* Echo;
-    t_model* cur_model;
-    t_model_ports* model_port;
-    t_linked_vptr* cur_vptr;
-
-    Echo = vtr::fopen(EchoFile, "w");
-    cur_model = nullptr;
-
-    //Print all layout device switch/segment list info first
-    PrintArchInfo(Echo, arch);
-
-    //Models
-    fprintf(Echo, "*************************************************\n");
-    for (j = 0; j < 2; j++) {
-        if (j == 0) {
-            fprintf(Echo, "Printing user models \n");
-            cur_model = arch->models;
-        } else if (j == 1) {
-            fprintf(Echo, "Printing library models \n");
-            cur_model = arch->model_library;
-        }
-        while (cur_model) {
-            fprintf(Echo, "Model: \"%s\"\n", cur_model->name);
-            model_port = cur_model->inputs;
-            while (model_port) {
-                fprintf(Echo, "\tInput Ports: \"%s\" \"%d\" min_size=\"%d\"\n",
-                        model_port->name, model_port->size,
-                        model_port->min_size);
-                model_port = model_port->next;
-            }
-            model_port = cur_model->outputs;
-            while (model_port) {
-                fprintf(Echo, "\tOutput Ports: \"%s\" \"%d\" min_size=\"%d\"\n",
-                        model_port->name, model_port->size,
-                        model_port->min_size);
-                model_port = model_port->next;
-            }
-            cur_vptr = cur_model->pb_types;
-            i = 0;
-            while (cur_vptr != nullptr) {
-                fprintf(Echo, "\tpb_type %d: \"%s\"\n", i,
-                        ((t_pb_type*)cur_vptr->data_vptr)->name);
-                cur_vptr = cur_vptr->next;
-                i++;
-            }
-
-            cur_model = cur_model->next;
-        }
-    }
-    fprintf(Echo, "*************************************************\n\n");
-    fprintf(Echo, "*************************************************\n");
-    for (auto& Type : PhysicalTileTypes) {
-        fprintf(Echo, "Type: \"%s\"\n", Type.name);
-        fprintf(Echo, "\tcapacity: %d\n", Type.capacity);
-        fprintf(Echo, "\twidth: %d\n", Type.width);
-        fprintf(Echo, "\theight: %d\n", Type.height);
-        for (const t_fc_specification& fc_spec : Type.fc_specs) {
-            fprintf(Echo, "fc_value_type: ");
-            if (fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE) {
-                fprintf(Echo, "ABSOLUTE");
-            } else if (fc_spec.fc_value_type == e_fc_value_type::FRACTIONAL) {
-                fprintf(Echo, "FRACTIONAL");
-            } else {
-                VTR_ASSERT(false);
-            }
-            fprintf(Echo, " fc_value: %f", fc_spec.fc_value);
-            fprintf(Echo, " segment: %s", arch->Segments[fc_spec.seg_index].name.c_str());
-            fprintf(Echo, " pins:");
-            for (int pin : fc_spec.pins) {
-                fprintf(Echo, " %d", pin);
-            }
-            fprintf(Echo, "\n");
-        }
-        fprintf(Echo, "\tnum_drivers: %d\n", Type.num_drivers);
-        fprintf(Echo, "\tnum_receivers: %d\n", Type.num_receivers);
-
-        int index = Type.index;
-        fprintf(Echo, "\tindex: %d\n", index);
-
-        auto equivalent_sites = get_equivalent_sites_set(&Type);
-
-        for (auto LogicalBlock : equivalent_sites) {
-            fprintf(Echo, "\nEquivalent Site: %s\n", LogicalBlock->name);
-        }
-        fprintf(Echo, "\n");
-    }
-
-    fprintf(Echo, "*************************************************\n\n");
-    fprintf(Echo, "*************************************************\n");
-
-    for (auto& LogicalBlock : LogicalBlockTypes) {
-        if (LogicalBlock.pb_type) {
-            PrintPb_types_rec(Echo, LogicalBlock.pb_type, 2);
-        }
-        fprintf(Echo, "\n");
-    }
-
-    fclose(Echo);
-}
-
-//Added May 2013 Daniel Chen, help dump arch info after loading from XML
-void PrintArchInfo(FILE* Echo, const t_arch* arch) {
-    int i, j;
-
-    fprintf(Echo, "Printing architecture... \n\n");
-    //Layout
-    fprintf(Echo, "*************************************************\n");
-    for (const auto& grid_layout : arch->grid_layouts) {
-        if (grid_layout.grid_type == GridDefType::AUTO) {
-            fprintf(Echo, "Layout: '%s' Type: auto Aspect_Ratio: %f\n", grid_layout.name.c_str(), grid_layout.aspect_ratio);
-        } else {
-            VTR_ASSERT(grid_layout.grid_type == GridDefType::FIXED);
-            fprintf(Echo, "Layout: '%s' Type: fixed Width: %d Height %d\n", grid_layout.name.c_str(), grid_layout.width, grid_layout.height);
-        }
-    }
-    fprintf(Echo, "*************************************************\n\n");
-    //Device
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Device Info:\n");
-
-    fprintf(Echo,
-            "\tSizing: R_minW_nmos %e R_minW_pmos %e\n",
-            arch->R_minW_nmos, arch->R_minW_pmos);
-
-    fprintf(Echo, "\tArea: grid_logic_tile_area %e\n",
-            arch->grid_logic_tile_area);
-
-    fprintf(Echo, "\tChannel Width Distribution:\n");
-
-    switch (arch->Chans.chan_x_dist.type) {
-        case (UNIFORM):
-            fprintf(Echo, "\t\tx: type uniform peak %e\n",
-                    arch->Chans.chan_x_dist.peak);
-            break;
-        case (GAUSSIAN):
-            fprintf(Echo,
-                    "\t\tx: type gaussian peak %e \
-						  width %e Xpeak %e dc %e\n",
-                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.width,
-                    arch->Chans.chan_x_dist.xpeak, arch->Chans.chan_x_dist.dc);
-            break;
-        case (PULSE):
-            fprintf(Echo,
-                    "\t\tx: type pulse peak %e \
-						  width %e Xpeak %e dc %e\n",
-                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.width,
-                    arch->Chans.chan_x_dist.xpeak, arch->Chans.chan_x_dist.dc);
-            break;
-        case (DELTA):
-            fprintf(Echo,
-                    "\t\tx: distr dleta peak %e \
-						  Xpeak %e dc %e\n",
-                    arch->Chans.chan_x_dist.peak, arch->Chans.chan_x_dist.xpeak,
-                    arch->Chans.chan_x_dist.dc);
-            break;
-        default:
-            fprintf(Echo, "\t\tInvalid Distribution!\n");
-            break;
-    }
-
-    switch (arch->Chans.chan_y_dist.type) {
-        case (UNIFORM):
-            fprintf(Echo, "\t\ty: type uniform peak %e\n",
-                    arch->Chans.chan_y_dist.peak);
-            break;
-        case (GAUSSIAN):
-            fprintf(Echo,
-                    "\t\ty: type gaussian peak %e \
-						  width %e Xpeak %e dc %e\n",
-                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.width,
-                    arch->Chans.chan_y_dist.xpeak, arch->Chans.chan_y_dist.dc);
-            break;
-        case (PULSE):
-            fprintf(Echo,
-                    "\t\ty: type pulse peak %e \
-						  width %e Xpeak %e dc %e\n",
-                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.width,
-                    arch->Chans.chan_y_dist.xpeak, arch->Chans.chan_y_dist.dc);
-            break;
-        case (DELTA):
-            fprintf(Echo,
-                    "\t\ty: distr dleta peak %e \
-						  Xpeak %e dc %e\n",
-                    arch->Chans.chan_y_dist.peak, arch->Chans.chan_y_dist.xpeak,
-                    arch->Chans.chan_y_dist.dc);
-            break;
-        default:
-            fprintf(Echo, "\t\tInvalid Distribution!\n");
-            break;
-    }
-
-    switch (arch->SBType) {
-        case (WILTON):
-            fprintf(Echo, "\tSwitch Block: type wilton fs %d\n", arch->Fs);
-            break;
-        case (UNIVERSAL):
-            fprintf(Echo, "\tSwitch Block: type universal fs %d\n", arch->Fs);
-            break;
-        case (SUBSET):
-            fprintf(Echo, "\tSwitch Block: type subset fs %d\n", arch->Fs);
-            break;
-        default:
-            break;
-    }
-
-    fprintf(Echo, "\tInput Connect Block Switch Name: %s\n", arch->ipin_cblock_switch_name.c_str());
-
-    fprintf(Echo, "*************************************************\n\n");
-    //Switch list
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Switch List:\n");
-
-    //13 is hard coded because format of %e is always 1.123456e+12
-    //It always consists of 10 alphanumeric digits, a decimal
-    //and a sign
-    for (i = 0; i < arch->num_switches; i++) {
-        if (arch->Switches[i].type() == SwitchType::MUX) {
-            fprintf(Echo, "\tSwitch[%d]: name %s type mux\n", i + 1, arch->Switches[i].name);
-        } else if (arch->Switches[i].type() == SwitchType::TRISTATE) {
-            fprintf(Echo, "\tSwitch[%d]: name %s type tristate\n", i + 1, arch->Switches[i].name);
-        } else if (arch->Switches[i].type() == SwitchType::SHORT) {
-            fprintf(Echo, "\tSwitch[%d]: name %s type short\n", i + 1, arch->Switches[i].name);
-        } else if (arch->Switches[i].type() == SwitchType::BUFFER) {
-            fprintf(Echo, "\tSwitch[%d]: name %s type buffer\n", i + 1, arch->Switches[i].name);
-        } else {
-            VTR_ASSERT(arch->Switches[i].type() == SwitchType::PASS_GATE);
-            fprintf(Echo, "\tSwitch[%d]: name %s type pass_gate\n", i + 1, arch->Switches[i].name);
-        }
-        fprintf(Echo, "\t\t\t\tR %e Cin %e Cout %e\n", arch->Switches[i].R,
-                arch->Switches[i].Cin, arch->Switches[i].Cout);
-        fprintf(Echo, "\t\t\t\t#Tdel values %d buf_size %e mux_trans_size %e\n",
-                (int)arch->Switches[i].Tdel_map_.size(), arch->Switches[i].buf_size,
-                arch->Switches[i].mux_trans_size);
-        if (arch->Switches[i].power_buffer_type == POWER_BUFFER_TYPE_AUTO) {
-            fprintf(Echo, "\t\t\t\tpower_buffer_size auto\n");
-        } else {
-            fprintf(Echo, "\t\t\t\tpower_buffer_size %e\n",
-                    arch->Switches[i].power_buffer_size);
-        }
-    }
-
-    fprintf(Echo, "*************************************************\n\n");
-    //Segment List
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Segment List:\n");
-    for (i = 0; i < (int)(arch->Segments).size(); i++) {
-        const struct t_segment_inf& seg = arch->Segments[i];
-        fprintf(Echo,
-                "\tSegment[%d]: frequency %d length %d R_metal %e C_metal %e\n",
-                i + 1, seg.frequency, seg.length,
-                seg.Rmetal, seg.Cmetal);
-
-        if (seg.directionality == UNI_DIRECTIONAL) {
-            //wire_switch == arch_opin_switch
-            fprintf(Echo, "\t\t\t\ttype unidir mux_name %s\n",
-                    arch->Switches[seg.arch_wire_switch].name);
-        } else { //Should be bidir
-            fprintf(Echo, "\t\t\t\ttype bidir wire_switch %s arch_opin_switch %s\n",
-                    arch->Switches[seg.arch_wire_switch].name,
-                    arch->Switches[seg.arch_opin_switch].name);
-        }
-
-        fprintf(Echo, "\t\t\t\tcb ");
-        for (j = 0; j < (int)seg.cb.size(); j++) {
-            if (seg.cb[j]) {
-                fprintf(Echo, "1 ");
-            } else {
-                fprintf(Echo, "0 ");
-            }
-        }
-        fprintf(Echo, "\n");
-
-        fprintf(Echo, "\t\t\t\tsb ");
-        for (j = 0; j < (int)seg.sb.size(); j++) {
-            if (seg.sb[j]) {
-                fprintf(Echo, "1 ");
-            } else {
-                fprintf(Echo, "0 ");
-            }
-        }
-        fprintf(Echo, "\n");
-    }
-    fprintf(Echo, "*************************************************\n\n");
-    //Direct List
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Direct List:\n");
-    for (i = 0; i < arch->num_directs; i++) {
-        fprintf(Echo, "\tDirect[%d]: name %s from_pin %s to_pin %s\n", i + 1,
-                arch->Directs[i].name, arch->Directs[i].from_pin,
-                arch->Directs[i].to_pin);
-        fprintf(Echo, "\t\t\t\t x_offset %d y_offset %d z_offset %d\n",
-                arch->Directs[i].x_offset, arch->Directs[i].y_offset,
-                arch->Directs[i].sub_tile_offset);
-    }
-    fprintf(Echo, "*************************************************\n\n");
-
-    //Architecture Power
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Power:\n");
-    if (arch->power) {
-        fprintf(Echo, "\tlocal_interconnect C_wire %e factor %f\n",
-                arch->power->C_wire_local, arch->power->local_interc_factor);
-        fprintf(Echo, "\tlogical_effort_factor %f trans_per_sram_bit %f\n",
-                arch->power->logical_effort_factor,
-                arch->power->transistors_per_SRAM_bit);
-    }
-
-    fprintf(Echo, "*************************************************\n\n");
-    //Architecture Clock
-    fprintf(Echo, "*************************************************\n");
-    fprintf(Echo, "Clock:\n");
-    if (arch->clocks) {
-        for (i = 0; i < arch->clocks->num_global_clocks; i++) {
-            if (arch->clocks->clock_inf[i].autosize_buffer) {
-                fprintf(Echo, "\tClock[%d]: buffer_size auto C_wire %e", i + 1,
-                        arch->clocks->clock_inf->C_wire);
-            } else {
-                fprintf(Echo, "\tClock[%d]: buffer_size %e C_wire %e", i + 1,
-                        arch->clocks->clock_inf[i].buffer_size,
-                        arch->clocks->clock_inf[i].C_wire);
-            }
-            fprintf(Echo, "\t\t\t\tstat_prob %f switch_density %f period %e",
-                    arch->clocks->clock_inf[i].prob,
-                    arch->clocks->clock_inf[i].dens,
-                    arch->clocks->clock_inf[i].period);
-        }
-    }
-
-    fprintf(Echo, "*************************************************\n\n");
-}
-
-static void PrintPb_types_rec(FILE* Echo, const t_pb_type* pb_type, int level) {
-    int i, j, k;
-    char* tabs;
-
-    tabs = (char*)vtr::malloc((level + 1) * sizeof(char));
-    for (i = 0; i < level; i++) {
-        tabs[i] = '\t';
-    }
-    tabs[level] = '\0';
-
-    fprintf(Echo, "%spb_type name: %s\n", tabs, pb_type->name);
-    fprintf(Echo, "%s\tblif_model: %s\n", tabs, pb_type->blif_model);
-    fprintf(Echo, "%s\tclass_type: %d\n", tabs, pb_type->class_type);
-    fprintf(Echo, "%s\tnum_modes: %d\n", tabs, pb_type->num_modes);
-    fprintf(Echo, "%s\tnum_ports: %d\n", tabs, pb_type->num_ports);
-    for (i = 0; i < pb_type->num_ports; i++) {
-        fprintf(Echo, "%s\tport %s type %d num_pins %d\n", tabs,
-                pb_type->ports[i].name, pb_type->ports[i].type,
-                pb_type->ports[i].num_pins);
-    }
-
-    if (pb_type->num_modes > 0) { /*one or more modes*/
-        for (i = 0; i < pb_type->num_modes; i++) {
-            fprintf(Echo, "%s\tmode %s:\n", tabs, pb_type->modes[i].name);
-            for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
-                PrintPb_types_rec(Echo, &pb_type->modes[i].pb_type_children[j],
-                                  level + 2);
-            }
-            for (j = 0; j < pb_type->modes[i].num_interconnect; j++) {
-                fprintf(Echo, "%s\t\tinterconnect %d %s %s\n", tabs,
-                        pb_type->modes[i].interconnect[j].type,
-                        pb_type->modes[i].interconnect[j].input_string,
-                        pb_type->modes[i].interconnect[j].output_string);
-                for (k = 0;
-                     k < pb_type->modes[i].interconnect[j].num_annotations;
-                     k++) {
-                    fprintf(Echo, "%s\t\t\tannotation %s %s %d: %s\n", tabs,
-                            pb_type->modes[i].interconnect[j].annotations[k].input_pins,
-                            pb_type->modes[i].interconnect[j].annotations[k].output_pins,
-                            pb_type->modes[i].interconnect[j].annotations[k].format,
-                            pb_type->modes[i].interconnect[j].annotations[k].value[0]);
-                }
-                //Print power info for interconnects
-                if (pb_type->modes[i].interconnect[j].interconnect_power) {
-                    if (pb_type->modes[i].interconnect[j].interconnect_power->power_usage.dynamic
-                        || pb_type->modes[i].interconnect[j].interconnect_power->power_usage.leakage) {
-                        fprintf(Echo, "%s\t\t\tpower %e %e\n", tabs,
-                                pb_type->modes[i].interconnect[j].interconnect_power->power_usage.dynamic,
-                                pb_type->modes[i].interconnect[j].interconnect_power->power_usage.leakage);
-                    }
-                }
-            }
-        }
-    } else { /*leaf pb with unknown model*/
-        /*LUT(names) already handled, it naturally has 2 modes.
-         * I/O has no annotations to be displayed
-         * All other library or user models may have delays specificied, e.g. Tsetup and Tcq
-         * Display the additional information*/
-        if (strcmp(pb_type->model->name, MODEL_NAMES)
-            && strcmp(pb_type->model->name, MODEL_INPUT)
-            && strcmp(pb_type->model->name, MODEL_OUTPUT)) {
-            for (k = 0; k < pb_type->num_annotations; k++) {
-                fprintf(Echo, "%s\t\t\tannotation %s %s %s %d: %s\n", tabs,
-                        pb_type->annotations[k].clock,
-                        pb_type->annotations[k].input_pins,
-                        pb_type->annotations[k].output_pins,
-                        pb_type->annotations[k].format,
-                        pb_type->annotations[k].value[0]);
-            }
-        }
-    }
-
-    if (pb_type->pb_type_power) {
-        PrintPb_types_recPower(Echo, pb_type, tabs);
-    }
-    free(tabs);
-}
-
-//Added May 2013 Daniel Chen, help dump arch info after loading from XML
-static void PrintPb_types_recPower(FILE* Echo,
-                                   const t_pb_type* pb_type,
-                                   const char* tabs) {
-    int i = 0;
-    /*Print power information for each pb if available*/
-    switch (pb_type->pb_type_power->estimation_method) {
-        case POWER_METHOD_UNDEFINED:
-            fprintf(Echo, "%s\tpower method: undefined\n", tabs);
-            break;
-        case POWER_METHOD_IGNORE:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because of the inheritance property of auto-size*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_IGNORE)
-                    break;
-            }
-            fprintf(Echo, "%s\tpower method: ignore\n", tabs);
-            break;
-        case POWER_METHOD_SUM_OF_CHILDREN:
-            fprintf(Echo, "%s\tpower method: sum-of-children\n", tabs);
-            break;
-        case POWER_METHOD_AUTO_SIZES:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because of the inheritance property of auto-size*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_AUTO_SIZES)
-                    break;
-            }
-            fprintf(Echo, "%s\tpower method: auto-size\n", tabs);
-            break;
-        case POWER_METHOD_SPECIFY_SIZES:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because of the inheritance property of specify-size*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_SPECIFY_SIZES)
-                    break;
-            }
-
-            fprintf(Echo, "%s\tpower method: specify-size\n", tabs);
-            for (i = 0; i < pb_type->num_ports; i++) {
-                //Print all the power information on each port, only if available,
-                //will not print if value is 0 or NULL
-                if (pb_type->ports[i].port_power->buffer_type
-                    || pb_type->ports[i].port_power->wire_type
-                    || pb_type->pb_type_power->absolute_power_per_instance.leakage
-                    || pb_type->pb_type_power->absolute_power_per_instance.dynamic) {
-                    fprintf(Echo, "%s\t\tport %s type %d num_pins %d\n", tabs,
-                            pb_type->ports[i].name, pb_type->ports[i].type,
-                            pb_type->ports[i].num_pins);
-                    //Buffer size
-                    switch (pb_type->ports[i].port_power->buffer_type) {
-                        case (POWER_BUFFER_TYPE_UNDEFINED):
-                        case (POWER_BUFFER_TYPE_NONE):
-                            break;
-                        case (POWER_BUFFER_TYPE_AUTO):
-                            fprintf(Echo, "%s\t\t\tbuffer_size %s\n", tabs, "auto");
-                            break;
-                        case (POWER_BUFFER_TYPE_ABSOLUTE_SIZE):
-                            fprintf(Echo, "%s\t\t\tbuffer_size %f\n", tabs,
-                                    pb_type->ports[i].port_power->buffer_size);
-                            break;
-                        default:
-                            break;
-                    }
-                    switch (pb_type->ports[i].port_power->wire_type) {
-                        case (POWER_WIRE_TYPE_UNDEFINED):
-                        case (POWER_WIRE_TYPE_IGNORED):
-                            break;
-                        case (POWER_WIRE_TYPE_C):
-                            fprintf(Echo, "%s\t\t\twire_cap: %e\n", tabs,
-                                    pb_type->ports[i].port_power->wire.C);
-                            break;
-                        case (POWER_WIRE_TYPE_ABSOLUTE_LENGTH):
-                            fprintf(Echo, "%s\t\t\twire_len(abs): %e\n", tabs,
-                                    pb_type->ports[i].port_power->wire.absolute_length);
-                            break;
-                        case (POWER_WIRE_TYPE_RELATIVE_LENGTH):
-                            fprintf(Echo, "%s\t\t\twire_len(rel): %f\n", tabs,
-                                    pb_type->ports[i].port_power->wire.relative_length);
-                            break;
-                        case (POWER_WIRE_TYPE_AUTO):
-                            fprintf(Echo, "%s\t\t\twire_len: %s\n", tabs, "auto");
-                            break;
-                        default:
-                            break;
-                    }
-                }
-            }
-            //Output static power even if non zero
-            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
-                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
-
-            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
-                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
-            break;
-        case POWER_METHOD_TOGGLE_PINS:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because once energy_per_toggle is specified at one level,
-                 * all children pb's are energy_per_toggle and only want to display once*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_TOGGLE_PINS)
-                    break;
-            }
-
-            fprintf(Echo, "%s\tpower method: pin-toggle\n", tabs);
-            for (i = 0; i < pb_type->num_ports; i++) {
-                /*Print all the power information on each port, only if available,
-                 * will not print if value is 0 or NULL*/
-                if (pb_type->ports[i].port_power->energy_per_toggle
-                    || pb_type->ports[i].port_power->scaled_by_port
-                    || pb_type->pb_type_power->absolute_power_per_instance.leakage
-                    || pb_type->pb_type_power->absolute_power_per_instance.dynamic) {
-                    fprintf(Echo, "%s\t\tport %s type %d num_pins %d\n", tabs,
-                            pb_type->ports[i].name, pb_type->ports[i].type,
-                            pb_type->ports[i].num_pins);
-                    //Toggle Energy
-                    if (pb_type->ports[i].port_power->energy_per_toggle) {
-                        fprintf(Echo, "%s\t\t\tenergy_per_toggle %e\n", tabs,
-                                pb_type->ports[i].port_power->energy_per_toggle);
-                    }
-                    //Scaled by port (could be reversed)
-                    if (pb_type->ports[i].port_power->scaled_by_port) {
-                        if (pb_type->ports[i].port_power->scaled_by_port->num_pins
-                            > 1) {
-                            fprintf(Echo,
-                                    (pb_type->ports[i].port_power->reverse_scaled ? "%s\t\t\tscaled_by_static_prob_n: %s[%d]\n" : "%s\t\t\tscaled_by_static_prob: %s[%d]\n"),
-                                    tabs,
-                                    pb_type->ports[i].port_power->scaled_by_port->name,
-                                    pb_type->ports[i].port_power->scaled_by_port_pin_idx);
-                        } else {
-                            fprintf(Echo,
-                                    (pb_type->ports[i].port_power->reverse_scaled ? "%s\t\t\tscaled_by_static_prob_n: %s\n" : "%s\t\t\tscaled_by_static_prob: %s\n"),
-                                    tabs,
-                                    pb_type->ports[i].port_power->scaled_by_port->name);
-                        }
-                    }
-                }
-            }
-            //Output static power even if non zero
-            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
-                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
-
-            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
-                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
-
-            break;
-        case POWER_METHOD_C_INTERNAL:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because of values at this level includes all children pb's*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_C_INTERNAL)
-                    break;
-            }
-            fprintf(Echo, "%s\tpower method: C-internal\n", tabs);
-
-            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
-                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
-
-            if (pb_type->pb_type_power->C_internal)
-                fprintf(Echo, "%s\t\tdynamic c-internal: %e \n", tabs,
-                        pb_type->pb_type_power->C_internal);
-            break;
-        case POWER_METHOD_ABSOLUTE:
-            if (pb_type->parent_mode) {
-                /*if NOT top-level pb (all top-level pb has NULL parent_mode, check parent's power method
-                 * This is because of values at this level includes all children pb's*/
-                if (pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method
-                    == POWER_METHOD_ABSOLUTE)
-                    break;
-            }
-            fprintf(Echo, "%s\tpower method: absolute\n", tabs);
-            if (pb_type->pb_type_power->absolute_power_per_instance.leakage)
-                fprintf(Echo, "%s\t\tstatic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.leakage);
-
-            if (pb_type->pb_type_power->absolute_power_per_instance.dynamic)
-                fprintf(Echo, "%s\t\tdynamic power_per_instance: %e \n", tabs,
-                        pb_type->pb_type_power->absolute_power_per_instance.dynamic);
-            break;
-        default:
-            fprintf(Echo, "%s\tpower method: error has occcured\n", tabs);
-            break;
-    }
-}
diff --git a/third_party/vtr/libs/archfpga/src/echo_arch.h b/third_party/vtr/libs/archfpga/src/echo_arch.h
deleted file mode 100644
index 4e9d23001..000000000
--- a/third_party/vtr/libs/archfpga/src/echo_arch.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef ECHO_ARCH_H
-#define ECHO_ARCH_H
-
-#include "arch_types.h"
-
-void EchoArch(const char* EchoFile,
-              const std::vector<t_physical_tile_type>& PhysicalTileTypes,
-              const std::vector<t_logical_block_type>& LogicalBlockTypes,
-              const t_arch* arch);
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/histogram.cc b/third_party/vtr/libs/archfpga/src/histogram.cc
deleted file mode 100644
index aa00ec2ae..000000000
--- a/third_party/vtr/libs/archfpga/src/histogram.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <algorithm>
-#include <string>
-#include <sstream>
-#include <cmath>
-
-#include "vtr_log.h"
-#include "vtr_assert.h"
-#include "vtr_util.h"
-
-#include "histogram.h"
-
-std::vector<HistogramBucket> build_histogram(std::vector<float> values, size_t num_bins, float min_value, float max_value) {
-    std::vector<HistogramBucket> histogram;
-
-    if (values.empty()) return histogram;
-
-    if (std::isnan(min_value)) {
-        min_value = *std::min_element(values.begin(), values.end());
-    }
-    if (std::isnan(max_value)) {
-        max_value = *std::max_element(values.begin(), values.end());
-    }
-
-    //Determine the bin size
-    float range = max_value - min_value;
-    float bin_size = range / num_bins;
-
-    //Create the buckets
-    float bucket_min = min_value;
-    for (size_t ibucket = 0; ibucket < num_bins; ++ibucket) {
-        float bucket_max = bucket_min + bin_size;
-
-        histogram.emplace_back(bucket_min, bucket_max);
-
-        bucket_min = bucket_max;
-    }
-
-    //To avoid round-off errors we force the max value of the last bucket equal to the max value
-    histogram[histogram.size() - 1].max_value = max_value;
-
-    //Count the values into the buckets
-    auto comp = [](const HistogramBucket& bucket, float value) {
-        return bucket.max_value < value;
-    };
-    for (auto value : values) {
-        //Find the bucket who's max is less than the current slack
-
-        auto iter = std::lower_bound(histogram.begin(), histogram.end(), value, comp);
-        VTR_ASSERT(iter != histogram.end());
-
-        iter->count++;
-    }
-
-    return histogram;
-}
-
-void print_histogram(std::vector<HistogramBucket> histogram) {
-    size_t char_width = 80;
-
-    auto lines = format_histogram(histogram, char_width);
-
-    for (auto line : lines) {
-        VTR_LOG("%s\n", line.c_str());
-    }
-}
-
-float get_histogram_mode(std::vector<HistogramBucket> histogram) {
-    size_t max_count = 0;
-    float mode = 0.0;
-    for (auto bucket : histogram) {
-        if (bucket.count > max_count) {
-            mode = bucket.max_value;
-
-            max_count = bucket.count;
-        }
-    }
-
-    return mode;
-}
-
-std::vector<std::string> format_histogram(std::vector<HistogramBucket> histogram, size_t width) {
-    std::vector<std::string> lines;
-
-    //Determine the maximum and total count
-    size_t max_count = 0;
-    size_t total_count = 0;
-    for (const HistogramBucket& bucket : histogram) {
-        max_count = std::max(max_count, bucket.count);
-        total_count += bucket.count;
-    }
-
-    if (max_count == 0) return lines; //Nothing to do
-
-    int count_digits = ceil(log10(max_count));
-
-    //Determine the maximum prefix length
-    size_t bar_len = width
-                     - (18 + 3) //bucket prefix
-                     - count_digits
-                     - 7  //percentage
-                     - 2; //-2 for " |" appended after count
-
-    for (size_t ibucket = 0; ibucket < histogram.size(); ++ibucket) {
-        std::string line;
-
-        float pct = histogram[ibucket].count / float(total_count) * 100;
-
-        line += vtr::string_fmt("[% 9.2g:% 9.2g) %*zu (%5.1f%%) |", histogram[ibucket].min_value, histogram[ibucket].max_value, count_digits, histogram[ibucket].count, pct);
-
-        size_t num_chars = std::round((double(histogram[ibucket].count) / max_count) * bar_len);
-        for (size_t i = 0; i < num_chars; ++i) {
-            line += "*";
-        }
-
-        lines.push_back(line);
-    }
-
-    return lines;
-}
diff --git a/third_party/vtr/libs/archfpga/src/histogram.h b/third_party/vtr/libs/archfpga/src/histogram.h
deleted file mode 100644
index 29a0ae470..000000000
--- a/third_party/vtr/libs/archfpga/src/histogram.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef VPR_HISTOGRAM_H
-#define VPR_HISTOGRAM_H
-
-#include <limits>
-#include <vector>
-
-struct HistogramBucket {
-    HistogramBucket(float min_val, float max_val, float init_count = 0) noexcept
-        : min_value(min_val)
-        , max_value(max_val)
-        , count(init_count) {}
-
-    float min_value = std::numeric_limits<float>::quiet_NaN();
-    float max_value = std::numeric_limits<float>::quiet_NaN();
-    size_t count = 0;
-};
-
-std::vector<HistogramBucket> build_histogram(std::vector<float> values, size_t num_bins, float min_value = std::numeric_limits<float>::quiet_NaN(), float max_value = std::numeric_limits<float>::quiet_NaN());
-
-void print_histogram(std::vector<HistogramBucket> histogram);
-
-float get_histogram_mode(std::vector<HistogramBucket> histogram);
-
-std::vector<std::string> format_histogram(std::vector<HistogramBucket> histogram, size_t width = 80);
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/logic_types.h b/third_party/vtr/libs/archfpga/src/logic_types.h
deleted file mode 100644
index 4427b8501..000000000
--- a/third_party/vtr/libs/archfpga/src/logic_types.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Data types describing the logic (technology-mapped) models that the architecture can implement.
- * Logic models include LUT (.names), flipflop (.latch), inpad, outpad, memory slice, etc.
- * Logic models are from the internal VPR library, or can be user-defined (both defined in .blif)
- *
- * Date: February 19, 2009
- * Authors: Jason Luu and Kenneth Kent
- */
-
-#ifndef LOGIC_TYPES_H
-#define LOGIC_TYPES_H
-
-#include "vtr_list.h"
-#include <vector>
-#include <string>
-
-/*
- * Logic model data types
- * A logic model is described by its I/O ports and function name
- */
-enum PORTS {
-    IN_PORT,
-    OUT_PORT,
-    INOUT_PORT,
-    ERR_PORT
-};
-
-struct t_model_ports {
-    enum PORTS dir = ERR_PORT;                         /* port direction */
-    char* name = nullptr;                              /* name of this port */
-    int size = 0;                                      /* maximum number of pins */
-    int min_size = 0;                                  /* minimum number of pins */
-    bool is_clock = false;                             /* clock? */
-    bool is_non_clock_global = false;                  /* not a clock but is a special, global, control signal (eg global asynchronous reset, etc) */
-    std::string clock;                                 /* The clock associated with this pin (if the pin is sequential) */
-    std::vector<std::string> combinational_sink_ports; /* The other ports on this model which are combinationally driven by this port */
-
-    t_model_ports* next = nullptr; /* next port */
-
-    int index = -1; /* indexing for array look-up */
-};
-
-struct t_model {
-    char* name = nullptr;             /* name of this logic model */
-    t_model_ports* inputs = nullptr;  /* linked list of input/clock ports */
-    t_model_ports* outputs = nullptr; /* linked list of output ports */
-    void* instances = nullptr;
-    int used = 0;
-    vtr::t_linked_vptr* pb_types = nullptr; /* Physical block types that implement this model */
-    t_model* next = nullptr;                /* next model (linked list) */
-
-    bool never_prune = false; /* Don't remove from the netlist even if a block of this type has no output ports used and, therefore, unconnected to the rest of the netlist */
-
-    int index = -1;
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/main.cc b/third_party/vtr/libs/archfpga/src/main.cc
deleted file mode 100644
index 6a9e3f354..000000000
--- a/third_party/vtr/libs/archfpga/src/main.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Test libarchfpga, try reading an architecture and print the results to a file
- *
- * Date: February 19, 2009
- * Author: Jason Luu
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-#include "vtr_error.h"
-#include "vtr_memory.h"
-
-#include "arch_util.h"
-#include "read_xml_arch_file.h"
-#include "echo_arch.h"
-
-void print_help();
-
-int main(int argc, char** argv) {
-    try {
-        t_arch arch;
-        std::vector<t_physical_tile_type> physical_tile_types;
-        std::vector<t_logical_block_type> logical_block_types;
-
-        if (argc - 1 != 3) {
-            printf("Error: Unexpected # of arguments.  Expected 3 found %d arguments\n",
-                   argc);
-            print_help();
-            return 1;
-        }
-
-        printf("------------------------------------------------------------------------------\n");
-        printf("- Read architecture file and print library data structures into an output file\n");
-        printf("------------------------------------------------------------------------------\n\n");
-
-        printf(
-            "Inputs: \n"
-            "architecture %s \n"
-            "timing_driven %d \n"
-            "output file %s\n",
-            argv[1], atoi(argv[2]), argv[3]);
-        printf("Reading in architecture\n");
-
-        /* function declarations */
-        XmlReadArch(argv[1], atoi(argv[2]), &arch, physical_tile_types, logical_block_types);
-
-        printf("Printing Results\n");
-
-        EchoArch(argv[3], physical_tile_types, logical_block_types, &arch);
-
-        // CLEAN UP
-        free_arch(&arch);
-        free_type_descriptors(physical_tile_types);
-        free_type_descriptors(logical_block_types);
-
-    } catch (vtr::VtrError& vtr_error) {
-        printf("Failed to process architecture %s: %s\n", argv[1], vtr_error.what());
-        return 1;
-    } catch (std::exception& error) {
-        printf("Failed to process architecture %s: %s\n", argv[1], error.what());
-        return 1;
-    }
-
-    printf("Done\n");
-
-    return 0;
-}
-
-void print_help() {
-    printf("\n---------------------------------------------------------------------------------------\n");
-    printf("read_arch - Read a VPR architecture file and output internal data structures\n");
-    printf("\n");
-    printf("Usage: read_arch <arch_file.xml> <timing_driven (0|1)> <output_file>\n");
-    printf("\n");
-    printf("  ex: read_arch k4_n10.xml 1 arch_data.out\n");
-    printf("      Read timing-driven architecture k4_n10.xml and output the results to arch_data.out\n");
-    printf("\n---------------------------------------------------------------------------------------\n");
-}
diff --git a/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc b/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
deleted file mode 100644
index 8587b1c56..000000000
--- a/third_party/vtr/libs/archfpga/src/parse_switchblocks.cc
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * See vpr/SRC/route/build_switchblocks.c for a detailed description of how the new
- * switch block format works and what files are involved.
- *
- *
- * A large chunk of this file is dedicated to helping parse the initial switchblock
- * specificaiton in the XML arch file, providing error checking, etc.
- *
- * Another large chunk of this file is dedicated to parsing the actual formulas
- * specified by the switch block permutation functions into their numeric counterparts.
- */
-
-#include <string.h>
-#include <string>
-#include <sstream>
-#include <vector>
-#include <stack>
-#include <utility>
-#include <algorithm>
-
-#include "vtr_assert.h"
-#include "vtr_util.h"
-
-#include "pugixml.hpp"
-#include "pugixml_util.hpp"
-
-#include "arch_error.h"
-
-#include "read_xml_util.h"
-#include "arch_util.h"
-#include "arch_types.h"
-#include "physical_types.h"
-#include "parse_switchblocks.h"
-
-using pugiutil::ReqOpt;
-
-using vtr::FormulaParser;
-using vtr::t_formula_data;
-
-/**** Function Declarations ****/
-/*---- Functions for Parsing Switchblocks from Architecture ----*/
-
-//Load an XML wireconn specification into a t_wireconn_inf
-t_wireconn_inf parse_wireconn(pugi::xml_node node, const pugiutil::loc_data& loc_data);
-
-//Process the desired order of a wireconn
-static void parse_switchpoint_order(const char* order, SwitchPointOrder& switchpoint_order);
-
-//Process a wireconn defined in the inline style (using attributes)
-void parse_wireconn_inline(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc);
-
-//Process a wireconn defined in the multinode style (more advanced specification)
-void parse_wireconn_multinode(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc);
-
-//Process a <from> or <to> sub-node of a multinode wireconn
-t_wire_switchpoints parse_wireconn_from_to_node(pugi::xml_node node, const pugiutil::loc_data& loc_data);
-
-/* parses the wire types specified in the comma-separated 'ch' char array into the vector wire_points_vec.
- * Spaces are trimmed off */
-static void parse_comma_separated_wire_types(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints);
-
-/* parses the wirepoints specified in ch into the vector wire_points_vec */
-static void parse_comma_separated_wire_points(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints);
-
-/* Parses the number of connections type */
-static void parse_num_conns(std::string num_conns, t_wireconn_inf& wireconn);
-
-/* checks for correctness of a unidir switchblock. */
-static void check_unidir_switchblock(const t_switchblock_inf* sb);
-
-/* checks for correctness of a bidir switchblock. */
-static void check_bidir_switchblock(const t_permutation_map* permutation_map);
-
-/* checks for correctness of a wireconn segment specification. */
-static void check_wireconn(const t_arch* arch, const t_wireconn_inf& wireconn);
-
-/**** Function Definitions ****/
-
-/*---- Functions for Parsing Switchblocks from Architecture ----*/
-
-/* Reads-in the wire connections specified for the switchblock in the xml arch file */
-void read_sb_wireconns(const t_arch_switch_inf* /*switches*/, int /*num_switches*/, pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data) {
-    /* Make sure that Node is a switchblock */
-    check_node(Node, "switchblock", loc_data);
-
-    int num_wireconns;
-    pugi::xml_node SubElem;
-
-    /* count the number of specified wire connections for this SB */
-    num_wireconns = count_children(Node, "wireconn", loc_data, ReqOpt::OPTIONAL);
-    sb->wireconns.reserve(num_wireconns);
-
-    if (num_wireconns > 0) {
-        SubElem = get_first_child(Node, "wireconn", loc_data);
-    }
-    for (int i = 0; i < num_wireconns; i++) {
-        t_wireconn_inf wc = parse_wireconn(SubElem, loc_data);
-        sb->wireconns.push_back(wc);
-        SubElem = SubElem.next_sibling(SubElem.name());
-    }
-
-    return;
-}
-
-t_wireconn_inf parse_wireconn(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    t_wireconn_inf wc;
-
-    size_t num_children = count_children(node, "from", loc_data, ReqOpt::OPTIONAL);
-    num_children += count_children(node, "to", loc_data, ReqOpt::OPTIONAL);
-
-    if (num_children == 0) {
-        parse_wireconn_inline(node, loc_data, wc);
-    } else {
-        VTR_ASSERT(num_children > 0);
-        parse_wireconn_multinode(node, loc_data, wc);
-    }
-
-    return wc;
-}
-
-void parse_wireconn_inline(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc) {
-    //Parse an inline wireconn definition, using attributes
-    expect_only_attributes(node, {"num_conns", "from_type", "to_type", "from_switchpoint", "to_switchpoint", "from_order", "to_order"}, loc_data);
-
-    /* get the connection style */
-    const char* char_prop = get_attribute(node, "num_conns", loc_data).value();
-    parse_num_conns(char_prop, wc);
-
-    /* get from type */
-    char_prop = get_attribute(node, "from_type", loc_data).value();
-    parse_comma_separated_wire_types(char_prop, wc.from_switchpoint_set);
-
-    /* get to type */
-    char_prop = get_attribute(node, "to_type", loc_data).value();
-    parse_comma_separated_wire_types(char_prop, wc.to_switchpoint_set);
-
-    /* get the source wire point */
-    char_prop = get_attribute(node, "from_switchpoint", loc_data).value();
-    parse_comma_separated_wire_points(char_prop, wc.from_switchpoint_set);
-
-    /* get the destination wire point */
-    char_prop = get_attribute(node, "to_switchpoint", loc_data).value();
-    parse_comma_separated_wire_points(char_prop, wc.to_switchpoint_set);
-
-    char_prop = get_attribute(node, "from_order", loc_data, ReqOpt::OPTIONAL).value();
-    parse_switchpoint_order(char_prop, wc.from_switchpoint_order);
-
-    char_prop = get_attribute(node, "to_order", loc_data, ReqOpt::OPTIONAL).value();
-    parse_switchpoint_order(char_prop, wc.to_switchpoint_order);
-}
-
-void parse_wireconn_multinode(pugi::xml_node node, const pugiutil::loc_data& loc_data, t_wireconn_inf& wc) {
-    expect_only_children(node, {"from", "to"}, loc_data);
-
-    /* get the connection style */
-    const char* char_prop = get_attribute(node, "num_conns", loc_data).value();
-    parse_num_conns(char_prop, wc);
-
-    char_prop = get_attribute(node, "from_order", loc_data, ReqOpt::OPTIONAL).value();
-    parse_switchpoint_order(char_prop, wc.from_switchpoint_order);
-
-    char_prop = get_attribute(node, "to_order", loc_data, ReqOpt::OPTIONAL).value();
-    parse_switchpoint_order(char_prop, wc.to_switchpoint_order);
-
-    size_t num_from_children = count_children(node, "from", loc_data);
-    size_t num_to_children = count_children(node, "to", loc_data);
-
-    VTR_ASSERT(num_from_children > 0);
-    VTR_ASSERT(num_to_children > 0);
-
-    for (pugi::xml_node child : node.children()) {
-        if (child.name() == std::string("from")) {
-            t_wire_switchpoints from_switchpoints = parse_wireconn_from_to_node(child, loc_data);
-            wc.from_switchpoint_set.push_back(from_switchpoints);
-        } else if (child.name() == std::string("to")) {
-            t_wire_switchpoints to_switchpoints = parse_wireconn_from_to_node(child, loc_data);
-            wc.to_switchpoint_set.push_back(to_switchpoints);
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Unrecognized child node '%s' of parent node '%s'",
-                           node.name(), child.name());
-        }
-    }
-}
-
-t_wire_switchpoints parse_wireconn_from_to_node(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    expect_only_attributes(node, {"type", "switchpoint"}, loc_data);
-
-    size_t attribute_count = count_attributes(node, loc_data);
-
-    if (attribute_count != 2) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Expected only 2 attributes on node '%s'",
-                       node.name());
-    }
-
-    t_wire_switchpoints wire_switchpoints;
-    wire_switchpoints.segment_name = get_attribute(node, "type", loc_data).value();
-
-    auto points_str = get_attribute(node, "switchpoint", loc_data).value();
-    for (const auto& point_str : vtr::split(points_str, ",")) {
-        int switchpoint = vtr::atoi(point_str);
-        wire_switchpoints.switchpoints.push_back(switchpoint);
-    }
-
-    if (wire_switchpoints.switchpoints.empty()) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node), "Empty switchpoint specification",
-                       node.name());
-    }
-
-    return wire_switchpoints;
-}
-
-static void parse_switchpoint_order(const char* order, SwitchPointOrder& switchpoint_order) {
-    if (order == std::string("")) {
-        switchpoint_order = SwitchPointOrder::SHUFFLED; //Default
-    } else if (order == std::string("fixed")) {
-        switchpoint_order = SwitchPointOrder::FIXED;
-    } else if (order == std::string("shuffled")) {
-        switchpoint_order = SwitchPointOrder::SHUFFLED;
-    } else {
-        archfpga_throw(__FILE__, __LINE__, "Unrecognized switchpoint order '%s'", order);
-    }
-}
-
-/* parses the wire types specified in the comma-separated 'ch' char array into the vector wire_points_vec.
- * Spaces are trimmed off */
-static void parse_comma_separated_wire_types(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints) {
-    auto types = vtr::split(ch, ",");
-
-    if (types.empty()) {
-        archfpga_throw(__FILE__, __LINE__, "parse_comma_separated_wire_types: found empty wireconn wire type entry\n");
-    }
-
-    for (const auto& type : types) {
-        t_wire_switchpoints wsp;
-        wsp.segment_name = type;
-
-        wire_switchpoints.push_back(wsp);
-    }
-}
-
-/* parses the wirepoints specified in the comma-separated 'ch' char array into the vector wire_points_vec */
-static void parse_comma_separated_wire_points(const char* ch, std::vector<t_wire_switchpoints>& wire_switchpoints) {
-    auto points = vtr::split(ch, ",");
-    if (points.empty()) {
-        archfpga_throw(__FILE__, __LINE__, "parse_comma_separated_wire_points: found empty wireconn wire point entry\n");
-    }
-
-    for (const auto& point_str : points) {
-        int point = vtr::atoi(point_str);
-
-        for (auto& wire_switchpoint : wire_switchpoints) {
-            wire_switchpoint.switchpoints.push_back(point);
-        }
-    }
-}
-
-static void parse_num_conns(std::string num_conns, t_wireconn_inf& wireconn) {
-    //num_conns is now interpretted as a formula and processed in build_switchblocks
-    wireconn.num_conns_formula = num_conns;
-}
-
-/* Loads permutation funcs specified under Node into t_switchblock_inf. Node should be
- * <switchfuncs> */
-void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data) {
-    /* Make sure the passed-in is correct */
-    check_node(Node, "switchfuncs", loc_data);
-
-    pugi::xml_node SubElem;
-
-    /* get the number of specified permutation functions */
-    int num_funcs = count_children(Node, "func", loc_data, ReqOpt::OPTIONAL);
-
-    const char* func_type;
-    const char* func_formula;
-    std::vector<std::string>* func_ptr;
-
-    /* used to index into permutation map of switchblock */
-    SB_Side_Connection conn;
-
-    /* now we iterate through all the specified permutation functions, and
-     * load them into the switchblock structure as appropriate */
-    if (num_funcs > 0) {
-        SubElem = get_first_child(Node, "func", loc_data);
-    }
-    for (int ifunc = 0; ifunc < num_funcs; ifunc++) {
-        /* get function type */
-        func_type = get_attribute(SubElem, "type", loc_data).as_string(nullptr);
-
-        /* get function formula */
-        func_formula = get_attribute(SubElem, "formula", loc_data).as_string(nullptr);
-
-        /* go through all the possible cases of func_type */
-        if (0 == strcmp(func_type, "lt")) {
-            conn.set_sides(LEFT, TOP);
-        } else if (0 == strcmp(func_type, "lr")) {
-            conn.set_sides(LEFT, RIGHT);
-        } else if (0 == strcmp(func_type, "lb")) {
-            conn.set_sides(LEFT, BOTTOM);
-        } else if (0 == strcmp(func_type, "tl")) {
-            conn.set_sides(TOP, LEFT);
-        } else if (0 == strcmp(func_type, "tb")) {
-            conn.set_sides(TOP, BOTTOM);
-        } else if (0 == strcmp(func_type, "tr")) {
-            conn.set_sides(TOP, RIGHT);
-        } else if (0 == strcmp(func_type, "rt")) {
-            conn.set_sides(RIGHT, TOP);
-        } else if (0 == strcmp(func_type, "rl")) {
-            conn.set_sides(RIGHT, LEFT);
-        } else if (0 == strcmp(func_type, "rb")) {
-            conn.set_sides(RIGHT, BOTTOM);
-        } else if (0 == strcmp(func_type, "bl")) {
-            conn.set_sides(BOTTOM, LEFT);
-        } else if (0 == strcmp(func_type, "bt")) {
-            conn.set_sides(BOTTOM, TOP);
-        } else if (0 == strcmp(func_type, "br")) {
-            conn.set_sides(BOTTOM, RIGHT);
-        } else {
-            /* unknown permutation function */
-            archfpga_throw(__FILE__, __LINE__, "Unknown permutation function specified: %s\n", func_type);
-        }
-        func_ptr = &(sb->permutation_map[conn]);
-
-        /* Here we load the specified switch function(s) */
-        func_ptr->push_back(std::string(func_formula));
-
-        func_ptr = nullptr;
-        /* get the next switchblock function */
-        SubElem = SubElem.next_sibling(SubElem.name());
-    }
-
-    return;
-}
-
-/* checks for correctness of switch block read-in from the XML architecture file */
-void check_switchblock(const t_switchblock_inf* sb, const t_arch* arch) {
-    /* get directionality */
-    enum e_directionality directionality = sb->directionality;
-
-    /* Check for errors in the switchblock descriptions */
-    if (UNI_DIRECTIONAL == directionality) {
-        check_unidir_switchblock(sb);
-    } else {
-        VTR_ASSERT(BI_DIRECTIONAL == directionality);
-        check_bidir_switchblock(&(sb->permutation_map));
-    }
-
-    /* check that specified wires exist */
-    for (const auto& wireconn : sb->wireconns) {
-        check_wireconn(arch, wireconn);
-    }
-
-    //TODO:
-    /* check that the wire segment directionality matches the specified switch block directionality */
-    /* check for duplicate names */
-    /* check that specified switches exist */
-    /* check that type of switchblock matches type of switch specified */
-}
-
-/* checks for correctness of a unidirectional switchblock. hard exit if error found (to be changed to throw later) */
-static void check_unidir_switchblock(const t_switchblock_inf* sb) {
-    /* Check that the destination wire points are always the starting points (i.e. of wire point 0) */
-    for (const t_wireconn_inf& wireconn : sb->wireconns) {
-        for (const t_wire_switchpoints& wire_to_points : wireconn.to_switchpoint_set) {
-            if (wire_to_points.switchpoints.size() > 1 || wire_to_points.switchpoints[0] != 0) {
-                archfpga_throw(__FILE__, __LINE__, "Unidirectional switch blocks are currently only allowed to drive the start points of wire segments\n");
-            }
-        }
-    }
-}
-
-/* checks for correctness of a bidirectional switchblock */
-static void check_bidir_switchblock(const t_permutation_map* permutation_map) {
-    /**** check that if side1->side2 is specified, then side2->side1 is not, as it is implicit ****/
-
-    /* variable used to index into the permutation map */
-    SB_Side_Connection conn;
-
-    /* iterate over all combinations of from_side -> to side */
-    for (e_side from_side : {TOP, RIGHT, BOTTOM, LEFT}) {
-        for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) {
-            /* can't connect a switchblock side to itself */
-            if (from_side == to_side) {
-                continue;
-            }
-
-            /* index into permutation map with this variable */
-            conn.set_sides(from_side, to_side);
-
-            /* check if a connection between these sides exists */
-            t_permutation_map::const_iterator it = (*permutation_map).find(conn);
-            if (it != (*permutation_map).end()) {
-                /* the two sides are connected */
-                /* check if the opposite connection has been specified */
-                conn.set_sides(to_side, from_side);
-                it = (*permutation_map).find(conn);
-                if (it != (*permutation_map).end()) {
-                    archfpga_throw(__FILE__, __LINE__, "If a bidirectional switch block specifies a connection from side1->side2, no connection should be specified from side2->side1 as it is implicit.\n");
-                }
-            }
-        }
-    }
-
-    return;
-}
-
-static void check_wireconn(const t_arch* arch, const t_wireconn_inf& wireconn) {
-    for (const t_wire_switchpoints& wire_switchpoints : wireconn.from_switchpoint_set) {
-        auto seg_name = wire_switchpoints.segment_name;
-
-        //Make sure the segment exists
-        const t_segment_inf* seg_info = find_segment(arch, seg_name);
-        if (!seg_info) {
-            archfpga_throw(__FILE__, __LINE__, "Failed to find segment '%s' for <wireconn> from type specification\n", seg_name.c_str());
-        }
-
-        //Check that the specified switch points are valid
-        for (int switchpoint : wire_switchpoints.switchpoints) {
-            if (switchpoint < 0) {
-                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> from_switchpoint '%d' (must be >= 0)\n", switchpoint, seg_name.c_str());
-            }
-            if (switchpoint >= seg_info->length) {
-                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> from_switchpoints '%d' (must be < %d)\n", switchpoint, seg_info->length);
-            }
-            //TODO: check that points correspond to valid sb locations
-        }
-    }
-
-    for (const t_wire_switchpoints& wire_switchpoints : wireconn.to_switchpoint_set) {
-        auto seg_name = wire_switchpoints.segment_name;
-
-        //Make sure the segment exists
-        const t_segment_inf* seg_info = find_segment(arch, seg_name);
-        if (!seg_info) {
-            archfpga_throw(__FILE__, __LINE__, "Failed to find segment '%s' for <wireconn> to type specification\n", seg_name.c_str());
-        }
-
-        //Check that the specified switch points are valid
-        for (int switchpoint : wire_switchpoints.switchpoints) {
-            if (switchpoint < 0) {
-                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> to_switchpoint '%d' (must be >= 0)\n", switchpoint, seg_name.c_str());
-            }
-            if (switchpoint >= seg_info->length) {
-                archfpga_throw(__FILE__, __LINE__, "Invalid <wireconn> to_switchpoints '%d' (must be < %d)\n", switchpoint, seg_info->length);
-            }
-            //TODO: check that points correspond to valid sb locations
-        }
-    }
-}
-
-/*---- Functions for Parsing the Symbolic Switchblock Formulas ----*/
-
-/* returns integer result according to the specified switchblock formula and data. formula may be piece-wise */
-int get_sb_formula_raw_result(FormulaParser& formula_parser, const char* formula, const t_formula_data& mydata) {
-    /* the result of the formula will be an integer */
-    int result = -1;
-
-    /* check formula */
-    if (nullptr == formula) {
-        archfpga_throw(__FILE__, __LINE__, "in get_sb_formula_result: SB formula pointer NULL\n");
-    } else if ('\0' == formula[0]) {
-        archfpga_throw(__FILE__, __LINE__, "in get_sb_formula_result: SB formula empty\n");
-    }
-
-    /* parse based on whether formula is piece-wise or not */
-    if (formula_parser.is_piecewise_formula(formula)) {
-        //EXPERIMENTAL
-        result = formula_parser.parse_piecewise_formula(formula, mydata);
-    } else {
-        result = formula_parser.parse_formula(formula, mydata);
-    }
-
-    return result;
-}
diff --git a/third_party/vtr/libs/archfpga/src/parse_switchblocks.h b/third_party/vtr/libs/archfpga/src/parse_switchblocks.h
deleted file mode 100644
index a76860318..000000000
--- a/third_party/vtr/libs/archfpga/src/parse_switchblocks.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef PARSE_SWITCHBLOCKS_H
-#define PARSE_SWITCHBLOCKS_H
-
-#include <vector>
-#include "pugixml.hpp"
-#include "pugixml_util.hpp"
-#include "vtr_expr_eval.h"
-
-/**** Function Declarations ****/
-/* Loads permutation funcs specified under Node into t_switchblock_inf */
-void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data);
-
-/* Reads-in the wire connections specified for the switchblock in the xml arch file */
-void read_sb_wireconns(const t_arch_switch_inf* switches, int num_switches, pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data);
-
-/* checks for correctness of switch block read-in from the XML architecture file */
-void check_switchblock(const t_switchblock_inf* sb, const t_arch* arch);
-
-/* returns integer result according to the specified formula and data */
-int get_sb_formula_raw_result(vtr::FormulaParser& formula_parser, const char* formula, const vtr::t_formula_data& mydata);
-
-#endif /* PARSE_SWITCHBLOCKS_H */
diff --git a/third_party/vtr/libs/archfpga/src/physical_types.cc b/third_party/vtr/libs/archfpga/src/physical_types.cc
deleted file mode 100644
index dfa110f39..000000000
--- a/third_party/vtr/libs/archfpga/src/physical_types.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-#include "physical_types.h"
-#include "vtr_math.h"
-#include "vtr_util.h"
-#include "vtr_log.h"
-
-#include "arch_util.h"
-
-static bool switch_type_is_buffered(SwitchType type);
-static bool switch_type_is_configurable(SwitchType type);
-static e_directionality switch_type_directionaity(SwitchType type);
-
-//Ensure the constant has external linkage to avoid linking errors
-constexpr int t_arch_switch_inf::UNDEFINED_FANIN;
-
-/*
- * t_arch_switch_inf
- */
-
-SwitchType t_arch_switch_inf::type() const {
-    return type_;
-}
-
-bool t_arch_switch_inf::buffered() const {
-    return switch_type_is_buffered(type());
-}
-
-bool t_arch_switch_inf::configurable() const {
-    return switch_type_is_configurable(type());
-}
-
-e_directionality t_arch_switch_inf::directionality() const {
-    return switch_type_directionaity(type());
-}
-
-float t_arch_switch_inf::Tdel(int fanin) const {
-    if (fixed_Tdel()) {
-        auto itr = Tdel_map_.find(UNDEFINED_FANIN);
-        VTR_ASSERT(itr != Tdel_map_.end());
-        return itr->second;
-    } else {
-        VTR_ASSERT(fanin >= 0);
-        return vtr::linear_interpolate_or_extrapolate(&Tdel_map_, fanin);
-    }
-}
-
-bool t_arch_switch_inf::fixed_Tdel() const {
-    return Tdel_map_.size() == 1 && Tdel_map_.count(UNDEFINED_FANIN);
-}
-
-void t_arch_switch_inf::set_Tdel(int fanin, float delay) {
-    Tdel_map_[fanin] = delay;
-}
-
-void t_arch_switch_inf::set_type(SwitchType type_val) {
-    type_ = type_val;
-}
-
-/*
- * t_rr_switch_inf
- */
-
-SwitchType t_rr_switch_inf::type() const {
-    return type_;
-}
-
-bool t_rr_switch_inf::buffered() const {
-    return switch_type_is_buffered(type());
-}
-
-bool t_rr_switch_inf::configurable() const {
-    return switch_type_is_configurable(type());
-}
-
-void t_rr_switch_inf::set_type(SwitchType type_val) {
-    type_ = type_val;
-}
-
-static bool switch_type_is_buffered(SwitchType type) {
-    //Muxes and Tristates isolate thier input and output into
-    //seperate DC connected sub-circuits
-    return type == SwitchType::MUX
-           || type == SwitchType::TRISTATE
-           || type == SwitchType::BUFFER;
-}
-
-static bool switch_type_is_configurable(SwitchType type) {
-    //Shorts and buffers are non-configurable
-    return !(type == SwitchType::SHORT
-             || type == SwitchType::BUFFER);
-}
-
-static e_directionality switch_type_directionaity(SwitchType type) {
-    if (type == SwitchType::SHORT
-        || type == SwitchType::PASS_GATE) {
-        //Shorts and pass gates can conduct in either direction
-        return e_directionality::BI_DIRECTIONAL;
-    } else {
-        VTR_ASSERT_SAFE(type == SwitchType::MUX
-                        || type == SwitchType::TRISTATE
-                        || type == SwitchType::BUFFER);
-        //Buffered switches can only drive in one direction
-        return e_directionality::UNI_DIRECTIONAL;
-    }
-}
-
-/*
- * t_physical_tile_type
- */
-std::vector<int> t_physical_tile_type::get_clock_pins_indices() const {
-    for (auto pin_index : this->clock_pin_indices) {
-        VTR_ASSERT(pin_index < this->num_pins);
-    }
-
-    return this->clock_pin_indices;
-}
-
-int t_physical_tile_type::get_sub_tile_loc_from_pin(int pin_num) const {
-    VTR_ASSERT(pin_num < this->num_pins);
-
-    for (auto sub_tile : this->sub_tiles) {
-        auto max_inst_pins = sub_tile.num_phy_pins / sub_tile.capacity.total();
-
-        for (int pin = 0; pin < sub_tile.num_phy_pins; pin++) {
-            if (sub_tile.sub_tile_to_tile_pin_indices[pin] == pin_num) {
-                //If the physical tile pin matches pin_num, return the
-                //corresponding absolute capacity location of the sub_tile
-                return pin / max_inst_pins + sub_tile.capacity.low;
-            }
-        }
-    }
-
-    return OPEN;
-}
-
-bool t_physical_tile_type::is_empty() const {
-    return std::string(name) == std::string(EMPTY_BLOCK_NAME);
-}
-
-/*
- * t_logical_block_type
- */
-
-bool t_logical_block_type::is_empty() const {
-    return std::string(name) == std::string(EMPTY_BLOCK_NAME);
-}
-
-/**
- * t_pb_graph_node
- */
-
-int t_pb_graph_node::num_pins() const {
-    int npins = 0;
-
-    for (int iport = 0; iport < num_input_ports; ++iport) {
-        npins += num_input_pins[iport];
-    }
-
-    for (int iport = 0; iport < num_output_ports; ++iport) {
-        npins += num_output_pins[iport];
-    }
-
-    for (int iport = 0; iport < num_clock_ports; ++iport) {
-        npins += num_clock_pins[iport];
-    }
-
-    return npins;
-}
-
-std::string t_pb_graph_node::hierarchical_type_name() const {
-    std::vector<std::string> names;
-    std::string child_mode_name;
-
-    for (auto curr_node = this; curr_node != nullptr; curr_node = curr_node->parent_pb_graph_node) {
-        std::string type_name;
-
-        //get name and type of physical block
-        type_name = curr_node->pb_type->name;
-        type_name += "[" + std::to_string(curr_node->placement_index) + "]";
-
-        if (!curr_node->is_primitive()) {
-            // primitives have no modes
-            type_name += "[" + child_mode_name + "]";
-        }
-
-        if (!curr_node->is_root()) {
-            // get the mode of this child
-            child_mode_name = curr_node->pb_type->parent_mode->name;
-        }
-
-        names.push_back(type_name);
-    }
-
-    //We walked up from the leaf to root, so we join in reverse order
-    return vtr::join(names.rbegin(), names.rend(), "/");
-}
-
-/**
- * t_pb_graph_pin
- */
-
-std::string t_pb_graph_pin::to_string(const bool full_description) const {
-    std::string parent_name = this->parent_node->pb_type->name;
-    std::string parent_index = std::to_string(this->parent_node->placement_index);
-    std::string port_name = this->port->name;
-    std::string pin_index = std::to_string(this->pin_number);
-
-    std::string pin_string = parent_name + "[" + parent_index + "]";
-    pin_string += "." + port_name + "[" + pin_index + "]";
-
-    if (!full_description) return pin_string;
-
-    // Traverse upward through the pb_type hierarchy, constructing
-    // name that represents the whole hierarchy to reach this pin.
-    auto parent_parent_node = this->parent_node->parent_pb_graph_node;
-    for (auto pb_node = parent_parent_node; pb_node != nullptr; pb_node = pb_node->parent_pb_graph_node) {
-        std::string parent = pb_node->pb_type->name;
-        parent += "[" + std::to_string(pb_node->placement_index) + "]";
-        pin_string = parent + "/" + pin_string;
-    }
-    return pin_string;
-}
-
-/**
- * t_pb_graph_edge
- */
-
-bool t_pb_graph_edge::annotated_with_pattern(int pattern_index) const {
-    for (int ipattern = 0; ipattern < this->num_pack_patterns; ipattern++) {
-        if (this->pack_pattern_indices[ipattern] == pattern_index) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-bool t_pb_graph_edge::belongs_to_pattern(int pattern_index) const {
-    // return true if this edge is annotated with this pattern
-    if (this->annotated_with_pattern(pattern_index)) {
-        return true;
-        // if not annotated check if its pattern should be inferred
-    } else if (this->infer_pattern) {
-        // if pattern should be inferred try to infer it from all connected output edges
-        for (int ipin = 0; ipin < this->num_output_pins; ipin++) {
-            for (int iedge = 0; iedge < this->output_pins[ipin]->num_output_edges; iedge++) {
-                if (this->output_pins[ipin]->output_edges[iedge]->belongs_to_pattern(pattern_index)) {
-                    return true;
-                }
-            }
-        }
-    }
-
-    // return false otherwise
-    return false;
-}
diff --git a/third_party/vtr/libs/archfpga/src/physical_types.h b/third_party/vtr/libs/archfpga/src/physical_types.h
deleted file mode 100644
index 4b326dd1f..000000000
--- a/third_party/vtr/libs/archfpga/src/physical_types.h
+++ /dev/null
@@ -1,1896 +0,0 @@
-/*
- * Data types describing the physical components on the FPGA architecture.
- *
- * We assume an island style FPGA where complex logic blocks are arranged in a grid and each side of the logic block has access to the inter-block interconnect.  To keep the logic blocks general,
- * we allow arbitrary hierarchy, modes, primitives, and interconnect within each complex logic block.  The data structures here describe the properties of the island-style FPGA as well as the details on
- * hierarchy, modes, primitives, and interconnects within each logic block.
- *
- * Data structures that flesh out
- *
- * The data structures that store the
- *
- * Key data types:
- * t_physical_tile_type: represents the type of a tile in the device grid and describes its physical characteristics (pin locations, area, width, height, etc.)
- * t_logical_block_type: represents and describes the type of a clustered block
- * pb_type: describes the types of physical blocks within the t_logical_block_type in a hierarchy where the top block is the complex block and the leaf blocks implement one logical block
- * pb_graph_node: is a flattened version of pb_type so a pb_type with 10 instances will have 10 pb_graph_nodes representing each instance
- *
- * Additional notes:
- *
- * The interconnect specified in the architecture file gets flattened out in the pb_graph_node netlist.  Each pb_graph_node contains pb_graph_pins which allow it to connect to other pb_graph_nodes.
- * These pins are in connected to other pins through pb_graph_edges. The pin connections are based on what is specified in the <interconnect> tags of the architecture file.
- *
- * Date: February 19, 2009
- * Authors: Jason Luu and Kenneth Kent
- */
-
-#ifndef PHYSICAL_TYPES_H
-#define PHYSICAL_TYPES_H
-
-#include <functional>
-#include <vector>
-#include <unordered_map>
-#include <string>
-#include <map>
-#include <unordered_map>
-#include <limits>
-#include <numeric>
-
-#include "vtr_ndmatrix.h"
-#include "vtr_hash.h"
-#include "vtr_bimap.h"
-#include "vtr_string_interning.h"
-
-#include "logic_types.h"
-#include "clock_types.h"
-
-//Forward declarations
-struct t_clock_arch;
-struct t_clock_network;
-struct t_power_arch;
-struct t_interconnect_pins;
-struct t_power_usage;
-struct t_pb_type_power;
-struct t_mode_power;
-struct t_interconnect_power;
-struct t_port_power;
-struct t_physical_tile_port;
-struct t_equivalent_site;
-struct t_physical_tile_type;
-typedef const t_physical_tile_type* t_physical_tile_type_ptr;
-struct t_sub_tile;
-struct t_logical_block_type;
-typedef const t_logical_block_type* t_logical_block_type_ptr;
-struct t_logical_pin;
-struct t_physical_pin;
-struct t_pb_type;
-struct t_pb_graph_pin_power;
-struct t_mode;
-struct t_pb_graph_node_power;
-struct t_port;
-class t_pb_graph_node;
-struct t_pin_to_pin_annotation;
-struct t_interconnect;
-class t_pb_graph_pin;
-class t_pb_graph_edge;
-struct t_cluster_placement_primitive;
-struct t_arch;
-enum class e_sb_type;
-
-/****************************************************************************/
-/* FPGA metadata types                                                      */
-/****************************************************************************/
-/* t_metadata_value, and t_metadata_dict provide a types to store
- * metadata about the FPGA architecture and routing routing graph along side
- * the pb_type, grid, node and edge descriptions.
- *
- * The metadata is stored as a simple key/value map.  key's are string and an
- * optional coordinate. t_metadata_value provides the value storage, which is a
- * string.
- */
-
-// Metadata value storage.
-class t_metadata_value {
-  public:
-    explicit t_metadata_value(vtr::interned_string v)
-        : value_(v) {}
-    explicit t_metadata_value(const t_metadata_value& o) noexcept
-        : value_(o.value_) {}
-
-    // Return string value.
-    vtr::interned_string as_string() const { return value_; }
-
-  private:
-    vtr::interned_string value_;
-};
-
-// Metadata storage dictionary.
-struct t_metadata_dict : vtr::flat_map<
-                             vtr::interned_string,
-                             std::vector<t_metadata_value>,
-                             vtr::interned_string_less> {
-    // Is this key present in the map?
-    inline bool has(vtr::interned_string key) const {
-        return this->count(key) >= 1;
-    }
-
-    // Get all metadata values matching key.
-    //
-    // Returns nullptr if key is not found.
-    inline const std::vector<t_metadata_value>* get(vtr::interned_string key) const {
-        auto iter = this->find(key);
-        if (iter != this->end()) {
-            return &iter->second;
-        }
-        return nullptr;
-    }
-
-    // Get metadata values matching key.
-    //
-    // Returns nullptr if key is not found or if multiple values are prsent
-    // per key.
-    inline const t_metadata_value* one(vtr::interned_string key) const {
-        auto values = get(key);
-        if (values == nullptr) {
-            return nullptr;
-        }
-        if (values->size() != 1) {
-            return nullptr;
-        }
-        return &((*values)[0]);
-    }
-
-    // Adds value to key.
-    void add(vtr::interned_string key, vtr::interned_string value) {
-        // Get the iterator to the key, which may already have elements if
-        // add was called with this key in the past.
-        (*this)[key].emplace_back(t_metadata_value(value));
-    }
-};
-
-/*************************************************************************************************/
-/* FPGA basic definitions                                                                        */
-/*************************************************************************************************/
-
-/* Pins describe I/O into clustered logic block.
- * A pin may be unconnected, driving a net or in the fanout, respectively. */
-enum e_pin_type {
-    OPEN = -1,
-    DRIVER = 0,
-    RECEIVER = 1
-};
-
-/* Type of interconnect within complex block: Complete for everything connected (full crossbar), direct for one-to-one connections, and mux for many-to-one connections */
-enum e_interconnect {
-    COMPLETE_INTERC = 1,
-    DIRECT_INTERC = 2,
-    MUX_INTERC = 3
-};
-
-/* Orientations. */
-enum e_side : unsigned char {
-    TOP = 0,
-    RIGHT = 1,
-    BOTTOM = 2,
-    LEFT = 3,
-    NUM_SIDES
-};
-constexpr std::array<e_side, NUM_SIDES> SIDES = {{TOP, RIGHT, BOTTOM, LEFT}};                    //Set of all side orientations
-constexpr std::array<const char*, NUM_SIDES> SIDE_STRING = {{"TOP", "RIGHT", "BOTTOM", "LEFT"}}; //String versions of side orientations
-
-/* pin location distributions */
-enum e_pin_location_distr {
-    E_SPREAD_PIN_DISTR,
-    E_PERIMETER_PIN_DISTR,
-    E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR,
-    E_CUSTOM_PIN_DISTR
-};
-
-/* pb_type class */
-enum e_pb_type_class {
-    UNKNOWN_CLASS = 0,
-    LUT_CLASS = 1,
-    LATCH_CLASS = 2,
-    MEMORY_CLASS = 3,
-    NUM_CLASSES
-};
-
-// Set of all pb_type classes
-constexpr std::array<e_pb_type_class, NUM_CLASSES> PB_TYPE_CLASSES = {
-    {UNKNOWN_CLASS, LUT_CLASS, LATCH_CLASS, MEMORY_CLASS}};
-
-// String versions of pb_type class values
-constexpr std::array<const char*, NUM_CLASSES> PB_TYPE_CLASS_STRING = {
-    {"unknown", "lut", "flipflop", "memory"}};
-
-/* Annotations for pin-to-pin connections */
-enum e_pin_to_pin_annotation_type {
-    E_ANNOT_PIN_TO_PIN_DELAY = 0,
-    E_ANNOT_PIN_TO_PIN_CAPACITANCE,
-    E_ANNOT_PIN_TO_PIN_PACK_PATTERN
-};
-enum e_pin_to_pin_annotation_format {
-    E_ANNOT_PIN_TO_PIN_MATRIX = 0,
-    E_ANNOT_PIN_TO_PIN_CONSTANT
-};
-enum e_pin_to_pin_delay_annotations {
-    E_ANNOT_PIN_TO_PIN_DELAY_MIN = 0,        //pb interconnect or primitive combinational max delay
-    E_ANNOT_PIN_TO_PIN_DELAY_MAX,            //pb interconnect or primitive combinational max delay
-    E_ANNOT_PIN_TO_PIN_DELAY_TSETUP,         //primitive setup time
-    E_ANNOT_PIN_TO_PIN_DELAY_THOLD,          //primitive hold time
-    E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN, //primitive min clock-to-q delay
-    E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX, //primitive max clock-to-q delay
-};
-enum e_pin_to_pin_capacitance_annotations {
-    E_ANNOT_PIN_TO_PIN_CAPACITANCE_C = 0
-};
-enum e_pin_to_pin_pack_pattern_annotations {
-    E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME = 0
-};
-
-/* Power Estimation type for a PB */
-enum e_power_estimation_method_ {
-    POWER_METHOD_UNDEFINED = 0,
-    POWER_METHOD_IGNORE,          /* Ignore power of this PB, and all children PB */
-    POWER_METHOD_SUM_OF_CHILDREN, /* Ignore power of this PB, but consider children */
-    POWER_METHOD_AUTO_SIZES,      /* Transistor-level, auto-sized buffers/wires */
-    POWER_METHOD_SPECIFY_SIZES,   /* Transistor-level, user-specified buffers/wires */
-    POWER_METHOD_TOGGLE_PINS,     /* Dynamic: Energy per pin toggle, Static: Absolute */
-    POWER_METHOD_C_INTERNAL,      /* Dynamic: Equiv. Internal capacitance, Static: Absolute */
-    POWER_METHOD_ABSOLUTE         /* Dynamic: Aboslute, Static: Absolute */
-};
-typedef enum e_power_estimation_method_ e_power_estimation_method;
-typedef enum e_power_estimation_method_ t_power_estimation_method;
-
-/* Specifies what part of the FPGA a custom switchblock should be built in (i.e. perimeter, core, everywhere) */
-enum e_sb_location {
-    E_PERIMETER = 0,
-    E_CORNER,
-    E_FRINGE, /* perimeter minus corners */
-    E_CORE,
-    E_EVERYWHERE
-};
-
-/*************************************************************************************************/
-/* FPGA grid layout data types                                                                   */
-/*************************************************************************************************/
-/* Grid location specification
- *  Each member is a formula evaluated in terms of 'W' (device width),
- *  and 'H' (device height). Formulas can be evaluated using parse_formula()
- *  from expr_eval.h.
- */
-struct t_grid_loc_spec {
-    t_grid_loc_spec(std::string start, std::string end, std::string repeat, std::string incr)
-        : start_expr(start)
-        , end_expr(end)
-        , repeat_expr(repeat)
-        , incr_expr(incr) {}
-
-    std::string start_expr; //Starting position (inclusive)
-    std::string end_expr;   //Ending position (inclusive)
-
-    std::string repeat_expr; //Distance between repeated
-                             // region instances
-
-    std::string incr_expr; //Distance between block instantiations
-                           // with the region
-};
-
-/* Definition of how to place physical logic block in the grid.
- *  This defines a region of the grid to be set to a specific type
- *  (provided it's priority is high enough to override other blocks).
- *
- *  The diagram below illustrates the layout specification.
- *
- *                      +----+                +----+           +----+
- *                      |    |                |    |           |    |
- *                      |    |                |    |    ...    |    |
- *                      |    |                |    |           |    |
- *                      +----+                +----+           +----+
- *
- *                        .                     .                 .
- *                        .                     .                 .
- *                        .                     .                 .
- *
- *                      +----+                +----+           +----+
- *                      |    |                |    |           |    |
- *                      |    |                |    |    ...    |    |
- *                      |    |                |    |           |    |
- *                      +----+                +----+           +----+
- *                   ^
- *                   |
- *           repeaty |
- *                   |
- *                   v        (endx,endy)
- *                      +----+                +----+           +----+
- *                      |    |                |    |           |    |
- *                      |    |                |    |    ...    |    |
- *                      |    |                |    |           |    |
- *                      +----+                +----+           +----+
- *       (startx,starty)
- *                            <-------------->
- *                                 repeatx
- *
- *  startx/endx and endx/endy define a rectangular region instances dimensions.
- *  The region instance is then repeated every repeatx/repeaty (if specified).
- *
- *  Within a particular region instance a block of block_type is laid down every
- *  incrx/incry units (if not specified defaults to block width/height):
- *
- *
- *    * = an instance of block_type within the region
- *
- *                    +------------------------------+
- *                    |*         *         *        *|
- *                    |                              |
- *                    |                              |
- *                    |                              |
- *                    |                              |
- *                    |                              |
- *                    |*         *         *        *|
- *                ^   |                              |
- *                |   |                              |
- *          incry |   |                              |
- *                |   |                              |
- *                v   |                              |
- *                    |*         *         *        *|
- *                    +------------------------------+
- *
- *                      <------->
- *                        incrx
- *
- *  In the above diagram incrx = 10, and incry = 6
- */
-struct t_grid_loc_def {
-    t_grid_loc_def(std::string block_type_val, int priority_val)
-        : block_type(block_type_val)
-        , priority(priority_val)
-        , x("0", "W-1", "max(w+1,W)", "w") //Fill in x direction, no repeat, incr by block width
-        , y("0", "H-1", "max(h+1,H)", "h") //Fill in y direction, no repeat, incr by block height
-    {}
-
-    std::string block_type; //The block type name
-
-    int priority = 0; //Priority of the specification.
-                      // In case of conflicting specifications
-                      // the largest priority wins.
-
-    t_grid_loc_spec x; //Horizontal location specification
-    t_grid_loc_spec y; //Veritcal location specification
-
-    // When 1 metadata tag is split among multiple t_grid_loc_def, one
-    // t_grid_loc_def is arbitrarily chosen to own the metadata, and the other
-    // t_grid_loc_def point to the owned version.
-    std::unique_ptr<t_metadata_dict> owned_meta;
-    t_metadata_dict* meta = nullptr; // Metadata for this location definition. This
-                                     // metadata may be shared with multiple grid_locs
-                                     // that come from a common definition.
-};
-
-enum GridDefType {
-    AUTO,
-    FIXED
-};
-
-struct t_grid_def {
-    GridDefType grid_type = GridDefType::AUTO; //The type of this grid specification
-
-    std::string name = ""; //The name of this device
-
-    int width = -1;  //Fixed device width (only valid for grid_type == FIXED)
-    int height = -1; //Fixed device height (only valid for grid_type == FIXED)
-
-    float aspect_ratio = 1.; //Aspect ratio for auto-sized devices (only valid for
-                             //grid_type == AUTO)
-
-    std::vector<t_grid_loc_def> loc_defs; //The list of grid location definitions for this grid specification
-};
-
-/************************* POWER ***********************************/
-
-/* Global clock architecture */
-struct t_clock_arch {
-    int num_global_clocks;
-    t_clock_network* clock_inf; /* Details about each clock */
-};
-
-/* Architecture information for a single clock */
-struct t_clock_network {
-    bool autosize_buffer; /* autosize clock buffers */
-    float buffer_size;    /* if not autosized, the clock buffer size */
-    float C_wire;         /* Wire capacitance (per meter) */
-
-    float prob;   /* Static probability of net assigned to this clock */
-    float dens;   /* Switching density of net assigned to this clock */
-    float period; /* Period of clock */
-};
-
-/* Power-related architecture information */
-struct t_power_arch {
-    float C_wire_local; /* Capacitance of local interconnect (per meter) */
-    //int seg_buffer_split; /* Split segment for distributed buffer (no split=1) */
-    float logical_effort_factor;
-    float local_interc_factor;
-    float transistors_per_SRAM_bit;
-    float mux_transistor_size;
-    float FF_size;
-    float LUT_transistor_size;
-};
-
-/* Power usage for an entity */
-struct t_power_usage {
-    float dynamic;
-    float leakage;
-};
-
-/*************************************************************************************************/
-/* FPGA Physical Logic Blocks data types                                                         */
-/*************************************************************************************************/
-
-enum class PortEquivalence {
-    NONE,    //The pins within the port are not equivalent and can not be swapped
-    FULL,    //The pins within the port are fully equivalent and can be freely swapped (e.g. logically equivalent or modelling a full-crossbar)
-    INSTANCE //The port is equivalent with instance swapping (more restrictive that FULL)
-};
-
-/* A class of CLB pins that share common properties
- * port_name: name of this class of pins
- * type:  DRIVER or RECEIVER (what is this pinclass?)              *
- * num_pins:  The number of logically equivalent pins forming this *
- *           class.                                                *
- * pinlist[]:  List of clb pin numbers which belong to this class. */
-struct t_class {
-    enum e_pin_type type;
-    PortEquivalence equivalence;
-    int num_pins;
-    std::vector<int> pinlist; /* [0..num_pins - 1] */
-};
-
-/* Struct to hold the class ranges for specific sub tiles */
-struct t_class_range {
-    int low = 0;
-    int high = 0;
-};
-
-enum e_power_wire_type {
-    POWER_WIRE_TYPE_UNDEFINED = 0,
-    POWER_WIRE_TYPE_IGNORED,
-    POWER_WIRE_TYPE_C,
-    POWER_WIRE_TYPE_ABSOLUTE_LENGTH,
-    POWER_WIRE_TYPE_RELATIVE_LENGTH,
-    POWER_WIRE_TYPE_AUTO
-};
-
-enum e_power_buffer_type {
-    POWER_BUFFER_TYPE_UNDEFINED = 0,
-    POWER_BUFFER_TYPE_NONE,
-    POWER_BUFFER_TYPE_AUTO,
-    POWER_BUFFER_TYPE_ABSOLUTE_SIZE
-};
-
-struct t_port_power {
-    /* Transistor-Level Power Properties */
-
-    // Wire
-    e_power_wire_type wire_type;
-    union {
-        float C;
-        float absolute_length;
-        float relative_length;
-    } wire;
-
-    // Buffer
-    e_power_buffer_type buffer_type;
-    float buffer_size;
-
-    /* Pin-Toggle Power Properties */
-    bool pin_toggle_initialized;
-    float energy_per_toggle;
-    t_port* scaled_by_port;
-    int scaled_by_port_pin_idx;
-    bool reverse_scaled; /* Scale by (1-prob) */
-};
-
-//The type of Fc specification
-enum class e_fc_type {
-    IN, //The fc specification for an input pin
-    OUT //The fc specification for an output pin
-};
-
-//The value type of the Fc specification
-enum class e_fc_value_type {
-    FRACTIONAL, //Fractional Fc specification (i.e. fraction of routing channel tracks)
-    ABSOLUTE    //Absolute Fc specification (i.e. absolute number of tracks)
-};
-
-//Describes the Fc specification for a set of pins and a segment
-struct t_fc_specification {
-    e_fc_type fc_type;             //What type of Fc
-    e_fc_value_type fc_value_type; //How to interpret the Fc value
-    float fc_value;                //The Fc value
-    int seg_index;                 //The target segment index
-    std::vector<int> pins;         //The block pins collectively effected by this Fc
-};
-
-//Defines the default Fc specification for an architecture
-struct t_default_fc_spec {
-    bool specified = false;         //Whether or not a default specification exists
-    e_fc_value_type in_value_type;  //Type of the input value (frac or abs)
-    float in_value;                 //Input Fc value
-    e_fc_value_type out_value_type; //Type of the output value (frac or abs)
-    float out_value;                //Output Fc value
-};
-
-enum class e_sb_type {
-    NONE,       //No SB at this location
-    HORIZONTAL, //Horizontal straight-through connections
-    VERTICAL,   //Vertical straight-through connections
-    TURNS,      //Turning connections only
-    STRAIGHT,   //Straight-through connections (i.e. vertical + horizontal)
-    FULL        //Full SB at this location (i.e. turns + straight)
-
-};
-
-constexpr int NO_SWITCH = -1;
-constexpr int DEFAULT_SWITCH = -2;
-
-/* Describes the type for a physical tile
- * name: unique identifier for type
- * num_pins: Number of pins for the block
- * capacity: Number of blocks of this type that can occupy one grid tile (typically used by IOs).
- * width: Width of large block in grid tiles
- * height: Height of large block in grid tiles
- *
- * pinloc: Is set to true if a given pin exists on a certain position of a
- *         block. Derived from pin_location_distribution/pin_loc_assignments
- *
- * pin_location_distribution: The pin distribution type
- * num_pin_loc_assignments: The number of strings within each pin_loc_assignments
- * pin_loc_assignments: The strings for a custom pin location distribution.
- *                      Valid only for pin_location_distribution == E_CUSTOM_PIN_DISTR
- *
- * num_class: Number of logically-equivalent pin classes
- * class_inf: Information of each logically-equivalent class
- *
- * pin_avg_width_offset: Average width offset to specified pin (exact if only a single physical pin instance)
- * pin_avg_height_offset: Average height offset to specified pin (exact if only a single physical pin instance)
- * pin_class: The class a pin belongs to
- * is_ignored_pin: Whether or not a pin is ignored durring rr_graph generation and routing.
- *                 This is usually the case for clock pins and other global pins unless the
- *                 clock_modeling option is set to route the clock through regular inter-block
- *                 wiring or through a dedicated clock network.
- * is_pin_global: Whether or not this pin is marked as global. Clock pins and other specified
- *                global pins in the architecture file are marked as global.
- *
- * fc_specs: The Fc specifications for all pins
- *
- * switchblock_locations: Switch block configuration for this block.
- *                        Each element describes the type of SB which should be
- *                        constructed at the specified location.
- *                        Note that the SB is located to the top-right of the
- *                        grid tile location. [0..width-1][0..height-1]
- *
- * area: Describes how much area this logic block takes, if undefined, use default
- * type_timing_inf: timing information unique to this type
- * num_drivers: Total number of output drivers supplied
- * num_receivers: Total number of input receivers supplied
- * index: Keep track of type in array for easy access
- * logical_tile_index: index of the corresponding logical block type
- *
- * In general, the physical tile is a placeable physical resource on the FPGA device,
- * and it is allowed to contain an heterogeneous set of logical blocks (pb_types).
- *
- * Each physical tile must specify at least one sub tile, that is a physical location
- * on the sub tiles stacks. This means that a physical tile occupies an (x, y) location on the grid,
- * and it has at least one sub tile slot that allows for a placement within the (x, y) location.
- *
- * Therefore, to identify the location of a logical block within the device grid, we need to
- * specify three different coordinates:
- *      - x         : horizontal coordinate
- *      - y         : vertical coordinate
- *      - sub tile  : location within the sub tile stack at an (x, y) physical location
- *
- * A physical tile is heterogeneous as it allows the placement of different kinds of logical blocks within,
- * that can share the same (x, y) placement location.
- *
- */
-struct t_physical_tile_type {
-    char* name = nullptr;
-    int num_pins = 0;
-    int num_inst_pins = 0;
-    int num_input_pins = 0;
-    int num_output_pins = 0;
-    int num_clock_pins = 0;
-
-    std::vector<int> clock_pin_indices;
-
-    int capacity = 0;
-
-    int width = 0;
-    int height = 0;
-
-    vtr::NdMatrix<std::vector<bool>, 3> pinloc; /* [0..width-1][0..height-1][0..3][0..num_pins-1] */
-
-    std::vector<t_class> class_inf; /* [0..num_class-1] */
-
-    std::vector<int> pin_width_offset;  // [0..num_pins-1]
-    std::vector<int> pin_height_offset; // [0..num_pins-1]
-    std::vector<int> pin_class;         // [0..num_pins-1]
-    std::vector<bool> is_ignored_pin;   // [0..num_pins-1]
-    std::vector<bool> is_pin_global;    // [0..num_pins-1]
-
-    std::vector<t_fc_specification> fc_specs;
-
-    vtr::Matrix<e_sb_type> switchblock_locations;
-    vtr::Matrix<int> switchblock_switch_overrides;
-
-    float area = 0;
-
-    /* This info can be determined from class_inf and pin_class but stored for faster access */
-    int num_drivers = 0;
-    int num_receivers = 0;
-
-    int index = -1; /* index of type descriptor in array (allows for index referencing) */
-
-    // vector of the different types of sub tiles allowed for the physical tile.
-    std::vector<t_sub_tile> sub_tiles;
-
-    /* Unordered map indexed by the logical block index.
-     * tile_block_pin_directs_map[logical block index][logical block pin] -> physical tile pin */
-    std::unordered_map<int, std::unordered_map<int, vtr::bimap<t_logical_pin, t_physical_pin>>> tile_block_pin_directs_map;
-
-    /* Returns the indices of pins that contain a clock for this physical logic block */
-    std::vector<int> get_clock_pins_indices() const;
-
-    // Returns the sub tile location of the physical tile given an input pin
-    int get_sub_tile_loc_from_pin(int pin_num) const;
-
-    // TODO: Remove is_input_type / is_output_type as part of
-    // https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
-
-    // Does this t_physical_tile_type contain an inpad?
-    bool is_input_type = false;
-
-    // Does this t_physical_tile_type contain an outpad?
-    bool is_output_type = false;
-
-    // Is this t_physical_tile_type an empty type?
-    bool is_empty() const;
-};
-
-/* Holds the capacity range of a certain sub_tile block within the parent physical tile type.
- * E.g. TILE_X has the following sub tiles:
- *          - SUB_TILE_A: capacity_range --> 0 to 4
- *          - SUB_TILE_B: capacity_range --> 5 to 11
- *          - SUB_TILE_C: capacity_range --> 12 to 16
- *
- * Totale TILE_X capacity is 17
- */
-struct t_capacity_range {
-    int low = 0;
-    int high = 0;
-
-    void set(int low_cap, int high_cap) {
-        low = low_cap;
-        high = high_cap;
-    }
-
-    bool is_in_range(int cap) const {
-        return cap >= low and cap <= high;
-    }
-
-    int total() const {
-        return high - low + 1;
-    }
-};
-
-/**
- * @brief Describes the possible placeable blocks within a physical tile type.
- *
- * Heterogeneous blocks:
- *
- * The sub tile allows to have heterogeneous blocks placed at the same grid location.
- * Heterogeneous blocks are blocks which do not share either the same functionality or the
- * IO interface, but do share the same (x, y) grid location.
- * For each heterogeneous block type than, there should be a corresponding sub tile to enable
- * its placement within the physical tile.
- *
- * For further information there is a tutorial on the VTR documentation page.
- *
- *
- * Equivalent sites:
- *
- * Moreover, the same sub tile enables to allow the placement of different implementations
- * of a logical block.
- * This means that two blocks that have different internal functionalities, but the IO interface of one block
- * is a subset of the other, they can be placed at the same sub tile location within the physical tile.
- * These two blocks can be identified as equivalent, hence they can belong to the same sub tile.
- */
-struct t_sub_tile {
-    char* name = nullptr;
-
-    // Mapping between the sub tile's pins and the physical pins corresponding
-    // to the physical tile type.
-    std::vector<int> sub_tile_to_tile_pin_indices;
-
-    std::vector<t_physical_tile_port> ports;
-
-    std::vector<t_logical_block_type_ptr> equivalent_sites; ///>List of netlist blocks (t_logical_block) that one could
-                                                            ///>place within this sub tile.
-
-    t_capacity_range capacity; ///>Indicates the total number of sub tile instances of this type placeable at a
-                               ///>physical location.
-                               ///>E.g.: capacity can range from 4 to 7, meaning that there are four placeable sub tiles
-                               ///>      at a physical location, and compatible netlist blocks can be placed at sub_tile
-                               ///>      indices ranging from 4 to 7.
-    t_class_range class_range;
-
-    int num_phy_pins = 0;
-
-    int index = -1;
-};
-
-/** A logical pin defines the pin index of a logical block type (i.e. a top level PB type)
- *  This structure wraps the int value of the logical pin to allow its storage in the
- *  vtr::bimap container.
- */
-struct t_logical_pin {
-    int pin = -1;
-
-    t_logical_pin(int value) {
-        pin = value;
-    }
-
-    bool operator==(const t_logical_pin o) const {
-        return pin == o.pin;
-    }
-
-    bool operator<(const t_logical_pin o) const {
-        return pin < o.pin;
-    }
-};
-
-/** A physical pin defines the pin index of a physical tile type (i.e. a grid tile type)
- *  This structure wraps the int value of the physical pin to allow its storage in the
- *  vtr::bimap container.
- */
-struct t_physical_pin {
-    int pin = -1;
-
-    t_physical_pin(int value) {
-        pin = value;
-    }
-
-    bool operator==(const t_physical_pin o) const {
-        return pin == o.pin;
-    }
-
-    bool operator<(const t_physical_pin o) const {
-        return pin < o.pin;
-    }
-};
-
-/** Describes I/O and clock ports of a physical tile type
- *
- *  It corresponds to <port/> tags in the FPGA architecture description
- *
- *  Data members:
- *      name: name of the port
- *      is_clock: whether or not this port is a clock
- *      is_non_clock_global: Applies to top level pb_type, this pin is not a clock but
- *                           is a global signal (useful for stuff like global reset signals,
- *                           perhaps useful for VCC and GND)
- *      num_pins: the number of pins this port has
- *      tile_type: pointer to the associated tile type
- *      port_class: port belongs to recognized set of ports in class library
- *      index: port index by index in array of parent pb_type
- *      absolute_first_pin_index: absolute index of the first pin in the physical tile.
- *                                All the other pin indices can be calculated with num_pins
- *      port_index_by_type index of port by type (index by input, output, or clock)
- *      equivalence: Applies to logic block I/Os and to primitive inputs only
- */
-struct t_physical_tile_port {
-    char* name;
-    enum PORTS type;
-    bool is_clock;
-    bool is_non_clock_global;
-    int num_pins;
-    PortEquivalence equivalent;
-
-    int index;
-    int absolute_first_pin_index;
-    int port_index_by_type;
-
-    t_physical_tile_port() {
-        is_clock = false;
-        is_non_clock_global = false;
-
-        num_pins = 1;
-        equivalent = PortEquivalence::NONE;
-    }
-};
-
-/* Describes the type for a logical block
- * name: unique identifier for type
- * pb_type: Internal subblocks and routing information for this physical block
- * pb_graph_head: Head of DAG of pb_types_nodes and their edges
- *
- * index: Keep track of type in array for easy access
- * physical_tile_index: index of the corresponding physical tile type
- *
- * A logical block is the implementation of a component's functionality of the FPGA device
- * and it identifies its logical behaviour and internal connections.
- *
- * The logical block type is mainly used during the packing stage of VPR and is used to generate
- * the packed netlist and all the corresponding blocks and their internal structure.
- *
- * The logical blocks than get assigned to a possible physical tile for the placement step.
- *
- * A logical block must correspond to at least one physical tile.
- */
-struct t_logical_block_type {
-    char* name = nullptr;
-
-    /* Clustering info */
-    t_pb_type* pb_type = nullptr;
-    t_pb_graph_node* pb_graph_head = nullptr;
-
-    int index = -1; /* index of type descriptor in array (allows for index referencing) */
-
-    std::vector<t_physical_tile_type_ptr> equivalent_tiles; ///>List of physical tiles at which one could
-                                                            ///>place this type of netlist block.
-
-    // Is this t_logical_block_type empty?
-    bool is_empty() const;
-};
-
-/*************************************************************************************************
- * PB Type Hierarchy                                                                             *
- *************************************************************************************************
- *
- * VPR represents the 'type' of block types corresponding to FPGA grid locations using a hierarchy
- * of t_pb_type objects.
- *
- * The root t_pb_type corresponds to a single top level block type and maps to a particular type
- * of location in the FPGA device grid (e.g. Logic, DSP, RAM etc.).
- *
- * A non-root t_pb_type represents an intermediate level of hierarchy within the root block type.
- *
- * The PB Type hierarchy corresponds to the tags specified in the FPGA architecture description:
- *
- *      struct              XML Tag
- *      ------              ------------
- *      t_pb_type           <pb_type/>
- *      t_mode              <mode/>
- *      t_interconnect      <interconnect/>
- *      t_port              <port/>
- *
- * The various structures hold pointers to each other which encode the hierarchy.
- */
-
-/** Describes the type of clustered block if a root (parent_mode == nullptr), an
- *  intermediate level of hierarchy (parent_mode != nullptr), or a leaf/primitive
- *  (num_modes == 0, model != nullptr).
- *
- *  This (along with t_mode) corresponds to the hierarchical specification of
- *  block modes that users provide in the architecture (i.e. <pb_type/> tags).
- *
- *  It is also useful to note that a single t_pb_type may represent multiple instances of that
- *  type in the architecture (see the num_pb field).
- *
- *  In VPR there is a single instance of a t_pb_type for each type, which is referenced as a
- *  flyweight by other objects (e.g. t_pb_graph_node).
- *
- *  Data members:
- *      name: name of the physical block type
- *      num_pb: maximum number of instances of this physical block type sharing one parent
- *      blif_model: the string in the blif circuit that corresponds with this pb type
- *      class_type: Special library name
- *      modes: Different modes accepted
- *      ports: I/O and clock ports
- *      num_clock_pins: A count of the total number of clock pins
- *      num_input_pins: A count of the total number of input pins
- *      num_output_pins: A count of the total number of output pins
- *      num_pins: A count of the total number of pins
- *      timing: Timing matrix of block [0..num_inputs-1][0..num_outputs-1]
- *      parent_mode: mode of the parent block
- *      t_mode_power: ???
- *      meta: Table storing extra arbitrary metadata attributes.
- */
-struct t_pb_type {
-    char* name = nullptr;
-    int num_pb = 0;
-    char* blif_model = nullptr;
-    t_model* model = nullptr;
-    enum e_pb_type_class class_type = UNKNOWN_CLASS;
-
-    t_mode* modes = nullptr; /* [0..num_modes-1] */
-    int num_modes = 0;
-    t_port* ports = nullptr; /* [0..num_ports] */
-    int num_ports = 0;
-
-    int num_clock_pins = 0;
-    int num_input_pins = 0; /* inputs not including clock pins */
-    int num_output_pins = 0;
-
-    int num_pins = 0;
-
-    t_mode* parent_mode = nullptr;
-    int depth = 0; /* depth of pb_type */
-
-    t_pin_to_pin_annotation* annotations = nullptr; /* [0..num_annotations-1] */
-    int num_annotations = 0;
-
-    /* Power related members */
-    t_pb_type_power* pb_type_power = nullptr;
-
-    t_metadata_dict meta;
-};
-
-/** Describes an operational mode of a clustered logic block
- *
- *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
- *  It corresponds to <mode/> tags in the FPGA architecture description
- *
- *  Data members:
- *      name: name of the mode
- *      pb_type_children: pb_types it contains
- *      interconnect: interconnect of parent pb_type to children pb_types or children to children pb_types
- *      num_interconnect: Total number of interconnect tags specified by user
- *      parent_pb_type: Which parent contains this mode
- *      index: Index of mode in array with other modes
- *      disable_packing: Specify if the mode is disabled/enabled for VPR packer.
- *                       By default, every mode is enabled for VPR packer.
- *                       Users can disable it for VPR packer through arch XML
- *                       When flag is set true, the mode is invisible to VPR packer.
- *                       No logic will be mapped to the pb_type under the mode
- *      t_mode_power: ???
- *      meta: Table storing extra arbitrary metadata attributes.
- */
-struct t_mode {
-    char* name = nullptr;
-    t_pb_type* pb_type_children = nullptr; /* [0..num_child_pb_types] */
-    int num_pb_type_children = 0;
-    t_interconnect* interconnect = nullptr;
-    int num_interconnect = 0;
-    t_pb_type* parent_pb_type = nullptr;
-    int index = 0;
-
-    /* Packer-related switches */
-    bool disable_packing = false;
-
-    /* Power related members */
-    t_mode_power* mode_power = nullptr;
-
-    t_metadata_dict meta;
-};
-
-/** Describes an interconnect edge inside a cluster
- *
- *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
- *  It corresponds to <interconnect/> tags in the FPGA architecture description
- *
- *  Data members:
- *      type: type of the interconnect
- *      name: identifier for interconnect
- *      input_string: input string verbatim to parse later
- *      output_string: input string output to parse later
- *      annotations: Annotations for delay, power, etc
- *      num_annotations: Total number of annotations
- *      infer_annotations: This interconnect is autogenerated, if true, infer pack_patterns
- *                         such as carry-chains and forced packs based on interconnect linked to it
- *      parent_mode_index: Mode of parent as int
- */
-struct t_interconnect {
-    enum e_interconnect type;
-    char* name = nullptr;
-
-    char* input_string = nullptr;
-    char* output_string = nullptr;
-
-    t_pin_to_pin_annotation* annotations = nullptr; /* [0..num_annotations-1] */
-    int num_annotations = 0;
-    bool infer_annotations = false;
-
-    int line_num = 0; /* Interconnect is processed later, need to know what line number it messed up on to give proper error message */
-
-    int parent_mode_index = 0;
-
-    /* Power related members */
-    t_mode* parent_mode = nullptr;
-
-    t_interconnect_power* interconnect_power = nullptr;
-    t_metadata_dict meta;
-};
-
-/** Describes I/O and clock ports
- *
- *  This forms part of the t_pb_type hierarchical description of a clustered logic block.
- *  It corresponds to <port/> tags in the FPGA architecture description
- *
- *  Data members:
- *      name: name of the port
- *      model_port: associated model port
- *      is_clock: whether or not this port is a clock
- *      is_non_clock_global: Applies to top level pb_type, this pin is not a clock but
- *                           is a global signal (useful for stuff like global reset signals,
- *                           perhaps useful for VCC and GND)
- *      num_pins: the number of pins this port has
- *      parent_pb_type: pointer to the parent pb_type
- *      port_class: port belongs to recognized set of ports in class library
- *      index: port index by index in array of parent pb_type
- *      port_index_by_type index of port by type (index by input, output, or clock)
- *      equivalence: Applies to logic block I/Os and to primitive inputs only
- */
-struct t_port {
-    char* name;
-    t_model_ports* model_port;
-    enum PORTS type;
-    bool is_clock;
-    bool is_non_clock_global;
-    int num_pins;
-    PortEquivalence equivalent;
-    t_pb_type* parent_pb_type;
-    char* port_class;
-
-    int index;
-    int port_index_by_type;
-    int absolute_first_pin_index;
-
-    t_port_power* port_power;
-};
-
-struct t_pb_type_power {
-    /* Type of power estimation for this pb */
-    e_power_estimation_method estimation_method;
-
-    t_power_usage absolute_power_per_instance; /* User-provided absolute power per block */
-
-    float C_internal;         /*Internal capacitance of the pb */
-    int leakage_default_mode; /* Default mode for leakage analysis, if block has no set mode */
-
-    t_power_usage power_usage;            /* Total power usage of this pb type */
-    t_power_usage power_usage_bufs_wires; /* Power dissipated in local buffers and wire switching (Subset of total power) */
-};
-
-struct t_interconnect_power {
-    t_power_usage power_usage;
-
-    /* These are not necessarily power-related; however, at the moment
-     * only power estimation uses them
-     */
-    bool port_info_initialized;
-    int num_input_ports;
-    int num_output_ports;
-    int num_pins_per_port;
-    float transistor_cnt;
-};
-
-struct t_interconnect_pins {
-    t_interconnect* interconnect;
-
-    t_pb_graph_pin*** input_pins;  // [0..num_input_ports-1][0..num_pins_per_port-1]
-    t_pb_graph_pin*** output_pins; // [0..num_output_ports-1][0..num_pins_per_port-1]
-};
-
-struct t_mode_power {
-    t_power_usage power_usage; /* Power usage of this mode */
-};
-
-/** Info placed between pins in the architecture file (e.g. delay annotations),
- *
- * This is later for additional information.
- *
- * Data Members:
- *      value: value/property pair
- *      prop: value/property pair
- *      type: type of annotation
- *      format: formatting of data
- *      input_pins: input pins as string affected by annotation
- *      output_pins: output pins as string affected by annotation
- *      clock_pin: clock as string affected by annotation
- */
-struct t_pin_to_pin_annotation {
-    char** value; /* [0..num_value_prop_pairs - 1] */
-    int* prop;    /* [0..num_value_prop_pairs - 1] */
-    int num_value_prop_pairs;
-
-    enum e_pin_to_pin_annotation_type type;
-    enum e_pin_to_pin_annotation_format format;
-
-    char* input_pins;
-    char* output_pins;
-    char* clock;
-
-    int line_num; /* used to report what line number this annotation is found in architecture file */
-};
-
-/*************************************************************************************************
- * PB Graph                                                                                      *
- *************************************************************************************************
- *
- * The PB graph represents the flattened and elaborated connectivity within a t_pb_type (i.e.
- * the routing resource graph), derived from the t_pb_type hierarchy.
- *
- * The PB graph is built of t_pb_graph_node and t_pb_graph_pin objects.
- *
- * There is a single PB graph associated with each root t_pb_type, and it is referenced in other objects (e.g.
- * t_pb) as a flyweight.
- *
- */
-
-/** Describes the internal connectivity corresponding to a t_pb_type and t_mode of a cluster.
- *
- *  There is a t_pb_graph_node for each instance of the pb_type (i.e. t_pb_type may describe
- *  num_pb instances of the type, with each instance represented as a t_pb_graph_node).
- *  The distinction between the pb_type and the pb_graph_node is necessary since the 'position'
- *  of a particular instance in the cluster is important when routing the cluster (since the routing
- *  accessible from each position may be different).
- *
- *  Data members:
- *      pb_type               : Pointer to the type of pb graph node this belongs to
- *      placement_index       : there are a certain number of pbs available, this gives the index of the node
- *      child_pb_graph_nodes  : array of children pb graph nodes organized into modes
- *      parent_pb_graph_node  : parent pb graph node
- *      total_primitive_count : Total number of this primitive type in the cluster. If there are 10 ALMs per cluster
- *                              and 2 FFs per ALM (given the mode of the parent of this primitive) then the total is 20.
- *      illegal_modes         : vector containing illegal modes that result in conflicts during routing
- */
-class t_pb_graph_node {
-  public:
-    t_pb_type* pb_type;
-
-    int placement_index;
-
-    /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
-     *
-     * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to
-     * the parent_pb.
-     * Example: Edges that connect LUTs A, B and C to the parent pb_graph_node refer to the correct parent's mode which is set to "LUTs",
-     *          but edges of LUT D have the mode of edge corresponding to a wrong parent's pb_graph_node mode, namely "LUTRAM".
-     *          This situation is unfeasible as the edge modes are inconsistent between siblings of the same parent pb_graph_node.
-     *          In this case, the "LUTs" mode of the parent pb_graph_node cannot be used as the LUT D is not able to have a feasible
-     *          edge mode that does relate with the other sibling's edge modes.
-     *
-     *          The "LUTs" index mode is added to the illegal_modes vector. The conflicting mode marked as illegal is the most restrictive one.
-     *          This means that LUT D is unable to be routed if using the parent's "LUTs" mode (otherwise "LUTs" mode would be selected for LUT D
-     *          as well), but LUTs A, B and C could still be routed using the parent pb_graph_node's mode "LUTRAM".
-     *          Therefore, "LUTs" is marked as illegal and all the LUTs (A, B, C and D) will have a consistent parent pb_graph_node mode, namely "LUTRAM".
-     *
-     * Usage: cluster_router uses this information to exclude the expansion of a node which has a not cosistent mode.
-     *        Everytime the mode consistency check fails, the index of the mode that causes the conflict is added to this vector.
-     * */
-    std::vector<int> illegal_modes;
-
-    t_pb_graph_pin** input_pins;  /* [0..num_input_ports-1] [0..num_port_pins-1]*/
-    t_pb_graph_pin** output_pins; /* [0..num_output_ports-1] [0..num_port_pins-1]*/
-    t_pb_graph_pin** clock_pins;  /* [0..num_clock_ports-1] [0..num_port_pins-1]*/
-
-    int num_input_ports;
-    int num_output_ports;
-    int num_clock_ports;
-
-    int* num_input_pins;  /* [0..num_input_ports - 1] */
-    int* num_output_pins; /* [0..num_output_ports - 1] */
-    int* num_clock_pins;  /* [0..num_clock_ports - 1] */
-
-    t_pb_graph_node*** child_pb_graph_nodes; /* [0..num_modes-1][0..num_pb_type_in_mode-1][0..num_pb-1] */
-    t_pb_graph_node* parent_pb_graph_node;
-
-    int total_pb_pins; /* only valid for top-level */
-
-    void* temp_scratch_pad;                                     /* temporary data, useful for keeping track of things when traversing data structure */
-    t_cluster_placement_primitive* cluster_placement_primitive; /* pointer to indexing structure useful during packing stage */
-
-    int* input_pin_class_size;  /* Stores the number of pins that belong to a particular input pin class */
-    int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
-    int* output_pin_class_size; /* Stores the number of pins that belong to a particular output pin class */
-    int num_output_pin_class;   /* number of output pin classes that this pb_graph_node has */
-
-    int total_primitive_count; /* total number of this primitive type in the cluster */
-
-    /* Interconnect instances for this pb
-     * Only used for power
-     */
-    t_pb_graph_node_power* pb_node_power;
-    t_interconnect_pins** interconnect_pins; /* [0..num_modes-1][0..num_interconnect_in_mode] */
-
-    // Returns true if this pb_graph_node represents a primitive type (primitives have 0 modes)
-    bool is_primitive() const { return this->pb_type->num_modes == 0; }
-
-    // Returns true if this pb_graph_node represents a root graph node (ex. clb)
-    bool is_root() const { return this->parent_pb_graph_node == nullptr; }
-
-    //Returns the number of pins on this graph node
-    //  Note this is the total for all ports on this node excluding any children (i.e. sum of all num_input_pins, num_output_pins, num_clock_pins)
-    int num_pins() const;
-    // Returns a string containing the hierarchical type name of the pb_graph_node
-    // Ex: clb[0][default]/lab[0][default]/fle[3][n1_lut6]/ble6[0][default]/lut6[0]
-    std::string hierarchical_type_name() const;
-};
-
-/* Identify pb pin type for timing purposes */
-enum e_pb_graph_pin_type {
-    PB_PIN_NORMAL = 0,
-    PB_PIN_SEQUENTIAL,
-    PB_PIN_INPAD,
-    PB_PIN_OUTPAD,
-    PB_PIN_TERMINAL,
-    PB_PIN_CLOCK
-};
-
-/** Describes a pb graph pin
- *
- *  Data Members:
- *      port: pointer to the port that this pin is associated with
- *      pin_number: pin number of the port that this pin is associated with
- *      input edges: [0..num_input_edges - 1]edges incoming
- *      num_input_edges: number edges incoming
- *      output edges: [0..num_output_edges - 1]edges out_going
- *      num_output_edges: number edges out_going
- *      parent_node: parent pb_graph_node
- *      pin_count_in_cluster: Unique number for pin inside cluster
- */
-class t_pb_graph_pin {
-  public:
-    t_port* port = nullptr;
-    int pin_number = 0;
-    std::vector<t_pb_graph_edge*> input_edges; /* [0..num_input_edges] */
-    int num_input_edges = 0;
-    std::vector<t_pb_graph_edge*> output_edges; /* [0..num_output_edges] */
-    int num_output_edges = 0;
-
-    t_pb_graph_node* parent_node = nullptr;
-    int pin_count_in_cluster = 0;
-
-    int scratch_pad = 0; /* temporary data structure useful to store traversal info */
-
-    enum e_pb_graph_pin_type type = PB_PIN_NORMAL; /* The type of this pin (sequential, i/o etc.) */
-
-    /* sequential timing information */
-    float tsu = std::numeric_limits<float>::quiet_NaN();     /* For sequential logic elements the setup time */
-    float thld = std::numeric_limits<float>::quiet_NaN();    /* For sequential logic elements the hold time */
-    float tco_min = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the minimum clock to output time */
-    float tco_max = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the maximum clock to output time */
-    t_pb_graph_pin* associated_clock_pin = nullptr;          /* For sequentail elements, the associated clock */
-
-    /* combinational timing information */
-    int num_pin_timing = 0;                   /* Number of ipin to opin timing edges*/
-    std::vector<t_pb_graph_pin*> pin_timing;  /* timing edge sink pins  [0..num_pin_timing-1]*/
-    std::vector<float> pin_timing_del_max;    /* primitive ipin to opin max-delay [0..num_pin_timing-1]*/
-    std::vector<float> pin_timing_del_min;    /* primitive ipin to opin min-delay [0..num_pin_timing-1]*/
-    int num_pin_timing_del_max_annotated = 0; //The list of valid pin_timing_del_max entries runs from [0..num_pin_timing_del_max_annotated-1]
-    int num_pin_timing_del_min_annotated = 0; //The list of valid pin_timing_del_max entries runs from [0..num_pin_timing_del_min_annotated-1]
-
-    /* Applies to clusters only */
-    int pin_class = 0;
-
-    /* Applies to pins of primitive only */
-    int* parent_pin_class = nullptr; /* [0..depth-1] the grouping of pins that this particular pin belongs to */
-    /* Applies to output pins of primitives only */
-    t_pb_graph_pin*** list_of_connectable_input_pin_ptrs = nullptr; /* [0..depth-1][0..num_connectable_primitive_input_pins-1] what input pins this output can connect to without exiting cluster at given depth */
-    int* num_connectable_primitive_input_pins = nullptr;            /* [0..depth-1] number of input pins that this output pin can reach without exiting cluster at given depth */
-
-    bool is_forced_connection = false; /* This output pin connects to one and only one input pin */
-
-    t_pb_graph_pin_power* pin_power = nullptr;
-
-    // class member functions
-  public:
-    // Returns true if this pin belongs to a primitive block like
-    // a LUT or FF, instead of a cluster-level block like a CLB.
-    bool is_primitive_pin() const {
-        return this->parent_node->is_primitive();
-    }
-    // Returns true if this pin belongs to a root pb_block which is a pb_block
-    // that has no parent block. For example, pins of a CLB, IO, DSP, etc.
-    bool is_root_block_pin() const {
-        return this->parent_node->is_root();
-    }
-    // This function returns a string that contains the name of the pin
-    // and the entire sequence of pb_types in the hierarchy from the block
-    // of this pin back to the cluster-level (top-level) pb_type in the
-    // following format: clb[0]/lab[0]/fle[3]/ble6[0]/lut6[0].in[0]
-    // if full_description is set to false it will only return lut6[0].in[0]
-    std::string to_string(const bool full_description = true) const;
-};
-
-/** Describes a pb graph edge
- *
- *  Note that this is a "fat" edge which supports bused based connections
- *
- *  Data members:
- *      input_pins: array of pb_type graph input pins ptrs entering this edge
- *      num_input_pins: Number of input pins entering this edge
- *      output_pins: array of pb_type graph output pins ptrs exiting this edge
- *      num_output_pins: Number of output pins exiting this edge
- *
- *      num_pack_patterns: number of pack patterns this edge belongs to
- *      pack_pattern_names: [0..num_pack_patterns-1] name of each pack pattern
- *      pack_pattern_indices: [0..num_pack_patterns-1] id of each pack pattern
- *      infer_pattern: if true, pattern of this edge could be inferred by checking
- *                     input/output edges. This is true when the edge is a single
- *                     fanout edge and is driven or driving another edge which is
- *                     annotated with a pack pattern.
- */
-class t_pb_graph_edge {
-  public:
-    /* edge connectivity */
-    t_pb_graph_pin** input_pins;
-    int num_input_pins;
-    t_pb_graph_pin** output_pins;
-    int num_output_pins;
-
-    /* timing information */
-    float delay_max;
-    float delay_min;
-    float capacitance;
-
-    /* who drives this edge */
-    t_interconnect* interconnect;
-    int driver_set;
-    int driver_pin;
-
-    /* pack pattern info */
-    int num_pack_patterns;
-    std::vector<const char*> pack_pattern_names;
-    int* pack_pattern_indices;
-    bool infer_pattern;
-
-    // class member functions
-  public:
-    // Returns true is this edge is annotated with the given pattern_index
-    //  pattern_index : index of the packing pattern
-    bool annotated_with_pattern(int pattern_index) const;
-
-    // Returns true is this edge is annotated with pattern_index or its pattern
-    // is inferred and a connected output edge is annotated with pattern_index
-    //   pattern_index : index of the packing pattern
-    bool belongs_to_pattern(int pattern_index) const;
-};
-
-struct t_pb_graph_node_power {
-    float transistor_cnt_pb_children; /* Total transistor size of this pb */
-    float transistor_cnt_interc;      /* Total transistor size of the interconnect in this pb */
-    float transistor_cnt_buffers;
-};
-
-struct t_pb_graph_pin_power {
-    /* Transistor-level Power Properties */
-    float C_wire;
-    float buffer_size;
-
-    /* Pin-Toggle Power Properties */
-    t_pb_graph_pin* scaled_by_pin;
-};
-
-/*************************************************************************************************/
-/* FPGA Routing architecture                                                                     */
-/*************************************************************************************************/
-
-/* Description of routing channel distribution across the FPGA, only available for global routing
- * Width is standard dev. for Gaussian.  xpeak is where peak     *
- * occurs. dc is the dc offset for Gaussian and pulse waveforms. */
-enum e_stat {
-    UNIFORM,
-    GAUSSIAN,
-    PULSE,
-    DELTA
-};
-struct t_chan {
-    enum e_stat type;
-    float peak;
-    float width;
-    float xpeak;
-    float dc;
-};
-
-/* chan_x_dist: Describes the x-directed channel width distribution.         *
- * chan_y_dist: Describes the y-directed channel width distribution.         */
-struct t_chan_width_dist {
-    t_chan chan_x_dist;
-    t_chan chan_y_dist;
-};
-
-enum e_directionality {
-    UNI_DIRECTIONAL,
-    BI_DIRECTIONAL
-};
-/* X_AXIS: Data that describes an x-directed wire segment (CHANX)                     *
- * Y_AXIS: Data that describes an y-directed wire segment (CHANY)                     *     
- * BOTH_AXIS: Data that can be applied to both x-directed and y-directed wire segment */
-enum e_parallel_axis {
-    X_AXIS,
-    Y_AXIS,
-    BOTH_AXIS
-};
-enum e_switch_block_type {
-    SUBSET,
-    WILTON,
-    UNIVERSAL,
-    FULL,
-    CUSTOM
-};
-typedef enum e_switch_block_type t_switch_block_type;
-enum e_Fc_type {
-    ABSOLUTE,
-    FRACTIONAL
-};
-
-/* Lists all the important information about a certain segment type.  Only   *
- * used if the route_type is DETAILED.  [0 .. det_routing_arch.num_segment]  *
- * name: the name of this segment                                            *
- * frequency:  ratio of tracks which are of this segment type.               *
- * length:     Length (in clbs) of the segment.                              *
- * arch_wire_switch: Index of the switch type that connects other wires      *
- *                   *to* this segment. Note that this index is in relation  *
- *                   to the switches from the architecture file, not the     *
- *                   expanded list of switches that is built at the end of   *
- *                   build_rr_graph.                                         *
- * arch_opin_switch: Index of the switch type that connects output pins      *
- *                   (OPINs) *to* this segment. Note that this index is in   *
- *                   relation to the switches from the architecture file,    *
- *                   not the expanded list of switches that is built         *
- *                   at the end of build_rr_graph                            *
- * frac_cb:  The fraction of logic blocks along its length to which this     *
- *           segment can connect.  (i.e. internal population).               *
- * frac_sb:  The fraction of the length + 1 switch blocks along the segment  *
- *           to which the segment can connect.  Segments that aren't long    *
- *           lines must connect to at least two switch boxes.                *
- * parallel_axis:   Defines what axis the segment is parallel to. See        *
- *                  e_parallel_axis comments for more details on the values. *
- * Cmetal: Capacitance of a routing track, per unit logic block length.      *
- * Rmetal: Resistance of a routing track, per unit logic block length.       *
- * (UDSD by AY) drivers: How do signals driving a routing track connect to   *
- *                       the track?  
- * seg_index: The index of the segment as stored in the appropriate Segs list*
- *            Upon loading the architecture, we use this field to keep track *
- *            the segment's index in the unified segment_inf vector. This is *
- *            usefull when building the rr_graph for different Y & X channels*
- *            interms of track distribution and segment type.                *
- * meta: Table storing extra arbitrary metadata attributes.                  */
-struct t_segment_inf {
-    std::string name;
-    int frequency;
-    int length;
-    short arch_wire_switch;
-    short arch_opin_switch;
-    float frac_cb;
-    float frac_sb;
-    bool longline;
-    float Rmetal;
-    float Cmetal;
-    enum e_directionality directionality;
-    enum e_parallel_axis parallel_axis;
-    std::vector<bool> cb;
-    std::vector<bool> sb;
-    int seg_index;
-    //float Cmetal_per_m; /* Wire capacitance (per meter) */
-};
-
-inline bool operator==(const t_segment_inf& a, const t_segment_inf& b) {
-    return a.name == b.name && a.frequency == b.frequency && a.length == b.length && a.arch_wire_switch == b.arch_wire_switch && a.arch_opin_switch == b.arch_opin_switch && a.frac_cb == b.frac_cb && a.frac_sb == b.frac_sb && a.longline == b.longline && a.Rmetal == b.Rmetal && a.Cmetal == b.Cmetal && a.directionality == b.directionality && a.parallel_axis == b.parallel_axis && a.cb == b.cb && a.sb == b.sb;
-}
-
-/*provide hashing for t_segment_inf to enable the use of many std containers.
- * Only the most important/varying fields are used (not worth the extra overhead to include all fields)*/
-
-struct t_hash_segment_inf {
-    size_t operator()(const t_segment_inf& seg_inf) const noexcept {
-        size_t result;
-        result = ((((std::hash<std::string>()(seg_inf.name)
-                     ^ std::hash<int>()(seg_inf.frequency) << 10)
-                    ^ std::hash<int>()(seg_inf.length) << 20)
-                   ^ std::hash<int>()((int)seg_inf.arch_opin_switch) << 30));
-        return result;
-    }
-};
-enum class SwitchType {
-    MUX = 0,   //A configurable (buffered) mux (single-driver)
-    TRISTATE,  //A configurable tristate-able buffer (multi-driver)
-    PASS_GATE, //A configurable pass transitor switch (multi-driver)
-    SHORT,     //A non-configurable electrically shorted connection (multi-driver)
-    BUFFER,    //A non-configurable non-tristate-able buffer (uni-driver)
-    INVALID,   //Unspecified, usually an error
-    NUM_SWITCH_TYPES
-};
-constexpr std::array<const char*, size_t(SwitchType::NUM_SWITCH_TYPES)> SWITCH_TYPE_STRINGS = {{"MUX", "TRISTATE", "PASS_GATE", "SHORT", "BUFFER", "INVALID"}};
-
-/* Constant/Reserved names for switches in architecture XML
- * Delayless switch:
- *   The zero-delay switch created by VPR internally 
- *   This is a special switch just to ease CAD algorithms
- *   It is mainly used in
- *     - the edges between SOURCE and SINK nodes in routing resource graphs  
- *     - the edges in CLB-to-CLB connections (defined by <directlist> in arch XML)
- *   
- */
-constexpr const char* VPR_DELAYLESS_SWITCH_NAME = "__vpr_delayless_switch__";
-
-enum class BufferSize {
-    AUTO,
-    ABSOLUTE
-};
-
-/* Lists all the important information about a switch type read from the     *
- * architecture file.                                                        *
- * [0 .. Arch.num_switch]                                                    *
- * buffered:  Does this switch include a buffer?                             *
- * R:  Equivalent resistance of the buffer/switch.                           *
- * Cin:  Input capacitance.                                                  *
- * Cout:  Output capacitance.                                                *
- * Cinternal: Since multiplexers and tristate buffers are modeled as a       *
- *            parallel stream of pass transistors feeding into a buffer,     *
- *            we would expect an additional "internal capacitance"           *
- *            to arise when the pass transistor is enabled and the signal    *
- *            must propogate to the buffer. See diagram of one stream below: *
- *                                                                           *
- *                  Pass Transistor                                          *
- *                       |                                                   *
- *                     -----                                                 *
- *                     -----      Buffer                                     *
- *                    |     |       |\                                       *
- *              ------       -------| \--------                              *
- *                |             |   | /    |                                 *
- *              =====         ===== |/   =====                               *
- *              =====         =====      =====                               *
- *                |             |          |                                 *
- *             Input C    Internal C    Output C                             *
- *                                                                           *
- * Tdel_map: A map where the key is the number of inputs and the entry       *
- *           is the corresponding delay. If there is only one entry at key   *
- *           UNDEFINED, then delay is a constant (doesn't vary with fan-in). *
- *	         A map saves us the trouble of sorting, and has lower access     *
- *           time for interpolation/extrapolation purposes                   *
- * mux_trans_size:  The area of each transistor in the segment's driving mux *
- *                  measured in minimum width transistor units               *
- * buf_size:  The area of the buffer. If set to zero, area should be         *
- *            calculated from R                                              */
-struct t_arch_switch_inf {
-  public:
-    static constexpr int UNDEFINED_FANIN = -1;
-
-    char* name = nullptr;
-    float R = 0.;
-    float Cin = 0.;
-    float Cout = 0.;
-    float Cinternal = 0.;
-    float mux_trans_size = 1.;
-    BufferSize buf_size_type = BufferSize::AUTO;
-    float buf_size = 0.;
-    e_power_buffer_type power_buffer_type = POWER_BUFFER_TYPE_AUTO;
-    float power_buffer_size = 0.;
-
-  public:
-    //Returns the type of switch
-    SwitchType type() const;
-
-    //Returns true if this switch type isolates its input and output into
-    //separate DC-connected subcircuits
-    bool buffered() const;
-
-    //Returns true if this switch type is configurable
-    bool configurable() const;
-
-    //Returns whether the switch's directionality (e.g. BI_DIR, UNI_DIR)
-    e_directionality directionality() const;
-
-    //Returns the intrinsic delay of this switch
-    float Tdel(int fanin = UNDEFINED_FANIN) const;
-
-    //Returns true if the Tdel value is independent of fanout
-    bool fixed_Tdel() const;
-
-  public:
-    void set_Tdel(int fanin, float delay);
-    void set_type(SwitchType type_val);
-
-  private:
-    SwitchType type_ = SwitchType::INVALID;
-    std::map<int, double> Tdel_map_;
-
-    friend void PrintArchInfo(FILE*, const t_arch*);
-};
-
-/* Lists all the important information about an rr switch type.              *
- * The s_rr_switch_inf describes a switch derived from a switch described    *
- * by s_arch_switch_inf. This indirection allows us to vary properties of a  *
- * given switch, such as varying delay with switch fan-in.                   *
- * buffered:  Does this switch isolate it's input/output into separate       *
- *            DC-connected sub-circuits?                                     *
- * configurable: Is this switch is configurable (i.e. can the switch can be  *
- *               turned on or off)?. This allows modelling of non-optional   *
- *               switches (e.g. fixed buffers, or shorted connections) which *
- *               must be used (e.g. expanded by the router) if a connected   *
- *               segment is used.                                            *
- * R:  Equivalent resistance of the buffer/switch.                           *
- * Cin:  Input capacitance.                                                  *
- * Cout:  Output capacitance.                                                *
- * Cinternal: Internal capacitance, see the definition above.                *
- * Tdel:  Intrinsic delay.  The delay through an unloaded switch is          *
- *        Tdel + R * Cout.                                                   *
- * mux_trans_size:  The area of each transistor in the segment's driving mux *
- *                  measured in minimum width transistor units               *
- * buf_size:  The area of the buffer. If set to zero, area should be         *
- *            calculated from R                                              */
-struct t_rr_switch_inf {
-    float R = 0.;
-    float Cin = 0.;
-    float Cout = 0.;
-    float Cinternal = 0.;
-    float Tdel = 0.;
-    float mux_trans_size = 0.;
-    float buf_size = 0.;
-    const char* name = nullptr;
-    e_power_buffer_type power_buffer_type = POWER_BUFFER_TYPE_UNDEFINED;
-    float power_buffer_size = 0.;
-
-  public:
-    //Returns the type of switch
-    SwitchType type() const;
-
-    //Returns true if this switch type isolates its input and output into
-    //seperate DC-connected subcircuits
-    bool buffered() const;
-
-    //Returns true if this switch type is configurable
-    bool configurable() const;
-
-  public:
-    void set_type(SwitchType type_val);
-
-  private:
-    SwitchType type_ = SwitchType::INVALID;
-};
-
-/* Lists all the important information about a direct chain connection.     *
- * [0 .. det_routing_arch.num_direct]                                       *
- * name:  Name of this direct chain connection                              *
- * from_pin:  The type of the pin that drives this chain connection         *
- * In the format of <block_name>.<pin_name>                      *
- * to_pin:  The type of pin that is driven by this chain connection         *
- * In the format of <block_name>.<pin_name>                        *
- * x_offset:  The x offset from the source to the sink of this connection   *
- * y_offset:  The y offset from the source to the sink of this connection   *
- * z_offset:  The z offset from the source to the sink of this connection   *
- * switch_type: The index into the switch list for the switch used by this  *
- *              direct                                                      *
- * line: The line number in the .arch file that specifies this              *
- *       particular placement macro.                                        *
- */
-struct t_direct_inf {
-    char* name;
-    char* from_pin;
-    char* to_pin;
-    int x_offset;
-    int y_offset;
-    int sub_tile_offset;
-    int switch_type;
-    e_side from_side;
-    e_side to_side;
-    int line;
-};
-
-enum class SwitchPointOrder {
-    FIXED,   //Switchpoints are ordered as specified in architecture
-    SHUFFLED //Switchpoints are shuffled (more diversity)
-};
-
-//A collection of switchpoints associated with a segment
-struct t_wire_switchpoints {
-    std::string segment_name;      //The type of segment
-    std::vector<int> switchpoints; //The indices of wire points along the segment
-};
-
-/* Used to list information about a set of track segments that should connect through a switchblock */
-struct t_wireconn_inf {
-    std::vector<t_wire_switchpoints> from_switchpoint_set;             //The set of segment/wirepoints representing the 'from' set (union of all t_wire_switchpoints in vector)
-    std::vector<t_wire_switchpoints> to_switchpoint_set;               //The set of segment/wirepoints representing the 'to' set (union of all t_wire_switchpoints in vector)
-    SwitchPointOrder from_switchpoint_order = SwitchPointOrder::FIXED; //The desired from_switchpoint_set ordering
-    SwitchPointOrder to_switchpoint_order = SwitchPointOrder::FIXED;   //The desired to_switchpoint_set ordering
-
-    std::string num_conns_formula; /* Specifies how many connections should be made for this wireconn.
-                                    *
-                                    * '<int>': A specific number of connections
-                                    * 'from':  The number of generated connections between the 'from' and 'to' sets equals the
-                                    *          size of the 'from' set. This ensures every element in the from set is connected
-                                    *          to an element of the 'to' set.
-                                    *          Note: this it may result in 'to' elements being driven by multiple 'from'
-                                    *          elements (if 'from' is larger than 'to'), or in some elements of 'to' having
-                                    *          no driving connections (if 'to' is larger than 'from').
-                                    * 'to':    The number of generated connections is set equal to the size of the 'to' set.
-                                    *          This ensures that each element of the 'to' set has precisely one incomming connection.
-                                    *          Note: this may result in 'from' elements driving multiple 'to' elements (if 'to' is
-                                    *          larger than 'from'), or some 'from' elements driving to 'to' elements (if 'from' is
-                                    *          larger than 'to')
-                                    */
-};
-
-/* represents a connection between two sides of a switchblock */
-class SB_Side_Connection {
-  public:
-    /* specify the two SB sides that form a connection */
-    enum e_side from_side = TOP;
-    enum e_side to_side = TOP;
-
-    void set_sides(enum e_side from, enum e_side to) {
-        from_side = from;
-        to_side = to;
-    }
-
-    SB_Side_Connection() = default;
-
-    SB_Side_Connection(enum e_side from, enum e_side to)
-        : from_side(from)
-        , to_side(to) {
-    }
-
-    /* overload < operator which will be used by std::map */
-    bool operator<(const SB_Side_Connection& obj) const {
-        bool result;
-
-        if (from_side < obj.from_side) {
-            result = true;
-        } else {
-            if (from_side == obj.from_side) {
-                result = (to_side < obj.to_side) ? true : false;
-            } else {
-                result = false;
-            }
-        }
-
-        return result;
-    }
-};
-
-/* Use a map to index into the string permutation functions used to connect from one side to another */
-typedef std::map<SB_Side_Connection, std::vector<std::string>> t_permutation_map;
-
-/* Lists all information about a particular switch block specified in the architecture file */
-struct t_switchblock_inf {
-    std::string name;                /* the name of this switchblock */
-    e_sb_location location;          /* where on the FPGA this switchblock should be built (i.e. perimeter, core, everywhere) */
-    e_directionality directionality; /* the directionality of this switchblock (unidir/bidir) */
-
-    t_permutation_map permutation_map; /* map holding the permutation functions attributed to this switchblock */
-
-    std::vector<t_wireconn_inf> wireconns; /* list of wire types/groups this SB will connect */
-};
-
-/* Clock related data types used for building a dedicated clock network */
-struct t_clock_arch_spec {
-    std::vector<t_clock_network_arch> clock_networks_arch;
-    std::unordered_map<std::string, t_metal_layer> clock_metal_layers;
-    std::vector<t_clock_connection_arch> clock_connections_arch;
-};
-
-struct t_lut_cell {
-    std::string name;
-    std::string init_param;
-    std::vector<std::string> inputs;
-};
-
-struct t_lut_bel {
-    std::string name;
-
-    std::vector<std::string> input_pins;
-    std::string output_pin;
-
-    bool operator==(const t_lut_bel& other) const {
-        return name == other.name && input_pins == other.input_pins && output_pin == other.output_pin;
-    }
-};
-
-struct t_lut_element {
-    std::string site_type;
-    int width;
-    std::vector<t_lut_bel> lut_bels;
-
-    bool operator==(const t_lut_element& other) const {
-        return site_type == other.site_type && width == other.width && lut_bels == other.lut_bels;
-    }
-};
-
-/**
- * Represents a Network-on-chip(NoC) Router data type. It is used
- * to store individual router information when parsing the arch file.
- * */
-struct t_router {
-    /** A unique id provided by the user to identify a router. Must be a positive value*/
-    int id = -1;
-
-    /** A value representing the approximate horizontal position on the FPGA device where the router
-     * tile is located*/
-    double device_x_position = -1;
-    /** A value representing the approximate vertical position on the FPGA device where the router
-     * tile is located*/
-    double device_y_position = -1;
-
-    /** A list of router ids that are connected to the current router*/
-    std::vector<int> connection_list;
-};
-
-/**
- * Network-on-chip(NoC) data type used to store the network properties
- * when parsing the arh file. This is used when building the dedicated on-chip
- * network during the device creation.
- * */
-struct t_noc_inf {
-    double link_bandwidth; /*!< The maximum bandwidth supported in the NoC. This value is the same for all links. units in bps*/
-    double link_latency;   /*!< The worst case latency seen when traversing a link. This value is the same for all links. units in seconds*/
-    double router_latency; /*!< The worst case latency seen when traversing a router. This value is the same for all routers, units in seconds*/
-
-    /** A list of all routers in the NoC*/
-    std::vector<t_router> router_list;
-
-    /** Represents the name of a router tile on the FPGA device. This should match the name used in the arch file when
-     * describing a NoC router tile within the FPGA device*/
-    std::string noc_router_tile_name;
-};
-
-/*   Detailed routing architecture */
-struct t_arch {
-    mutable vtr::string_internment strings;
-    std::vector<vtr::interned_string> interned_strings;
-
-    char* architecture_id; //Secure hash digest of the architecture file to uniquely identify this architecture
-
-    t_chan_width_dist Chans;
-    enum e_switch_block_type SBType;
-    std::vector<t_switchblock_inf> switchblocks;
-    float R_minW_nmos;
-    float R_minW_pmos;
-    int Fs;
-    float grid_logic_tile_area;
-    std::vector<t_segment_inf> Segments;
-    t_arch_switch_inf* Switches = nullptr;
-    int num_switches;
-    t_direct_inf* Directs = nullptr;
-    int num_directs = 0;
-
-    t_model* models = nullptr;
-    t_model* model_library = nullptr;
-
-    t_power_arch* power = nullptr;
-    t_clock_arch* clocks = nullptr;
-
-    // Constants
-    // VCC and GND cells are special virtual cells that are
-    // used to handle the constant network of the device.
-    //
-    // Similarly, the constant nets are defined to identify
-    // the generic name for the constant network.
-    //
-    // Given that usually, the constants have a dedicated network in
-    // real FPGAs, this information becomes relevant to identify which
-    // nets from the circuit netlist are belonging to the constant network,
-    // and assigned to it accordingly.
-    //
-    // NOTE: At the moment, the constant cells and nets are primarly used
-    // for the interchange netlist format, to determine which are the constants
-    // net names and which virtual cell is responsible to generate them.
-    // The information is present in the device database.
-    std::pair<std::string, std::string> gnd_cell;
-    std::pair<std::string, std::string> vcc_cell;
-
-    std::string gnd_net = "$__gnd_net";
-    std::string vcc_net = "$__vcc_net";
-
-    // Luts
-    std::vector<t_lut_cell> lut_cells;
-    std::unordered_map<std::string, std::vector<t_lut_element>> lut_elements;
-
-    //The name of the switch used for the input connection block (i.e. to
-    //connect routing tracks to block pins).
-    //This should correspond to a switch in Switches
-    std::string ipin_cblock_switch_name;
-
-    std::vector<t_grid_def> grid_layouts; //Set of potential device layouts
-
-    t_clock_arch_spec clock_arch; // Clock related data types
-
-    // if we have an embedded NoC in the architecture, then we store it here
-    t_noc_inf* noc = nullptr;
-};
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/physical_types_util.cc b/third_party/vtr/libs/archfpga/src/physical_types_util.cc
deleted file mode 100644
index 6971d883a..000000000
--- a/third_party/vtr/libs/archfpga/src/physical_types_util.cc
+++ /dev/null
@@ -1,492 +0,0 @@
-#include "vtr_assert.h"
-#include "vtr_memory.h"
-#include "vtr_util.h"
-
-#include "arch_types.h"
-#include "arch_util.h"
-#include "arch_error.h"
-
-#include "physical_types_util.h"
-
-/**
- * @brief Data structure that holds information about a phyisical pin
- *
- * This structure holds the following information on a pin:
- *   - sub_tile_index: index of the sub tile within the physical tile type containing this pin
- *   - capacity_instance: sub tile instance containing this physical pin.
- *                        Each sub tile has a capacity field, which determines how many of its
- *                        instances are present in the belonging physical tile.
- *                        E.g.:
- *                          - The sub tile BUFG has a capacity of 4 within its belonging physical tile CLOCK_TILE.
- *                          - The capacity instance of a pin in the CLOCK_TILE identifies which of the 4 instances
- *                            the pin belongs to.
- *   - port_index: Each sub tile has a set of ports with a variable number of pins. The port_index field identifies
- *                 which port the physical pin belongs to.
- *   - pin_index_in_port: Given that ports can have multiple pins, we need also a field to identify which one of the
- *                        multiple pins of the port corresponds to the physical pin.
- *
- */
-struct t_pin_inst_port {
-    int sub_tile_index;    // Sub Tile index
-    int capacity_instance; // within capacity
-    int port_index;        // Port index
-    int pin_index_in_port; // Pin's index within the port
-};
-
-/******************** Subroutine declarations and definition ****************************/
-
-static std::tuple<int, int, int> get_pin_index_for_inst(t_physical_tile_type_ptr type, int pin_index);
-static t_pin_inst_port block_type_pin_index_to_pin_inst(t_physical_tile_type_ptr type, int pin_index);
-
-static std::tuple<int, int, int> get_pin_index_for_inst(t_physical_tile_type_ptr type, int pin_index) {
-    VTR_ASSERT(pin_index < type->num_pins);
-
-    int total_pin_counts = 0;
-    int pin_offset = 0;
-    for (auto& sub_tile : type->sub_tiles) {
-        total_pin_counts += sub_tile.num_phy_pins;
-
-        if (pin_index < total_pin_counts) {
-            int pins_per_inst = sub_tile.num_phy_pins / sub_tile.capacity.total();
-            int inst_num = (pin_index - pin_offset) / pins_per_inst;
-            int inst_index = (pin_index - pin_offset) % pins_per_inst;
-
-            return std::make_tuple(inst_index, inst_num, sub_tile.index);
-        }
-
-        pin_offset += sub_tile.num_phy_pins;
-    }
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Could not infer the correct pin instance index for %s (pin index: %d)", type->name, pin_index);
-}
-
-static t_pin_inst_port block_type_pin_index_to_pin_inst(t_physical_tile_type_ptr type, int pin_index) {
-    int sub_tile_index, inst_num;
-    std::tie<int, int, int>(pin_index, inst_num, sub_tile_index) = get_pin_index_for_inst(type, pin_index);
-
-    t_pin_inst_port pin_inst_port;
-    pin_inst_port.sub_tile_index = sub_tile_index;
-    pin_inst_port.capacity_instance = inst_num;
-    pin_inst_port.port_index = OPEN;
-    pin_inst_port.pin_index_in_port = OPEN;
-
-    for (auto const& port : type->sub_tiles[sub_tile_index].ports) {
-        if (pin_index >= port.absolute_first_pin_index && pin_index < port.absolute_first_pin_index + port.num_pins) {
-            pin_inst_port.port_index = port.index;
-            pin_inst_port.pin_index_in_port = pin_index - port.absolute_first_pin_index;
-            break;
-        }
-    }
-    return pin_inst_port;
-}
-
-/******************** End Subroutine declarations and definition ************************/
-
-int get_sub_tile_physical_pin(int sub_tile_index,
-                              t_physical_tile_type_ptr physical_tile,
-                              t_logical_block_type_ptr logical_block,
-                              int pin) {
-    t_logical_pin logical_pin(pin);
-
-    const auto& direct_map = physical_tile->tile_block_pin_directs_map.at(logical_block->index).at(sub_tile_index);
-    auto result = direct_map.find(logical_pin);
-
-    if (result == direct_map.end()) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Couldn't find the corresponding physical tile pin of the logical block pin %d."
-                       "Physical Tile Type: %s, Logical Block Type: %s.\n",
-                       pin, physical_tile->name, logical_block->name);
-    }
-
-    return result->second.pin;
-}
-
-int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
-                                              t_logical_block_type_ptr logical_block) {
-    int sub_tile_index = OPEN;
-    for (const auto& sub_tile : physical_tile->sub_tiles) {
-        auto eq_sites = sub_tile.equivalent_sites;
-        auto it = std::find(eq_sites.begin(), eq_sites.end(), logical_block);
-        if (it != eq_sites.end()) {
-            sub_tile_index = sub_tile.index;
-        }
-    }
-
-    if (sub_tile_index == OPEN) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Found no instances of logical block type '%s' within physical tile type '%s'. ",
-                       logical_block->name, physical_tile->name);
-    }
-
-    return sub_tile_index;
-}
-
-int get_physical_pin(t_physical_tile_type_ptr physical_tile,
-                     t_logical_block_type_ptr logical_block,
-                     int pin) {
-    int sub_tile_index = get_logical_block_physical_sub_tile_index(physical_tile, logical_block);
-
-    if (sub_tile_index == OPEN) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Couldn't find the corresponding physical tile type pin of the logical block type pin %d.",
-                       pin);
-    }
-
-    int sub_tile_physical_pin = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, pin);
-    return physical_tile->sub_tiles[sub_tile_index].sub_tile_to_tile_pin_indices[sub_tile_physical_pin];
-}
-
-int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
-                                              t_logical_block_type_ptr logical_block,
-                                              int sub_tile_capacity) {
-    int sub_tile_index = OPEN;
-    for (const auto& sub_tile : physical_tile->sub_tiles) {
-        auto eq_sites = sub_tile.equivalent_sites;
-        auto it = std::find(eq_sites.begin(), eq_sites.end(), logical_block);
-        if (it != eq_sites.end()
-            && (sub_tile.capacity.is_in_range(sub_tile_capacity))) {
-            sub_tile_index = sub_tile.index;
-            break;
-        }
-    }
-
-    if (sub_tile_index == OPEN) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Found no instances of logical block type '%s' within physical tile type '%s'. ",
-                       logical_block->name, physical_tile->name);
-    }
-
-    return sub_tile_index;
-}
-
-/**
- * This function returns the most common physical tile type given a logical block
- */
-t_physical_tile_type_ptr pick_physical_type(t_logical_block_type_ptr logical_block) {
-    return logical_block->equivalent_tiles[0];
-}
-
-t_logical_block_type_ptr pick_logical_type(t_physical_tile_type_ptr physical_tile) {
-    return physical_tile->sub_tiles[0].equivalent_sites[0];
-}
-
-bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block) {
-    const auto& equivalent_tiles = logical_block->equivalent_tiles;
-    return std::find(equivalent_tiles.begin(), equivalent_tiles.end(), physical_tile) != equivalent_tiles.end();
-}
-
-bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc) {
-    bool capacity_compatible = false;
-    for (auto& sub_tile : physical_tile->sub_tiles) {
-        auto result = std::find(sub_tile.equivalent_sites.begin(), sub_tile.equivalent_sites.end(), logical_block);
-
-        if (sub_tile.capacity.is_in_range(sub_tile_loc) && result != sub_tile.equivalent_sites.end()) {
-            capacity_compatible = true;
-            break;
-        }
-    }
-
-    return capacity_compatible && is_tile_compatible(physical_tile, logical_block);
-}
-
-int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
-                                          t_logical_block_type_ptr logical_block,
-                                          int sub_tile_capacity,
-                                          int pin) {
-    int sub_tile_index = get_logical_block_physical_sub_tile_index(physical_tile, logical_block, sub_tile_capacity);
-
-    if (sub_tile_index == OPEN) {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Couldn't find the corresponding physical tile type pin of the logical block type pin %d.",
-                       pin);
-    }
-
-    int sub_tile_physical_pin = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, pin);
-
-    /* Find the relative capacity of the logical_block in this sub tile */
-    int relative_capacity = sub_tile_capacity - physical_tile->sub_tiles[sub_tile_index].capacity.low;
-
-    /* Find the number of pins per block in the equivalent site list
-     * of the sub tile. Otherwise, the current logical block may have smaller/larger number of pins
-     * than other logical blocks that can be placed in the sub-tile. This will lead to an error
-     * when computing the pin index!
-     */
-    int block_num_pins = physical_tile->sub_tiles[sub_tile_index].num_phy_pins / physical_tile->sub_tiles[sub_tile_index].capacity.total();
-
-    return relative_capacity * block_num_pins
-           + physical_tile->sub_tiles[sub_tile_index].sub_tile_to_tile_pin_indices[sub_tile_physical_pin];
-}
-
-int get_max_num_pins(t_logical_block_type_ptr logical_block) {
-    int max_num_pins = 0;
-
-    for (auto physical_tile : logical_block->equivalent_tiles) {
-        max_num_pins = std::max(max_num_pins, physical_tile->num_pins);
-    }
-
-    return max_num_pins;
-}
-
-//Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type) {
-    int iclass = OPEN;
-
-    int ipin = find_pin(type, port_name, pin_index_in_port);
-
-    if (ipin != OPEN) {
-        iclass = type->pin_class[ipin];
-
-        if (iclass != OPEN) {
-            VTR_ASSERT(type->class_inf[iclass].type == pin_type);
-        }
-    }
-    return iclass;
-}
-
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port) {
-    int ipin = OPEN;
-    int port_base_ipin = 0;
-    int num_pins = OPEN;
-    int pin_offset = 0;
-
-    bool port_found = false;
-    for (const auto& sub_tile : type->sub_tiles) {
-        for (const auto& port : sub_tile.ports) {
-            if (0 == strcmp(port.name, port_name.c_str())) {
-                port_found = true;
-                num_pins = port.num_pins;
-                break;
-            }
-
-            port_base_ipin += port.num_pins;
-        }
-
-        if (port_found) {
-            break;
-        }
-
-        port_base_ipin = 0;
-        pin_offset += sub_tile.num_phy_pins;
-    }
-
-    if (num_pins != OPEN) {
-        VTR_ASSERT(pin_index_in_port < num_pins);
-
-        ipin = port_base_ipin + pin_index_in_port + pin_offset;
-    }
-
-    return ipin;
-}
-
-std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin) {
-    int pins_to_remove = 0;
-    for (auto sub_tile : physical_tile->sub_tiles) {
-        auto capacity = sub_tile.capacity;
-        int sub_tile_num_pins = sub_tile.num_phy_pins;
-        int sub_tile_pin = pin - pins_to_remove;
-
-        if (sub_tile_pin < sub_tile_num_pins) {
-            int rel_capacity = sub_tile_pin / (sub_tile_num_pins / capacity.total());
-            int rel_pin = sub_tile_pin % (sub_tile_num_pins / capacity.total());
-
-            return std::pair<int, int>(rel_capacity + capacity.low, rel_pin);
-        }
-
-        pins_to_remove += sub_tile_num_pins;
-    }
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Couldn't find sub tile that contains the pin %d in physical tile %s.\n",
-                   pin, physical_tile->name);
-}
-
-int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location) {
-    int pins_to_add = 0;
-    for (auto sub_tile : physical_tile->sub_tiles) {
-        auto capacity = sub_tile.capacity;
-        int rel_capacity = capacity_location - capacity.low;
-        int num_inst_pins = sub_tile.num_phy_pins / capacity.total();
-
-        if (capacity.is_in_range(capacity_location)) {
-            return pins_to_add + num_inst_pins * rel_capacity + relative_pin;
-        }
-
-        pins_to_add += sub_tile.num_phy_pins;
-    }
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Couldn't find sub tile that contains the relative pin %d at the capacity location %d in physical tile %s.\n",
-                   relative_pin, capacity_location, physical_tile->name);
-}
-bool is_opin(int ipin, t_physical_tile_type_ptr type) {
-    /* Returns true if this clb pin is an output, false otherwise. */
-
-    if (ipin > type->num_pins) {
-        //Not a top level pin
-        return false;
-    }
-
-    int iclass = type->pin_class[ipin];
-
-    if (type->class_inf[iclass].type == DRIVER)
-        return true;
-    else
-        return false;
-}
-
-// TODO: Remove is_input_type / is_output_type / is_io_type as part of
-// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
-bool is_input_type(t_physical_tile_type_ptr type) {
-    return type->is_input_type;
-}
-
-bool is_output_type(t_physical_tile_type_ptr type) {
-    return type->is_output_type;
-}
-
-bool is_io_type(t_physical_tile_type_ptr type) {
-    return is_input_type(type)
-           || is_output_type(type);
-}
-
-std::string block_type_pin_index_to_name(t_physical_tile_type_ptr type, int pin_index) {
-    VTR_ASSERT(pin_index < type->num_pins);
-
-    std::string pin_name = type->name;
-
-    int sub_tile_index, inst_num;
-    std::tie<int, int, int>(pin_index, inst_num, sub_tile_index) = get_pin_index_for_inst(type, pin_index);
-
-    if (type->sub_tiles[sub_tile_index].capacity.total() > 1) {
-        pin_name += "[" + std::to_string(inst_num) + "]";
-    }
-
-    pin_name += ".";
-
-    for (auto const& port : type->sub_tiles[sub_tile_index].ports) {
-        if (pin_index >= port.absolute_first_pin_index && pin_index < port.absolute_first_pin_index + port.num_pins) {
-            //This port contains the desired pin index
-            int index_in_port = pin_index - port.absolute_first_pin_index;
-            pin_name += port.name;
-            pin_name += "[" + std::to_string(index_in_port) + "]";
-            return pin_name;
-        }
-    }
-
-    return "<UNKOWN>";
-}
-
-std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_type_ptr type, int class_index) {
-    VTR_ASSERT(class_index < (int)type->class_inf.size());
-
-    auto class_inf = type->class_inf[class_index];
-
-    std::vector<t_pin_inst_port> pin_info;
-    for (int ipin = 0; ipin < class_inf.num_pins; ++ipin) {
-        int pin_index = class_inf.pinlist[ipin];
-        pin_info.push_back(block_type_pin_index_to_pin_inst(type, pin_index));
-    }
-
-    auto cmp = [](const t_pin_inst_port& lhs, const t_pin_inst_port& rhs) {
-        return std::tie(lhs.capacity_instance, lhs.port_index, lhs.pin_index_in_port)
-               < std::tie(rhs.capacity_instance, rhs.port_index, rhs.pin_index_in_port);
-    };
-
-    //Ensure all the pins are in order
-    std::sort(pin_info.begin(), pin_info.end(), cmp);
-
-    //Determine ranges for each capacity instance and port pair
-    std::map<std::tuple<int, int, int>, std::pair<int, int>> pin_ranges;
-    for (const auto& pin_inf : pin_info) {
-        auto key = std::make_tuple(pin_inf.sub_tile_index, pin_inf.capacity_instance, pin_inf.port_index);
-        if (!pin_ranges.count(key)) {
-            pin_ranges[key].first = pin_inf.pin_index_in_port;
-            pin_ranges[key].second = pin_inf.pin_index_in_port;
-        } else {
-            VTR_ASSERT(pin_ranges[key].second == pin_inf.pin_index_in_port - 1);
-            pin_ranges[key].second = pin_inf.pin_index_in_port;
-        }
-    }
-
-    //Format pin ranges
-    std::vector<std::string> pin_names;
-    for (auto kv : pin_ranges) {
-        auto type_port = kv.first;
-        auto pins = kv.second;
-
-        int isub_tile, icapacity, iport;
-        std::tie<int, int, int>(isub_tile, icapacity, iport) = type_port;
-
-        int ipin_start = pins.first;
-        int ipin_end = pins.second;
-
-        auto& sub_tile = type->sub_tiles[isub_tile];
-
-        std::string pin_name;
-        if (ipin_start == ipin_end) {
-            pin_name = vtr::string_fmt("%s[%d].%s[%d]",
-                                       type->name,
-                                       icapacity,
-                                       sub_tile.ports[iport].name,
-                                       ipin_start);
-        } else {
-            pin_name = vtr::string_fmt("%s[%d].%s[%d:%d]",
-                                       type->name,
-                                       icapacity,
-                                       sub_tile.ports[iport].name,
-                                       ipin_start,
-                                       ipin_end);
-        }
-
-        pin_names.push_back(pin_name);
-    }
-
-    return pin_names;
-}
-
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name) {
-    for (auto port : sub_tile->ports) {
-        if (0 == strcmp(port.name, port_name)) {
-            return &sub_tile->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) {
-    auto pb_type = type->pb_type;
-
-    for (int i = 0; i < pb_type->num_ports; i++) {
-        auto port = pb_type->ports[i];
-        if (0 == strcmp(port.name, port_name)) {
-            return &pb_type->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) {
-    for (auto port : sub_tile->ports) {
-        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
-            return &sub_tile->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
-
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin) {
-    auto pb_type = type->pb_type;
-
-    for (int i = 0; i < pb_type->num_ports; i++) {
-        auto port = pb_type->ports[i];
-        if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
-            return &pb_type->ports[port.index];
-        }
-    }
-
-    return nullptr;
-}
diff --git a/third_party/vtr/libs/archfpga/src/physical_types_util.h b/third_party/vtr/libs/archfpga/src/physical_types_util.h
deleted file mode 100644
index 71c74a614..000000000
--- a/third_party/vtr/libs/archfpga/src/physical_types_util.h
+++ /dev/null
@@ -1,304 +0,0 @@
-#ifndef PHYSICAL_TYPES_UTIL_H
-#define PHYSICAL_TYPES_UTIL_H
-
-#include "physical_types.h"
-
-/********************************************************************
- *                                                                  *
- *  Physical types utility functions                                *
- *                                                                  *
- *  This source file contains several utilities that enable the     *
- *  interaction with the architecture's physical types.             *
- *  Mainly, the two classes of objects accessed by the utility      *
- *  functions in this file are the following:                       *
- *    - physical_tile_type: identifies a placeable tile within      *
- *                          the device grid.                        *
- *    - logical_block_tpye: identifies a clustered block type       *
- *                          within the clb_netlist                  *
- *                                                                  *
- *  All the following utilities are intended to ease the            *
- *  developement to access the above mentioned classes and perform  *
- *  some required operations with their data.                       *
- *                                                                  *
- *  Please classify such functions in this file                     *
- *                                                                  *
- * ******************************************************************/
-
-/*
- * Terms definition.
- *
- * This comment helps in clarifying what some of the data types correspond to
- * and what their purpose is, for a better understanding of the utility routines.
- *
- *   - logical_pin_index: corresponds to the absolute pin index of a logical block type.
- *   - physical_pin_index: corresponds to the absolute pin index of a physical tile type.
- *   - sub tile: component of a physical tile type.
- *               For further information on sub tiles refer to the documentation at:
- *               https://docs.verilogtorouting.org/en/latest/arch/reference/#tag-%3Csub_tilename
- *   - capacity: corresponds to the total number of instances of a sub tile within the belonging
- *               physical tile.
- *
- * Given that, each physical tile type can be used to place a different number/type of logical block types
- * it is necessary to have at disposal utilities to correctly synchronize and connect a logical block
- * to a compatible physical tile.
- *
- * For instance, if there are multiple physical tile types compatible with a logical type, it must be assumed
- * that the there is no 1:1 mapping between the logical block pins and the physical tiles one.
- *
- * To clarify, imagine a situation as follows:
- *
- *   - BUFG logical block type:
- *
- *        *----------------*
- *        |                |
- *    --->| CLK        OUT |--->
- *        |                |
- *        *----------------*
- *
- *     The logical pin indices are:
- *       - CLK: 0
- *       - OUT: 1
- *
- *   - CLOCK TILE physical tile type, containing a BUFGCTRL sub tile of capacity of 2:
- *
- *        *--------------------------*
- *        |                          |
- *        |    BUFGCTRL sub tile     |
- *        |    Instance 1            |
- *        |    *----------------*    |
- *        |    |                |    |
- *  CLK_1 |--->| CLK        OUT |--->| OUT_1
- *  EN_1  |--->| EN             |    |
- *        |    |                |    |
- *        |    *----------------*    |
- *        |                          |
- *        |    BUFGCTRL sub tile     |
- *        |    Instance 2            |
- *        |    *----------------*    |
- *        |    |                |    |
- *  CLK_2 |--->| CLK        OUT |--->| OUT_2
- *  EN_2  |--->| EN             |    |
- *        |    |                |    |
- *        |    *----------------*    |
- *        |                          |
- *        *--------------------------*
- *
- *     The physical pin indices are:
- *       - CLK_1: 0
- *       - EN_1 : 1
- *       - OUT_1: 2
- *       - CLK_2: 3
- *       - EN_2 : 4
- *       - OUT_2: 5
- *
- * The BUFG logical block can be placed in a BUFGCTRL sub tile of the CLOCK TILE physical tile.
- * As visible in the diagram, the CLOCK TILE contains a total of 6 physical pins, 3 for each instance,
- * and the logical block contains only 2 pins, but still, it is compatible to be placed within
- * the BUFGCTRL sub tile.
- *
- * One of the purposes of the following utility functions is to correctly identify the relation
- * of a logical pin (e.g. CLK of the BUFG logical block type) with the corresponding physical tile index
- * (e.g. CLK_2 of the CLOCK TILE physical tile, in case the logical block is placed on the second instance
- * of the BUFGCTRL sub tile).
- *
- * With the assumption that there is no 1:1 mapping between logical block and sub tile pins
- * (e.g. EN pin in the BUFGCTRL sub tile), there is some extra computation and data structures
- * needed to correctly identify the relation between the pins.
- *
- * For instance, the following information are required:
- *   - mapping between logical and sub tile pins.
- *   - mapping between sub tile pins and absoulte physical pin
- *   - capacity instance of the sub tile
- *
- * With all the above information we can calculate correctly the connection between the CLK (logical pin)
- * and CLK_2 (physical pin) from the BUFG (logical block) and CLOCK TILE (physical tile).
- */
-
-///@brief Returns true if the absolute physical pin index is an output of the given physical tile type
-bool is_opin(int ipin, t_physical_tile_type_ptr type);
-
-///@brief Returns true if the given physical tile type can implement a .input block type
-bool is_input_type(t_physical_tile_type_ptr type);
-///@brief Returns true if the given physical tile type can implement a .output block type
-bool is_output_type(t_physical_tile_type_ptr type);
-///@brief Returns true if the given physical tile type can implement either a .input or .output block type
-bool is_io_type(t_physical_tile_type_ptr type);
-
-/**
- * @brief Returns the corresponding physical pin based on the input parameters:
- *
- * - physical_tile
- * - relative_pin: this is the pin relative to a specific sub tile
- * - capacity location: absolute sub tile location
- *
- * Take the above CLOCK TILE example:
- *   - we want to get the absolute physical pin corresponding to the first pin
- *     of the second instance of the BUFGCTRL sub tile
- *
- *   int pin = get_physical_pin_from_capacity_location(clock_tile, 0, 1);
- *
- *   This function call returns the absolute pin index of the CLK_1 pin (assumed that it is the first pin of the sub tile).
- *   The value returned in this case is 3.
- *   Note: capacity and pin indices start from zero.
- */
-int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location);
-
-/**
- * @brief Returns a pair consisting of the absolute capacity location relative to the pin parameter
- *
- *
- * Take the above CLOCK TILE example:
- *   - given the CLOCK TILE and the index corresponding to the CLK_1 pin, we want the relative pin
- *     of one of its sub tiles at a particualr capacity location (i.e. sub tile instance).
- *
- * std::tie(absolute_capacity, relative_pin) = get_capacity_location_from_physical_pin(clock_tile, 3)
- *
- * The value returned is (1, 0), where:
- *   - 1 corresponds to the capacity location (sub tile instance) where the absoulte physical pin index (CLK_1) is connected
- *   - 0 corresponds to the relative pin index within the BUFGCTRL sub tile
- */
-std::pair<int, int> get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin);
-
-///@brief Returns the name of the pin_index'th pin on the specified block type
-std::string block_type_pin_index_to_name(t_physical_tile_type_ptr type, int pin_index);
-
-///@brief Returns the name of the class_index'th pin class on the specified block type
-std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_type_ptr type, int class_index);
-
-///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
-t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types);
-
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
-
-///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port);
-
-///@brief Returns the maximum number of pins within a logical block
-int get_max_num_pins(t_logical_block_type_ptr logical_block);
-
-///@brief Verifies whether a given logical block is compatible with a given physical tile
-bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block);
-
-///@brief Verifies whether a logical block and a relative placement location is compatible with a given physical tile
-bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc);
-
-/**
- * @brief Returns the first physical tile type that matches the logical block
- *
- * The order of the physical tiles suitable for the input logical block follows the order
- * with which the logical blocks appear in the architecture XML definition
- */
-t_physical_tile_type_ptr pick_physical_type(t_logical_block_type_ptr logical_block);
-
-/**
- * Returns the first logical block type that matches the physical tile
- *
- * The order of the logical blocks suitable for the input physical tile follows the order
- * with which the physical tiles appear in the architecture XML definition
- */
-t_logical_block_type_ptr pick_logical_type(t_physical_tile_type_ptr physical_tile);
-
-/**
- * @brief Returns the sub tile index (within 'physical_tile') corresponding to the
- * 'logical block'.
- *
- * This function will return the index for the first sub_tile that can accommodate
- * the logical block.
- *
- * It is typically called before/during placement,
- * when picking a sub-tile to fit a logical block
- */
-int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
-                                              t_logical_block_type_ptr logical_block);
-/**
- * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
- *
- * This function is called before/during placement, when a sub tile index was not yet assigned.
- *
- * Throws an exception if the corresponding physical pin can't be found.
- */
-int get_physical_pin(t_physical_tile_type_ptr physical_tile,
-                     t_logical_block_type_ptr logical_block,
-                     int pin);
-/**
- * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
- * This function considers if a given offset is in the range of sub tile capacity
- *
- *   (First pin index at current sub-tile)                                     (The wanted pin index)
- *
- *   |                                                               |<----- pin ------->|
- *   v                                                                                   v
- *
- *   |<----- capacity.low ----->|<----- capacity.low + 1 ----->| ... |<----- sub_tile_capacity ---->|
- *
- * Throws an exception if the corresponding physical pin can't be found.
- *
- * Take the above CLOCK TILE example:
- *   - we want to get the absolute physical pin corresponding to the CLK pin
- *     of the BUFG logical block placed at the second instance of the BUFGCTRL sub tile
- *
- *   int pin = get_physical_pin_at_sub_tile_location(clock_tile, bufg_block, 1, 0);
- *
- *   where the input params are:
- *     - clock_tile: CLOCK TILE
- *     - bufg_block: BUFG
- *     - 1: second absolute capacity instance in the CLOCK TILE
- *     - 0: logical pin index corresponding to CLK
- *
- *   This function call returns the absolute pin index of the CLK_1 pin (assumed that it is the first pin of the sub tile).
- *   The value returned in this case is 3.
- *   Note: capacity and pin indices start from zero.
- */
-int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
-                                          t_logical_block_type_ptr logical_block,
-                                          int sub_tile_capacity,
-                                          int pin);
-
-/**
- * @brief Returns the sub tile index (within 'physical_tile') corresponding to the
- * 'logical block' by considering if a given offset is in the range of sub tile capacity
- */
-int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_tile,
-                                              t_logical_block_type_ptr logical_block,
-                                              int sub_tile_capacity);
-/**
- * @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin') of the 'logical_block' at sub-tile location 'sub_tile_index'.
- *
- * Throws an exception if the corresponding physical pin can't be found.
- */
-int get_sub_tile_physical_pin(int sub_tile_index,
-                              t_physical_tile_type_ptr physical_tile,
-                              t_logical_block_type_ptr logical_block,
-                              int pin);
-
-/**
- * @brief Returns one of the physical ports of a tile corresponding to the port_name.
- * Given that each sub_tile's port that has exactly the same name has to be equivalent
- * one to the other, it is indifferent which port is returned.
- */
-t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, const char* port_name);
-
-/**
- * @brief Returns the physical tile port given the port name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name);
-
-/**
- * @brief Returns the logical block port given the port name and the corresponding logical block type
- */
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name);
-
-/**
- * @brief Returns the physical tile port given the pin name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin);
-
-/**
- * @brief Returns the logical block port given the pin name and the corresponding logical block type
- */
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin);
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
deleted file mode 100644
index 4d56a3f53..000000000
--- a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.cc
+++ /dev/null
@@ -1,2542 +0,0 @@
-#include <algorithm>
-#include <kj/std/iostream.h>
-#include <limits>
-#include <map>
-#include <regex>
-#include <set>
-#include <stdlib.h>
-#include <string>
-#include <string.h>
-#include <zlib.h>
-
-#include "vtr_assert.h"
-#include "vtr_digest.h"
-#include "vtr_log.h"
-#include "vtr_memory.h"
-#include "vtr_util.h"
-
-#include "arch_check.h"
-#include "arch_error.h"
-#include "arch_util.h"
-#include "arch_types.h"
-
-#include "read_fpga_interchange_arch.h"
-
-/*
- * FPGA Interchange Device frontend
- *
- * This file contains functions to read and parse a Cap'n'proto FPGA interchange device description
- * and populate the various VTR architecture's internal data structures.
- *
- * The Device data is, by default, GZipped, hence the requirement of the ZLIB library to allow
- * for in-memory decompression of the input file.
- */
-
-using namespace DeviceResources;
-using namespace LogicalNetlist;
-using namespace capnp;
-
-// Necessary to reduce code verbosity when getting the pin directions
-static const auto INPUT = LogicalNetlist::Netlist::Direction::INPUT;
-static const auto OUTPUT = LogicalNetlist::Netlist::Direction::OUTPUT;
-static const auto INOUT = LogicalNetlist::Netlist::Direction::INOUT;
-
-static const auto LOGIC = Device::BELCategory::LOGIC;
-static const auto ROUTING = Device::BELCategory::ROUTING;
-static const auto SITE_PORT = Device::BELCategory::SITE_PORT;
-
-// Enum for pack pattern expansion direction
-enum e_pp_dir {
-    FORWARD = 0,
-    BACKWARD = 1
-};
-
-struct t_package_pin {
-    std::string name;
-
-    std::string site_name;
-    std::string bel_name;
-};
-
-struct t_bel_cell_mapping {
-    size_t cell;
-    size_t site;
-    std::vector<std::pair<size_t, size_t>> pins;
-
-    bool operator<(const t_bel_cell_mapping& other) const {
-        return cell < other.cell || (cell == other.cell && site < other.site);
-    }
-};
-
-// Intermediate data type to store information on interconnects to be created
-struct t_ic_data {
-    std::string input;
-    std::set<std::string> outputs;
-
-    bool requires_pack_pattern;
-};
-
-/****************** Utility functions ******************/
-
-/**
- * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two
- * speed_models (slow and fast).
- *
- * Timing data can be found on PIPs, nodes, site pins and bel pins.
- * This function retrieves the timing value based on the wanted speed model and the wanted corner.
- *
- * More information on the FPGA Interchange timing model can be found here:
- *   - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp
- */
-static float get_corner_value(Device::CornerModel::Reader model, const char* speed_model, const char* value) {
-    bool slow_model = std::string(speed_model) == std::string("slow");
-    bool fast_model = std::string(speed_model) == std::string("fast");
-
-    bool min_corner = std::string(value) == std::string("min");
-    bool typ_corner = std::string(value) == std::string("typ");
-    bool max_corner = std::string(value) == std::string("max");
-
-    if (!slow_model && !fast_model) {
-        archfpga_throw("", __LINE__,
-                       "Wrong speed model `%s`. Expected `slow` or `fast`\n", speed_model);
-    }
-
-    if (!min_corner && !typ_corner && !max_corner) {
-        archfpga_throw("", __LINE__,
-                       "Wrong corner model `%s`. Expected `min`, `typ` or `max`\n", value);
-    }
-
-    bool has_fast = model.getFast().hasFast();
-    bool has_slow = model.getSlow().hasSlow();
-
-    if (slow_model && has_slow) {
-        auto half = model.getSlow().getSlow();
-        if (min_corner && half.getMin().isMin()) {
-            return half.getMin().getMin();
-        } else if (typ_corner && half.getTyp().isTyp()) {
-            return half.getTyp().getTyp();
-        } else if (max_corner && half.getMax().isMax()) {
-            return half.getMax().getMax();
-        } else {
-            if (half.getMin().isMin()) {
-                return half.getMin().getMin();
-            } else if (half.getTyp().isTyp()) {
-                return half.getTyp().getTyp();
-            } else if (half.getMax().isMax()) {
-                return half.getMax().getMax();
-            } else {
-                archfpga_throw("", __LINE__,
-                               "Invalid speed model %s. No value found!\n", speed_model);
-            }
-        }
-    } else if (fast_model && has_fast) {
-        auto half = model.getFast().getFast();
-        if (min_corner && half.getMin().isMin()) {
-            return half.getMin().getMin();
-        } else if (typ_corner && half.getTyp().isTyp()) {
-            return half.getTyp().getTyp();
-        } else if (max_corner && half.getMax().isMax()) {
-            return half.getMax().getMax();
-        } else {
-            if (half.getMin().isMin()) {
-                return half.getMin().getMin();
-            } else if (half.getTyp().isTyp()) {
-                return half.getTyp().getTyp();
-            } else if (half.getMax().isMax()) {
-                return half.getMax().getMax();
-            } else {
-                archfpga_throw("", __LINE__,
-                               "Invalid speed model %s. No value found!\n", speed_model);
-            }
-        }
-    }
-
-    return 0.;
-}
-
-/** @brief Returns the port corresponding to the given model in the architecture */
-static t_model_ports* get_model_port(t_arch* arch, std::string model, std::string port, bool fail = true) {
-    for (t_model* m : {arch->models, arch->model_library}) {
-        for (; m != nullptr; m = m->next) {
-            if (std::string(m->name) != model)
-                continue;
-
-            for (t_model_ports* p : {m->inputs, m->outputs})
-                for (; p != nullptr; p = p->next)
-                    if (std::string(p->name) == port)
-                        return p;
-        }
-    }
-
-    if (fail)
-        archfpga_throw(__FILE__, __LINE__,
-                       "Could not find model port: %s (%s)\n", port.c_str(), model.c_str());
-
-    return nullptr;
-}
-
-/** @brief Returns the specified architecture model */
-static t_model* get_model(t_arch* arch, std::string model) {
-    for (t_model* m : {arch->models, arch->model_library})
-        for (; m != nullptr; m = m->next)
-            if (std::string(m->name) == model)
-                return m;
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Could not find model: %s\n", model.c_str());
-}
-
-/** @brief Returns the physical or logical type by its name */
-template<typename T>
-static T* get_type_by_name(const char* type_name, std::vector<T>& types) {
-    for (auto& type : types) {
-        if (0 == strcmp(type.name, type_name)) {
-            return &type;
-        }
-    }
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Could not find type: %s\n", type_name);
-}
-
-/** @brief Returns a generic port instantiation for a complex block */
-static t_port get_generic_port(t_arch* arch,
-                               t_pb_type* pb_type,
-                               PORTS dir,
-                               std::string name,
-                               std::string model = "",
-                               int num_pins = 1) {
-    t_port port;
-    port.parent_pb_type = pb_type;
-    port.name = vtr::strdup(name.c_str());
-    port.num_pins = num_pins;
-    port.index = 0;
-    port.absolute_first_pin_index = 0;
-    port.port_index_by_type = 0;
-    port.equivalent = PortEquivalence::NONE;
-    port.type = dir;
-    port.is_clock = false;
-    port.is_non_clock_global = false;
-    port.model_port = nullptr;
-    port.port_class = vtr::strdup(nullptr);
-    port.port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
-
-    if (!model.empty())
-        port.model_port = get_model_port(arch, model, name);
-
-    return port;
-}
-
-/** @brief Returns true if a given port name exists in the given complex block */
-static bool block_port_exists(t_pb_type* pb_type, std::string port_name) {
-    for (int iport = 0; iport < pb_type->num_ports; iport++) {
-        const t_port port = pb_type->ports[iport];
-
-        if (std::string(port.name) == port_name)
-            return true;
-    }
-
-    return false;
-}
-
-/** @brief Returns a pack pattern given it's name, input and output strings */
-static t_pin_to_pin_annotation get_pack_pattern(std::string pp_name, std::string input, std::string output) {
-    t_pin_to_pin_annotation pp;
-
-    pp.prop = (int*)vtr::calloc(1, sizeof(int));
-    pp.value = (char**)vtr::calloc(1, sizeof(char*));
-
-    pp.type = E_ANNOT_PIN_TO_PIN_PACK_PATTERN;
-    pp.format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-    pp.prop[0] = (int)E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME;
-    pp.value[0] = vtr::strdup(pp_name.c_str());
-    pp.input_pins = vtr::strdup(input.c_str());
-    pp.output_pins = vtr::strdup(output.c_str());
-    pp.num_value_prop_pairs = 1;
-    pp.clock = nullptr;
-
-    return pp;
-}
-
-/****************** End Utility functions ******************/
-
-struct ArchReader {
-  public:
-    ArchReader(t_arch* arch,
-               Device::Reader& arch_reader,
-               const char* arch_file,
-               std::vector<t_physical_tile_type>& phys_types,
-               std::vector<t_logical_block_type>& logical_types)
-        : arch_(arch)
-        , arch_file_(arch_file)
-        , ar_(arch_reader)
-        , ptypes_(phys_types)
-        , ltypes_(logical_types) {
-        set_arch_file_name(arch_file);
-
-        for (std::string str : ar_.getStrList()) {
-            auto interned_string = arch_->strings.intern_string(vtr::string_view(str.c_str()));
-            arch_->interned_strings.push_back(interned_string);
-        }
-    }
-
-    void read_arch() {
-        // Preprocess arch information
-        process_luts();
-        process_package_pins();
-        process_cell_bel_mappings();
-        process_constants();
-        process_bels_and_sites();
-
-        process_models();
-        process_constant_model();
-
-        process_device();
-
-        process_layout();
-        process_switches();
-        process_segments();
-
-        process_sites();
-        process_constant_block();
-
-        process_tiles();
-        process_constant_tile();
-
-        link_physical_logical_types(ptypes_, ltypes_);
-
-        SyncModelsPbTypes(arch_, ltypes_);
-        check_models(arch_);
-    }
-
-  private:
-    t_arch* arch_;
-    const char* arch_file_;
-    Device::Reader& ar_;
-    std::vector<t_physical_tile_type>& ptypes_;
-    std::vector<t_logical_block_type>& ltypes_;
-
-    t_default_fc_spec default_fc_;
-
-    std::string bel_dedup_suffix_ = "_bel";
-    std::string const_block_ = "constant_block";
-
-    std::unordered_set<int> take_bels_;
-    std::unordered_set<int> take_sites_;
-
-    // Package pins
-
-    // TODO: add possibility to have multiple packages
-    std::vector<t_package_pin> package_pins_;
-    std::unordered_set<std::string> pad_bels_;
-    std::string out_suffix_ = "_out";
-    std::string in_suffix_ = "_in";
-
-    // Bel Cell mappings
-    std::unordered_map<uint32_t, std::set<t_bel_cell_mapping>> bel_cell_mappings_;
-    std::unordered_map<std::string, int> segment_name_to_segment_idx;
-
-    // Utils
-
-    /** @brief Returns the string corresponding to the given index */
-    std::string str(size_t idx) {
-        return arch_->interned_strings[idx].get(&arch_->strings);
-    }
-
-    /** @brief Get the BEL count of a site depending on its category (e.g. logic or routing BELs) */
-    int get_bel_type_count(Device::SiteType::Reader& site, Device::BELCategory category, bool skip_lut = false) {
-        int count = 0;
-        for (auto bel : site.getBels()) {
-            auto bel_name = str(bel.getName());
-            bool is_logic = category == LOGIC;
-
-            if (skip_lut && is_lut(bel_name, str(site.getName())))
-                continue;
-
-            bool skip_bel = is_logic && take_bels_.count(bel.getName()) == 0;
-
-            if (bel.getCategory() == category && !skip_bel)
-                count++;
-        }
-
-        return count;
-    }
-
-    /** @brief Get the BEL reader given its name and site */
-    Device::BEL::Reader get_bel_reader(Device::SiteType::Reader& site, std::string bel_name) {
-        for (auto bel : site.getBels())
-            if (str(bel.getName()) == bel_name)
-                return bel;
-        VTR_ASSERT_MSG(0, "Could not find the BEL reader!\n");
-    }
-
-    /** @brief Get the BEL pin reader given its name, site and corresponding BEL */
-    Device::BELPin::Reader get_bel_pin_reader(Device::SiteType::Reader& site, Device::BEL::Reader& bel, std::string pin_name) {
-        auto bel_pins = site.getBelPins();
-
-        for (auto bel_pin : bel.getPins()) {
-            auto pin_reader = bel_pins[bel_pin];
-            if (str(pin_reader.getName()) == pin_name)
-                return pin_reader;
-        }
-        VTR_ASSERT_MSG(0, "Could not find the BEL pin reader!\n");
-    }
-
-    /** @brief Get the BEL name, with an optional deduplication suffix in case its name collides with the site name */
-    std::string get_bel_name(Device::SiteType::Reader& site, Device::BEL::Reader& bel) {
-        if (bel.getCategory() == SITE_PORT)
-            return str(site.getName());
-
-        auto site_name = str(site.getName());
-        auto bel_name = str(bel.getName());
-
-        return site_name == bel_name ? bel_name + bel_dedup_suffix_ : bel_name;
-    }
-
-    /** @brief Returns the name of the input argument BEL with optionally the de-duplication suffix removed */
-    std::string remove_bel_suffix(std::string bel) {
-        std::smatch regex_matches;
-        std::string regex = std::string("(.*)") + bel_dedup_suffix_;
-        const std::regex bel_regex(regex.c_str());
-        if (std::regex_match(bel, regex_matches, bel_regex))
-            return regex_matches[1].str();
-
-        return bel;
-    }
-
-    /** @brief Returns true in case the input argument corresponds to the name of a LUT */
-    bool is_lut(std::string name, const std::string site = std::string()) {
-        for (auto cell : arch_->lut_cells)
-            if (cell.name == name)
-                return true;
-
-        for (const auto& it : arch_->lut_elements) {
-            if (!site.empty() && site != it.first) {
-                continue;
-            }
-
-            for (const auto& lut_element : it.second) {
-                for (const auto& lut_bel : lut_element.lut_bels) {
-                    if (lut_bel.name == name) {
-                        return true;
-                    }
-                }
-            }
-        }
-
-        return false;
-    }
-
-    t_lut_element* get_lut_element_for_bel(const std::string& site_type, const std::string& bel_name) {
-        if (!arch_->lut_elements.count(site_type)) {
-            return nullptr;
-        }
-
-        for (auto& lut_element : arch_->lut_elements.at(site_type)) {
-            for (auto& lut_bel : lut_element.lut_bels) {
-                if (lut_bel.name == bel_name) {
-                    return &lut_element;
-                }
-            }
-        }
-
-        return nullptr;
-    }
-
-    /** @brief Returns true in case the input argument corresponds to a PAD BEL */
-    bool is_pad(std::string name) {
-        return pad_bels_.count(name) != 0;
-    }
-
-    /** @brief Utility function to fill in all the necessary information for the sub_tile
-     *
-     *  Given a physical tile type and a corresponding sub tile with additional information on the IO pin count
-     *  this function populates all the data structures corresponding to the sub tile, and modifies also the parent
-     *  physical tile type, updating the pin numberings as  well as the directs pin mapping for the equivalent sites
-     *
-     *  Affected data structures:
-     *      - pinloc
-     *      - fc_specs
-     *      - equivalent_sites
-     *      - tile_block_pin_directs_map
-     **/
-    void fill_sub_tile(t_physical_tile_type& type, t_sub_tile& sub_tile, int num_pins, int input_count, int output_count) {
-        sub_tile.num_phy_pins += num_pins;
-        type.num_pins += num_pins;
-        type.num_inst_pins += num_pins;
-
-        type.num_input_pins += input_count;
-        type.num_output_pins += output_count;
-        type.num_receivers += input_count;
-        type.num_drivers += output_count;
-
-        type.pin_width_offset.resize(type.num_pins, 0);
-        type.pin_height_offset.resize(type.num_pins, 0);
-
-        type.pinloc.resize({1, 1, 4}, std::vector<bool>(type.num_pins, false));
-        for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-            for (int pin = 0; pin < type.num_pins; pin++) {
-                type.pinloc[0][0][side][pin] = true;
-                type.pin_width_offset[pin] = 0;
-                type.pin_height_offset[pin] = 0;
-            }
-        }
-
-        vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
-
-        for (int npin = 0; npin < type.num_pins; npin++) {
-            t_physical_pin physical_pin(npin);
-            t_logical_pin logical_pin(npin);
-
-            directs_map.insert(logical_pin, physical_pin);
-        }
-
-        auto ltype = get_type_by_name<t_logical_block_type>(sub_tile.name, ltypes_);
-        sub_tile.equivalent_sites.push_back(ltype);
-
-        type.tile_block_pin_directs_map[ltype->index][sub_tile.index] = directs_map;
-
-        // Assign FC specs
-        int iblk_pin = 0;
-        for (const auto& port : sub_tile.ports) {
-            t_fc_specification fc_spec;
-
-            // FIXME: Use always one segment for the time being.
-            //        Can use the right segment for this IOPIN as soon
-            //        as the RR graph reading from the interchange is complete.
-            fc_spec.seg_index = 0;
-
-            //Apply type and defaults
-            if (port.type == IN_PORT) {
-                fc_spec.fc_type = e_fc_type::IN;
-                fc_spec.fc_value_type = default_fc_.in_value_type;
-                fc_spec.fc_value = default_fc_.in_value;
-            } else {
-                VTR_ASSERT(port.type == OUT_PORT);
-                fc_spec.fc_type = e_fc_type::OUT;
-                fc_spec.fc_value_type = default_fc_.out_value_type;
-                fc_spec.fc_value = default_fc_.out_value;
-            }
-
-            //Add all the pins from this port
-            for (int iport_pin = 0; iport_pin < port.num_pins; ++iport_pin) {
-                int true_physical_blk_pin = sub_tile.sub_tile_to_tile_pin_indices[iblk_pin++];
-                fc_spec.pins.push_back(true_physical_blk_pin);
-            }
-
-            type.fc_specs.push_back(fc_spec);
-        }
-    }
-
-    /** @brief Returns an intermediate map representing all the interconnects to be added in a site */
-    std::unordered_map<std::string, t_ic_data> get_interconnects(Device::SiteType::Reader& site) {
-        // dictionary:
-        //   - key: interconnect name
-        //   - value: interconnect data
-        std::unordered_map<std::string, t_ic_data> ics;
-
-        const std::string site_type = str(site.getName());
-
-        for (auto wire : site.getSiteWires()) {
-            std::string wire_name = str(wire.getName());
-
-            // pin name, bel name
-            int pin_id = OPEN;
-            bool pad_exists = false;
-            bool all_inout_pins = true;
-            std::string pad_bel_name;
-            std::string pad_bel_pin_name;
-            for (auto pin : wire.getPins()) {
-                auto bel_pin = site.getBelPins()[pin];
-                auto dir = bel_pin.getDir();
-                std::string bel_pin_name = str(bel_pin.getName());
-
-                auto bel = get_bel_reader(site, str(bel_pin.getBel()));
-                auto bel_name = get_bel_name(site, bel);
-
-                auto bel_is_pad = is_pad(bel_name);
-
-                pad_exists |= bel_is_pad;
-                all_inout_pins &= dir == INOUT;
-
-                if (bel_is_pad) {
-                    pad_bel_name = bel_name;
-                    pad_bel_pin_name = bel_pin_name;
-                }
-
-                if (dir == OUTPUT)
-                    pin_id = pin;
-            }
-
-            if (pin_id == OPEN) {
-                // If no driver pin has been found, the assumption is that
-                // there must be a PAD with inout pin connected to other inout pins
-                for (auto pin : wire.getPins()) {
-                    auto bel_pin = site.getBelPins()[pin];
-                    std::string bel_pin_name = str(bel_pin.getName());
-
-                    auto bel = get_bel_reader(site, str(bel_pin.getBel()));
-                    auto bel_name = get_bel_name(site, bel);
-
-                    if (!is_pad(bel_name))
-                        continue;
-
-                    pin_id = pin;
-                }
-            }
-
-            VTR_ASSERT(pin_id != OPEN);
-
-            auto out_pin = site.getBelPins()[pin_id];
-            auto out_pin_bel = get_bel_reader(site, str(out_pin.getBel()));
-            auto out_pin_name = str(out_pin.getName());
-
-            for (auto pin : wire.getPins()) {
-                if ((int)pin == pin_id)
-                    continue;
-
-                auto bel_pin = site.getBelPins()[pin];
-                std::string out_bel_pin_name = str(bel_pin.getName());
-
-                auto out_bel = get_bel_reader(site, str(bel_pin.getBel()));
-                auto out_bel_name = get_bel_name(site, out_bel);
-
-                auto in_bel = out_pin_bel;
-                auto in_bel_name = get_bel_name(site, in_bel);
-                auto in_bel_pin_name = out_pin_name;
-
-                bool skip_in_bel = in_bel.getCategory() == LOGIC && take_bels_.count(in_bel.getName()) == 0;
-                bool skip_out_bel = out_bel.getCategory() == LOGIC && take_bels_.count(out_bel.getName()) == 0;
-                if (skip_in_bel || skip_out_bel)
-                    continue;
-
-                // LUT bels are nested under pb_types which represent LUT
-                // elements. Check if a BEL belongs to a LUT element and
-                // adjust pb_type name in the interconnect accordingly.
-                auto get_lut_element_index = [&](const std::string& bel_name) {
-                    auto lut_element = get_lut_element_for_bel(site_type, bel_name);
-                    if (lut_element == nullptr)
-                        return -1;
-
-                    const auto& lut_elements = arch_->lut_elements.at(site_type);
-                    auto it = std::find(lut_elements.begin(), lut_elements.end(), *lut_element);
-                    VTR_ASSERT(it != lut_elements.end());
-
-                    return (int)std::distance(lut_elements.begin(), it);
-                };
-
-                // TODO: This avoids having LUTs that can be used in other ways than LUTs, e.g. as DRAMs.
-                //       Once support is added for macro expansion, all the connections currently marked as
-                //       invalid will be re-enabled.
-                auto is_lut_connection_valid = [&](const std::string& bel_name, const std::string& pin_name) {
-                    auto lut_element = get_lut_element_for_bel(site_type, bel_name);
-                    if (lut_element == nullptr)
-                        return false;
-
-                    bool pin_found = false;
-                    for (auto lut_bel : lut_element->lut_bels) {
-                        for (auto lut_bel_pin : lut_bel.input_pins)
-                            pin_found |= lut_bel_pin == pin_name;
-
-                        pin_found |= lut_bel.output_pin == pin_name;
-                    }
-
-                    if (!pin_found)
-                        return false;
-
-                    return true;
-                };
-
-                int index = get_lut_element_index(out_bel_name);
-                bool valid_lut = is_lut_connection_valid(out_bel_name, out_bel_pin_name);
-                if (index >= 0) {
-                    out_bel_name = "LUT" + std::to_string(index);
-
-                    if (!valid_lut)
-                        continue;
-                }
-
-                index = get_lut_element_index(in_bel_name);
-                valid_lut = is_lut_connection_valid(in_bel_name, in_bel_pin_name);
-                if (index >= 0) {
-                    in_bel_name = "LUT" + std::to_string(index);
-
-                    if (!valid_lut)
-                        continue;
-                }
-
-                std::string ostr = out_bel_name + "." + out_bel_pin_name;
-                std::string istr = in_bel_name + "." + in_bel_pin_name;
-
-                // TODO: If the bel pin is INOUT (e.g. PULLDOWN/PULLUP in Series7)
-                //       for now treat as input only and assign the in suffix
-                if (bel_pin.getDir() == INOUT && !all_inout_pins && !is_pad(out_bel_name))
-                    ostr += in_suffix_;
-
-                auto ic_name = wire_name + "_" + out_bel_pin_name;
-
-                bool requires_pack_pattern = pad_exists;
-
-                std::vector<std::pair<std::string, t_ic_data>> ics_data;
-                if (all_inout_pins) {
-                    std::string extra_istr = out_bel_name + "." + out_bel_pin_name + out_suffix_;
-                    std::string extra_ostr = in_bel_name + "." + in_bel_pin_name + in_suffix_;
-                    std::string extra_ic_name = ic_name + "_extra";
-
-                    std::set<std::string> extra_ostrs{extra_ostr};
-                    t_ic_data extra_ic_data = {
-                        extra_istr,           // ic input
-                        extra_ostrs,          // ic outputs
-                        requires_pack_pattern // pack pattern required
-                    };
-
-                    ics_data.push_back(std::make_pair(extra_ic_name, extra_ic_data));
-
-                    istr += out_suffix_;
-                    ostr += in_suffix_;
-                } else if (pad_exists) {
-                    if (out_bel_name == pad_bel_name)
-                        ostr += in_suffix_;
-                    else { // Create new wire to connect PAD output to the BELs input
-                        ic_name = wire_name + "_" + pad_bel_pin_name + out_suffix_;
-                        istr = pad_bel_name + "." + pad_bel_pin_name + out_suffix_;
-                    }
-                }
-
-                std::set<std::string> ostrs{ostr};
-                t_ic_data ic_data = {
-                    istr,
-                    ostrs,
-                    requires_pack_pattern};
-
-                ics_data.push_back(std::make_pair(ic_name, ic_data));
-
-                for (auto entry : ics_data) {
-                    auto name = entry.first;
-                    auto data = entry.second;
-
-                    auto res = ics.emplace(name, data);
-
-                    if (!res.second) {
-                        auto old_data = res.first->second;
-
-                        VTR_ASSERT(old_data.input == data.input);
-                        VTR_ASSERT(data.outputs.size() == 1);
-
-                        for (auto out : data.outputs)
-                            res.first->second.outputs.insert(out);
-                        res.first->second.requires_pack_pattern |= data.requires_pack_pattern;
-                    }
-                }
-            }
-        }
-
-        return ics;
-    }
-
-    /**
-     * Preprocessors:
-     *   - process_bels_and_sites: information on whether sites and bels need to be expanded in pb types
-     *   - process_luts: processes information on which cells and bels are LUTs
-     *   - process_package_pins: processes information on the device's pinout and which sites and bels
-     *                           contain IO pads
-     *   - process_cell_bel_mapping: processes mappings between a cell and the possible BELs location for that cell
-     *   - process_constants: processes constants cell and net names
-     */
-    void process_bels_and_sites() {
-        auto tiles = ar_.getTileList();
-        auto tile_types = ar_.getTileTypeList();
-        auto site_types = ar_.getSiteTypeList();
-
-        for (auto tile : tiles) {
-            auto tile_type = tile_types[tile.getType()];
-
-            for (auto site : tile.getSites()) {
-                auto site_type_in_tile = tile_type.getSiteTypes()[site.getType()];
-                auto site_type = site_types[site_type_in_tile.getPrimaryType()];
-
-                bool found = false;
-                for (auto bel : site_type.getBels()) {
-                    auto bel_name = bel.getName();
-                    bool res = bel_cell_mappings_.find(bel_name) != bel_cell_mappings_.end();
-
-                    found |= res;
-
-                    if (res || is_pad(str(bel_name)))
-                        take_bels_.insert(bel_name);
-                }
-
-                if (found)
-                    take_sites_.insert(site_type.getName());
-
-                // TODO: Enable also alternative site types handling
-            }
-        }
-    }
-
-    void process_luts() {
-        // Add LUT Cell definitions
-        // This is helpful to understand which cells are LUTs
-        auto lut_def = ar_.getLutDefinitions();
-
-        for (auto lut_cell : lut_def.getLutCells()) {
-            t_lut_cell cell;
-            cell.name = lut_cell.getCell().cStr();
-            for (auto input : lut_cell.getInputPins())
-                cell.inputs.push_back(input.cStr());
-
-            auto equation = lut_cell.getEquation();
-            if (equation.isInitParam())
-                cell.init_param = equation.getInitParam().cStr();
-
-            arch_->lut_cells.push_back(cell);
-        }
-
-        for (auto lut_elem : lut_def.getLutElements()) {
-            for (auto lut : lut_elem.getLuts()) {
-                t_lut_element element;
-                element.site_type = lut_elem.getSite().cStr();
-                element.width = lut.getWidth();
-
-                for (auto bel : lut.getBels()) {
-                    t_lut_bel lut_bel;
-                    lut_bel.name = bel.getName().cStr();
-                    std::vector<std::string> ipins;
-
-                    for (auto pin : bel.getInputPins())
-                        ipins.push_back(pin.cStr());
-
-                    lut_bel.input_pins = ipins;
-                    lut_bel.output_pin = bel.getOutputPin().cStr();
-
-                    element.lut_bels.push_back(lut_bel);
-                }
-
-                arch_->lut_elements[element.site_type].push_back(element);
-            }
-        }
-    }
-
-    void process_package_pins() {
-        for (auto package : ar_.getPackages()) {
-            for (auto pin : package.getPackagePins()) {
-                t_package_pin pckg_pin;
-                pckg_pin.name = str(pin.getPackagePin());
-
-                if (pin.getBel().isBel()) {
-                    pckg_pin.bel_name = str(pin.getBel().getBel());
-                    pad_bels_.insert(pckg_pin.bel_name);
-                }
-
-                if (pin.getSite().isSite())
-                    pckg_pin.site_name = str(pin.getSite().getSite());
-
-                package_pins_.push_back(pckg_pin);
-            }
-        }
-    }
-
-    void process_cell_bel_mappings() {
-        auto primLib = ar_.getPrimLibs();
-        auto portList = primLib.getPortList();
-
-        for (auto cell_mapping : ar_.getCellBelMap()) {
-            size_t cell_name = cell_mapping.getCell();
-
-            int found_valid_prim = false;
-            for (auto primitive : primLib.getCellDecls()) {
-                bool is_prim = str(primitive.getLib()) == std::string("primitives");
-                bool is_cell = cell_name == primitive.getName();
-
-                bool has_inout = false;
-                for (auto port_idx : primitive.getPorts()) {
-                    auto port = portList[port_idx];
-
-                    if (port.getDir() == INOUT) {
-                        has_inout = true;
-                        break;
-                    }
-                }
-
-                if (is_prim && is_cell && !has_inout) {
-                    found_valid_prim = true;
-                    break;
-                }
-            }
-
-            if (!found_valid_prim)
-                continue;
-
-            for (auto common_pins : cell_mapping.getCommonPins()) {
-                std::vector<std::pair<size_t, size_t>> pins;
-
-                for (auto pin_map : common_pins.getPins())
-                    pins.emplace_back(pin_map.getCellPin(), pin_map.getBelPin());
-
-                for (auto site_type_entry : common_pins.getSiteTypes()) {
-                    size_t site_type = site_type_entry.getSiteType();
-
-                    for (auto bel : site_type_entry.getBels()) {
-                        t_bel_cell_mapping mapping;
-
-                        mapping.cell = cell_name;
-                        mapping.site = site_type;
-                        mapping.pins = pins;
-
-                        std::set<t_bel_cell_mapping> maps{mapping};
-                        auto res = bel_cell_mappings_.emplace(bel, maps);
-                        if (!res.second) {
-                            res.first->second.insert(mapping);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    void process_constants() {
-        auto consts = ar_.getConstants();
-
-        arch_->gnd_cell = std::make_pair(str(consts.getGndCellType()), str(consts.getGndCellPin()));
-        arch_->vcc_cell = std::make_pair(str(consts.getVccCellType()), str(consts.getVccCellPin()));
-
-        arch_->gnd_net = consts.getGndNetName().isName() ? str(consts.getGndNetName().getName()) : "$__gnd_net";
-        arch_->vcc_net = consts.getVccNetName().isName() ? str(consts.getVccNetName().getName()) : "$__vcc_net";
-    }
-
-    /* end preprocessors */
-
-    // Model processing
-    void process_models() {
-        // Populate the common library, namely .inputs, .outputs, .names, .latches
-        CreateModelLibrary(arch_);
-
-        t_model* temp = nullptr;
-        std::map<std::string, int> model_name_map;
-        std::pair<std::map<std::string, int>::iterator, bool> ret_map_name;
-
-        int model_index = NUM_MODELS_IN_LIBRARY;
-        arch_->models = nullptr;
-
-        auto primLib = ar_.getPrimLibs();
-        for (auto primitive : primLib.getCellDecls()) {
-            if (str(primitive.getLib()) == std::string("primitives")) {
-                std::string prim_name = str(primitive.getName());
-
-                if (is_lut(prim_name))
-                    continue;
-
-                // Check whether the model can be placed in at least one
-                // BEL that was marked as valid (e.g. added to the take_bels_ data structure)
-                bool has_bel = false;
-                for (auto bel_cell_map : bel_cell_mappings_) {
-                    auto bel_name = bel_cell_map.first;
-
-                    bool take_bel = take_bels_.count(bel_name) != 0;
-
-                    if (!take_bel || is_lut(str(bel_name)))
-                        continue;
-
-                    for (auto map : bel_cell_map.second)
-                        has_bel |= primitive.getName() == map.cell;
-                }
-
-                if (!has_bel)
-                    continue;
-
-                try {
-                    temp = new t_model;
-                    temp->index = model_index++;
-
-                    temp->never_prune = true;
-                    temp->name = vtr::strdup(prim_name.c_str());
-
-                    ret_map_name = model_name_map.insert(std::pair<std::string, int>(temp->name, 0));
-                    if (!ret_map_name.second) {
-                        archfpga_throw(arch_file_, __LINE__,
-                                       "Duplicate model name: '%s'.\n", temp->name);
-                    }
-
-                    if (!process_model_ports(temp, primitive)) {
-                        free_arch_model(temp);
-                        continue;
-                    }
-
-                    check_model_clocks(temp, arch_file_, __LINE__);
-                    check_model_combinational_sinks(temp, arch_file_, __LINE__);
-                    warn_model_missing_timing(temp, arch_file_, __LINE__);
-
-                } catch (ArchFpgaError& e) {
-                    free_arch_model(temp);
-                    throw;
-                }
-                temp->next = arch_->models;
-                arch_->models = temp;
-            }
-        }
-    }
-
-    bool process_model_ports(t_model* model, Netlist::CellDeclaration::Reader primitive) {
-        auto primLib = ar_.getPrimLibs();
-        auto portList = primLib.getPortList();
-
-        std::set<std::pair<std::string, enum PORTS>> port_names;
-
-        for (auto port_idx : primitive.getPorts()) {
-            auto port = portList[port_idx];
-            enum PORTS dir = ERR_PORT;
-            switch (port.getDir()) {
-                case INPUT:
-                    dir = IN_PORT;
-                    break;
-                case OUTPUT:
-                    dir = OUT_PORT;
-                    break;
-                case INOUT:
-                    return false;
-                    break;
-                default:
-                    break;
-            }
-            t_model_ports* model_port = new t_model_ports;
-            model_port->dir = dir;
-            model_port->name = vtr::strdup(str(port.getName()).c_str());
-
-            // TODO: add parsing of clock port types when the interchange schema allows for it:
-            //       https://github.com/chipsalliance/fpga-interchange-schema/issues/66
-
-            //Sanity checks
-            if (model_port->is_clock == true && model_port->is_non_clock_global == true) {
-                archfpga_throw(arch_file_, __LINE__,
-                               "Model port '%s' cannot be both a clock and a non-clock signal simultaneously", model_port->name);
-            }
-            if (model_port->name == nullptr) {
-                archfpga_throw(arch_file_, __LINE__,
-                               "Model port is missing a name");
-            }
-            if (port_names.count(std::pair<std::string, enum PORTS>(model_port->name, dir)) && dir != INOUT_PORT) {
-                archfpga_throw(arch_file_, __LINE__,
-                               "Duplicate model port named '%s'", model_port->name);
-            }
-            if (dir == OUT_PORT && !model_port->combinational_sink_ports.empty()) {
-                archfpga_throw(arch_file_, __LINE__,
-                               "Model output ports can not have combinational sink ports");
-            }
-
-            model_port->min_size = 1;
-            model_port->size = 1;
-            if (port.isBus()) {
-                int s = port.getBus().getBusStart();
-                int e = port.getBus().getBusEnd();
-                model_port->size = std::abs(e - s) + 1;
-            }
-
-            port_names.insert(std::pair<std::string, enum PORTS>(model_port->name, dir));
-            //Add the port
-            if (dir == IN_PORT) {
-                model_port->next = model->inputs;
-                model->inputs = model_port;
-
-            } else if (dir == OUT_PORT) {
-                model_port->next = model->outputs;
-                model->outputs = model_port;
-            }
-        }
-
-        return true;
-    }
-
-    // Complex Blocks
-    void process_sites() {
-        auto siteTypeList = ar_.getSiteTypeList();
-
-        int index = 0;
-        // TODO: Make this dynamic depending on data from the interchange
-        auto EMPTY = get_empty_logical_type();
-        EMPTY.index = index;
-        ltypes_.push_back(EMPTY);
-
-        for (auto site : siteTypeList) {
-            auto bels = site.getBels();
-
-            if (bels.size() == 0)
-                continue;
-
-            t_logical_block_type ltype;
-
-            std::string name = str(site.getName());
-
-            if (take_sites_.count(site.getName()) == 0)
-                continue;
-
-            // Check for duplicates
-            auto is_duplicate = [name](t_logical_block_type l) { return std::string(l.name) == name; };
-            VTR_ASSERT(std::find_if(ltypes_.begin(), ltypes_.end(), is_duplicate) == ltypes_.end());
-
-            ltype.name = vtr::strdup(name.c_str());
-            ltype.index = ++index;
-
-            auto pb_type = new t_pb_type;
-            ltype.pb_type = pb_type;
-
-            pb_type->name = vtr::strdup(name.c_str());
-            pb_type->num_pb = 1;
-            process_block_ports(pb_type, site);
-
-            // Process modes (for simplicity, only the default mode is allowed for the time being)
-            pb_type->num_modes = 1;
-            pb_type->modes = new t_mode[pb_type->num_modes];
-
-            auto mode = &pb_type->modes[0];
-            mode->parent_pb_type = pb_type;
-            mode->index = 0;
-            mode->name = vtr::strdup("default");
-            mode->disable_packing = false;
-
-            // Get LUT elements for this site
-            std::vector<t_lut_element> lut_elements;
-            if (arch_->lut_elements.count(name))
-                lut_elements = arch_->lut_elements.at(name);
-
-            // Count non-LUT BELs plus LUT elements
-            int block_count = get_bel_type_count(site, LOGIC, true) + get_bel_type_count(site, ROUTING, true) + lut_elements.size();
-
-            mode->num_pb_type_children = block_count;
-            mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
-
-            // Add regular BELs
-            int count = 0;
-            for (auto bel : bels) {
-                auto category = bel.getCategory();
-                if (bel.getCategory() == SITE_PORT)
-                    continue;
-
-                bool is_logic = category == LOGIC;
-
-                if (take_bels_.count(bel.getName()) == 0 && is_logic)
-                    continue;
-
-                if (is_lut(str(bel.getName()), name))
-                    continue;
-
-                auto bel_name = str(bel.getName());
-                std::pair<std::string, std::string> key(name, bel_name);
-
-                auto mid_pb_type = &mode->pb_type_children[count++];
-                std::string mid_pb_type_name = bel_name == name ? bel_name + bel_dedup_suffix_ : bel_name;
-
-                mid_pb_type->name = vtr::strdup(mid_pb_type_name.c_str());
-                mid_pb_type->num_pb = 1;
-                mid_pb_type->parent_mode = mode;
-                mid_pb_type->blif_model = nullptr;
-
-                if (!is_pad(bel_name))
-                    process_block_ports(mid_pb_type, site, bel.getName());
-
-                if (is_pad(bel_name))
-                    process_pad_block(mid_pb_type, bel, site);
-                else if (is_logic)
-                    process_generic_block(mid_pb_type, bel, site);
-                else {
-                    VTR_ASSERT(category == ROUTING);
-                    process_routing_block(mid_pb_type);
-                }
-            }
-
-            // Add LUT elements
-            for (size_t i = 0; i < lut_elements.size(); ++i) {
-                const auto& lut_element = lut_elements[i];
-
-                auto mid_pb_type = &mode->pb_type_children[count++];
-                std::string lut_name = "LUT" + std::to_string(i);
-                mid_pb_type->name = vtr::strdup(lut_name.c_str());
-                mid_pb_type->num_pb = 1;
-                mid_pb_type->parent_mode = mode;
-                mid_pb_type->blif_model = nullptr;
-
-                process_lut_element(mid_pb_type, lut_element);
-            }
-
-            process_interconnects(mode, site);
-            ltypes_.push_back(ltype);
-        }
-    }
-
-    /** @brief Processes a LUT element starting from the intermediate pb type */
-    void process_lut_element(t_pb_type* parent, const t_lut_element& lut_element) {
-        // Collect ports for the parent pb_type representing the whole LUT
-        // element
-        std::set<std::tuple<std::string, PORTS, int>> parent_ports;
-        for (const auto& lut_bel : lut_element.lut_bels) {
-            for (const auto& name : lut_bel.input_pins) {
-                parent_ports.emplace(name, IN_PORT, 1);
-            }
-
-            parent_ports.emplace(lut_bel.output_pin, OUT_PORT, 1);
-        }
-
-        // Create the ports
-        create_ports(parent, parent_ports);
-
-        // Make a single mode for each member LUT of the LUT element
-        parent->num_modes = (int)lut_element.lut_bels.size();
-        parent->modes = new t_mode[parent->num_modes];
-
-        for (size_t i = 0; i < lut_element.lut_bels.size(); ++i) {
-            const t_lut_bel& lut_bel = lut_element.lut_bels[i];
-            auto mode = &parent->modes[i];
-
-            mode->name = vtr::strdup(lut_bel.name.c_str());
-            mode->parent_pb_type = parent;
-            mode->index = i;
-
-            // Leaf pb_type block for the LUT
-            mode->num_pb_type_children = 1;
-            mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
-
-            auto pb_type = &mode->pb_type_children[0];
-            pb_type->name = vtr::strdup(lut_bel.name.c_str());
-            pb_type->num_pb = 1;
-            pb_type->parent_mode = mode;
-            pb_type->blif_model = nullptr;
-
-            process_lut_block(pb_type, lut_bel);
-
-            // Mode interconnect
-            mode->num_interconnect = lut_bel.input_pins.size() + 1;
-            mode->interconnect = new t_interconnect[mode->num_interconnect];
-
-            std::string istr, ostr, name;
-
-            // Inputs
-            for (size_t j = 0; j < lut_bel.input_pins.size(); ++j) {
-                auto* ic = &mode->interconnect[j];
-
-                ic->type = DIRECT_INTERC;
-                ic->parent_mode = mode;
-                ic->parent_mode_index = mode->index;
-
-                istr = std::string(parent->name) + "." + lut_bel.input_pins[j];
-                ostr = std::string(pb_type->name) + ".in[" + std::to_string(j) + "]";
-                name = istr + "_to_" + ostr;
-
-                ic->input_string = vtr::strdup(istr.c_str());
-                ic->output_string = vtr::strdup(ostr.c_str());
-                ic->name = vtr::strdup(name.c_str());
-            }
-
-            // Output
-            auto* ic = &mode->interconnect[mode->num_interconnect - 1];
-            ic->type = DIRECT_INTERC;
-            ic->parent_mode = mode;
-            ic->parent_mode_index = mode->index;
-
-            istr = std::string(pb_type->name) + ".out";
-            ostr = std::string(parent->name) + "." + lut_bel.output_pin;
-            name = istr + "_to_" + ostr;
-
-            ic->input_string = vtr::strdup(istr.c_str());
-            ic->output_string = vtr::strdup(ostr.c_str());
-            ic->name = vtr::strdup(name.c_str());
-        }
-    }
-
-    /** @brief Processes a LUT primitive starting from the intermediate pb type */
-    void process_lut_block(t_pb_type* pb_type, const t_lut_bel& lut_bel) {
-        // Create port list
-        size_t width = lut_bel.input_pins.size();
-
-        std::set<std::tuple<std::string, PORTS, int>> ports;
-        ports.emplace("in", IN_PORT, width);
-        ports.emplace("out", OUT_PORT, 1);
-
-        create_ports(pb_type, ports);
-
-        // Make two modes. One for LUT-thru and another for the actual LUT bel
-        pb_type->num_modes = 2;
-        pb_type->modes = new t_mode[pb_type->num_modes];
-
-        // ................................................
-        // LUT-thru
-        t_mode* mode = &pb_type->modes[0];
-
-        // Mode
-        mode->name = vtr::strdup("wire");
-        mode->parent_pb_type = pb_type;
-        mode->index = 0;
-        mode->num_pb_type_children = 0;
-
-        // Mode interconnect
-        mode->num_interconnect = 1;
-        mode->interconnect = new t_interconnect[mode->num_interconnect];
-        t_interconnect* ic = &mode->interconnect[0];
-
-        std::string istr, ostr, name;
-
-        istr = std::string(pb_type->name) + ".in";
-        ostr = std::string(pb_type->name) + ".out";
-        name = "passthrough";
-
-        ic->input_string = vtr::strdup(istr.c_str());
-        ic->output_string = vtr::strdup(ostr.c_str());
-        ic->name = vtr::strdup(name.c_str());
-
-        ic->type = COMPLETE_INTERC;
-        ic->parent_mode = mode;
-        ic->parent_mode_index = mode->index;
-
-        // ................................................
-        // LUT BEL
-        mode = &pb_type->modes[1];
-
-        // Mode
-        mode->name = vtr::strdup("lut");
-        mode->parent_pb_type = pb_type;
-        mode->index = 1;
-
-        // Leaf pb_type
-        mode->num_pb_type_children = 1;
-        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
-
-        auto lut = &mode->pb_type_children[0];
-        lut->name = vtr::strdup("lut");
-        lut->num_pb = 1;
-        lut->parent_mode = mode;
-
-        lut->blif_model = vtr::strdup(MODEL_NAMES);
-        lut->model = get_model(arch_, std::string(MODEL_NAMES));
-
-        lut->num_ports = 2;
-        lut->ports = (t_port*)vtr::calloc(lut->num_ports, sizeof(t_port));
-        lut->ports[0] = get_generic_port(arch_, lut, IN_PORT, "in", MODEL_NAMES, width);
-        lut->ports[1] = get_generic_port(arch_, lut, OUT_PORT, "out", MODEL_NAMES);
-
-        lut->ports[0].equivalent = PortEquivalence::FULL;
-
-        // Set classes
-        pb_type->class_type = LUT_CLASS;
-        lut->class_type = LUT_CLASS;
-        lut->ports[0].port_class = vtr::strdup("lut_in");
-        lut->ports[1].port_class = vtr::strdup("lut_out");
-
-        // Mode interconnect
-        mode->num_interconnect = 2;
-        mode->interconnect = new t_interconnect[mode->num_interconnect];
-
-        // Input
-        ic = &mode->interconnect[0];
-        ic->type = DIRECT_INTERC;
-        ic->parent_mode = mode;
-        ic->parent_mode_index = mode->index;
-
-        istr = std::string(pb_type->name) + ".in";
-        ostr = std::string(lut->name) + ".in";
-        name = istr + "_to_" + ostr;
-
-        ic->input_string = vtr::strdup(istr.c_str());
-        ic->output_string = vtr::strdup(ostr.c_str());
-        ic->name = vtr::strdup(name.c_str());
-
-        // Output
-        ic = &mode->interconnect[1];
-        ic->type = DIRECT_INTERC;
-        ic->parent_mode = mode;
-        ic->parent_mode_index = mode->index;
-
-        istr = std::string(lut->name) + ".out";
-        ostr = std::string(pb_type->name) + ".out";
-        name = istr + "_to_" + ostr;
-
-        ic->input_string = vtr::strdup(istr.c_str());
-        ic->output_string = vtr::strdup(ostr.c_str());
-        ic->name = vtr::strdup(name.c_str());
-    }
-
-    /** @brief Generates the leaf pb types for the PAD type */
-    void process_pad_block(t_pb_type* pad, Device::BEL::Reader& bel, Device::SiteType::Reader& site) {
-        // For now, hard-code two modes for pads, so that PADs can either be IPADs or OPADs
-        pad->num_modes = 2;
-        pad->modes = new t_mode[2];
-
-        // Add PAD pb_type ports
-        VTR_ASSERT(bel.getPins().size() == 1);
-        std::string pin = str(site.getBelPins()[bel.getPins()[0]].getName());
-        std::string ipin = pin + in_suffix_;
-        std::string opin = pin + out_suffix_;
-
-        auto num_ports = 2;
-        auto ports = new t_port[num_ports];
-        pad->ports = ports;
-        pad->num_ports = pad->num_pins = num_ports;
-        pad->num_input_pins = 1;
-        pad->num_output_pins = 1;
-
-        int pin_abs = 0;
-        int pin_count = 0;
-        for (auto dir : {IN_PORT, OUT_PORT}) {
-            int pins_dir_count = 0;
-            t_port* port = &ports[pin_count];
-
-            port->parent_pb_type = pad;
-            port->index = pin_count++;
-            port->port_index_by_type = pins_dir_count++;
-            port->absolute_first_pin_index = pin_abs++;
-
-            port->equivalent = PortEquivalence::NONE;
-            port->num_pins = 1;
-            port->type = dir;
-            port->is_clock = false;
-
-            bool is_input = dir == IN_PORT;
-            port->name = is_input ? vtr::strdup(ipin.c_str()) : vtr::strdup(opin.c_str());
-            port->model_port = nullptr;
-            port->port_class = vtr::strdup(nullptr);
-            port->port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
-        }
-
-        // OPAD mode
-        auto omode = &pad->modes[0];
-        omode->name = vtr::strdup("opad");
-        omode->parent_pb_type = pad;
-        omode->index = 0;
-        omode->num_pb_type_children = 1;
-        omode->pb_type_children = new t_pb_type[1];
-
-        auto opad = new t_pb_type;
-        opad->name = vtr::strdup("opad");
-        opad->num_pb = 1;
-        opad->parent_mode = omode;
-
-        num_ports = 1;
-        opad->num_ports = num_ports;
-        opad->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
-        opad->blif_model = vtr::strdup(MODEL_OUTPUT);
-        opad->model = get_model(arch_, std::string(MODEL_OUTPUT));
-
-        opad->ports[0] = get_generic_port(arch_, opad, IN_PORT, "outpad", MODEL_OUTPUT);
-        omode->pb_type_children[0] = *opad;
-
-        // IPAD mode
-        auto imode = &pad->modes[1];
-        imode->name = vtr::strdup("ipad");
-        imode->parent_pb_type = pad;
-        imode->index = 1;
-        imode->num_pb_type_children = 1;
-        imode->pb_type_children = new t_pb_type[1];
-
-        auto ipad = new t_pb_type;
-        ipad->name = vtr::strdup("ipad");
-        ipad->num_pb = 1;
-        ipad->parent_mode = imode;
-
-        num_ports = 1;
-        ipad->num_ports = num_ports;
-        ipad->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
-        ipad->blif_model = vtr::strdup(MODEL_INPUT);
-        ipad->model = get_model(arch_, std::string(MODEL_INPUT));
-
-        ipad->ports[0] = get_generic_port(arch_, ipad, OUT_PORT, "inpad", MODEL_INPUT);
-        imode->pb_type_children[0] = *ipad;
-
-        // Handle interconnects
-        int num_pins = 1;
-
-        omode->num_interconnect = num_pins;
-        omode->interconnect = new t_interconnect[num_pins];
-
-        imode->num_interconnect = num_pins;
-        imode->interconnect = new t_interconnect[num_pins];
-
-        std::string opad_istr = std::string(pad->name) + std::string(".") + ipin;
-        std::string opad_ostr = std::string(opad->name) + std::string(".outpad");
-        std::string o_ic_name = std::string(pad->name) + std::string("_") + std::string(opad->name);
-
-        std::string ipad_istr = std::string(ipad->name) + std::string(".inpad");
-        std::string ipad_ostr = std::string(pad->name) + std::string(".") + opin;
-        std::string i_ic_name = std::string(ipad->name) + std::string("_") + std::string(pad->name);
-
-        auto o_ic = new t_interconnect[num_pins];
-        auto i_ic = new t_interconnect[num_pins];
-
-        o_ic->name = vtr::strdup(o_ic_name.c_str());
-        o_ic->type = DIRECT_INTERC;
-        o_ic->parent_mode_index = 0;
-        o_ic->parent_mode = omode;
-        o_ic->input_string = vtr::strdup(opad_istr.c_str());
-        o_ic->output_string = vtr::strdup(opad_ostr.c_str());
-
-        i_ic->name = vtr::strdup(i_ic_name.c_str());
-        i_ic->type = DIRECT_INTERC;
-        i_ic->parent_mode_index = 0;
-        i_ic->parent_mode = imode;
-        i_ic->input_string = vtr::strdup(ipad_istr.c_str());
-        i_ic->output_string = vtr::strdup(ipad_ostr.c_str());
-
-        omode->interconnect[0] = *o_ic;
-        imode->interconnect[0] = *i_ic;
-    }
-
-    /** @brief Generates the leaf pb types for a generic intermediate block, with as many modes
-     *         as the number of models that can be used in this complex block.
-     */
-    void process_generic_block(t_pb_type* pb_type, Device::BEL::Reader& bel, Device::SiteType::Reader& site) {
-        std::string pb_name = std::string(pb_type->name);
-
-        std::set<t_bel_cell_mapping> maps(bel_cell_mappings_[bel.getName()]);
-
-        std::vector<t_bel_cell_mapping> map_to_erase;
-        for (auto map : maps) {
-            auto name = str(map.cell);
-            bool is_compatible = map.site == site.getName();
-
-            for (auto pin_map : map.pins) {
-                if (is_compatible == false)
-                    break;
-
-                auto cell_pin = str(pin_map.first);
-                auto bel_pin = str(pin_map.second);
-
-                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
-                    continue;
-
-                // Assign suffix to bel pin as it is a inout pin which was split in out and in ports
-                auto pin_reader = get_bel_pin_reader(site, bel, bel_pin);
-                bool is_inout = pin_reader.getDir() == INOUT;
-
-                auto model_port = get_model_port(arch_, name, cell_pin, false);
-
-                if (is_inout && model_port != nullptr)
-                    bel_pin = model_port->dir == IN_PORT ? bel_pin + in_suffix_ : bel_pin + out_suffix_;
-
-                is_compatible &= block_port_exists(pb_type, bel_pin);
-            }
-
-            if (!is_compatible)
-                map_to_erase.push_back(map);
-        }
-
-        for (auto map : map_to_erase)
-            VTR_ASSERT(maps.erase(map) == 1);
-
-        int num_modes = maps.size();
-
-        VTR_ASSERT(num_modes > 0);
-
-        pb_type->num_modes = num_modes;
-        pb_type->modes = new t_mode[num_modes];
-
-        int count = 0;
-        for (auto map : maps) {
-            if (map.site != site.getName())
-                continue;
-
-            int idx = count++;
-            t_mode* mode = &pb_type->modes[idx];
-            auto name = str(map.cell);
-            mode->name = vtr::strdup(name.c_str());
-            mode->parent_pb_type = pb_type;
-            mode->index = idx;
-            mode->num_pb_type_children = 1;
-            mode->pb_type_children = new t_pb_type[1];
-
-            auto leaf = &mode->pb_type_children[0];
-            std::string leaf_name = name == std::string(pb_type->name) ? name + std::string("_leaf") : name;
-            leaf->name = vtr::strdup(leaf_name.c_str());
-            leaf->num_pb = 1;
-            leaf->parent_mode = mode;
-
-            // Pre-count pins
-            int ic_count = 0;
-            for (auto pin_map : map.pins) {
-                auto cell_pin = str(pin_map.first);
-
-                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
-                    continue;
-
-                ic_count++;
-            }
-
-            int num_ports = ic_count;
-            leaf->num_ports = num_ports;
-            leaf->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
-            leaf->blif_model = vtr::strdup((std::string(".subckt ") + name).c_str());
-            leaf->model = get_model(arch_, name);
-
-            mode->num_interconnect = num_ports;
-            mode->interconnect = new t_interconnect[num_ports];
-            std::set<std::tuple<std::string, PORTS, int>> pins;
-            ic_count = 0;
-            for (auto pin_map : map.pins) {
-                auto cell_pin = str(pin_map.first);
-                auto bel_pin = str(pin_map.second);
-
-                if (cell_pin == arch_->vcc_cell.first || cell_pin == arch_->gnd_cell.first)
-                    continue;
-
-                std::smatch regex_matches;
-                std::string pin_suffix;
-                const std::regex port_regex("([0-9A-Za-z-]+)\\[([0-9]+)\\]");
-                if (std::regex_match(cell_pin, regex_matches, port_regex)) {
-                    cell_pin = regex_matches[1].str();
-                    pin_suffix = std::string("[") + regex_matches[2].str() + std::string("]");
-                }
-
-                auto model_port = get_model_port(arch_, name, cell_pin);
-
-                auto size = model_port->size;
-                auto dir = model_port->dir;
-
-                // Assign suffix to bel pin as it is a inout pin which was split in out and in ports
-                auto pin_reader = get_bel_pin_reader(site, bel, bel_pin);
-                bool is_inout = pin_reader.getDir() == INOUT;
-
-                pins.emplace(cell_pin, dir, size);
-
-                std::string istr, ostr, ic_name;
-                switch (dir) {
-                    case IN_PORT:
-                        bel_pin = is_inout ? bel_pin + in_suffix_ : bel_pin;
-                        istr = pb_name + std::string(".") + bel_pin;
-                        ostr = leaf_name + std::string(".") + cell_pin + pin_suffix;
-                        break;
-                    case OUT_PORT:
-                        bel_pin = is_inout ? bel_pin + out_suffix_ : bel_pin;
-                        istr = leaf_name + std::string(".") + cell_pin + pin_suffix;
-                        ostr = pb_name + std::string(".") + bel_pin;
-                        break;
-                    default:
-                        VTR_ASSERT(0);
-                }
-
-                ic_name = istr + std::string("_") + ostr;
-
-                auto ic = &mode->interconnect[ic_count++];
-                ic->name = vtr::strdup(ic_name.c_str());
-                ic->type = DIRECT_INTERC;
-                ic->parent_mode_index = idx;
-                ic->parent_mode = mode;
-                ic->input_string = vtr::strdup(istr.c_str());
-                ic->output_string = vtr::strdup(ostr.c_str());
-            }
-
-            create_ports(leaf, pins, name);
-        }
-    }
-
-    /** @brief Generates a routing block to allow for cascading routing blocks to be
-     *         placed in the same complex block type.
-     */
-    void process_routing_block(t_pb_type* pb_type) {
-        pb_type->num_modes = 1;
-        pb_type->modes = new t_mode[1];
-
-        int idx = 0;
-        auto mode = &pb_type->modes[idx];
-
-        std::string name = std::string(pb_type->name);
-        mode->name = vtr::strdup(name.c_str());
-        mode->parent_pb_type = pb_type;
-        mode->index = idx;
-        mode->num_pb_type_children = 0;
-
-        std::string istr, ostr, ic_name;
-
-        // The MUX interconnections can only have a single output
-        VTR_ASSERT(pb_type->num_output_pins == 1);
-
-        for (int iport = 0; iport < pb_type->num_ports; iport++) {
-            const t_port port = pb_type->ports[iport];
-            auto port_name = name + "." + std::string(port.name);
-            switch (port.type) {
-                case IN_PORT:
-                    istr += istr.empty() ? port_name : " " + port_name;
-                    break;
-                case OUT_PORT:
-                    ostr = port_name;
-                    break;
-                default:
-                    VTR_ASSERT(0);
-            }
-        }
-
-        ic_name = std::string(pb_type->name);
-
-        mode->num_interconnect = 1;
-        mode->interconnect = new t_interconnect[1];
-
-        e_interconnect ic_type = pb_type->num_input_pins == 1 ? DIRECT_INTERC : MUX_INTERC;
-
-        auto ic = &mode->interconnect[idx];
-        ic->name = vtr::strdup(ic_name.c_str());
-        ic->type = ic_type;
-        ic->parent_mode_index = idx;
-        ic->parent_mode = mode;
-        ic->input_string = vtr::strdup(istr.c_str());
-        ic->output_string = vtr::strdup(ostr.c_str());
-    }
-
-    /** @brief Processes all the ports of a given complex block.
-     *         If a bel name index is specified, the bel pins are processed, otherwise the site ports
-     *         are processed instead.
-     */
-    void process_block_ports(t_pb_type* pb_type, Device::SiteType::Reader& site, size_t bel_name = OPEN) {
-        // Prepare data based on pb_type level
-        std::set<std::tuple<std::string, PORTS, int>> pins;
-        if (bel_name == (size_t)OPEN) {
-            for (auto pin : site.getPins()) {
-                auto dir = pin.getDir() == INPUT ? IN_PORT : OUT_PORT;
-                pins.emplace(str(pin.getName()), dir, 1);
-            }
-        } else {
-            auto bel = get_bel_reader(site, str(bel_name));
-
-            for (auto bel_pin : bel.getPins()) {
-                auto pin = site.getBelPins()[bel_pin];
-                auto dir = pin.getDir();
-
-                switch (dir) {
-                    case INPUT:
-                        pins.emplace(str(pin.getName()), IN_PORT, 1);
-                        break;
-                    case OUTPUT:
-                        pins.emplace(str(pin.getName()), OUT_PORT, 1);
-                        break;
-                    case INOUT:
-                        pins.emplace(str(pin.getName()) + in_suffix_, IN_PORT, 1);
-                        pins.emplace(str(pin.getName()) + out_suffix_, OUT_PORT, 1);
-                        break;
-                    default:
-                        VTR_ASSERT(0);
-                }
-            }
-        }
-
-        create_ports(pb_type, pins);
-    }
-
-    /** @brief Generates all the port for a complex block, given its pointer and a map of ports (key) and their direction and width */
-    void create_ports(t_pb_type* pb_type, std::set<std::tuple<std::string, PORTS, int>>& pins, std::string model = "") {
-        std::unordered_set<std::string> names;
-
-        auto num_ports = pins.size();
-        auto ports = new t_port[num_ports];
-        pb_type->ports = ports;
-        pb_type->num_ports = pb_type->num_pins = num_ports;
-        pb_type->num_input_pins = 0;
-        pb_type->num_output_pins = 0;
-
-        int pin_abs = 0;
-        int pin_count = 0;
-        for (auto dir : {IN_PORT, OUT_PORT}) {
-            int pins_dir_count = 0;
-            for (auto pin_tuple : pins) {
-                std::string pin_name;
-                PORTS pin_dir;
-                int num_pins;
-                std::tie(pin_name, pin_dir, num_pins) = pin_tuple;
-
-                if (pin_dir != dir)
-                    continue;
-
-                bool is_input = dir == IN_PORT;
-                pb_type->num_input_pins += is_input ? 1 : 0;
-                pb_type->num_output_pins += is_input ? 0 : 1;
-
-                auto port = get_generic_port(arch_, pb_type, dir, pin_name, /*string_model=*/"", num_pins);
-                ports[pin_count] = port;
-                port.index = pin_count++;
-                port.port_index_by_type = pins_dir_count++;
-                port.absolute_first_pin_index = pin_abs++;
-
-                if (!model.empty())
-                    port.model_port = get_model_port(arch_, model, pin_name);
-            }
-        }
-    }
-
-    /** @brief Processes and creates the interconnects corresponding to a given mode */
-    void process_interconnects(t_mode* mode, Device::SiteType::Reader& site) {
-        auto ics = get_interconnects(site);
-        auto num_ic = ics.size();
-
-        mode->num_interconnect = num_ic;
-        mode->interconnect = new t_interconnect[num_ic];
-
-        int curr_ic = 0;
-        std::unordered_set<std::string> names;
-
-        // Handle site wires, namely direct interconnects
-        for (auto ic_pair : ics) {
-            auto ic_name = ic_pair.first;
-            auto ic_data = ic_pair.second;
-
-            auto input = ic_data.input;
-            auto outputs = ic_data.outputs;
-
-            auto merge_string = [](std::string ss, std::string s) {
-                return ss.empty() ? s : ss + " " + s;
-            };
-
-            std::string outputs_str = std::accumulate(outputs.begin(), outputs.end(), std::string(), merge_string);
-
-            t_interconnect* ic = &mode->interconnect[curr_ic++];
-
-            // No line num for interconnects, as line num is XML specific
-            // TODO: probably line_num should be deprecated as it is dependent
-            //       on the input architecture format.
-            ic->line_num = 0;
-            ic->type = DIRECT_INTERC;
-            ic->parent_mode_index = mode->index;
-            ic->parent_mode = mode;
-
-            VTR_ASSERT(names.insert(ic_name).second);
-            ic->name = vtr::strdup(ic_name.c_str());
-            ic->input_string = vtr::strdup(input.c_str());
-            ic->output_string = vtr::strdup(outputs_str.c_str());
-        }
-
-        // Checks and, in case, adds all the necessary pack patterns to the marked interconnects
-        for (size_t iic = 0; iic < num_ic; iic++) {
-            t_interconnect* ic = &mode->interconnect[iic];
-
-            auto ic_data = ics.at(std::string(ic->name));
-
-            if (ic_data.requires_pack_pattern) {
-                auto backward_pps_map = propagate_pack_patterns(ic, site, BACKWARD);
-                auto forward_pps_map = propagate_pack_patterns(ic, site, FORWARD);
-
-                std::unordered_map<t_interconnect*, std::set<std::string>> pps_map;
-
-                for (auto pp : backward_pps_map)
-                    pps_map.emplace(pp.first, std::set<std::string>{});
-
-                for (auto pp : forward_pps_map)
-                    pps_map.emplace(pp.first, std::set<std::string>{});
-
-                // Cross-product of all pack-patterns added both when exploring backwards and forward.
-                // E.g.:
-                //   Generated pack patterns
-                //      - backward: OBUFDS, OBUF
-                //      - forward: OPAD
-                //  Final pack patterns:
-                //      - OBUFDS_OPAD, OBUF_OPAD
-                for (auto for_pp_pair : forward_pps_map)
-                    for (auto back_pp_pair : backward_pps_map)
-                        for (auto for_pp : for_pp_pair.second)
-                            for (auto back_pp : back_pp_pair.second) {
-                                std::string pp_name = for_pp + "_" + back_pp;
-                                pps_map.at(for_pp_pair.first).insert(pp_name);
-                                pps_map.at(back_pp_pair.first).insert(pp_name);
-                            }
-
-                for (auto pair : pps_map) {
-                    t_interconnect* pp_ic = pair.first;
-
-                    auto num_pp = pair.second.size();
-                    pp_ic->num_annotations = num_pp;
-                    pp_ic->annotations = new t_pin_to_pin_annotation[num_pp];
-
-                    int idx = 0;
-                    for (auto pp_name : pair.second)
-                        pp_ic->annotations[idx++] = get_pack_pattern(pp_name, pp_ic->input_string, pp_ic->output_string);
-                }
-            }
-        }
-    }
-
-    /** @brief Propagates and generates all pack_patterns required for the given ic.
-     *         This is necessary to find all root blocks that generate the pack pattern.
-     */
-    std::unordered_map<t_interconnect*, std::set<std::string>> propagate_pack_patterns(t_interconnect* ic, Device::SiteType::Reader& site, e_pp_dir direction) {
-        auto site_pins = site.getBelPins();
-
-        std::string endpoint = direction == BACKWARD ? ic->input_string : ic->output_string;
-        auto ic_endpoints = vtr::split(endpoint, " ");
-
-        std::unordered_map<t_interconnect*, std::set<std::string>> pps_map;
-
-        bool is_backward = direction == BACKWARD;
-
-        for (auto ep : ic_endpoints) {
-            auto parts = vtr::split(ep, ".");
-            auto bel = parts[0];
-            auto pin = parts[1];
-
-            if (bel == str(site.getName()))
-                return pps_map;
-
-            // Assign mode and pb_type
-            t_mode* parent_mode = ic->parent_mode;
-            t_pb_type* pb_type = nullptr;
-
-            for (int ipb = 0; ipb < parent_mode->num_pb_type_children; ipb++)
-                if (std::string(parent_mode->pb_type_children[ipb].name) == bel)
-                    pb_type = &parent_mode->pb_type_children[ipb];
-
-            VTR_ASSERT(pb_type != nullptr);
-
-            auto bel_reader = get_bel_reader(site, remove_bel_suffix(bel));
-
-            // Passing through routing mux. Check at the muxes input pins interconnects
-            if (bel_reader.getCategory() == ROUTING) {
-                for (auto bel_pin : bel_reader.getPins()) {
-                    auto pin_reader = site_pins[bel_pin];
-                    auto pin_name = str(pin_reader.getName());
-
-                    if (pin_reader.getDir() != (is_backward ? INPUT : OUTPUT))
-                        continue;
-
-                    for (int iic = 0; iic < parent_mode->num_interconnect; iic++) {
-                        t_interconnect* other_ic = &parent_mode->interconnect[iic];
-
-                        if (std::string(ic->name) == std::string(other_ic->name))
-                            continue;
-
-                        std::string ic_to_find = bel + "." + pin_name;
-
-                        bool found = false;
-                        for (auto out : vtr::split(is_backward ? other_ic->output_string : other_ic->input_string, " "))
-                            found |= out == ic_to_find;
-
-                        if (found) {
-                            // An output interconnect to propagate was found, continue searching
-                            auto res = propagate_pack_patterns(other_ic, site, direction);
-
-                            for (auto pp_map : res)
-                                pps_map.emplace(pp_map.first, pp_map.second);
-                        }
-                    }
-                }
-            } else {
-                VTR_ASSERT(bel_reader.getCategory() == LOGIC);
-
-                for (int imode = 0; imode < pb_type->num_modes; imode++) {
-                    t_mode* mode = &pb_type->modes[imode];
-
-                    for (int iic = 0; iic < mode->num_interconnect; iic++) {
-                        t_interconnect* other_ic = &mode->interconnect[iic];
-
-                        bool found = false;
-                        for (auto other_ep : vtr::split(is_backward ? other_ic->output_string : other_ic->input_string, " ")) {
-                            found |= other_ep == ep;
-                        }
-
-                        if (found) {
-                            std::string pp_name = std::string(pb_type->name) + "." + std::string(mode->name);
-
-                            std::set<std::string> pp{pp_name};
-                            auto res = pps_map.emplace(other_ic, pp);
-
-                            if (!res.second)
-                                res.first->second.insert(pp_name);
-                        }
-                    }
-                }
-            }
-        }
-
-        return pps_map;
-    }
-
-    // Physical Tiles
-    void process_tiles() {
-        auto EMPTY = get_empty_physical_type();
-        int index = 0;
-        EMPTY.index = index;
-        ptypes_.push_back(EMPTY);
-
-        auto tileTypeList = ar_.getTileTypeList();
-        auto siteTypeList = ar_.getSiteTypeList();
-
-        for (auto tile : tileTypeList) {
-            t_physical_tile_type ptype;
-            auto name = str(tile.getName());
-
-            if (name == EMPTY.name)
-                continue;
-
-            bool has_valid_sites = false;
-
-            for (auto site_type : tile.getSiteTypes())
-                has_valid_sites |= take_sites_.count(siteTypeList[site_type.getPrimaryType()].getName()) != 0;
-
-            if (!has_valid_sites)
-                continue;
-
-            ptype.name = vtr::strdup(name.c_str());
-            ptype.index = ++index;
-            ptype.width = ptype.height = ptype.area = 1;
-            ptype.capacity = 0;
-
-            process_sub_tiles(ptype, tile);
-
-            setup_pin_classes(&ptype);
-
-            bool is_io = false;
-            for (auto site : tile.getSiteTypes()) {
-                auto site_type = ar_.getSiteTypeList()[site.getPrimaryType()];
-
-                for (auto bel : site_type.getBels())
-                    is_io |= is_pad(str(bel.getName()));
-            }
-
-            ptype.is_input_type = ptype.is_output_type = is_io;
-
-            // TODO: remove the following once the RR graph generation is fully enabled from the device database
-            ptype.switchblock_locations = vtr::Matrix<e_sb_type>({{1, 1}}, e_sb_type::FULL);
-            ptype.switchblock_switch_overrides = vtr::Matrix<int>({{1, 1}}, DEFAULT_SWITCH);
-
-            ptypes_.push_back(ptype);
-        }
-    }
-
-    void process_sub_tiles(t_physical_tile_type& type, Device::TileType::Reader& tile) {
-        // TODO: only one subtile at the moment
-        auto siteTypeList = ar_.getSiteTypeList();
-        for (auto site_in_tile : tile.getSiteTypes()) {
-            t_sub_tile sub_tile;
-
-            auto site = siteTypeList[site_in_tile.getPrimaryType()];
-
-            if (take_sites_.count(site.getName()) == 0)
-                continue;
-
-            auto pins_to_wires = site_in_tile.getPrimaryPinsToTileWires();
-
-            sub_tile.index = type.capacity;
-            sub_tile.name = vtr::strdup(str(site.getName()).c_str());
-            sub_tile.capacity.set(type.capacity, type.capacity);
-            type.capacity++;
-
-            int port_idx = 0;
-            int abs_first_pin_idx = 0;
-            int icount = 0;
-            int ocount = 0;
-
-            std::unordered_map<std::string, std::string> port_name_to_wire_name;
-            int idx = 0;
-            for (auto dir : {INPUT, OUTPUT}) {
-                int port_idx_by_type = 0;
-                for (auto pin : site.getPins()) {
-                    if (pin.getDir() != dir)
-                        continue;
-
-                    t_physical_tile_port port;
-
-                    port.name = vtr::strdup(str(pin.getName()).c_str());
-
-                    port_name_to_wire_name[std::string(port.name)] = str(pins_to_wires[idx++]);
-
-                    sub_tile.sub_tile_to_tile_pin_indices.push_back(type.num_pins + port_idx);
-                    port.index = port_idx++;
-
-                    port.absolute_first_pin_index = abs_first_pin_idx++;
-                    port.port_index_by_type = port_idx_by_type++;
-
-                    if (dir == INPUT) {
-                        port.type = IN_PORT;
-                        icount++;
-                    } else {
-                        port.type = OUT_PORT;
-                        ocount++;
-                    }
-
-                    sub_tile.ports.push_back(port);
-                }
-            }
-
-            auto pins_size = site.getPins().size();
-            fill_sub_tile(type, sub_tile, pins_size, icount, ocount);
-
-            type.sub_tiles.push_back(sub_tile);
-        }
-    }
-
-    /** @brief The constant block is a synthetic tile which is used to assign a virtual
-     *         location in the grid to the constant signals which are than driven to
-     *         all the real constant wires.
-     *
-     * The block's diagram can be seen below. The port names are specified in
-     * the interchange device database, therefore GND and VCC are mainly
-     * examples in this case.
-     *
-     * +---------------+
-     * |               |
-     * |  +-------+    |
-     * |  |       |    |
-     * |  |  GND  +----+--> RR Graph node
-     * |  |       |    |
-     * |  +-------+    |
-     * |               |
-     * |               |
-     * |  +-------+    |
-     * |  |       |    |
-     * |  |  VCC  +----+--> RR Graph node
-     * |  |       |    |
-     * |  +-------+    |
-     * |               |
-     * +---------------+
-     */
-    void process_constant_block() {
-        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
-
-        // Create constant complex block
-        t_logical_block_type block;
-
-        block.name = vtr::strdup(const_block_.c_str());
-        block.index = ltypes_.size();
-
-        auto pb_type = new t_pb_type;
-        block.pb_type = pb_type;
-
-        pb_type->name = vtr::strdup(const_block_.c_str());
-        pb_type->num_pb = 1;
-
-        pb_type->num_modes = 1;
-        pb_type->modes = new t_mode[pb_type->num_modes];
-
-        pb_type->num_ports = 2;
-        pb_type->ports = (t_port*)vtr::calloc(pb_type->num_ports, sizeof(t_port));
-
-        pb_type->num_output_pins = 2;
-        pb_type->num_input_pins = 0;
-        pb_type->num_clock_pins = 0;
-        pb_type->num_pins = 2;
-
-        auto mode = &pb_type->modes[0];
-        mode->parent_pb_type = pb_type;
-        mode->index = 0;
-        mode->name = vtr::strdup("default");
-        mode->disable_packing = false;
-
-        mode->num_interconnect = 2;
-        mode->interconnect = new t_interconnect[mode->num_interconnect];
-
-        mode->num_pb_type_children = 2;
-        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
-
-        int count = 0;
-        for (auto const_cell : const_cells) {
-            auto leaf_pb_type = &mode->pb_type_children[count];
-
-            std::string leaf_name = const_cell.first;
-            leaf_pb_type->name = vtr::strdup(leaf_name.c_str());
-            leaf_pb_type->num_pb = 1;
-            leaf_pb_type->parent_mode = mode;
-            leaf_pb_type->blif_model = nullptr;
-
-            leaf_pb_type->num_output_pins = 1;
-            leaf_pb_type->num_input_pins = 0;
-            leaf_pb_type->num_clock_pins = 0;
-            leaf_pb_type->num_pins = 1;
-
-            int num_ports = 1;
-            leaf_pb_type->num_ports = num_ports;
-            leaf_pb_type->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
-            leaf_pb_type->blif_model = vtr::strdup(const_cell.first.c_str());
-            leaf_pb_type->model = get_model(arch_, const_cell.first);
-
-            leaf_pb_type->ports[0] = get_generic_port(arch_, leaf_pb_type, OUT_PORT, const_cell.second, const_cell.first);
-            pb_type->ports[count] = get_generic_port(arch_, leaf_pb_type, OUT_PORT, const_cell.first + "_" + const_cell.second);
-
-            std::string istr = leaf_name + "." + const_cell.second;
-            std::string ostr = const_block_ + "." + const_cell.first + "_" + const_cell.second;
-            std::string ic_name = const_cell.first;
-
-            auto ic = &mode->interconnect[count];
-
-            ic->name = vtr::strdup(ic_name.c_str());
-            ic->type = DIRECT_INTERC;
-            ic->parent_mode_index = 0;
-            ic->parent_mode = mode;
-            ic->input_string = vtr::strdup(istr.c_str());
-            ic->output_string = vtr::strdup(ostr.c_str());
-
-            count++;
-        }
-
-        ltypes_.push_back(block);
-    }
-
-    /** @brief Creates the models corresponding to the constant cells that are used in a given interchange device */
-    void process_constant_model() {
-        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
-
-        // Create constant models
-        for (auto const_cell : const_cells) {
-            t_model* model = new t_model;
-            model->index = arch_->models->index + 1;
-
-            model->never_prune = true;
-            model->name = vtr::strdup(const_cell.first.c_str());
-
-            t_model_ports* model_port = new t_model_ports;
-            model_port->dir = OUT_PORT;
-            model_port->name = vtr::strdup(const_cell.second.c_str());
-
-            model_port->min_size = 1;
-            model_port->size = 1;
-            model_port->next = model->outputs;
-            model->outputs = model_port;
-
-            model->next = arch_->models;
-            arch_->models = model;
-        }
-    }
-
-    /** @brief Creates a synthetic constant tile that will be located in the external layer of the device.
-     *
-     *  The constant tile has two output ports, one for GND and the other for VCC. The constant tile hosts
-     *  the constant pb type that is generated as well. See process_constant_model and process_constant_block.
-     */
-    void process_constant_tile() {
-        std::vector<std::pair<std::string, std::string>> const_cells{arch_->gnd_cell, arch_->vcc_cell};
-        // Create constant tile
-        t_physical_tile_type constant;
-        constant.name = vtr::strdup(const_block_.c_str());
-        constant.index = ptypes_.size();
-        constant.width = constant.height = constant.area = 1;
-        constant.capacity = 1;
-        constant.is_input_type = constant.is_output_type = false;
-
-        constant.switchblock_locations = vtr::Matrix<e_sb_type>({{1, 1}}, e_sb_type::FULL);
-        constant.switchblock_switch_overrides = vtr::Matrix<int>({{1, 1}}, DEFAULT_SWITCH);
-
-        t_sub_tile sub_tile;
-        sub_tile.index = 0;
-        sub_tile.name = vtr::strdup(const_block_.c_str());
-        int count = 0;
-        for (auto const_cell : const_cells) {
-            sub_tile.sub_tile_to_tile_pin_indices.push_back(count);
-
-            t_physical_tile_port port;
-            port.type = OUT_PORT;
-            port.num_pins = 1;
-
-            port.name = vtr::strdup((const_cell.first + "_" + const_cell.second).c_str());
-
-            port.index = port.absolute_first_pin_index = port.port_index_by_type = 0;
-
-            sub_tile.ports.push_back(port);
-
-            count++;
-        }
-
-        fill_sub_tile(constant, sub_tile, 2, 0, 2);
-        constant.sub_tiles.push_back(sub_tile);
-
-        setup_pin_classes(&constant);
-
-        ptypes_.push_back(constant);
-    }
-
-    // Layout Processing
-    void process_layout() {
-        auto tiles = ar_.getTileList();
-        auto tile_types = ar_.getTileTypeList();
-        auto site_types = ar_.getSiteTypeList();
-
-        std::vector<std::string> packages;
-        for (auto package : ar_.getPackages())
-            packages.push_back(str(package.getName()));
-
-        for (auto name : packages) {
-            t_grid_def grid_def;
-            grid_def.width = grid_def.height = 0;
-            for (auto tile : tiles) {
-                grid_def.width = std::max(grid_def.width, tile.getCol() + 1);
-                grid_def.height = std::max(grid_def.height, tile.getRow() + 1);
-            }
-
-            grid_def.width += 2;
-            grid_def.height += 2;
-
-            grid_def.grid_type = GridDefType::FIXED;
-
-            if (name == "auto") {
-                // At the moment, the interchange specifies fixed-layout only architectures,
-                // and allowing for auto-sizing could potentially be implemented later on
-                // to allow for experimentation on new architectures.
-                // For the time being the layout is restricted to be only fixed.
-                archfpga_throw(arch_file_, __LINE__,
-                               "The name auto is reserved for auto-size layouts; please choose another name");
-            }
-            grid_def.name = name;
-            for (auto tile : tiles) {
-                auto tile_type = tile_types[tile.getType()];
-
-                bool found = false;
-                for (auto site : tile.getSites()) {
-                    auto site_type_in_tile = tile_type.getSiteTypes()[site.getType()];
-                    auto site_type = site_types[site_type_in_tile.getPrimaryType()];
-
-                    found |= take_sites_.count(site_type.getName()) != 0;
-                }
-
-                if (!found)
-                    continue;
-
-                t_metadata_dict data;
-                std::string tile_prefix = str(tile.getName());
-                std::string tile_type_name = str(tile_type.getName());
-
-                size_t pos = tile_prefix.find(tile_type_name);
-                if (pos != std::string::npos && pos == 0)
-                    tile_prefix.erase(pos, tile_type_name.length() + 1);
-                t_grid_loc_def single(tile_type_name, 1);
-                single.x.start_expr = std::to_string(tile.getCol() + 1);
-                single.y.start_expr = std::to_string(tile.getRow() + 1);
-
-                single.x.end_expr = single.x.start_expr + " + w - 1";
-                single.y.end_expr = single.y.start_expr + " + h - 1";
-
-                single.owned_meta = std::make_unique<t_metadata_dict>(data);
-                single.meta = single.owned_meta.get();
-                grid_def.loc_defs.emplace_back(std::move(single));
-            }
-
-            // The constant source tile will be placed at (0, 0)
-            t_grid_loc_def constant(const_block_, 1);
-            constant.x.start_expr = std::to_string(1);
-            constant.y.start_expr = std::to_string(1);
-
-            constant.x.end_expr = constant.x.start_expr + " + w - 1";
-            constant.y.end_expr = constant.y.start_expr + " + h - 1";
-
-            grid_def.loc_defs.emplace_back(std::move(constant));
-
-            arch_->grid_layouts.emplace_back(std::move(grid_def));
-        }
-    }
-
-    void process_device() {
-        /*
-         * The generic architecture data is not currently available in the interchange format
-         * therefore, for a very initial implementation, the values are taken from the ones
-         * used primarly in the Xilinx series7 devices, generated using SymbiFlow.
-         *
-         * As the interchange format develops further, with possibly more details, this function can
-         * become dynamic, allowing for different parameters for the different architectures.
-         *
-         * FIXME: This will require to be dynamically assigned, and a suitable representation added
-         *        to the FPGA interchange device schema.
-         */
-        arch_->R_minW_nmos = 6065.520020;
-        arch_->R_minW_pmos = 18138.500000;
-        arch_->grid_logic_tile_area = 14813.392;
-        arch_->Chans.chan_x_dist.type = UNIFORM;
-        arch_->Chans.chan_x_dist.peak = 1;
-        arch_->Chans.chan_x_dist.width = 0;
-        arch_->Chans.chan_x_dist.xpeak = 0;
-        arch_->Chans.chan_x_dist.dc = 0;
-        arch_->Chans.chan_y_dist.type = UNIFORM;
-        arch_->Chans.chan_y_dist.peak = 1;
-        arch_->Chans.chan_y_dist.width = 0;
-        arch_->Chans.chan_y_dist.xpeak = 0;
-        arch_->Chans.chan_y_dist.dc = 0;
-        arch_->ipin_cblock_switch_name = std::string("generic");
-        arch_->SBType = WILTON;
-        arch_->Fs = 3;
-        default_fc_.specified = true;
-        default_fc_.in_value_type = e_fc_value_type::FRACTIONAL;
-        default_fc_.in_value = 1.0;
-        default_fc_.out_value_type = e_fc_value_type::FRACTIONAL;
-        default_fc_.out_value = 1.0;
-    }
-
-    void process_switches() {
-        std::set<std::pair<bool, uint32_t>> pip_timing_models;
-        for (auto tile_type : ar_.getTileTypeList()) {
-            for (auto pip : tile_type.getPips()) {
-                pip_timing_models.insert(std::pair<bool, uint32_t>(pip.getBuffered21(), pip.getTiming()));
-                if (!pip.getDirectional())
-                    pip_timing_models.insert(std::pair<bool, uint32_t>(pip.getBuffered20(), pip.getTiming()));
-            }
-        }
-
-        auto timing_data = ar_.getPipTimings();
-
-        std::vector<std::pair<bool, uint32_t>> pip_timing_models_list;
-        pip_timing_models_list.reserve(pip_timing_models.size());
-
-        for (auto entry : pip_timing_models) {
-            pip_timing_models_list.push_back(entry);
-        }
-
-        size_t num_switches = pip_timing_models.size() + 2;
-        std::string switch_name;
-
-        arch_->num_switches = num_switches;
-
-        if (num_switches > 0) {
-            arch_->Switches = new t_arch_switch_inf[num_switches];
-        }
-
-        float R, Cin, Cint, Cout, Tdel;
-        for (size_t i = 0; i < num_switches; ++i) {
-            t_arch_switch_inf* as = &arch_->Switches[i];
-
-            R = Cin = Cint = Cout = Tdel = 0.0;
-            SwitchType type;
-
-            if (i == 0) {
-                switch_name = "short";
-                type = SwitchType::SHORT;
-                R = 0.0;
-            } else if (i == 1) {
-                switch_name = "generic";
-                type = SwitchType::MUX;
-                R = 0.0;
-            } else {
-                auto entry = pip_timing_models_list[i - 2];
-                auto model = timing_data[entry.second];
-                std::stringstream name;
-                std::string mux_type_string = entry.first ? "mux_" : "passGate_";
-                name << mux_type_string;
-
-                // FIXME: allow to dynamically choose different speed models and corners
-                R = get_corner_value(model.getOutputResistance(), "slow", "min");
-                name << "R" << std::scientific << R;
-
-                Cin = get_corner_value(model.getInputCapacitance(), "slow", "min");
-                name << "Cin" << std::scientific << Cin;
-
-                Cout = get_corner_value(model.getOutputCapacitance(), "slow", "min");
-                name << "Cout" << std::scientific << Cout;
-
-                if (entry.first) {
-                    Cint = get_corner_value(model.getInternalCapacitance(), "slow", "min");
-                    name << "Cinternal" << std::scientific << Cint;
-                }
-
-                Tdel = get_corner_value(model.getInternalDelay(), "slow", "min");
-                name << "Tdel" << std::scientific << Tdel;
-
-                switch_name = name.str() + std::to_string(i);
-                type = entry.first ? SwitchType::MUX : SwitchType::PASS_GATE;
-            }
-
-            /* Should never happen */
-            if (switch_name == std::string(VPR_DELAYLESS_SWITCH_NAME)) {
-                archfpga_throw(arch_file_, __LINE__,
-                               "Switch name '%s' is a reserved name for VPR internal usage!", switch_name.c_str());
-            }
-
-            as->name = vtr::strdup(switch_name.c_str());
-            as->set_type(type);
-            as->mux_trans_size = as->type() == SwitchType::MUX ? 1 : 0;
-
-            as->R = R;
-            as->Cin = Cin;
-            as->Cout = Cout;
-            as->Cinternal = Cint;
-            as->set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel);
-
-            if (as->type() == SwitchType::SHORT || as->type() == SwitchType::PASS_GATE) {
-                as->buf_size_type = BufferSize::ABSOLUTE;
-                as->buf_size = 0;
-                as->power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
-                as->power_buffer_size = 0.;
-            } else {
-                as->buf_size_type = BufferSize::AUTO;
-                as->buf_size = 0.;
-                as->power_buffer_type = POWER_BUFFER_TYPE_AUTO;
-            }
-        }
-    }
-
-    void process_segments() {
-        // Segment names will be taken from wires connected to pips
-        // They are good representation for nodes
-        std::set<uint32_t> wire_names;
-        for (auto tile_type : ar_.getTileTypeList()) {
-            auto wires = tile_type.getWires();
-            for (auto pip : tile_type.getPips()) {
-                wire_names.insert(wires[pip.getWire0()]);
-                wire_names.insert(wires[pip.getWire1()]);
-            }
-        }
-
-        // FIXME: have only one segment type for the time being, so that
-        //        the RR graph generation is correct.
-        //        This can be removed once the RR graph reader from the interchange
-        //        device is ready and functional.
-        size_t num_seg = 1; //wire_names.size();
-
-        arch_->Segments.resize(num_seg);
-        size_t index = 0;
-        for (auto i : wire_names) {
-            if (index >= num_seg) break;
-
-            // Use default values as we will populate rr_graph with correct values
-            // This segments are just declaration of future use
-            arch_->Segments[index].name = str(i);
-            arch_->Segments[index].length = 1;
-            arch_->Segments[index].frequency = 1;
-            arch_->Segments[index].Rmetal = 1e-12;
-            arch_->Segments[index].Cmetal = 1e-12;
-            arch_->Segments[index].parallel_axis = BOTH_AXIS;
-
-            // TODO: Only bi-directional segments are created, but it the interchange format
-            //       has directionality information on PIPs, which may be used to infer the
-            //       segments' directonality.
-            arch_->Segments[index].directionality = BI_DIRECTIONAL;
-            arch_->Segments[index].arch_wire_switch = 1;
-            arch_->Segments[index].arch_opin_switch = 1;
-            arch_->Segments[index].cb.resize(1);
-            arch_->Segments[index].cb[0] = true;
-            arch_->Segments[index].sb.resize(2);
-            arch_->Segments[index].sb[0] = true;
-            arch_->Segments[index].sb[1] = true;
-            segment_name_to_segment_idx[str(i)] = index;
-            ++index;
-        }
-    }
-};
-
-void FPGAInterchangeReadArch(const char* FPGAInterchangeDeviceFile,
-                             const bool /*timing_enabled*/,
-                             t_arch* arch,
-                             std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                             std::vector<t_logical_block_type>& LogicalBlockTypes) {
-    // Decompress GZipped capnproto device file
-    gzFile file = gzopen(FPGAInterchangeDeviceFile, "r");
-    VTR_ASSERT(file != Z_NULL);
-
-    std::vector<uint8_t> output_data;
-    output_data.resize(4096);
-    std::stringstream sstream(std::ios_base::in | std::ios_base::out | std::ios_base::binary);
-    while (true) {
-        int ret = gzread(file, output_data.data(), output_data.size());
-        VTR_ASSERT(ret >= 0);
-        if (ret > 0) {
-            sstream.write((const char*)output_data.data(), ret);
-            VTR_ASSERT(sstream);
-        } else {
-            VTR_ASSERT(ret == 0);
-            int error;
-            gzerror(file, &error);
-            VTR_ASSERT(error == Z_OK);
-            break;
-        }
-    }
-
-    VTR_ASSERT(gzclose(file) == Z_OK);
-
-    sstream.seekg(0);
-    kj::std::StdInputStream istream(sstream);
-
-    // Reader options
-    capnp::ReaderOptions reader_options;
-    reader_options.nestingLimit = std::numeric_limits<int>::max();
-    reader_options.traversalLimitInWords = std::numeric_limits<uint64_t>::max();
-
-    capnp::InputStreamMessageReader message_reader(istream, reader_options);
-
-    auto device_reader = message_reader.getRoot<DeviceResources::Device>();
-
-    arch->architecture_id = vtr::strdup(vtr::secure_digest_file(FPGAInterchangeDeviceFile).c_str());
-
-    ArchReader reader(arch, device_reader, FPGAInterchangeDeviceFile, PhysicalTileTypes, LogicalBlockTypes);
-    reader.read_arch();
-}
diff --git a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h b/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
deleted file mode 100644
index 1e84c0cbe..000000000
--- a/third_party/vtr/libs/archfpga/src/read_fpga_interchange_arch.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef READ_FPGAINTERCHANGE_ARCH_FILE_H
-#define READ_FPGAINTERCHANGE_ARCH_FILE_H
-
-#include "arch_types.h"
-
-#include "DeviceResources.capnp.h"
-#include "LogicalNetlist.capnp.h"
-#include "capnp/serialize.h"
-#include "capnp/serialize-packed.h"
-#include <fcntl.h>
-#include <unistd.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* special type indexes, necessary for initialization, everything afterwards
- * should use the pointers to these type indices */
-
-#define NUM_MODELS_IN_LIBRARY 4
-#define EMPTY_TYPE_INDEX 0
-
-/* function declaration */
-void FPGAInterchangeReadArch(const char* FPGAInterchangeDeviceFile,
-                             const bool timing_enabled,
-                             t_arch* arch,
-                             std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                             std::vector<t_logical_block_type>& LogicalBlockTypes);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
deleted file mode 100644
index 95481f264..000000000
--- a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.cc
+++ /dev/null
@@ -1,5037 +0,0 @@
-/* The XML parser processes an XML file into a tree data structure composed of
- * pugi::xml_nodes.  Each node represents an XML element.  For example
- * <a> <b/> </a> will generate two pugi::xml_nodes.  One called "a" and its
- * child "b".  Each pugi::xml_node can contain various XML data such as attribute
- * information and text content.  The XML parser provides several functions to
- * help the developer build, and traverse tree (this is also somtime referred to
- * as the Document Object Model or DOM).
- *
- * For convenience it often makes sense to use some wraper functions (provided in
- * the pugiutil namespace of libvtrutil) which simplify loading an XML file and
- * error handling.
- *
- * The function pugiutil::load_xml() reads in an xml file.
- *
- * The function pugiutil::get_single_child() returns a child xml_node for a given parent
- * xml_node if there is a child which matches the name provided by the developer.
- *
- * The function pugiutil::get_attribute() is used to extract attributes from an
- * xml_node, returning a pugi::xml_attribute. xml_attribute objects support accessors
- * such as as_float(), as_int() to retrieve semantic values. See pugixml documentation
- * for more details.
- *
- * Architecture file checks already implemented (Daniel Chen):
- *		- Duplicate pb_types, pb_type ports, models, model ports,
- *			interconnects, interconnect annotations.
- *		- Port and pin range checking (port with 4 pins can only be
- *			accessed within [0:3].
- *		- LUT delay matrix size matches # of LUT inputs
- *		- Ensures XML tags are ordered.
- *		- Clocked primitives that have timing annotations must have a clock
- *			name matching the primitive.
- *		- Enforced VPR definition of LUT and FF must have one input port (n pins)
- *			and one output port(1 pin).
- *		- Checks file extension for blif and architecture xml file, avoid crashes if
- *			the two files are swapped on command line.
- *
- */
-
-#include <string.h>
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <algorithm>
-
-#include "pugixml.hpp"
-#include "pugixml_util.hpp"
-
-#include "vtr_assert.h"
-#include "vtr_log.h"
-#include "vtr_util.h"
-#include "vtr_memory.h"
-#include "vtr_digest.h"
-#include "vtr_token.h"
-#include "vtr_bimap.h"
-
-#include "arch_check.h"
-#include "arch_error.h"
-#include "arch_util.h"
-#include "arch_types.h"
-
-#include "read_xml_arch_file.h"
-#include "read_xml_util.h"
-#include "parse_switchblocks.h"
-
-#include "physical_types_util.h"
-
-using namespace std::string_literals;
-using pugiutil::ReqOpt;
-
-struct t_fc_override {
-    std::string port_name;
-    std::string seg_name;
-    e_fc_value_type fc_value_type;
-    float fc_value;
-};
-
-struct t_pin_counts {
-    int input = 0;
-    int output = 0;
-    int clock = 0;
-
-    int total() {
-        return input + output + clock;
-    }
-};
-
-struct t_pin_locs {
-  private:
-    // Distribution must be set once for each physical tile type
-    // and must be equal for each sub tile within a physical tile.
-    bool distribution_set = false;
-
-  public:
-    enum e_pin_location_distr distribution = E_SPREAD_PIN_DISTR;
-
-    /* [0..num_sub_tiles-1][0..width-1][0..height-1][0..3][0..num_tokens-1] */
-    vtr::NdMatrix<std::vector<std::string>, 4> assignments;
-
-    bool is_distribution_set() {
-        return distribution_set;
-    }
-
-    void set_distribution() {
-        VTR_ASSERT(distribution_set == false);
-        distribution_set = true;
-    }
-};
-
-/* Function prototypes */
-/*   Populate data */
-
-static void LoadPinLoc(pugi::xml_node Locations,
-                       t_physical_tile_type* type,
-                       t_pin_locs* pin_locs,
-                       const pugiutil::loc_data& loc_data);
-template<typename T>
-static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
-                                            T type,
-                                            const char* pin_loc_string,
-                                            const pugiutil::loc_data& loc_data);
-
-/* Process XML hierarchy */
-static void ProcessTiles(pugi::xml_node Node,
-                         std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                         std::vector<t_logical_block_type>& LogicalBlockTypes,
-                         const t_default_fc_spec& arch_def_fc,
-                         t_arch& arch,
-                         const pugiutil::loc_data& loc_data);
-// TODO: Remove block_type_contains_blif_model / pb_type_contains_blif_model
-// as part of
-// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
-static void MarkIoTypes(std::vector<t_physical_tile_type>& PhysicalTileTypes);
-static void ProcessTileProps(pugi::xml_node Node,
-                             t_physical_tile_type* PhysicalTileType,
-                             const pugiutil::loc_data& loc_data);
-static t_pin_counts ProcessSubTilePorts(pugi::xml_node Parent,
-                                        t_sub_tile* SubTile,
-                                        std::unordered_map<std::string, t_physical_tile_port>& tile_port_names,
-                                        const pugiutil::loc_data& loc_data);
-static void ProcessTilePort(pugi::xml_node Node,
-                            t_physical_tile_port* port,
-                            const pugiutil::loc_data& loc_data);
-static void ProcessTileEquivalentSites(pugi::xml_node Parent,
-                                       t_sub_tile* SubTile,
-                                       t_physical_tile_type* PhysicalTileType,
-                                       std::vector<t_logical_block_type>& LogicalBlockTypes,
-                                       const pugiutil::loc_data& loc_data);
-static void ProcessEquivalentSiteDirectConnection(pugi::xml_node Parent,
-                                                  t_sub_tile* SubTile,
-                                                  t_physical_tile_type* PhysicalTileType,
-                                                  t_logical_block_type* LogicalBlockType,
-                                                  const pugiutil::loc_data& loc_data);
-static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent,
-                                                  t_sub_tile* SubTile,
-                                                  t_physical_tile_type* PhysicalTileType,
-                                                  t_logical_block_type* LogicalBlockType,
-                                                  std::string site_name,
-                                                  const pugiutil::loc_data& loc_data);
-static void ProcessPinLocations(pugi::xml_node Locations,
-                                t_physical_tile_type* PhysicalTileType,
-                                t_sub_tile* SubTile,
-                                t_pin_locs* pin_locs,
-                                const pugiutil::loc_data& loc_data);
-static void ProcessSubTiles(pugi::xml_node Node,
-                            t_physical_tile_type* PhysicalTileType,
-                            std::vector<t_logical_block_type>& LogicalBlockTypes,
-                            std::vector<t_segment_inf>& segments,
-                            const t_default_fc_spec& arch_def_fc,
-                            const pugiutil::loc_data& loc_data);
-static void ProcessPb_Type(vtr::string_internment* strings,
-                           pugi::xml_node Parent,
-                           t_pb_type* pb_type,
-                           t_mode* mode,
-                           const bool timing_enabled,
-                           const t_arch& arch,
-                           const pugiutil::loc_data& loc_data);
-static void ProcessPb_TypePort(pugi::xml_node Parent,
-                               t_port* port,
-                               e_power_estimation_method power_method,
-                               const bool is_root_pb_type,
-                               const pugiutil::loc_data& loc_data);
-static void ProcessPinToPinAnnotations(pugi::xml_node parent,
-                                       t_pin_to_pin_annotation* annotation,
-                                       t_pb_type* parent_pb_type,
-                                       const pugiutil::loc_data& loc_data);
-static void ProcessInterconnect(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const pugiutil::loc_data& loc_data);
-static void ProcessMode(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data);
-static t_metadata_dict ProcessMetadata(vtr::string_internment* strings, pugi::xml_node Parent, const pugiutil::loc_data& loc_data);
-static void Process_Fc_Values(pugi::xml_node Node, t_default_fc_spec& spec, const pugiutil::loc_data& loc_data);
-static void Process_Fc(pugi::xml_node Node,
-                       t_physical_tile_type* PhysicalTileType,
-                       t_sub_tile* SubTile,
-                       t_pin_counts pin_counts,
-                       std::vector<t_segment_inf>& segments,
-                       const t_default_fc_spec& arch_def_fc,
-                       const pugiutil::loc_data& loc_data);
-static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data);
-static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations,
-                                        t_physical_tile_type* type,
-                                        const t_arch& arch,
-                                        const pugiutil::loc_data& loc_data);
-static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data);
-static void ProcessChanWidthDistr(pugi::xml_node Node,
-                                  t_arch* arch,
-                                  const pugiutil::loc_data& loc_data);
-static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pugiutil::loc_data& loc_data);
-static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data);
-static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::set<std::string>& port_names, const pugiutil::loc_data& loc_data);
-static void ProcessLayout(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data);
-static t_grid_def ProcessGridLayout(vtr::string_internment* strings, pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data);
-static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data);
-static void ProcessComplexBlocks(vtr::string_internment* strings, pugi::xml_node Node, std::vector<t_logical_block_type>& LogicalBlockTypes, t_arch& arch, const bool timing_enabled, const pugiutil::loc_data& loc_data);
-static void ProcessSwitches(pugi::xml_node Node,
-                            t_arch_switch_inf** Switches,
-                            int* NumSwitches,
-                            const bool timing_enabled,
-                            const pugiutil::loc_data& loc_data);
-static void ProcessSwitchTdel(pugi::xml_node Node, const bool timing_enabled, const int switch_index, t_arch_switch_inf* Switches, const pugiutil::loc_data& loc_data);
-static void ProcessDirects(pugi::xml_node Parent, t_direct_inf** Directs, int* NumDirects, const t_arch_switch_inf* Switches, const int NumSwitches, const pugiutil::loc_data& loc_data);
-static void ProcessClockMetalLayers(pugi::xml_node parent,
-                                    std::unordered_map<std::string, t_metal_layer>& metal_layers,
-                                    pugiutil::loc_data& loc_data);
-static void ProcessClockNetworks(pugi::xml_node parent,
-                                 std::vector<t_clock_network_arch>& clock_networks,
-                                 const t_arch_switch_inf* switches,
-                                 const int num_switches,
-                                 pugiutil::loc_data& loc_data);
-static void ProcessClockSwitchPoints(pugi::xml_node parent,
-                                     t_clock_network_arch& clock_network,
-                                     const t_arch_switch_inf* switches,
-                                     const int num_switches,
-                                     pugiutil::loc_data& loc_data);
-static void ProcessClockRouting(pugi::xml_node parent,
-                                std::vector<t_clock_connection_arch>& clock_connections,
-                                const t_arch_switch_inf* switches,
-                                const int num_switches,
-                                pugiutil::loc_data& loc_data);
-static void ProcessSegments(pugi::xml_node Parent,
-                            std::vector<t_segment_inf>& Segs,
-                            const t_arch_switch_inf* Switches,
-                            const int NumSwitches,
-                            const bool timing_enabled,
-                            const bool switchblocklist_required,
-                            const pugiutil::loc_data& loc_data);
-static void ProcessSwitchblocks(pugi::xml_node Parent, t_arch* arch, const pugiutil::loc_data& loc_data);
-static void ProcessCB_SB(pugi::xml_node Node, std::vector<bool>& list, const pugiutil::loc_data& loc_data);
-static void ProcessPower(pugi::xml_node parent,
-                         t_power_arch* power_arch,
-                         const pugiutil::loc_data& loc_data);
-
-static void ProcessClocks(pugi::xml_node Parent, t_clock_arch* clocks, const pugiutil::loc_data& loc_data);
-
-static void ProcessNoc(pugi::xml_node noc_tag, t_arch* arch, const pugiutil::loc_data& loc_data);
-
-static void processTopology(pugi::xml_node topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref);
-
-static void processMeshTopology(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref);
-
-static void processRouter(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, std::map<int, std::pair<int, int>>& routers_info_in_arch);
-
-static void ProcessPb_TypePowerEstMethod(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data);
-static void ProcessPb_TypePort_Power(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const pugiutil::loc_data& loc_data);
-
-std::string inst_port_to_port_name(std::string inst_port);
-
-static bool attribute_to_bool(const pugi::xml_node node,
-                              const pugi::xml_attribute attr,
-                              const pugiutil::loc_data& loc_data);
-int find_switch_by_name(const t_arch& arch, std::string switch_name);
-
-e_side string_to_side(std::string side_str);
-
-template<typename T>
-static T* get_type_by_name(const char* type_name, std::vector<T>& types);
-
-static void generate_noc_mesh(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, double mesh_region_start_x, double mesh_region_end_x, double mesh_region_start_y, double mesh_region_end_y, int mesh_size);
-
-static bool parse_noc_router_connection_list(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, int router_id, std::vector<int>& connection_list, std::string connection_list_attribute_value, std::map<int, std::pair<int, int>>& routers_in_arch_info);
-
-static void update_router_info_in_arch(int router_id, bool router_updated_as_a_connection, std::map<int, std::pair<int, int>>& routers_in_arch_info);
-
-static void verify_noc_topology(std::map<int, std::pair<int, int>>& routers_in_arch_info);
-
-/*
- *
- *
- * External Function Implementations
- *
- *
- */
-
-/* Loads the given architecture file. */
-void XmlReadArch(const char* ArchFile,
-                 const bool timing_enabled,
-                 t_arch* arch,
-                 std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                 std::vector<t_logical_block_type>& LogicalBlockTypes) {
-    pugi::xml_node Next;
-    ReqOpt POWER_REQD, SWITCHBLOCKLIST_REQD;
-
-    if (vtr::check_file_name_extension(ArchFile, ".xml") == false) {
-        VTR_LOG_WARN(
-            "Architecture file '%s' may be in incorrect format. "
-            "Expecting .xml format for architecture files.\n",
-            ArchFile);
-    }
-
-    //Create a unique identifier for this architecture file based on it's contents
-    arch->architecture_id = vtr::strdup(vtr::secure_digest_file(ArchFile).c_str());
-
-    /* Parse the file */
-    pugi::xml_document doc;
-    pugiutil::loc_data loc_data;
-    t_default_fc_spec arch_def_fc;
-    try {
-        loc_data = pugiutil::load_xml(doc, ArchFile);
-
-        set_arch_file_name(ArchFile);
-
-        /* Root node should be architecture */
-        auto architecture = get_single_child(doc, "architecture", loc_data);
-
-        /* TODO: do version processing properly with string delimiting on the . */
-#if 0
-        char* Prop = get_attribute(architecture, "version", loc_data, ReqOpt::OPTIONAL).as_string(NULL);
-        if (Prop != NULL) {
-            if (atof(Prop) > atof(VPR_VERSION)) {
-                VTR_LOG_WARN( "This architecture version is for VPR %f while your current VPR version is " VPR_VERSION ", compatability issues may arise\n",
-                        atof(Prop));
-            }
-        }
-#endif
-
-        /* Process models */
-        Next = get_single_child(architecture, "models", loc_data);
-        ProcessModels(Next, arch, loc_data);
-        CreateModelLibrary(arch);
-
-        /* Process layout */
-        Next = get_single_child(architecture, "layout", loc_data);
-        ProcessLayout(Next, arch, loc_data);
-
-        /* Process device */
-        Next = get_single_child(architecture, "device", loc_data);
-        ProcessDevice(Next, arch, arch_def_fc, loc_data);
-
-        /* Process switches */
-        Next = get_single_child(architecture, "switchlist", loc_data);
-        ProcessSwitches(Next, &(arch->Switches), &(arch->num_switches),
-                        timing_enabled, loc_data);
-
-        /* Process switchblocks. This depends on switches */
-        bool switchblocklist_required = (arch->SBType == CUSTOM); //require this section only if custom switchblocks are used
-        SWITCHBLOCKLIST_REQD = BoolToReqOpt(switchblocklist_required);
-
-        /* Process segments. This depends on switches */
-        Next = get_single_child(architecture, "segmentlist", loc_data);
-        ProcessSegments(Next, arch->Segments,
-                        arch->Switches, arch->num_switches, timing_enabled, switchblocklist_required, loc_data);
-
-        Next = get_single_child(architecture, "switchblocklist", loc_data, SWITCHBLOCKLIST_REQD);
-        if (Next) {
-            ProcessSwitchblocks(Next, arch, loc_data);
-        }
-
-        /* Process logical block types */
-        Next = get_single_child(architecture, "complexblocklist", loc_data);
-        ProcessComplexBlocks(&arch->strings, Next, LogicalBlockTypes, *arch, timing_enabled, loc_data);
-
-        /* Process logical block types */
-        Next = get_single_child(architecture, "tiles", loc_data);
-        ProcessTiles(Next, PhysicalTileTypes, LogicalBlockTypes, arch_def_fc, *arch, loc_data);
-
-        /* Link Physical Tiles with Logical Blocks */
-        link_physical_logical_types(PhysicalTileTypes, LogicalBlockTypes);
-
-        /* Process directs */
-        Next = get_single_child(architecture, "directlist", loc_data, ReqOpt::OPTIONAL);
-        if (Next) {
-            ProcessDirects(Next, &(arch->Directs), &(arch->num_directs),
-                           arch->Switches, arch->num_switches,
-                           loc_data);
-        }
-
-        /* Process Clock Networks */
-        Next = get_single_child(architecture, "clocknetworks", loc_data, ReqOpt::OPTIONAL);
-        if (Next) {
-            std::vector<std::string> expected_children = {"metal_layers", "clock_network", "clock_routing"};
-            expect_only_children(Next, expected_children, loc_data);
-
-            ProcessClockMetalLayers(Next, arch->clock_arch.clock_metal_layers, loc_data);
-            ProcessClockNetworks(Next,
-                                 arch->clock_arch.clock_networks_arch,
-                                 arch->Switches,
-                                 arch->num_switches,
-                                 loc_data);
-            ProcessClockRouting(Next,
-                                arch->clock_arch.clock_connections_arch,
-                                arch->Switches,
-                                arch->num_switches,
-                                loc_data);
-        }
-
-        /* Process architecture power information */
-
-        /* If arch->power has been initialized, meaning the user has requested power estimation,
-         * then the power architecture information is required.
-         */
-        if (arch->power) {
-            POWER_REQD = ReqOpt::REQUIRED;
-        } else {
-            POWER_REQD = ReqOpt::OPTIONAL;
-        }
-
-        Next = get_single_child(architecture, "power", loc_data, POWER_REQD);
-        if (Next) {
-            if (arch->power) {
-                ProcessPower(Next, arch->power, loc_data);
-            } else {
-                /* This information still needs to be read, even if it is just
-                 * thrown away.
-                 */
-                t_power_arch* power_arch_fake = (t_power_arch*)vtr::calloc(1,
-                                                                           sizeof(t_power_arch));
-                ProcessPower(Next, power_arch_fake, loc_data);
-                free(power_arch_fake);
-            }
-        }
-
-        // Process Clocks
-        Next = get_single_child(architecture, "clocks", loc_data, POWER_REQD);
-        if (Next) {
-            if (arch->clocks) {
-                ProcessClocks(Next, arch->clocks, loc_data);
-            } else {
-                /* This information still needs to be read, even if it is just
-                 * thrown away.
-                 */
-                t_clock_arch* clocks_fake = (t_clock_arch*)vtr::calloc(1,
-                                                                       sizeof(t_clock_arch));
-                ProcessClocks(Next, clocks_fake, loc_data);
-                free(clocks_fake->clock_inf);
-                free(clocks_fake);
-            }
-        }
-
-        // process NoC (optional)
-        Next = get_single_child(architecture, "noc", loc_data, pugiutil::OPTIONAL);
-
-        if (Next) {
-            ProcessNoc(Next, arch, loc_data);
-        }
-
-        SyncModelsPbTypes(arch, LogicalBlockTypes);
-        check_models(arch);
-
-        MarkIoTypes(PhysicalTileTypes);
-    } catch (pugiutil::XmlError& e) {
-        archfpga_throw(ArchFile, e.line(),
-                       "%s", e.what());
-    }
-}
-
-/*
- *
- *
- * File-scope function implementations
- *
- *
- */
-
-static void LoadPinLoc(pugi::xml_node Locations,
-                       t_physical_tile_type* type,
-                       t_pin_locs* pin_locs,
-                       const pugiutil::loc_data& loc_data) {
-    type->pin_width_offset.resize(type->num_pins, 0);
-    type->pin_height_offset.resize(type->num_pins, 0);
-
-    std::vector<int> physical_pin_counts(type->num_pins, 0);
-    if (pin_locs->distribution == E_SPREAD_PIN_DISTR) {
-        /* evenly distribute pins starting at bottom left corner */
-
-        int num_sides = 4 * (type->width * type->height);
-        int side_index = 0;
-        int count = 0;
-        for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-            for (int width = 0; width < type->width; ++width) {
-                for (int height = 0; height < type->height; ++height) {
-                    for (int pin_offset = 0; pin_offset < (type->num_pins / num_sides) + 1; ++pin_offset) {
-                        int pin_num = side_index + pin_offset * num_sides;
-                        if (pin_num < type->num_pins) {
-                            type->pinloc[width][height][side][pin_num] = true;
-                            type->pin_width_offset[pin_num] += width;
-                            type->pin_height_offset[pin_num] += height;
-                            physical_pin_counts[pin_num] += 1;
-                            count++;
-                        }
-                    }
-                    side_index++;
-                }
-            }
-        }
-        VTR_ASSERT(side_index == num_sides);
-        VTR_ASSERT(count == type->num_pins);
-    } else if (pin_locs->distribution == E_PERIMETER_PIN_DISTR) {
-        //Add one pin at-a-time to perimeter sides in round-robin order
-        int ipin = 0;
-        while (ipin < type->num_pins) {
-            for (int width = 0; width < type->width; ++width) {
-                for (int height = 0; height < type->height; ++height) {
-                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-                        if (((width == 0 && side == LEFT)
-                             || (height == type->height - 1 && side == TOP)
-                             || (width == type->width - 1 && side == RIGHT)
-                             || (height == 0 && side == BOTTOM))
-                            && ipin < type->num_pins) {
-                            //On a side, with pins still to allocate
-
-                            type->pinloc[width][height][side][ipin] = true;
-                            type->pin_width_offset[ipin] += width;
-                            type->pin_height_offset[ipin] += height;
-                            physical_pin_counts[ipin] += 1;
-                            ++ipin;
-                        }
-                    }
-                }
-            }
-        }
-        VTR_ASSERT(ipin == type->num_pins);
-
-    } else if (pin_locs->distribution == E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR) {
-        //Collect the sets of block input/output pins
-        std::vector<int> input_pins;
-        std::vector<int> output_pins;
-        for (int pin_num = 0; pin_num < type->num_pins; ++pin_num) {
-            int iclass = type->pin_class[pin_num];
-
-            if (type->class_inf[iclass].type == RECEIVER) {
-                input_pins.push_back(pin_num);
-            } else {
-                VTR_ASSERT(type->class_inf[iclass].type == DRIVER);
-                output_pins.push_back(pin_num);
-            }
-        }
-
-        //Allocate the inputs one pin at-a-time in a round-robin order
-        //to all sides
-        size_t ipin = 0;
-        while (ipin < input_pins.size()) {
-            for (int width = 0; width < type->width; ++width) {
-                for (int height = 0; height < type->height; ++height) {
-                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-                        if (ipin < input_pins.size()) {
-                            //Pins still to allocate
-
-                            int pin_num = input_pins[ipin];
-
-                            type->pinloc[width][height][side][pin_num] = true;
-                            type->pin_width_offset[pin_num] += width;
-                            type->pin_height_offset[pin_num] += height;
-                            physical_pin_counts[pin_num] += 1;
-                            ++ipin;
-                        }
-                    }
-                }
-            }
-        }
-        VTR_ASSERT(ipin == input_pins.size());
-
-        //Allocate the outputs one pin at-a-time to perimeter sides in round-robin order
-        ipin = 0;
-        while (ipin < output_pins.size()) {
-            for (int width = 0; width < type->width; ++width) {
-                for (int height = 0; height < type->height; ++height) {
-                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-                        if (((width == 0 && side == LEFT)
-                             || (height == type->height - 1 && side == TOP)
-                             || (width == type->width - 1 && side == RIGHT)
-                             || (height == 0 && side == BOTTOM))
-                            && ipin < output_pins.size()) {
-                            //On a perimeter side, with pins still to allocate
-
-                            int pin_num = output_pins[ipin];
-
-                            type->pinloc[width][height][side][pin_num] = true;
-                            type->pin_width_offset[pin_num] += width;
-                            type->pin_height_offset[pin_num] += height;
-                            physical_pin_counts[pin_num] += 1;
-                            ++ipin;
-                        }
-                    }
-                }
-            }
-        }
-        VTR_ASSERT(ipin == output_pins.size());
-
-    } else {
-        VTR_ASSERT(pin_locs->distribution == E_CUSTOM_PIN_DISTR);
-        for (auto& sub_tile : type->sub_tiles) {
-            int sub_tile_index = sub_tile.index;
-            int sub_tile_capacity = sub_tile.capacity.total();
-
-            for (int width = 0; width < type->width; ++width) {
-                for (int height = 0; height < type->height; ++height) {
-                    for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-                        for (auto token : pin_locs->assignments[sub_tile_index][width][height][side]) {
-                            auto pin_range = ProcessPinString<t_sub_tile*>(Locations,
-                                                                           &sub_tile,
-                                                                           token.c_str(),
-                                                                           loc_data);
-
-                            for (int pin_num = pin_range.first; pin_num < pin_range.second; ++pin_num) {
-                                VTR_ASSERT(pin_num < (int)sub_tile.sub_tile_to_tile_pin_indices.size() / sub_tile_capacity);
-                                for (int capacity = 0; capacity < sub_tile_capacity; ++capacity) {
-                                    int sub_tile_pin_index = pin_num + capacity * sub_tile.num_phy_pins / sub_tile_capacity;
-                                    int physical_pin_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index];
-                                    type->pinloc[width][height][side][physical_pin_index] = true;
-                                    type->pin_width_offset[physical_pin_index] += width;
-                                    type->pin_height_offset[physical_pin_index] += height;
-                                    physical_pin_counts[physical_pin_index] += 1;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    for (int ipin = 0; ipin < type->num_pins; ++ipin) {
-        VTR_ASSERT(physical_pin_counts[ipin] >= 1);
-
-        type->pin_width_offset[ipin] /= physical_pin_counts[ipin];
-        type->pin_height_offset[ipin] /= physical_pin_counts[ipin];
-
-        VTR_ASSERT(type->pin_width_offset[ipin] >= 0 && type->pin_width_offset[ipin] < type->width);
-        VTR_ASSERT(type->pin_height_offset[ipin] >= 0 && type->pin_height_offset[ipin] < type->height);
-    }
-}
-
-template<typename T>
-static std::pair<int, int> ProcessPinString(pugi::xml_node Locations,
-                                            T type,
-                                            const char* pin_loc_string,
-                                            const pugiutil::loc_data& loc_data) {
-    int num_tokens;
-    auto tokens = GetTokensFromString(pin_loc_string, &num_tokens);
-
-    int token_index = 0;
-    auto token = tokens[token_index];
-
-    if (token.type != TOKEN_STRING || 0 != strcmp(token.data, type->name)) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "Wrong physical type name of the port: %s\n", pin_loc_string);
-    }
-
-    token_index++;
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_DOT) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No dot is present to separate type name and port name: %s\n", pin_loc_string);
-    }
-
-    token_index++;
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_STRING) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No port name is present: %s\n", pin_loc_string);
-    }
-
-    auto port = get_port_by_name(type, token.data);
-    if (port == nullptr) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "Port %s for %s could not be found: %s\n",
-                       type->name, token.data,
-                       pin_loc_string);
-    }
-    int abs_first_pin_idx = port->absolute_first_pin_index;
-
-    token_index++;
-
-    // All the pins of the port are taken or the port has a single pin
-    if (token_index == num_tokens) {
-        freeTokens(tokens, num_tokens);
-        return std::make_pair(abs_first_pin_idx, abs_first_pin_idx + port->num_pins);
-    }
-
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_OPEN_SQUARE_BRACKET) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No open square bracket present: %s\n", pin_loc_string);
-    }
-
-    token_index++;
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_INT) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No integer to indicate least significant pin index: %s\n", pin_loc_string);
-    }
-
-    int first_pin = vtr::atoi(token.data);
-
-    token_index++;
-    token = tokens[token_index];
-
-    // Single pin is specified
-    if (token.type != TOKEN_COLON) {
-        if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                           "No closing bracket: %s\n", pin_loc_string);
-        }
-
-        token_index++;
-
-        if (token_index != num_tokens) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                           "pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
-        }
-
-        freeTokens(tokens, num_tokens);
-        return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + first_pin + 1);
-    }
-
-    token_index++;
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_INT) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No integer to indicate most significant pin index: %s\n", pin_loc_string);
-    }
-
-    int last_pin = vtr::atoi(token.data);
-
-    token_index++;
-    token = tokens[token_index];
-
-    if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "No closed square bracket: %s\n", pin_loc_string);
-    }
-
-    token_index++;
-
-    if (token_index != num_tokens) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                       "pin location should be completed, but more tokens are present: %s\n", pin_loc_string);
-    }
-
-    if (first_pin > last_pin) {
-        std::swap(first_pin, last_pin);
-    }
-
-    freeTokens(tokens, num_tokens);
-    return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + last_pin + 1);
-}
-
-static void ProcessPinToPinAnnotations(pugi::xml_node Parent,
-                                       t_pin_to_pin_annotation* annotation,
-                                       t_pb_type* parent_pb_type,
-                                       const pugiutil::loc_data& loc_data) {
-    int i = 0;
-    const char* Prop;
-
-    if (get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
-        i++;
-    }
-    if (get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
-        i++;
-    }
-    if (get_attribute(Parent, "type", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
-        i++;
-    }
-    if (get_attribute(Parent, "value", loc_data, ReqOpt::OPTIONAL).as_string(nullptr)) {
-        i++;
-    }
-    if (0 == strcmp(Parent.name(), "C_constant")
-        || 0 == strcmp(Parent.name(), "C_matrix")
-        || 0 == strcmp(Parent.name(), "pack_pattern")) {
-        i = 1;
-    }
-
-    annotation->num_value_prop_pairs = i;
-    annotation->prop = (int*)vtr::calloc(i, sizeof(int));
-    annotation->value = (char**)vtr::calloc(i, sizeof(char*));
-    annotation->line_num = loc_data.line(Parent);
-    /* Todo: This is slow, I should use a case lookup */
-    i = 0;
-    if (0 == strcmp(Parent.name(), "delay_constant")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (Prop) {
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MAX;
-            annotation->value[i] = vtr::strdup(Prop);
-            i++;
-        }
-        Prop = get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (Prop) {
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MIN;
-            annotation->value[i] = vtr::strdup(Prop);
-            i++;
-        }
-        Prop = get_attribute(Parent, "in_port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "out_port", loc_data).value();
-        annotation->output_pins = vtr::strdup(Prop);
-
-    } else if (0 == strcmp(Parent.name(), "delay_matrix")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
-        annotation->format = E_ANNOT_PIN_TO_PIN_MATRIX;
-        Prop = get_attribute(Parent, "type", loc_data).value();
-        annotation->value[i] = vtr::strdup(Parent.child_value());
-
-        if (0 == strcmp(Prop, "max")) {
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MAX;
-        } else {
-            VTR_ASSERT(0 == strcmp(Prop, "min"));
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_MIN;
-        }
-
-        i++;
-        Prop = get_attribute(Parent, "in_port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "out_port", loc_data).value();
-        annotation->output_pins = vtr::strdup(Prop);
-
-    } else if (0 == strcmp(Parent.name(), "C_constant")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_CAPACITANCE;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "C", loc_data).value();
-        annotation->value[i] = vtr::strdup(Prop);
-        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_CAPACITANCE_C;
-        i++;
-
-        Prop = get_attribute(Parent, "in_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "out_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        annotation->output_pins = vtr::strdup(Prop);
-        VTR_ASSERT(annotation->output_pins != nullptr || annotation->input_pins != nullptr);
-
-    } else if (0 == strcmp(Parent.name(), "C_matrix")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_CAPACITANCE;
-        annotation->format = E_ANNOT_PIN_TO_PIN_MATRIX;
-        annotation->value[i] = vtr::strdup(Parent.child_value());
-        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_CAPACITANCE_C;
-        i++;
-
-        Prop = get_attribute(Parent, "in_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "out_port", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        annotation->output_pins = vtr::strdup(Prop);
-        VTR_ASSERT(annotation->output_pins != nullptr || annotation->input_pins != nullptr);
-
-    } else if (0 == strcmp(Parent.name(), "T_setup")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "value", loc_data).value();
-        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_TSETUP;
-        annotation->value[i] = vtr::strdup(Prop);
-
-        i++;
-        Prop = get_attribute(Parent, "port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "clock", loc_data).value();
-        annotation->clock = vtr::strdup(Prop);
-
-        primitives_annotation_clock_match(annotation, parent_pb_type);
-
-    } else if (0 == strcmp(Parent.name(), "T_clock_to_Q")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "max", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-
-        bool found_min_max_attrib = false;
-        if (Prop) {
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MAX;
-            annotation->value[i] = vtr::strdup(Prop);
-            i++;
-            found_min_max_attrib = true;
-        }
-        Prop = get_attribute(Parent, "min", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (Prop) {
-            annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_CLOCK_TO_Q_MIN;
-            annotation->value[i] = vtr::strdup(Prop);
-            i++;
-            found_min_max_attrib = true;
-        }
-
-        if (!found_min_max_attrib) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "Failed to find either 'max' or 'min' attribute required for <%s> in <%s>",
-                           Parent.name(), Parent.parent().name());
-        }
-
-        Prop = get_attribute(Parent, "port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "clock", loc_data).value();
-        annotation->clock = vtr::strdup(Prop);
-
-        primitives_annotation_clock_match(annotation, parent_pb_type);
-
-    } else if (0 == strcmp(Parent.name(), "T_hold")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_DELAY;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "value", loc_data).value();
-        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_DELAY_THOLD;
-        annotation->value[i] = vtr::strdup(Prop);
-        i++;
-
-        Prop = get_attribute(Parent, "port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "clock", loc_data).value();
-        annotation->clock = vtr::strdup(Prop);
-
-        primitives_annotation_clock_match(annotation, parent_pb_type);
-
-    } else if (0 == strcmp(Parent.name(), "pack_pattern")) {
-        annotation->type = E_ANNOT_PIN_TO_PIN_PACK_PATTERN;
-        annotation->format = E_ANNOT_PIN_TO_PIN_CONSTANT;
-        Prop = get_attribute(Parent, "name", loc_data).value();
-        annotation->prop[i] = (int)E_ANNOT_PIN_TO_PIN_PACK_PATTERN_NAME;
-        annotation->value[i] = vtr::strdup(Prop);
-        i++;
-
-        Prop = get_attribute(Parent, "in_port", loc_data).value();
-        annotation->input_pins = vtr::strdup(Prop);
-
-        Prop = get_attribute(Parent, "out_port", loc_data).value();
-        annotation->output_pins = vtr::strdup(Prop);
-
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "Unknown port type %s in %s in %s", Parent.name(),
-                       Parent.parent().name(), Parent.parent().parent().name());
-    }
-    VTR_ASSERT(i == annotation->num_value_prop_pairs);
-}
-
-static void ProcessPb_TypePowerPinToggle(pugi::xml_node parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node cur;
-    const char* prop;
-    t_port* port;
-    int high, low;
-
-    cur = get_first_child(parent, "port", loc_data, ReqOpt::OPTIONAL);
-    while (cur) {
-        prop = get_attribute(cur, "name", loc_data).value();
-
-        port = findPortByName(prop, pb_type, &high, &low);
-        if (!port) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                           "Could not find port '%s' needed for energy per toggle.",
-                           prop);
-        }
-        if (high != port->num_pins - 1 || low != 0) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                           "Pin-toggle does not support pin indices (%s)", prop);
-        }
-
-        if (port->port_power->pin_toggle_initialized) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                           "Duplicate pin-toggle energy for port '%s'", port->name);
-        }
-        port->port_power->pin_toggle_initialized = true;
-
-        /* Get energy per toggle */
-        port->port_power->energy_per_toggle = get_attribute(cur,
-                                                            "energy_per_toggle", loc_data)
-                                                  .as_float(0.);
-
-        /* Get scaled by factor */
-        bool reverse_scaled = false;
-        prop = get_attribute(cur, "scaled_by_static_prob", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (!prop) {
-            prop = get_attribute(cur, "scaled_by_static_prob_n", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-            if (prop) {
-                reverse_scaled = true;
-            }
-        }
-
-        if (prop) {
-            port->port_power->scaled_by_port = findPortByName(prop, pb_type,
-                                                              &high, &low);
-            if (high != low) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Pin-toggle 'scaled_by_static_prob' must be a single pin (%s)",
-                               prop);
-            }
-            port->port_power->scaled_by_port_pin_idx = high;
-            port->port_power->reverse_scaled = reverse_scaled;
-        }
-
-        cur = cur.next_sibling(cur.name());
-    }
-}
-
-static void ProcessPb_TypePower(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node cur, child;
-    bool require_dynamic_absolute = false;
-    bool require_static_absolute = false;
-    bool require_dynamic_C_internal = false;
-
-    cur = get_first_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
-    if (!cur) {
-        return;
-    }
-
-    switch (pb_type->pb_type_power->estimation_method) {
-        case POWER_METHOD_TOGGLE_PINS:
-            ProcessPb_TypePowerPinToggle(cur, pb_type, loc_data);
-            require_static_absolute = true;
-            break;
-        case POWER_METHOD_C_INTERNAL:
-            require_dynamic_C_internal = true;
-            require_static_absolute = true;
-            break;
-        case POWER_METHOD_ABSOLUTE:
-            require_dynamic_absolute = true;
-            require_static_absolute = true;
-            break;
-        default:
-            break;
-    }
-
-    if (require_static_absolute) {
-        child = get_single_child(cur, "static_power", loc_data);
-        pb_type->pb_type_power->absolute_power_per_instance.leakage = get_attribute(child, "power_per_instance", loc_data).as_float(0.);
-    }
-
-    if (require_dynamic_absolute) {
-        child = get_single_child(cur, "dynamic_power", loc_data);
-        pb_type->pb_type_power->absolute_power_per_instance.dynamic = get_attribute(child, "power_per_instance", loc_data).as_float(0.);
-    }
-
-    if (require_dynamic_C_internal) {
-        child = get_single_child(cur, "dynamic_power", loc_data);
-        pb_type->pb_type_power->C_internal = get_attribute(child,
-                                                           "C_internal", loc_data)
-                                                 .as_float(0.);
-    }
-}
-
-static void ProcessPb_TypePowerEstMethod(pugi::xml_node Parent, t_pb_type* pb_type, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node cur;
-    const char* prop;
-
-    e_power_estimation_method parent_power_method;
-
-    prop = nullptr;
-
-    cur = get_first_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
-    if (cur) {
-        prop = get_attribute(cur, "method", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    }
-
-    if (pb_type->parent_mode && pb_type->parent_mode->parent_pb_type) {
-        parent_power_method = pb_type->parent_mode->parent_pb_type->pb_type_power->estimation_method;
-    } else {
-        parent_power_method = POWER_METHOD_AUTO_SIZES;
-    }
-
-    if (!prop) {
-        /* default method is auto-size */
-        pb_type->pb_type_power->estimation_method = power_method_inherited(parent_power_method);
-    } else if (strcmp(prop, "auto-size") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_AUTO_SIZES;
-    } else if (strcmp(prop, "specify-size") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_SPECIFY_SIZES;
-    } else if (strcmp(prop, "pin-toggle") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_TOGGLE_PINS;
-    } else if (strcmp(prop, "c-internal") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_C_INTERNAL;
-    } else if (strcmp(prop, "absolute") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_ABSOLUTE;
-    } else if (strcmp(prop, "ignore") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_IGNORE;
-    } else if (strcmp(prop, "sum-of-children") == 0) {
-        pb_type->pb_type_power->estimation_method = POWER_METHOD_SUM_OF_CHILDREN;
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                       "Invalid power estimation method for pb_type '%s'",
-                       pb_type->name);
-    }
-}
-
-/* Takes in a pb_type, allocates and loads data for it and recurses downwards */
-static void ProcessPb_Type(vtr::string_internment* strings, pugi::xml_node Parent, t_pb_type* pb_type, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data) {
-    int num_ports, i, j, k, num_annotations;
-    const char* Prop;
-    pugi::xml_node Cur;
-
-    bool is_root_pb_type = !(mode != nullptr && mode->parent_pb_type != nullptr);
-    bool is_leaf_pb_type = bool(get_attribute(Parent, "blif_model", loc_data, ReqOpt::OPTIONAL));
-
-    std::vector<std::string> children_to_expect = {"input", "output", "clock", "mode", "power", "metadata"};
-    if (!is_leaf_pb_type) {
-        //Non-leafs may have a model/pb_type children
-        children_to_expect.push_back("model");
-        children_to_expect.push_back("pb_type");
-        children_to_expect.push_back("interconnect");
-
-        if (is_root_pb_type) {
-            VTR_ASSERT(!is_leaf_pb_type);
-            //Top level pb_type's may also have the following tag types
-            children_to_expect.push_back("fc");
-            children_to_expect.push_back("pinlocations");
-            children_to_expect.push_back("switchblock_locations");
-        }
-    } else {
-        VTR_ASSERT(is_leaf_pb_type);
-        VTR_ASSERT(!is_root_pb_type);
-
-        //Leaf pb_type's may also have the following tag types
-        children_to_expect.push_back("T_setup");
-        children_to_expect.push_back("T_hold");
-        children_to_expect.push_back("T_clock_to_Q");
-        children_to_expect.push_back("delay_constant");
-        children_to_expect.push_back("delay_matrix");
-    }
-
-    //Sanity check contained tags
-    expect_only_children(Parent, children_to_expect, loc_data);
-
-    char* class_name;
-    /* STL maps for checking various duplicate names */
-    std::map<std::string, int> pb_port_names;
-    std::map<std::string, int> mode_names;
-    std::pair<std::map<std::string, int>::iterator, bool> ret_pb_ports;
-    std::pair<std::map<std::string, int>::iterator, bool> ret_mode_names;
-    int num_in_ports, num_out_ports, num_clock_ports;
-    int num_delay_constant, num_delay_matrix, num_C_constant, num_C_matrix,
-        num_T_setup, num_T_cq, num_T_hold;
-
-    pb_type->parent_mode = mode;
-    if (mode != nullptr && mode->parent_pb_type != nullptr) {
-        pb_type->depth = mode->parent_pb_type->depth + 1;
-        Prop = get_attribute(Parent, "name", loc_data).value();
-        pb_type->name = vtr::strdup(Prop);
-    } else {
-        pb_type->depth = 0;
-        /* same name as type */
-    }
-
-    Prop = get_attribute(Parent, "blif_model", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    pb_type->blif_model = vtr::strdup(Prop);
-
-    pb_type->class_type = UNKNOWN_CLASS;
-    Prop = get_attribute(Parent, "class", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    class_name = vtr::strdup(Prop);
-
-    if (class_name) {
-        if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[LUT_CLASS])) {
-            pb_type->class_type = LUT_CLASS;
-        } else if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[LATCH_CLASS])) {
-            pb_type->class_type = LATCH_CLASS;
-        } else if (0 == strcmp(class_name, PB_TYPE_CLASS_STRING[MEMORY_CLASS])) {
-            pb_type->class_type = MEMORY_CLASS;
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "Unknown class '%s' in pb_type '%s'\n", class_name,
-                           pb_type->name);
-        }
-        free(class_name);
-    }
-
-    if (mode == nullptr) {
-        pb_type->num_pb = 1;
-    } else {
-        pb_type->num_pb = get_attribute(Parent, "num_pb", loc_data).as_int(0);
-    }
-
-    VTR_ASSERT(pb_type->num_pb > 0);
-    num_ports = num_in_ports = num_out_ports = num_clock_ports = 0;
-    num_in_ports = count_children(Parent, "input", loc_data, ReqOpt::OPTIONAL);
-    num_out_ports = count_children(Parent, "output", loc_data, ReqOpt::OPTIONAL);
-    num_clock_ports = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
-    num_ports = num_in_ports + num_out_ports + num_clock_ports;
-    pb_type->ports = (t_port*)vtr::calloc(num_ports, sizeof(t_port));
-    pb_type->num_ports = num_ports;
-
-    /* Enforce VPR's definition of LUT/FF by checking number of ports */
-    if (pb_type->class_type == LUT_CLASS
-        || pb_type->class_type == LATCH_CLASS) {
-        if (num_in_ports != 1 || num_out_ports != 1) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "%s primitives must contain exactly one input port and one output port."
-                           "Found '%d' input port(s) and '%d' output port(s) for '%s'",
-                           (pb_type->class_type == LUT_CLASS) ? "LUT" : "Latch",
-                           num_in_ports, num_out_ports, pb_type->name);
-        }
-    }
-
-    /* Initialize Power Structure */
-    pb_type->pb_type_power = (t_pb_type_power*)vtr::calloc(1,
-                                                           sizeof(t_pb_type_power));
-    ProcessPb_TypePowerEstMethod(Parent, pb_type, loc_data);
-
-    /* process ports */
-    j = 0;
-    int absolute_port_first_pin_index = 0;
-
-    for (i = 0; i < 3; i++) {
-        if (i == 0) {
-            k = 0;
-            Cur = get_first_child(Parent, "input", loc_data, ReqOpt::OPTIONAL);
-        } else if (i == 1) {
-            k = 0;
-            Cur = get_first_child(Parent, "output", loc_data, ReqOpt::OPTIONAL);
-        } else {
-            k = 0;
-            Cur = get_first_child(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
-        }
-        while (Cur) {
-            pb_type->ports[j].parent_pb_type = pb_type;
-            pb_type->ports[j].index = j;
-            pb_type->ports[j].port_index_by_type = k;
-            ProcessPb_TypePort(Cur, &pb_type->ports[j],
-                               pb_type->pb_type_power->estimation_method, is_root_pb_type, loc_data);
-
-            pb_type->ports[j].absolute_first_pin_index = absolute_port_first_pin_index;
-            absolute_port_first_pin_index += pb_type->ports[j].num_pins;
-
-            //Check port name duplicates
-            ret_pb_ports = pb_port_names.insert(std::pair<std::string, int>(pb_type->ports[j].name, 0));
-            if (!ret_pb_ports.second) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "Duplicate port names in pb_type '%s': port '%s'\n",
-                               pb_type->name, pb_type->ports[j].name);
-            }
-
-            /* get next iteration */
-            j++;
-            k++;
-            Cur = Cur.next_sibling(Cur.name());
-        }
-    }
-
-    VTR_ASSERT(j == num_ports);
-
-    /* Count stats on the number of each type of pin */
-    pb_type->num_clock_pins = pb_type->num_input_pins = pb_type->num_output_pins = 0;
-    for (i = 0; i < pb_type->num_ports; i++) {
-        if (pb_type->ports[i].type == IN_PORT
-            && pb_type->ports[i].is_clock == false) {
-            pb_type->num_input_pins += pb_type->ports[i].num_pins;
-        } else if (pb_type->ports[i].type == OUT_PORT) {
-            pb_type->num_output_pins += pb_type->ports[i].num_pins;
-        } else {
-            VTR_ASSERT(pb_type->ports[i].is_clock
-                       && pb_type->ports[i].type == IN_PORT);
-            pb_type->num_clock_pins += pb_type->ports[i].num_pins;
-        }
-    }
-
-    pb_type->num_pins = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins;
-
-    //Warn that max_internal_delay is no longer supported
-    //TODO: eventually remove
-    try {
-        expect_child_node_count(Parent, "max_internal_delay", 0, loc_data);
-    } catch (pugiutil::XmlError& e) {
-        std::string msg = e.what();
-        msg += ". <max_internal_delay> has been replaced with <delay_constant>/<delay_matrix> between sequential primitive ports.";
-        msg += " Please upgrade your architecture file.";
-        archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
-    }
-
-    pb_type->annotations = nullptr;
-    pb_type->num_annotations = 0;
-    i = 0;
-    /* Determine if this is a leaf or container pb_type */
-    if (pb_type->blif_model != nullptr) {
-        /* Process delay and capacitance annotations */
-        num_annotations = 0;
-        num_delay_constant = count_children(Parent, "delay_constant", loc_data, ReqOpt::OPTIONAL);
-        num_delay_matrix = count_children(Parent, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
-        num_C_constant = count_children(Parent, "C_constant", loc_data, ReqOpt::OPTIONAL);
-        num_C_matrix = count_children(Parent, "C_matrix", loc_data, ReqOpt::OPTIONAL);
-        num_T_setup = count_children(Parent, "T_setup", loc_data, ReqOpt::OPTIONAL);
-        num_T_cq = count_children(Parent, "T_clock_to_Q", loc_data, ReqOpt::OPTIONAL);
-        num_T_hold = count_children(Parent, "T_hold", loc_data, ReqOpt::OPTIONAL);
-        num_annotations = num_delay_constant + num_delay_matrix + num_C_constant
-                          + num_C_matrix + num_T_setup + num_T_cq + num_T_hold;
-
-        pb_type->annotations = (t_pin_to_pin_annotation*)vtr::calloc(num_annotations, sizeof(t_pin_to_pin_annotation));
-        pb_type->num_annotations = num_annotations;
-
-        j = 0;
-        for (i = 0; i < 7; i++) {
-            if (i == 0) {
-                Cur = get_first_child(Parent, "delay_constant", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 1) {
-                Cur = get_first_child(Parent, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 2) {
-                Cur = get_first_child(Parent, "C_constant", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 3) {
-                Cur = get_first_child(Parent, "C_matrix", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 4) {
-                Cur = get_first_child(Parent, "T_setup", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 5) {
-                Cur = get_first_child(Parent, "T_clock_to_Q", loc_data, ReqOpt::OPTIONAL);
-            } else if (i == 6) {
-                Cur = get_first_child(Parent, "T_hold", loc_data, ReqOpt::OPTIONAL);
-            }
-            while (Cur) {
-                ProcessPinToPinAnnotations(Cur, &pb_type->annotations[j],
-                                           pb_type, loc_data);
-
-                /* get next iteration */
-                j++;
-                Cur = Cur.next_sibling(Cur.name());
-            }
-        }
-        VTR_ASSERT(j == num_annotations);
-
-        if (timing_enabled) {
-            check_leaf_pb_model_timing_consistency(pb_type, arch);
-        }
-
-        /* leaf pb_type, if special known class, then read class lib otherwise treat as primitive */
-        if (pb_type->class_type == LUT_CLASS) {
-            ProcessLutClass(pb_type);
-        } else if (pb_type->class_type == MEMORY_CLASS) {
-            ProcessMemoryClass(pb_type);
-        } else {
-            /* other leaf pb_type do not have modes */
-            pb_type->num_modes = 0;
-            VTR_ASSERT(count_children(Parent, "mode", loc_data, ReqOpt::OPTIONAL) == 0);
-        }
-    } else {
-        /* container pb_type, process modes */
-        VTR_ASSERT(pb_type->class_type == UNKNOWN_CLASS);
-        pb_type->num_modes = count_children(Parent, "mode", loc_data, ReqOpt::OPTIONAL);
-        pb_type->pb_type_power->leakage_default_mode = 0;
-
-        if (pb_type->num_modes == 0) {
-            /* The pb_type operates in an implied one mode */
-            pb_type->num_modes = 1;
-            pb_type->modes = new t_mode[pb_type->num_modes];
-            pb_type->modes[i].parent_pb_type = pb_type;
-            pb_type->modes[i].index = i;
-            ProcessMode(strings, Parent, &pb_type->modes[i], timing_enabled, arch, loc_data);
-            i++;
-        } else {
-            pb_type->modes = new t_mode[pb_type->num_modes];
-
-            Cur = get_first_child(Parent, "mode", loc_data);
-            while (Cur != nullptr) {
-                if (0 == strcmp(Cur.name(), "mode")) {
-                    pb_type->modes[i].parent_pb_type = pb_type;
-                    pb_type->modes[i].index = i;
-                    ProcessMode(strings, Cur, &pb_type->modes[i], timing_enabled, arch, loc_data);
-
-                    ret_mode_names = mode_names.insert(std::pair<std::string, int>(pb_type->modes[i].name, 0));
-                    if (!ret_mode_names.second) {
-                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                                       "Duplicate mode name: '%s' in pb_type '%s'.\n",
-                                       pb_type->modes[i].name, pb_type->name);
-                    }
-
-                    /* get next iteration */
-                    i++;
-                    Cur = Cur.next_sibling(Cur.name());
-                }
-            }
-        }
-        VTR_ASSERT(i == pb_type->num_modes);
-    }
-
-    pb_port_names.clear();
-    mode_names.clear();
-
-    pb_type->meta = ProcessMetadata(strings, Parent, loc_data);
-    ProcessPb_TypePower(Parent, pb_type, loc_data);
-}
-
-static void ProcessPb_TypePort_Power(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node cur;
-    const char* prop;
-    bool wire_defined = false;
-
-    port->port_power = (t_port_power*)vtr::calloc(1, sizeof(t_port_power));
-
-    //Defaults
-    if (power_method == POWER_METHOD_AUTO_SIZES) {
-        port->port_power->wire_type = POWER_WIRE_TYPE_AUTO;
-        port->port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
-    } else if (power_method == POWER_METHOD_SPECIFY_SIZES) {
-        port->port_power->wire_type = POWER_WIRE_TYPE_IGNORED;
-        port->port_power->buffer_type = POWER_BUFFER_TYPE_NONE;
-    }
-
-    cur = get_single_child(Parent, "power", loc_data, ReqOpt::OPTIONAL);
-
-    if (cur) {
-        /* Wire capacitance */
-
-        /* Absolute C provided */
-        prop = get_attribute(cur, "wire_capacitance", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (prop) {
-            if (!(power_method == POWER_METHOD_AUTO_SIZES
-                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Wire capacitance defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
-                               port->name, port->parent_pb_type->name);
-            } else {
-                wire_defined = true;
-                port->port_power->wire_type = POWER_WIRE_TYPE_C;
-                port->port_power->wire.C = (float)atof(prop);
-            }
-        }
-
-        /* Wire absolute length provided */
-        prop = get_attribute(cur, "wire_length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (prop) {
-            if (!(power_method == POWER_METHOD_AUTO_SIZES
-                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Wire length defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
-                               port->name, port->parent_pb_type->name);
-            } else if (wire_defined) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Multiple wire properties defined for port '%s', pb_type '%s'.",
-                               port->name, port->parent_pb_type->name);
-            } else if (strcmp(prop, "auto") == 0) {
-                wire_defined = true;
-                port->port_power->wire_type = POWER_WIRE_TYPE_AUTO;
-            } else {
-                wire_defined = true;
-                port->port_power->wire_type = POWER_WIRE_TYPE_ABSOLUTE_LENGTH;
-                port->port_power->wire.absolute_length = (float)atof(prop);
-            }
-        }
-
-        /* Wire relative length provided */
-        prop = get_attribute(cur, "wire_relative_length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (prop) {
-            if (!(power_method == POWER_METHOD_AUTO_SIZES
-                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Wire relative length defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
-                               port->name, port->parent_pb_type->name);
-            } else if (wire_defined) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Multiple wire properties defined for port '%s', pb_type '%s'.",
-                               port->name, port->parent_pb_type->name);
-            } else {
-                wire_defined = true;
-                port->port_power->wire_type = POWER_WIRE_TYPE_RELATIVE_LENGTH;
-                port->port_power->wire.relative_length = (float)atof(prop);
-            }
-        }
-
-        /* Buffer Size */
-        prop = get_attribute(cur, "buffer_size", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (prop) {
-            if (!(power_method == POWER_METHOD_AUTO_SIZES
-                  || power_method == POWER_METHOD_SPECIFY_SIZES)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(cur),
-                               "Buffer size defined for port '%s'.  This is an invalid option for the parent pb_type '%s' power estimation method.",
-                               port->name, port->parent_pb_type->name);
-            } else if (strcmp(prop, "auto") == 0) {
-                port->port_power->buffer_type = POWER_BUFFER_TYPE_AUTO;
-            } else {
-                port->port_power->buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
-                port->port_power->buffer_size = (float)atof(prop);
-            }
-        }
-    }
-}
-
-static void ProcessPb_TypePort(pugi::xml_node Parent, t_port* port, e_power_estimation_method power_method, const bool is_root_pb_type, const pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_attributes = {"name", "num_pins", "port_class"};
-    if (is_root_pb_type) {
-        expected_attributes.push_back("equivalent");
-
-        if (Parent.name() == "input"s || Parent.name() == "clock"s) {
-            expected_attributes.push_back("is_non_clock_global");
-        }
-    }
-
-    expect_only_attributes(Parent, expected_attributes, loc_data);
-
-    const char* Prop;
-    Prop = get_attribute(Parent, "name", loc_data).value();
-    port->name = vtr::strdup(Prop);
-
-    Prop = get_attribute(Parent, "port_class", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    port->port_class = vtr::strdup(Prop);
-
-    Prop = get_attribute(Parent, "equivalent", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    if (Prop) {
-        if (Prop == "none"s) {
-            port->equivalent = PortEquivalence::NONE;
-        } else if (Prop == "full"s) {
-            port->equivalent = PortEquivalence::FULL;
-        } else if (Prop == "instance"s) {
-            if (Parent.name() == "output"s) {
-                port->equivalent = PortEquivalence::INSTANCE;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Invalid pin equivalence '%s' for %s port.", Prop, Parent.name());
-            }
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "Invalid pin equivalence '%s'.", Prop);
-        }
-    }
-    port->num_pins = get_attribute(Parent, "num_pins", loc_data).as_int(0);
-    port->is_non_clock_global = get_attribute(Parent,
-                                              "is_non_clock_global", loc_data, ReqOpt::OPTIONAL)
-                                    .as_bool(false);
-
-    if (port->num_pins <= 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "Invalid number of pins %d for %s port.", port->num_pins, Parent.name());
-    }
-
-    if (0 == strcmp(Parent.name(), "input")) {
-        port->type = IN_PORT;
-        port->is_clock = false;
-
-        /* Check if LUT/FF port class is lut_in/D */
-        if (port->parent_pb_type->class_type == LUT_CLASS) {
-            if ((!port->port_class) || strcmp("lut_in", port->port_class)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Inputs to LUT primitives must have a port class named "
-                               "as \"lut_in\".");
-            }
-        } else if (port->parent_pb_type->class_type == LATCH_CLASS) {
-            if ((!port->port_class) || strcmp("D", port->port_class)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Input to flipflop primitives must have a port class named "
-                               "as \"D\".");
-            }
-            /* Only allow one input pin for FF's */
-            if (port->num_pins != 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Input port of flipflop primitives must have exactly one pin. "
-                               "Found %d.",
-                               port->num_pins);
-            }
-        }
-
-    } else if (0 == strcmp(Parent.name(), "output")) {
-        port->type = OUT_PORT;
-        port->is_clock = false;
-
-        /* Check if LUT/FF port class is lut_out/Q */
-        if (port->parent_pb_type->class_type == LUT_CLASS) {
-            if ((!port->port_class) || strcmp("lut_out", port->port_class)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Output to LUT primitives must have a port class named "
-                               "as \"lut_in\".");
-            }
-            /* Only allow one output pin for LUT's */
-            if (port->num_pins != 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Output port of LUT primitives must have exactly one pin. "
-                               "Found %d.",
-                               port->num_pins);
-            }
-        } else if (port->parent_pb_type->class_type == LATCH_CLASS) {
-            if ((!port->port_class) || strcmp("Q", port->port_class)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Output to flipflop primitives must have a port class named "
-                               "as \"D\".");
-            }
-            /* Only allow one output pin for FF's */
-            if (port->num_pins != 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Output port of flipflop primitives must have exactly one pin. "
-                               "Found %d.",
-                               port->num_pins);
-            }
-        }
-    } else if (0 == strcmp(Parent.name(), "clock")) {
-        port->type = IN_PORT;
-        port->is_clock = true;
-        if (port->is_non_clock_global == true) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "Port %s cannot be both a clock and a non-clock simultaneously\n",
-                           Parent.name());
-        }
-
-        if (port->parent_pb_type->class_type == LATCH_CLASS) {
-            if ((!port->port_class) || strcmp("clock", port->port_class)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Clock to flipflop primitives must have a port class named "
-                               "as \"clock\".");
-            }
-            /* Only allow one output pin for FF's */
-            if (port->num_pins != 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Clock port of flipflop primitives must have exactly one pin. "
-                               "Found %d.",
-                               port->num_pins);
-            }
-        }
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "Unknown port type %s", Parent.name());
-    }
-
-    ProcessPb_TypePort_Power(Parent, port, power_method, loc_data);
-}
-
-static void ProcessInterconnect(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const pugiutil::loc_data& loc_data) {
-    int num_interconnect = 0;
-    int num_complete, num_direct, num_mux;
-    int i, j, k, L_index, num_annotations;
-    int num_delay_constant, num_delay_matrix, num_C_constant, num_C_matrix,
-        num_pack_pattern;
-    const char* Prop;
-    pugi::xml_node Cur;
-    pugi::xml_node Cur2;
-
-    std::map<std::string, int> interc_names;
-    std::pair<std::map<std::string, int>::iterator, bool> ret_interc_names;
-
-    num_complete = num_direct = num_mux = 0;
-    num_complete = count_children(Parent, "complete", loc_data, ReqOpt::OPTIONAL);
-    num_direct = count_children(Parent, "direct", loc_data, ReqOpt::OPTIONAL);
-    num_mux = count_children(Parent, "mux", loc_data, ReqOpt::OPTIONAL);
-    num_interconnect = num_complete + num_direct + num_mux;
-
-    mode->num_interconnect = num_interconnect;
-    mode->interconnect = new t_interconnect[num_interconnect];
-
-    i = 0;
-    for (L_index = 0; L_index < 3; L_index++) {
-        if (L_index == 0) {
-            Cur = get_first_child(Parent, "complete", loc_data, ReqOpt::OPTIONAL);
-        } else if (L_index == 1) {
-            Cur = get_first_child(Parent, "direct", loc_data, ReqOpt::OPTIONAL);
-        } else {
-            Cur = get_first_child(Parent, "mux", loc_data, ReqOpt::OPTIONAL);
-        }
-        while (Cur != nullptr) {
-            if (0 == strcmp(Cur.name(), "complete")) {
-                mode->interconnect[i].type = COMPLETE_INTERC;
-            } else if (0 == strcmp(Cur.name(), "direct")) {
-                mode->interconnect[i].type = DIRECT_INTERC;
-            } else {
-                VTR_ASSERT(0 == strcmp(Cur.name(), "mux"));
-                mode->interconnect[i].type = MUX_INTERC;
-            }
-
-            mode->interconnect[i].line_num = loc_data.line(Cur);
-
-            mode->interconnect[i].parent_mode_index = mode->index;
-            mode->interconnect[i].parent_mode = mode;
-
-            Prop = get_attribute(Cur, "input", loc_data).value();
-            mode->interconnect[i].input_string = vtr::strdup(Prop);
-
-            Prop = get_attribute(Cur, "output", loc_data).value();
-            mode->interconnect[i].output_string = vtr::strdup(Prop);
-
-            Prop = get_attribute(Cur, "name", loc_data).value();
-            mode->interconnect[i].name = vtr::strdup(Prop);
-            mode->interconnect[i].meta = ProcessMetadata(strings, Cur, loc_data);
-
-            ret_interc_names = interc_names.insert(std::pair<std::string, int>(mode->interconnect[i].name, 0));
-            if (!ret_interc_names.second) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "Duplicate interconnect name: '%s' in mode: '%s'.\n",
-                               mode->interconnect[i].name, mode->name);
-            }
-
-            /* Process delay and capacitance annotations */
-            num_annotations = 0;
-            num_delay_constant = count_children(Cur, "delay_constant", loc_data, ReqOpt::OPTIONAL);
-            num_delay_matrix = count_children(Cur, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
-            num_C_constant = count_children(Cur, "C_constant", loc_data, ReqOpt::OPTIONAL);
-            num_C_matrix = count_children(Cur, "C_matrix", loc_data, ReqOpt::OPTIONAL);
-            num_pack_pattern = count_children(Cur, "pack_pattern", loc_data, ReqOpt::OPTIONAL);
-            num_annotations = num_delay_constant + num_delay_matrix
-                              + num_C_constant + num_C_matrix + num_pack_pattern;
-
-            mode->interconnect[i].annotations = (t_pin_to_pin_annotation*)vtr::calloc(num_annotations,
-                                                                                      sizeof(t_pin_to_pin_annotation));
-            mode->interconnect[i].num_annotations = num_annotations;
-
-            k = 0;
-            for (j = 0; j < 5; j++) {
-                if (j == 0) {
-                    Cur2 = get_first_child(Cur, "delay_constant", loc_data, ReqOpt::OPTIONAL);
-                } else if (j == 1) {
-                    Cur2 = get_first_child(Cur, "delay_matrix", loc_data, ReqOpt::OPTIONAL);
-                } else if (j == 2) {
-                    Cur2 = get_first_child(Cur, "C_constant", loc_data, ReqOpt::OPTIONAL);
-                } else if (j == 3) {
-                    Cur2 = get_first_child(Cur, "C_matrix", loc_data, ReqOpt::OPTIONAL);
-                } else if (j == 4) {
-                    Cur2 = get_first_child(Cur, "pack_pattern", loc_data, ReqOpt::OPTIONAL);
-                }
-                while (Cur2 != nullptr) {
-                    ProcessPinToPinAnnotations(Cur2,
-                                               &(mode->interconnect[i].annotations[k]), nullptr, loc_data);
-
-                    /* get next iteration */
-                    k++;
-                    Cur2 = Cur2.next_sibling(Cur2.name());
-                }
-            }
-            VTR_ASSERT(k == num_annotations);
-
-            /* Power */
-            mode->interconnect[i].interconnect_power = (t_interconnect_power*)vtr::calloc(1,
-                                                                                          sizeof(t_interconnect_power));
-            mode->interconnect[i].interconnect_power->port_info_initialized = false;
-
-            /* get next iteration */
-            Cur = Cur.next_sibling(Cur.name());
-            i++;
-        }
-    }
-
-    interc_names.clear();
-    VTR_ASSERT(i == num_interconnect);
-}
-
-static void ProcessMode(vtr::string_internment* strings, pugi::xml_node Parent, t_mode* mode, const bool timing_enabled, const t_arch& arch, const pugiutil::loc_data& loc_data) {
-    int i;
-    const char* Prop;
-    pugi::xml_node Cur;
-    std::map<std::string, int> pb_type_names;
-    std::pair<std::map<std::string, int>::iterator, bool> ret_pb_types;
-
-    bool implied_mode = 0 == strcmp(Parent.name(), "pb_type");
-    if (implied_mode) {
-        mode->name = vtr::strdup("default");
-    } else {
-        Prop = get_attribute(Parent, "name", loc_data).value();
-        mode->name = vtr::strdup(Prop);
-    }
-
-    /* Parse XML about if this mode is disable for packing or not
-     * By default, all the mode will be visible to packer 
-     */
-    mode->disable_packing = false;
-
-    /* If the parent mode is disabled for packing,
-     * all the child mode should be disabled for packing as well
-     */
-    if (nullptr != mode->parent_pb_type->parent_mode) {
-        mode->disable_packing = mode->parent_pb_type->parent_mode->disable_packing;
-    }
-
-    /* Override if user specify */
-    mode->disable_packing = get_attribute(Parent, "disable_packing", loc_data, ReqOpt::OPTIONAL).as_bool(mode->disable_packing);
-    if (true == mode->disable_packing) {
-        VTR_LOG("mode '%s[%s]' is defined by user to be disabled in packing\n",
-                mode->parent_pb_type->name,
-                mode->name);
-    }
-
-    mode->num_pb_type_children = count_children(Parent, "pb_type", loc_data, ReqOpt::OPTIONAL);
-    if (mode->num_pb_type_children > 0) {
-        mode->pb_type_children = new t_pb_type[mode->num_pb_type_children];
-
-        i = 0;
-        Cur = get_first_child(Parent, "pb_type", loc_data);
-        while (Cur != nullptr) {
-            if (0 == strcmp(Cur.name(), "pb_type")) {
-                ProcessPb_Type(strings, Cur, &mode->pb_type_children[i], mode, timing_enabled, arch, loc_data);
-
-                ret_pb_types = pb_type_names.insert(
-                    std::pair<std::string, int>(mode->pb_type_children[i].name, 0));
-                if (!ret_pb_types.second) {
-                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                                   "Duplicate pb_type name: '%s' in mode: '%s'.\n",
-                                   mode->pb_type_children[i].name, mode->name);
-                }
-
-                /* get next iteration */
-                i++;
-                Cur = Cur.next_sibling(Cur.name());
-            }
-        }
-    } else {
-        mode->pb_type_children = nullptr;
-    }
-
-    /* Allocate power structure */
-    mode->mode_power = (t_mode_power*)vtr::calloc(1, sizeof(t_mode_power));
-
-    if (!implied_mode) {
-        // Implied mode metadata is attached to the pb_type, rather than
-        // the t_mode object.
-        mode->meta = ProcessMetadata(strings, Parent, loc_data);
-    }
-
-    /* Clear STL map used for duplicate checks */
-    pb_type_names.clear();
-
-    Cur = get_single_child(Parent, "interconnect", loc_data);
-    ProcessInterconnect(strings, Cur, mode, loc_data);
-}
-
-static t_metadata_dict ProcessMetadata(vtr::string_internment* strings, pugi::xml_node Parent, const pugiutil::loc_data& loc_data) {
-    //	<metadata>
-    //	  <meta>CLBLL_L_</meta>
-    //	</metadata>
-    t_metadata_dict data;
-    auto metadata = get_single_child(Parent, "metadata", loc_data, ReqOpt::OPTIONAL);
-    if (metadata) {
-        auto meta_tag = get_first_child(metadata, "meta", loc_data);
-        while (meta_tag) {
-            auto key = get_attribute(meta_tag, "name", loc_data).as_string();
-
-            auto value = meta_tag.child_value();
-            data.add(strings->intern_string(vtr::string_view(key)),
-                     strings->intern_string(vtr::string_view(value)));
-            meta_tag = meta_tag.next_sibling(meta_tag.name());
-        }
-    }
-    return data;
-}
-
-static void Process_Fc_Values(pugi::xml_node Node, t_default_fc_spec& spec, const pugiutil::loc_data& loc_data) {
-    spec.specified = true;
-
-    /* Load the default fc_in */
-    auto default_fc_in_attrib = get_attribute(Node, "in_type", loc_data);
-    spec.in_value_type = string_to_fc_value_type(default_fc_in_attrib.value(), Node, loc_data);
-
-    auto in_val_attrib = get_attribute(Node, "in_val", loc_data);
-    spec.in_value = vtr::atof(in_val_attrib.value());
-
-    /* Load the default fc_out */
-    auto default_fc_out_attrib = get_attribute(Node, "out_type", loc_data);
-    spec.out_value_type = string_to_fc_value_type(default_fc_out_attrib.value(), Node, loc_data);
-
-    auto out_val_attrib = get_attribute(Node, "out_val", loc_data);
-    spec.out_value = vtr::atof(out_val_attrib.value());
-}
-
-/* Takes in the node ptr for the 'fc' elements and initializes
- * the appropriate fields of type. */
-static void Process_Fc(pugi::xml_node Node,
-                       t_physical_tile_type* PhysicalTileType,
-                       t_sub_tile* SubTile,
-                       t_pin_counts pin_counts,
-                       std::vector<t_segment_inf>& segments,
-                       const t_default_fc_spec& arch_def_fc,
-                       const pugiutil::loc_data& loc_data) {
-    std::vector<t_fc_override> fc_overrides;
-    t_default_fc_spec def_fc_spec;
-    if (Node) {
-        /* Load the default Fc values from the node */
-        Process_Fc_Values(Node, def_fc_spec, loc_data);
-        /* Load any <fc_override/> tags */
-        for (auto child_node : Node.children()) {
-            t_fc_override fc_override = Process_Fc_override(child_node, loc_data);
-            fc_overrides.push_back(fc_override);
-        }
-    } else {
-        /* Use the default value, if available */
-        if (!arch_def_fc.specified) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "<sub_tile> is missing child <fc>, and no <default_fc> specified in architecture\n");
-        }
-        def_fc_spec = arch_def_fc;
-    }
-
-    /* Go through all the port/segment combinations and create the (potentially
-     * overriden) pin/seg Fc specifications */
-    for (size_t iseg = 0; iseg < segments.size(); ++iseg) {
-        for (int icapacity = 0; icapacity < SubTile->capacity.total(); ++icapacity) {
-            //If capacity > 0, we need t offset the block index by the number of pins per instance
-            //this ensures that all pins have an Fc specification
-            int iblk_pin = icapacity * pin_counts.total();
-
-            for (const auto& port : SubTile->ports) {
-                t_fc_specification fc_spec;
-
-                fc_spec.seg_index = iseg;
-
-                //Apply type and defaults
-                if (port.type == IN_PORT) {
-                    fc_spec.fc_type = e_fc_type::IN;
-                    fc_spec.fc_value_type = def_fc_spec.in_value_type;
-                    fc_spec.fc_value = def_fc_spec.in_value;
-                } else {
-                    VTR_ASSERT(port.type == OUT_PORT);
-                    fc_spec.fc_type = e_fc_type::OUT;
-                    fc_spec.fc_value_type = def_fc_spec.out_value_type;
-                    fc_spec.fc_value = def_fc_spec.out_value;
-                }
-
-                //Apply any matching overrides
-                bool default_overriden = false;
-                for (const auto& fc_override : fc_overrides) {
-                    bool apply_override = false;
-                    if (!fc_override.port_name.empty() && !fc_override.seg_name.empty()) {
-                        //Both port and seg names are specified require exact match on both
-                        if (fc_override.port_name == port.name && fc_override.seg_name == segments[iseg].name) {
-                            apply_override = true;
-                        }
-
-                    } else if (!fc_override.port_name.empty()) {
-                        VTR_ASSERT(fc_override.seg_name.empty());
-                        //Only the port name specified, require it to match
-                        if (fc_override.port_name == port.name) {
-                            apply_override = true;
-                        }
-                    } else {
-                        VTR_ASSERT(!fc_override.seg_name.empty());
-                        VTR_ASSERT(fc_override.port_name.empty());
-                        //Only the seg name specified, require it to match
-                        if (fc_override.seg_name == segments[iseg].name) {
-                            apply_override = true;
-                        }
-                    }
-
-                    if (apply_override) {
-                        //Exact match, or partial match to either port or seg name
-                        // Note that we continue searching, this ensures that the last matching override (in file order)
-                        // is applied last
-
-                        if (default_overriden) {
-                            //Warn if multiple overrides match
-                            VTR_LOGF_WARN(loc_data.filename_c_str(), loc_data.line(Node), "Multiple matching Fc overrides found; the last will be applied\n");
-                        }
-
-                        fc_spec.fc_value_type = fc_override.fc_value_type;
-                        fc_spec.fc_value = fc_override.fc_value;
-
-                        default_overriden = true;
-                    }
-                }
-
-                //Add all the pins from this port
-                for (int iport_pin = 0; iport_pin < port.num_pins; ++iport_pin) {
-                    //XXX: this assumes that iterating through the tile ports
-                    //     in order yields the block pin order
-                    int true_physical_blk_pin = SubTile->sub_tile_to_tile_pin_indices[iblk_pin];
-                    fc_spec.pins.push_back(true_physical_blk_pin);
-                    ++iblk_pin;
-                }
-
-                PhysicalTileType->fc_specs.push_back(fc_spec);
-            }
-        }
-    }
-}
-
-static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    if (node.name() != std::string("fc_override")) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Unexpeted node of type '%s' (expected optional 'fc_override')",
-                       node.name());
-    }
-
-    t_fc_override fc_override;
-
-    expect_child_node_count(node, 0, loc_data);
-
-    bool seen_fc_type = false;
-    bool seen_fc_value = false;
-    bool seen_port_or_seg = false;
-    for (auto attrib : node.attributes()) {
-        if (attrib.name() == std::string("port_name")) {
-            fc_override.port_name = attrib.value();
-            seen_port_or_seg |= true;
-        } else if (attrib.name() == std::string("segment_name")) {
-            fc_override.seg_name = attrib.value();
-            seen_port_or_seg |= true;
-        } else if (attrib.name() == std::string("fc_type")) {
-            fc_override.fc_value_type = string_to_fc_value_type(attrib.value(), node, loc_data);
-            seen_fc_type = true;
-        } else if (attrib.name() == std::string("fc_val")) {
-            fc_override.fc_value = vtr::atof(attrib.value());
-            seen_fc_value = true;
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                           "Unexpected attribute '%s'", attrib.name());
-        }
-    }
-
-    if (!seen_fc_type) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Missing expected attribute 'fc_type'");
-    }
-
-    if (!seen_fc_value) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Missing expected attribute 'fc_value'");
-    }
-
-    if (!seen_port_or_seg) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Missing expected attribute(s) 'port_name' and/or 'segment_name'");
-    }
-
-    return fc_override;
-}
-
-static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    e_fc_value_type fc_value_type = e_fc_value_type::FRACTIONAL;
-
-    if (str == "frac") {
-        fc_value_type = e_fc_value_type::FRACTIONAL;
-    } else if (str == "abs") {
-        fc_value_type = e_fc_value_type::ABSOLUTE;
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Invalid fc_type '%s'. Must be 'abs' or 'frac'.\n",
-                       str.c_str());
-    }
-
-    return fc_value_type;
-}
-
-//Process any custom switchblock locations
-static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations,
-                                        t_physical_tile_type* type,
-                                        const t_arch& arch,
-                                        const pugiutil::loc_data& loc_data) {
-    VTR_ASSERT(type);
-
-    expect_only_attributes(switchblock_locations, {"pattern", "internal_switch"}, loc_data);
-
-    std::string pattern = get_attribute(switchblock_locations, "pattern", loc_data, ReqOpt::OPTIONAL).as_string("external_full_internal_straight");
-
-    //Initialize the location specs
-    size_t width = type->width;
-    size_t height = type->height;
-    type->switchblock_locations = vtr::Matrix<e_sb_type>({{width, height}}, e_sb_type::NONE);
-    type->switchblock_switch_overrides = vtr::Matrix<int>({{width, height}}, DEFAULT_SWITCH);
-
-    if (pattern == "custom") {
-        expect_only_attributes(switchblock_locations, {"pattern"}, loc_data);
-
-        //Load a custom pattern specified with <sb_loc> tags
-        expect_only_children(switchblock_locations, {"sb_loc"}, loc_data); //Only sb_loc child tags
-
-        //Default to no SBs unless specified
-        type->switchblock_locations.fill(e_sb_type::NONE);
-
-        //Track which locations have been assigned to detect overlaps
-        auto assigned_locs = vtr::Matrix<bool>({{width, height}}, false);
-
-        for (pugi::xml_node sb_loc : switchblock_locations.children("sb_loc")) {
-            expect_only_attributes(sb_loc, {"type", "xoffset", "yoffset", "switch_override"}, loc_data);
-
-            //Determine the type
-            std::string sb_type_str = get_attribute(sb_loc, "type", loc_data, ReqOpt::OPTIONAL).as_string("full");
-            e_sb_type sb_type = e_sb_type::FULL;
-            if (sb_type_str == "none") {
-                sb_type = e_sb_type::NONE;
-            } else if (sb_type_str == "horizontal") {
-                sb_type = e_sb_type::HORIZONTAL;
-            } else if (sb_type_str == "vertical") {
-                sb_type = e_sb_type::VERTICAL;
-            } else if (sb_type_str == "turns") {
-                sb_type = e_sb_type::TURNS;
-            } else if (sb_type_str == "straight") {
-                sb_type = e_sb_type::STRAIGHT;
-            } else if (sb_type_str == "full") {
-                sb_type = e_sb_type::FULL;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
-                               "Invalid <sb_loc> 'type' attribute '%s'\n",
-                               sb_type_str.c_str());
-            }
-
-            //Determine the switch type
-            int sb_switch_override = DEFAULT_SWITCH;
-
-            auto sb_switch_override_attr = get_attribute(sb_loc, "switch_override", loc_data, ReqOpt::OPTIONAL);
-            if (sb_switch_override_attr) {
-                std::string sb_switch_override_str = sb_switch_override_attr.as_string();
-                //Use the specified switch
-                sb_switch_override = find_switch_by_name(arch, sb_switch_override_str);
-
-                if (sb_switch_override == OPEN) {
-                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
-                                   "Invalid <sb_loc> 'switch_override' attribute '%s' (no matching switch named '%s' found)\n",
-                                   sb_switch_override_str.c_str(), sb_switch_override_str.c_str());
-                }
-            }
-
-            //Get the horizontal offset
-            size_t xoffset = get_attribute(sb_loc, "xoffset", loc_data, ReqOpt::OPTIONAL).as_uint(0);
-            if (xoffset > width - 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
-                               "Invalid <sb_loc> 'xoffset' attribute '%zu' (must be in range [%d,%d])\n",
-                               xoffset, 0, width - 1);
-            }
-
-            //Get the vertical offset
-            size_t yoffset = get_attribute(sb_loc, "yoffset", loc_data, ReqOpt::OPTIONAL).as_uint(0);
-            if (yoffset > height - 1) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
-                               "Invalid <sb_loc> 'yoffset' attribute '%zu' (must be in range [%d,%d])\n",
-                               yoffset, 0, height - 1);
-            }
-
-            //Check if this location has already been set
-            if (assigned_locs[xoffset][yoffset]) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(sb_loc),
-                               "Duplicate <sb_loc> specifications at xoffset=%zu yoffset=%zu\n",
-                               xoffset, yoffset);
-            }
-
-            //Set the custom sb location and type
-            type->switchblock_locations[xoffset][yoffset] = sb_type;
-            type->switchblock_switch_overrides[xoffset][yoffset] = sb_switch_override;
-            assigned_locs[xoffset][yoffset] = true; //Mark the location as set for error detection
-        }
-    } else { //Non-custom patterns
-        //Initialize defaults
-        int internal_switch = DEFAULT_SWITCH;
-        int external_switch = DEFAULT_SWITCH;
-        e_sb_type internal_type = e_sb_type::FULL;
-        e_sb_type external_type = e_sb_type::FULL;
-
-        //Determine any internal switch override
-        auto internal_switch_attr = get_attribute(switchblock_locations, "internal_switch", loc_data, ReqOpt::OPTIONAL);
-        if (internal_switch_attr) {
-            std::string internal_switch_name = internal_switch_attr.as_string();
-            //Use the specified switch
-            internal_switch = find_switch_by_name(arch, internal_switch_name);
-
-            if (internal_switch == OPEN) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
-                               "Invalid <switchblock_locations> 'internal_switch' attribute '%s' (no matching switch named '%s' found)\n",
-                               internal_switch_name.c_str(), internal_switch_name.c_str());
-            }
-        }
-
-        //Identify switch block types
-        if (pattern == "all") {
-            internal_type = e_sb_type::FULL;
-            external_type = e_sb_type::FULL;
-
-        } else if (pattern == "external") {
-            internal_type = e_sb_type::NONE;
-            external_type = e_sb_type::FULL;
-
-        } else if (pattern == "internal") {
-            internal_type = e_sb_type::FULL;
-            external_type = e_sb_type::NONE;
-
-        } else if (pattern == "external_full_internal_straight") {
-            internal_type = e_sb_type::STRAIGHT;
-            external_type = e_sb_type::FULL;
-
-        } else if (pattern == "none") {
-            internal_type = e_sb_type::NONE;
-            external_type = e_sb_type::NONE;
-
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(switchblock_locations),
-                           "Invalid <switchblock_locations> 'pattern' attribute '%s'\n",
-                           pattern.c_str());
-        }
-
-        //Fill in all locations (sets internal)
-        type->switchblock_locations.fill(internal_type);
-        type->switchblock_switch_overrides.fill(internal_switch);
-
-        //Fill in top edge external
-        size_t yoffset = height - 1;
-        for (size_t xoffset = 0; xoffset < width; ++xoffset) {
-            type->switchblock_locations[xoffset][yoffset] = external_type;
-            type->switchblock_switch_overrides[xoffset][yoffset] = external_switch;
-        }
-
-        //Fill in right edge external
-        size_t xoffset = width - 1;
-        for (yoffset = 0; yoffset < height; ++yoffset) {
-            type->switchblock_locations[xoffset][yoffset] = external_type;
-            type->switchblock_switch_overrides[xoffset][yoffset] = external_switch;
-        }
-    }
-}
-
-/* Takes in node pointing to <models> and loads all the
- * child type objects.  */
-static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node p;
-    t_model* temp = nullptr;
-    int L_index;
-    /* std::maps for checking duplicates */
-    std::map<std::string, int> model_name_map;
-    std::pair<std::map<std::string, int>::iterator, bool> ret_map_name;
-
-    L_index = NUM_MODELS_IN_LIBRARY;
-
-    arch->models = nullptr;
-    for (pugi::xml_node model : Node.children()) {
-        //Process each model
-        if (model.name() != std::string("model")) {
-            bad_tag(model, loc_data, Node, {"model"});
-        }
-
-        try {
-            temp = new t_model;
-            temp->index = L_index;
-            L_index++;
-
-            //Process the <model> tag attributes
-            for (pugi::xml_attribute attr : model.attributes()) {
-                if (attr.name() == std::string("never_prune")) {
-                    auto model_type_str = vtr::strdup(attr.value());
-
-                    if (std::strcmp(model_type_str, "true") == 0) {
-                        temp->never_prune = true;
-                    } else if (std::strcmp(model_type_str, "false") == 0) {
-                        temp->never_prune = false;
-                    } else {
-                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
-                                       "Unsupported never prune attribute value.");
-                    }
-                } else if (attr.name() == std::string("name")) {
-                    if (!temp->name) {
-                        //First name attr. seen
-                        temp->name = vtr::strdup(attr.value());
-                    } else {
-                        //Duplicate name
-                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
-                                       "Duplicate 'name' attribute on <model> tag.");
-                    }
-                } else {
-                    bad_attribute(attr, model, loc_data);
-                }
-            }
-
-            /* Try insert new model, check if already exist at the same time */
-            ret_map_name = model_name_map.insert(std::pair<std::string, int>(temp->name, 0));
-            if (!ret_map_name.second) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(model),
-                               "Duplicate model name: '%s'.\n", temp->name);
-            }
-
-            //Process the ports
-            std::set<std::string> port_names;
-            for (pugi::xml_node port_group : model.children()) {
-                if (port_group.name() == std::string("input_ports")) {
-                    ProcessModelPorts(port_group, temp, port_names, loc_data);
-                } else if (port_group.name() == std::string("output_ports")) {
-                    ProcessModelPorts(port_group, temp, port_names, loc_data);
-                } else {
-                    bad_tag(port_group, loc_data, model, {"input_ports", "output_ports"});
-                }
-            }
-
-            //Sanity check the model
-            check_model_clocks(temp, loc_data.filename_c_str(), loc_data.line(model));
-            check_model_combinational_sinks(temp, loc_data.filename_c_str(), loc_data.line(model));
-            warn_model_missing_timing(temp, loc_data.filename_c_str(), loc_data.line(model));
-        } catch (ArchFpgaError& e) {
-            free_arch_model(temp);
-            throw;
-        }
-
-        //Add the model
-        temp->next = arch->models;
-        arch->models = temp;
-    }
-    return;
-}
-
-static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::set<std::string>& port_names, const pugiutil::loc_data& loc_data) {
-    for (pugi::xml_attribute attr : port_group.attributes()) {
-        bad_attribute(attr, port_group, loc_data);
-    }
-
-    enum PORTS dir = ERR_PORT;
-    if (port_group.name() == std::string("input_ports")) {
-        dir = IN_PORT;
-    } else {
-        VTR_ASSERT(port_group.name() == std::string("output_ports"));
-        dir = OUT_PORT;
-    }
-
-    //Process each port
-    for (pugi::xml_node port : port_group.children()) {
-        //Should only be ports
-        if (port.name() != std::string("port")) {
-            bad_tag(port, loc_data, port_group, {"port"});
-        }
-
-        //Ports should have no children
-        for (pugi::xml_node port_child : port.children()) {
-            bad_tag(port_child, loc_data, port);
-        }
-
-        t_model_ports* model_port = new t_model_ports;
-
-        model_port->dir = dir;
-
-        //Process the attributes of each port
-        for (pugi::xml_attribute attr : port.attributes()) {
-            if (attr.name() == std::string("name")) {
-                model_port->name = vtr::strdup(attr.value());
-
-            } else if (attr.name() == std::string("is_clock")) {
-                model_port->is_clock = attribute_to_bool(port, attr, loc_data);
-
-            } else if (attr.name() == std::string("is_non_clock_global")) {
-                model_port->is_non_clock_global = attribute_to_bool(port, attr, loc_data);
-
-            } else if (attr.name() == std::string("clock")) {
-                model_port->clock = std::string(attr.value());
-
-            } else if (attr.name() == std::string("combinational_sink_ports")) {
-                model_port->combinational_sink_ports = vtr::split(attr.value());
-
-            } else {
-                bad_attribute(attr, port, loc_data);
-            }
-        }
-
-        //Sanity checks
-        if (model_port->is_clock == true && model_port->is_non_clock_global == true) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
-                           "Model port '%s' cannot be both a clock and a non-clock signal simultaneously", model_port->name);
-        }
-
-        if (model_port->name == nullptr) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
-                           "Model port is missing a name");
-        }
-
-        if (port_names.count(model_port->name)) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
-                           "Duplicate model port named '%s'", model_port->name);
-        }
-
-        if (dir == OUT_PORT && !model_port->combinational_sink_ports.empty()) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(port),
-                           "Model output ports can not have combinational sink ports");
-        }
-
-        //Add the port
-        if (dir == IN_PORT) {
-            model_port->next = model->inputs;
-            model->inputs = model_port;
-
-        } else {
-            VTR_ASSERT(dir == OUT_PORT);
-
-            model_port->next = model->outputs;
-            model->outputs = model_port;
-        }
-    }
-}
-
-static void ProcessLayout(pugi::xml_node layout_tag, t_arch* arch, const pugiutil::loc_data& loc_data) {
-    VTR_ASSERT(layout_tag.name() == std::string("layout"));
-
-    //Expect no attributes on <layout>
-    expect_only_attributes(layout_tag, {}, loc_data);
-
-    //Count the number of <auto_layout> or <fixed_layout> tags
-    size_t auto_layout_cnt = 0;
-    size_t fixed_layout_cnt = 0;
-    for (auto layout_type_tag : layout_tag.children()) {
-        if (layout_type_tag.name() == std::string("auto_layout")) {
-            ++auto_layout_cnt;
-        } else if (layout_type_tag.name() == std::string("fixed_layout")) {
-            ++fixed_layout_cnt;
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
-                           "Unexpected tag type '<%s>', expected '<auto_layout>' or '<fixed_layout>'", layout_type_tag.name());
-        }
-    }
-
-    if (auto_layout_cnt == 0 && fixed_layout_cnt == 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_tag),
-                       "Expected either an <auto_layout> or <fixed_layout> tag");
-    }
-    if (auto_layout_cnt > 1) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_tag),
-                       "Expected at most one <auto_layout> tag");
-    }
-    VTR_ASSERT_MSG(auto_layout_cnt == 0 || auto_layout_cnt == 1, "<auto_layout> may appear at most once");
-
-    for (auto layout_type_tag : layout_tag.children()) {
-        t_grid_def grid_def = ProcessGridLayout(&arch->strings, layout_type_tag, loc_data);
-
-        arch->grid_layouts.emplace_back(std::move(grid_def));
-    }
-}
-
-static t_grid_def ProcessGridLayout(vtr::string_internment* strings, pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data) {
-    t_grid_def grid_def;
-
-    //Determine the grid specification type
-    if (layout_type_tag.name() == std::string("auto_layout")) {
-        expect_only_attributes(layout_type_tag, {"aspect_ratio"}, loc_data);
-
-        grid_def.grid_type = GridDefType::AUTO;
-
-        grid_def.aspect_ratio = get_attribute(layout_type_tag, "aspect_ratio", loc_data, ReqOpt::OPTIONAL).as_float(1.);
-        grid_def.name = "auto";
-
-    } else if (layout_type_tag.name() == std::string("fixed_layout")) {
-        expect_only_attributes(layout_type_tag, {"width", "height", "name"}, loc_data);
-
-        grid_def.grid_type = GridDefType::FIXED;
-        grid_def.width = get_attribute(layout_type_tag, "width", loc_data).as_int();
-        grid_def.height = get_attribute(layout_type_tag, "height", loc_data).as_int();
-        std::string name = get_attribute(layout_type_tag, "name", loc_data).value();
-
-        if (name == "auto") {
-            //We name <auto_layout> as 'auto', so don't allow a user to specify it
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
-                           "The name '%s' is reserved for auto-sized layouts; please choose another name");
-        }
-        grid_def.name = name;
-
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(layout_type_tag),
-                       "Unexpected tag '<%s>'. Expected '<auto_layout>' or '<fixed_layout>'.",
-                       layout_type_tag.name());
-    }
-
-    //Process all the block location specifications
-    for (auto loc_spec_tag : layout_type_tag.children()) {
-        auto loc_type = loc_spec_tag.name();
-        auto type_name = get_attribute(loc_spec_tag, "type", loc_data).value();
-        int priority = get_attribute(loc_spec_tag, "priority", loc_data).as_int();
-        t_metadata_dict meta = ProcessMetadata(strings, loc_spec_tag, loc_data);
-
-        if (loc_type == std::string("perimeter")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
-
-            //The edges
-            t_grid_loc_def left_edge(type_name, priority); //Including corners
-            left_edge.x.start_expr = "0";
-            left_edge.x.end_expr = "0";
-            left_edge.y.start_expr = "0";
-            left_edge.y.end_expr = "H - 1";
-
-            t_grid_loc_def right_edge(type_name, priority); //Including corners
-            right_edge.x.start_expr = "W - 1";
-            right_edge.x.end_expr = "W - 1";
-            right_edge.y.start_expr = "0";
-            right_edge.y.end_expr = "H - 1";
-
-            t_grid_loc_def bottom_edge(type_name, priority); //Exclucing corners
-            bottom_edge.x.start_expr = "1";
-            bottom_edge.x.end_expr = "W - 2";
-            bottom_edge.y.start_expr = "0";
-            bottom_edge.y.end_expr = "0";
-
-            t_grid_loc_def top_edge(type_name, priority); //Excluding corners
-            top_edge.x.start_expr = "1";
-            top_edge.x.end_expr = "W - 2";
-            top_edge.y.start_expr = "H - 1";
-            top_edge.y.end_expr = "H - 1";
-
-            left_edge.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            left_edge.meta = left_edge.owned_meta.get();
-            right_edge.meta = left_edge.owned_meta.get();
-            top_edge.meta = left_edge.owned_meta.get();
-            bottom_edge.meta = left_edge.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(left_edge));
-            grid_def.loc_defs.emplace_back(std::move(right_edge));
-            grid_def.loc_defs.emplace_back(std::move(top_edge));
-            grid_def.loc_defs.emplace_back(std::move(bottom_edge));
-
-        } else if (loc_type == std::string("corners")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
-
-            //The corners
-            t_grid_loc_def bottom_left(type_name, priority);
-            bottom_left.x.start_expr = "0";
-            bottom_left.x.end_expr = "0";
-            bottom_left.y.start_expr = "0";
-            bottom_left.y.end_expr = "0";
-
-            t_grid_loc_def top_left(type_name, priority);
-            top_left.x.start_expr = "0";
-            top_left.x.end_expr = "0";
-            top_left.y.start_expr = "H-1";
-            top_left.y.end_expr = "H-1";
-
-            t_grid_loc_def bottom_right(type_name, priority);
-            bottom_right.x.start_expr = "W-1";
-            bottom_right.x.end_expr = "W-1";
-            bottom_right.y.start_expr = "0";
-            bottom_right.y.end_expr = "0";
-
-            t_grid_loc_def top_right(type_name, priority);
-            top_right.x.start_expr = "W-1";
-            top_right.x.end_expr = "W-1";
-            top_right.y.start_expr = "H-1";
-            top_right.y.end_expr = "H-1";
-
-            bottom_left.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            bottom_left.meta = bottom_left.owned_meta.get();
-            top_left.meta = bottom_left.owned_meta.get();
-            bottom_right.meta = bottom_left.owned_meta.get();
-            top_right.meta = bottom_left.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(bottom_left));
-            grid_def.loc_defs.emplace_back(std::move(top_left));
-            grid_def.loc_defs.emplace_back(std::move(bottom_right));
-            grid_def.loc_defs.emplace_back(std::move(top_right));
-
-        } else if (loc_type == std::string("fill")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority"}, loc_data);
-
-            t_grid_loc_def fill(type_name, priority);
-            fill.x.start_expr = "0";
-            fill.x.end_expr = "W - 1";
-            fill.y.start_expr = "0";
-            fill.y.end_expr = "H - 1";
-
-            fill.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            fill.meta = fill.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(fill));
-
-        } else if (loc_type == std::string("single")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority", "x", "y"}, loc_data);
-
-            t_grid_loc_def single(type_name, priority);
-            single.x.start_expr = get_attribute(loc_spec_tag, "x", loc_data).value();
-            single.y.start_expr = get_attribute(loc_spec_tag, "y", loc_data).value();
-            single.x.end_expr = single.x.start_expr + " + w - 1";
-            single.y.end_expr = single.y.start_expr + " + h - 1";
-
-            single.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            single.meta = single.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(single));
-
-        } else if (loc_type == std::string("col")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority", "startx", "repeatx", "starty", "incry"}, loc_data);
-
-            t_grid_loc_def col(type_name, priority);
-
-            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data);
-
-            col.x.start_expr = startx_attr.value();
-            col.x.end_expr = startx_attr.value() + std::string(" + w - 1"); //end is inclusive so need to include block width
-
-            auto repeat_attr = get_attribute(loc_spec_tag, "repeatx", loc_data, ReqOpt::OPTIONAL);
-            if (repeat_attr) {
-                col.x.repeat_expr = repeat_attr.value();
-            }
-
-            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data, ReqOpt::OPTIONAL);
-            if (starty_attr) {
-                col.y.start_expr = starty_attr.value();
-            }
-
-            auto incry_attr = get_attribute(loc_spec_tag, "incry", loc_data, ReqOpt::OPTIONAL);
-            if (incry_attr) {
-                col.y.incr_expr = incry_attr.value();
-            }
-
-            col.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            col.meta = col.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(col));
-
-        } else if (loc_type == std::string("row")) {
-            expect_only_attributes(loc_spec_tag, {"type", "priority", "starty", "repeaty", "startx", "incrx"}, loc_data);
-
-            t_grid_loc_def row(type_name, priority);
-
-            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data);
-
-            row.y.start_expr = starty_attr.value();
-            row.y.end_expr = starty_attr.value() + std::string(" + h - 1"); //end is inclusive so need to include block height
-
-            auto repeat_attr = get_attribute(loc_spec_tag, "repeaty", loc_data, ReqOpt::OPTIONAL);
-            if (repeat_attr) {
-                row.y.repeat_expr = repeat_attr.value();
-            }
-
-            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data, ReqOpt::OPTIONAL);
-            if (startx_attr) {
-                row.x.start_expr = startx_attr.value();
-            }
-
-            auto incrx_attr = get_attribute(loc_spec_tag, "incrx", loc_data, ReqOpt::OPTIONAL);
-            if (incrx_attr) {
-                row.x.incr_expr = incrx_attr.value();
-            }
-
-            row.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            row.meta = row.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(row));
-        } else if (loc_type == std::string("region")) {
-            expect_only_attributes(loc_spec_tag,
-                                   {"type", "priority",
-                                    "startx", "endx", "repeatx", "incrx",
-                                    "starty", "endy", "repeaty", "incry"},
-                                   loc_data);
-            t_grid_loc_def region(type_name, priority);
-
-            auto startx_attr = get_attribute(loc_spec_tag, "startx", loc_data, ReqOpt::OPTIONAL);
-            if (startx_attr) {
-                region.x.start_expr = startx_attr.value();
-            }
-
-            auto endx_attr = get_attribute(loc_spec_tag, "endx", loc_data, ReqOpt::OPTIONAL);
-            if (endx_attr) {
-                region.x.end_expr = endx_attr.value();
-            }
-
-            auto starty_attr = get_attribute(loc_spec_tag, "starty", loc_data, ReqOpt::OPTIONAL);
-            if (starty_attr) {
-                region.y.start_expr = starty_attr.value();
-            }
-
-            auto endy_attr = get_attribute(loc_spec_tag, "endy", loc_data, ReqOpt::OPTIONAL);
-            if (endy_attr) {
-                region.y.end_expr = endy_attr.value();
-            }
-
-            auto repeatx_attr = get_attribute(loc_spec_tag, "repeatx", loc_data, ReqOpt::OPTIONAL);
-            if (repeatx_attr) {
-                region.x.repeat_expr = repeatx_attr.value();
-            }
-
-            auto repeaty_attr = get_attribute(loc_spec_tag, "repeaty", loc_data, ReqOpt::OPTIONAL);
-            if (repeaty_attr) {
-                region.y.repeat_expr = repeaty_attr.value();
-            }
-
-            auto incrx_attr = get_attribute(loc_spec_tag, "incrx", loc_data, ReqOpt::OPTIONAL);
-            if (incrx_attr) {
-                region.x.incr_expr = incrx_attr.value();
-            }
-
-            auto incry_attr = get_attribute(loc_spec_tag, "incry", loc_data, ReqOpt::OPTIONAL);
-            if (incry_attr) {
-                region.y.incr_expr = incry_attr.value();
-            }
-
-            region.owned_meta = std::make_unique<t_metadata_dict>(meta);
-            region.meta = region.owned_meta.get();
-
-            grid_def.loc_defs.emplace_back(std::move(region));
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(loc_spec_tag),
-                           "Unrecognized grid location specification type '%s'\n", loc_type);
-        }
-    }
-
-    //Warn if any type has no grid location specifed
-
-    return grid_def;
-}
-
-/* Takes in node pointing to <device> and loads all the
- * child type objects. */
-static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data) {
-    const char* Prop;
-    pugi::xml_node Cur;
-    bool custom_switch_block = false;
-
-    //Warn that <timing> is no longer supported
-    //TODO: eventually remove
-    try {
-        expect_child_node_count(Node, "timing", 0, loc_data);
-    } catch (pugiutil::XmlError& e) {
-        std::string msg = e.what();
-        msg += ". <timing> has been replaced with the <switch_block> tag.";
-        msg += " Please upgrade your architecture file.";
-        archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
-    }
-
-    expect_only_children(Node, {"sizing", "area", "chan_width_distr", "switch_block", "connection_block", "default_fc"}, loc_data);
-
-    //<sizing> tag
-    Cur = get_single_child(Node, "sizing", loc_data);
-    expect_only_attributes(Cur, {"R_minW_nmos", "R_minW_pmos"}, loc_data);
-    arch->R_minW_nmos = get_attribute(Cur, "R_minW_nmos", loc_data).as_float();
-    arch->R_minW_pmos = get_attribute(Cur, "R_minW_pmos", loc_data).as_float();
-
-    //<area> tag
-    Cur = get_single_child(Node, "area", loc_data);
-    expect_only_attributes(Cur, {"grid_logic_tile_area"}, loc_data);
-    arch->grid_logic_tile_area = get_attribute(Cur, "grid_logic_tile_area",
-                                               loc_data, ReqOpt::OPTIONAL)
-                                     .as_float(0);
-
-    //<chan_width_distr> tag
-    Cur = get_single_child(Node, "chan_width_distr", loc_data, ReqOpt::OPTIONAL);
-    expect_only_attributes(Cur, {}, loc_data);
-    if (Cur != nullptr) {
-        ProcessChanWidthDistr(Cur, arch, loc_data);
-    }
-
-    //<connection_block> tag
-    Cur = get_single_child(Node, "connection_block", loc_data);
-    expect_only_attributes(Cur, {"input_switch_name"}, loc_data);
-    arch->ipin_cblock_switch_name = get_attribute(Cur, "input_switch_name", loc_data).as_string();
-
-    //<switch_block> tag
-    Cur = get_single_child(Node, "switch_block", loc_data);
-    expect_only_attributes(Cur, {"type", "fs"}, loc_data);
-    Prop = get_attribute(Cur, "type", loc_data).value();
-    if (strcmp(Prop, "wilton") == 0) {
-        arch->SBType = WILTON;
-    } else if (strcmp(Prop, "universal") == 0) {
-        arch->SBType = UNIVERSAL;
-    } else if (strcmp(Prop, "subset") == 0) {
-        arch->SBType = SUBSET;
-    } else if (strcmp(Prop, "custom") == 0) {
-        arch->SBType = CUSTOM;
-        custom_switch_block = true;
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                       "Unknown property %s for switch block type x\n", Prop);
-    }
-
-    ReqOpt CUSTOM_SWITCHBLOCK_REQD = BoolToReqOpt(!custom_switch_block);
-    arch->Fs = get_attribute(Cur, "fs", loc_data, CUSTOM_SWITCHBLOCK_REQD).as_int(3);
-
-    Cur = get_single_child(Node, "default_fc", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        arch_def_fc.specified = true;
-        expect_only_attributes(Cur, {"in_type", "in_val", "out_type", "out_val"}, loc_data);
-        Process_Fc_Values(Cur, arch_def_fc, loc_data);
-    } else {
-        arch_def_fc.specified = false;
-    }
-}
-
-/* Takes in node pointing to <chan_width_distr> and loads all the
- * child type objects. */
-static void ProcessChanWidthDistr(pugi::xml_node Node,
-                                  t_arch* arch,
-                                  const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Cur;
-
-    expect_only_children(Node, {"x", "y"}, loc_data);
-
-    Cur = get_single_child(Node, "x", loc_data);
-    ProcessChanWidthDistrDir(Cur, &arch->Chans.chan_x_dist, loc_data);
-
-    Cur = get_single_child(Node, "y", loc_data);
-    ProcessChanWidthDistrDir(Cur, &arch->Chans.chan_y_dist, loc_data);
-}
-
-/* Takes in node within <chan_width_distr> and loads all the
- * child type objects. */
-static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pugiutil::loc_data& loc_data) {
-    const char* Prop;
-
-    ReqOpt hasXpeak, hasWidth, hasDc;
-    hasXpeak = hasWidth = hasDc = ReqOpt::OPTIONAL;
-
-    Prop = get_attribute(Node, "distr", loc_data).value();
-    if (strcmp(Prop, "uniform") == 0) {
-        chan->type = UNIFORM;
-    } else if (strcmp(Prop, "gaussian") == 0) {
-        chan->type = GAUSSIAN;
-        hasXpeak = hasWidth = hasDc = ReqOpt::REQUIRED;
-    } else if (strcmp(Prop, "pulse") == 0) {
-        chan->type = PULSE;
-        hasXpeak = hasWidth = hasDc = ReqOpt::REQUIRED;
-    } else if (strcmp(Prop, "delta") == 0) {
-        hasXpeak = hasDc = ReqOpt::REQUIRED;
-        chan->type = DELTA;
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Unknown property %s for chan_width_distr x\n", Prop);
-    }
-
-    chan->peak = get_attribute(Node, "peak", loc_data).as_float(UNDEFINED);
-    chan->width = get_attribute(Node, "width", loc_data, hasWidth).as_float(0);
-    chan->xpeak = get_attribute(Node, "xpeak", loc_data, hasXpeak).as_float(0);
-    chan->dc = get_attribute(Node, "dc", loc_data, hasDc).as_float(0);
-}
-
-static void ProcessTiles(pugi::xml_node Node,
-                         std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                         std::vector<t_logical_block_type>& LogicalBlockTypes,
-                         const t_default_fc_spec& arch_def_fc,
-                         t_arch& arch,
-                         const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurTileType;
-    pugi::xml_node Cur;
-    std::map<std::string, int> tile_type_descriptors;
-
-    /* Alloc the type list. Need one additional t_type_desctiptors:
-     * 1: empty psuedo-type
-     */
-    t_physical_tile_type EMPTY_PHYSICAL_TILE_TYPE = get_empty_physical_type();
-    EMPTY_PHYSICAL_TILE_TYPE.index = 0;
-    PhysicalTileTypes.push_back(EMPTY_PHYSICAL_TILE_TYPE);
-
-    /* Process the types */
-    int index = 1; /* Skip over 'empty' type */
-
-    CurTileType = Node.first_child();
-    while (CurTileType) {
-        check_node(CurTileType, "tile", loc_data);
-
-        t_physical_tile_type PhysicalTileType;
-
-        PhysicalTileType.index = index;
-
-        /* Parses the properties fields of the type */
-        ProcessTileProps(CurTileType, &PhysicalTileType, loc_data);
-
-        auto result = tile_type_descriptors.insert(std::pair<std::string, int>(PhysicalTileType.name, 0));
-        if (!result.second) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurTileType),
-                           "Duplicate tile descriptor name: '%s'.\n", PhysicalTileType.name);
-        }
-
-        //Warn that gridlocations is no longer supported
-        //TODO: eventually remove
-        try {
-            expect_child_node_count(CurTileType, "gridlocations", 0, loc_data);
-        } catch (pugiutil::XmlError& e) {
-            std::string msg = e.what();
-            msg += ". <gridlocations> has been replaced by the <auto_layout> and <device_layout> tags in the <layout> section.";
-            msg += " Please upgrade your architecture file.";
-            archfpga_throw(e.filename().c_str(), e.line(), msg.c_str());
-        }
-
-        //Load switchblock type and location overrides
-        Cur = get_single_child(CurTileType, "switchblock_locations", loc_data, ReqOpt::OPTIONAL);
-        ProcessSwitchblockLocations(Cur, &PhysicalTileType, arch, loc_data);
-
-        ProcessSubTiles(CurTileType, &PhysicalTileType, LogicalBlockTypes, arch.Segments, arch_def_fc, loc_data);
-
-        /* Type fully read */
-        ++index;
-
-        /* Push newly created Types to corresponding vectors */
-        PhysicalTileTypes.push_back(PhysicalTileType);
-
-        /* Free this node and get its next sibling node */
-        CurTileType = CurTileType.next_sibling(CurTileType.name());
-    }
-    tile_type_descriptors.clear();
-}
-
-static void MarkIoTypes(std::vector<t_physical_tile_type>& PhysicalTileTypes) {
-    for (auto& type : PhysicalTileTypes) {
-        type.is_input_type = false;
-        type.is_output_type = false;
-
-        auto equivalent_sites = get_equivalent_sites_set(&type);
-
-        for (const auto& equivalent_site : equivalent_sites) {
-            if (block_type_contains_blif_model(equivalent_site, MODEL_INPUT)) {
-                type.is_input_type = true;
-                break;
-            }
-        }
-
-        for (const auto& equivalent_site : equivalent_sites) {
-            if (block_type_contains_blif_model(equivalent_site, MODEL_OUTPUT)) {
-                type.is_output_type = true;
-                break;
-            }
-        }
-    }
-}
-
-static void ProcessTileProps(pugi::xml_node Node,
-                             t_physical_tile_type* PhysicalTileType,
-                             const pugiutil::loc_data& loc_data) {
-    expect_only_attributes(Node, {"name", "width", "height", "area"}, loc_data);
-
-    /* Load type name */
-    auto Prop = get_attribute(Node, "name", loc_data).value();
-    PhysicalTileType->name = vtr::strdup(Prop);
-
-    /* Load properties */
-    PhysicalTileType->width = get_attribute(Node, "width", loc_data, ReqOpt::OPTIONAL).as_uint(1);
-    PhysicalTileType->height = get_attribute(Node, "height", loc_data, ReqOpt::OPTIONAL).as_uint(1);
-    PhysicalTileType->area = get_attribute(Node, "area", loc_data, ReqOpt::OPTIONAL).as_float(UNDEFINED);
-
-    if (atof(Prop) < 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Area for type %s must be non-negative\n", PhysicalTileType->name);
-    }
-}
-
-static t_pin_counts ProcessSubTilePorts(pugi::xml_node Parent,
-                                        t_sub_tile* SubTile,
-                                        std::unordered_map<std::string, t_physical_tile_port>& tile_port_names,
-                                        const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Cur;
-
-    std::map<std::string, int> sub_tile_port_names;
-
-    int num_ports, num_in_ports, num_out_ports, num_clock_ports;
-
-    num_ports = num_in_ports = num_out_ports = num_clock_ports = 0;
-    num_in_ports = count_children(Parent, "input", loc_data, ReqOpt::OPTIONAL);
-    num_out_ports = count_children(Parent, "output", loc_data, ReqOpt::OPTIONAL);
-    num_clock_ports = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
-    num_ports = num_in_ports + num_out_ports + num_clock_ports;
-
-    int port_index_by_type;
-    int port_index = 0;
-    int absolute_first_pin_index = 0;
-
-    std::vector<const char*> port_types = {"input", "output", "clock"};
-    for (auto port_type : port_types) {
-        port_index_by_type = 0;
-        Cur = get_first_child(Parent, port_type, loc_data, ReqOpt::OPTIONAL);
-        while (Cur) {
-            t_physical_tile_port port;
-
-            port.index = port_index;
-            port.absolute_first_pin_index = absolute_first_pin_index;
-            port.port_index_by_type = port_index_by_type;
-            ProcessTilePort(Cur, &port, loc_data);
-
-            //Check port name duplicates
-            auto sub_tile_port_result = sub_tile_port_names.insert(std::pair<std::string, int>(port.name, 0));
-            if (!sub_tile_port_result.second) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "Duplicate port names in tile '%s': port '%s'\n",
-                               SubTile->name, port.name);
-            }
-
-            //Check port name duplicates
-            auto tile_port_result = tile_port_names.insert(std::pair<std::string, t_physical_tile_port>(port.name, port));
-            if (!tile_port_result.second) {
-                if (tile_port_result.first->second.num_pins != port.num_pins || tile_port_result.first->second.equivalent != port.equivalent) {
-                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                                   "Another port found with the same name in other sub tiles "
-                                   "that did not match the current port settings. '%s': port '%s'\n",
-                                   SubTile->name, port.name);
-                }
-            }
-
-            //Push port
-            SubTile->ports.push_back(port);
-
-            /* get next iteration */
-            port_index++;
-            port_index_by_type++;
-            absolute_first_pin_index += port.num_pins;
-
-            Cur = Cur.next_sibling(Cur.name());
-        }
-    }
-
-    VTR_ASSERT(port_index == num_ports);
-
-    t_pin_counts pin_counts;
-
-    /* Count stats on the number of each type of pin */
-    for (const auto& port : SubTile->ports) {
-        if (port.type == IN_PORT && port.is_clock == false) {
-            pin_counts.input += port.num_pins;
-        } else if (port.type == OUT_PORT) {
-            pin_counts.output += port.num_pins;
-        } else {
-            VTR_ASSERT(port.is_clock && port.type == IN_PORT);
-            pin_counts.clock += port.num_pins;
-        }
-    }
-
-    return pin_counts;
-}
-
-static void ProcessTilePort(pugi::xml_node Node,
-                            t_physical_tile_port* port,
-                            const pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_attributes = {"name", "num_pins", "equivalent"};
-
-    if (Node.name() == "input"s || Node.name() == "clock"s) {
-        expected_attributes.push_back("is_non_clock_global");
-    }
-
-    expect_only_attributes(Node, expected_attributes, loc_data);
-
-    const char* Prop;
-    Prop = get_attribute(Node, "name", loc_data).value();
-    port->name = vtr::strdup(Prop);
-
-    Prop = get_attribute(Node, "equivalent", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-    if (Prop) {
-        if (Prop == "none"s) {
-            port->equivalent = PortEquivalence::NONE;
-        } else if (Prop == "full"s) {
-            port->equivalent = PortEquivalence::FULL;
-        } else if (Prop == "instance"s) {
-            if (Node.name() == "output"s) {
-                port->equivalent = PortEquivalence::INSTANCE;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                               "Invalid pin equivalence '%s' for %s port.", Prop, Node.name());
-            }
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Invalid pin equivalence '%s'.", Prop);
-        }
-    }
-    port->num_pins = get_attribute(Node, "num_pins", loc_data).as_int(0);
-    port->is_non_clock_global = get_attribute(Node,
-                                              "is_non_clock_global", loc_data, ReqOpt::OPTIONAL)
-                                    .as_bool(false);
-
-    if (port->num_pins <= 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Invalid number of pins %d for %s port.", port->num_pins, Node.name());
-    }
-
-    if (0 == strcmp(Node.name(), "input")) {
-        port->type = IN_PORT;
-        port->is_clock = false;
-
-    } else if (0 == strcmp(Node.name(), "output")) {
-        port->type = OUT_PORT;
-        port->is_clock = false;
-
-    } else if (0 == strcmp(Node.name(), "clock")) {
-        port->type = IN_PORT;
-        port->is_clock = true;
-
-        if (port->is_non_clock_global == true) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Port %s cannot be both a clock and a non-clock simultaneously\n",
-                           Node.name());
-        }
-
-    } else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Unknown port type %s", Node.name());
-    }
-}
-
-static void ProcessTileEquivalentSites(pugi::xml_node Parent,
-                                       t_sub_tile* SubTile,
-                                       t_physical_tile_type* PhysicalTileType,
-                                       std::vector<t_logical_block_type>& LogicalBlockTypes,
-                                       const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurSite;
-
-    expect_only_children(Parent, {"site"}, loc_data);
-
-    if (count_children(Parent, "site", loc_data) < 1) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "There are no sites corresponding to this tile: %s.\n", SubTile->name);
-    }
-
-    CurSite = Parent.first_child();
-    while (CurSite) {
-        check_node(CurSite, "site", loc_data);
-
-        expect_only_attributes(CurSite, {"pb_type", "pin_mapping"}, loc_data);
-        /* Load equivalent site name */
-        auto Prop = std::string(get_attribute(CurSite, "pb_type", loc_data).value());
-
-        auto LogicalBlockType = get_type_by_name<t_logical_block_type>(Prop.c_str(), LogicalBlockTypes);
-
-        auto pin_mapping = get_attribute(CurSite, "pin_mapping", loc_data, ReqOpt::OPTIONAL).as_string("direct");
-
-        if (0 == strcmp(pin_mapping, "custom")) {
-            // Pin mapping between Tile and Pb Type is user-defined
-            ProcessEquivalentSiteCustomConnection(CurSite, SubTile, PhysicalTileType, LogicalBlockType, Prop, loc_data);
-        } else if (0 == strcmp(pin_mapping, "direct")) {
-            ProcessEquivalentSiteDirectConnection(CurSite, SubTile, PhysicalTileType, LogicalBlockType, loc_data);
-        }
-
-        if (0 == strcmp(LogicalBlockType->pb_type->name, Prop.c_str())) {
-            SubTile->equivalent_sites.push_back(LogicalBlockType);
-
-            check_port_direct_mappings(PhysicalTileType, SubTile, LogicalBlockType);
-        }
-
-        CurSite = CurSite.next_sibling(CurSite.name());
-    }
-}
-
-static void ProcessEquivalentSiteDirectConnection(pugi::xml_node Parent,
-                                                  t_sub_tile* SubTile,
-                                                  t_physical_tile_type* PhysicalTileType,
-                                                  t_logical_block_type* LogicalBlockType,
-                                                  const pugiutil::loc_data& loc_data) {
-    int num_pins = (int)SubTile->sub_tile_to_tile_pin_indices.size() / SubTile->capacity.total();
-
-    if (num_pins != LogicalBlockType->pb_type->num_pins) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "Pin definition differ between site %s and tile %s. User-defined pin mapping is required.\n", LogicalBlockType->pb_type->name, SubTile->name);
-    }
-
-    vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
-
-    for (int npin = 0; npin < num_pins; npin++) {
-        t_physical_pin physical_pin(npin);
-        t_logical_pin logical_pin(npin);
-
-        directs_map.insert(logical_pin, physical_pin);
-    }
-
-    PhysicalTileType->tile_block_pin_directs_map[LogicalBlockType->index][SubTile->index] = directs_map;
-}
-
-static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent,
-                                                  t_sub_tile* SubTile,
-                                                  t_physical_tile_type* PhysicalTileType,
-                                                  t_logical_block_type* LogicalBlockType,
-                                                  std::string site_name,
-                                                  const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurDirect;
-
-    expect_only_children(Parent, {"direct"}, loc_data);
-
-    if (count_children(Parent, "direct", loc_data) < 1) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                       "There are no direct pin mappings between site %s and tile %s.\n", site_name.c_str(), SubTile->name);
-    }
-
-    vtr::bimap<t_logical_pin, t_physical_pin> directs_map;
-
-    CurDirect = Parent.first_child();
-
-    while (CurDirect) {
-        check_node(CurDirect, "direct", loc_data);
-
-        expect_only_attributes(CurDirect, {"from", "to"}, loc_data);
-
-        std::string from, to;
-        // `from` attribute is relative to the physical tile pins
-        from = std::string(get_attribute(CurDirect, "from", loc_data).value());
-
-        // `to` attribute is relative to the logical block pins
-        to = std::string(get_attribute(CurDirect, "to", loc_data).value());
-
-        auto from_pins = ProcessPinString<t_sub_tile*>(CurDirect, SubTile, from.c_str(), loc_data);
-        auto to_pins = ProcessPinString<t_logical_block_type_ptr>(CurDirect, LogicalBlockType, to.c_str(), loc_data);
-
-        // Checking that the number of pins is exactly the same
-        if (from_pins.second - from_pins.first != to_pins.second - to_pins.first) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                           "The number of pins specified in the direct pin mapping is "
-                           "not equivalent for Physical Tile %s and Logical Block %s.\n",
-                           SubTile->name, LogicalBlockType->name);
-        }
-
-        int num_pins = from_pins.second - from_pins.first;
-        for (int i = 0; i < num_pins; i++) {
-            t_physical_pin physical_pin(from_pins.first + i);
-            t_logical_pin logical_pin(to_pins.first + i);
-
-            auto result = directs_map.insert(logical_pin, physical_pin);
-            if (!result.second) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent),
-                               "Duplicate logical pin (%d) to physical pin (%d) mappings found for "
-                               "Physical Tile %s and Logical Block %s.\n",
-                               logical_pin.pin, physical_pin.pin, SubTile->name, LogicalBlockType->name);
-            }
-        }
-
-        CurDirect = CurDirect.next_sibling(CurDirect.name());
-    }
-
-    PhysicalTileType->tile_block_pin_directs_map[LogicalBlockType->index][SubTile->index] = directs_map;
-}
-
-static void ProcessPinLocations(pugi::xml_node Locations,
-                                t_physical_tile_type* PhysicalTileType,
-                                t_sub_tile* SubTile,
-                                t_pin_locs* pin_locs,
-                                const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Cur;
-    const char* Prop;
-    enum e_pin_location_distr distribution;
-
-    if (Locations) {
-        expect_only_attributes(Locations, {"pattern"}, loc_data);
-
-        Prop = get_attribute(Locations, "pattern", loc_data).value();
-        if (strcmp(Prop, "spread") == 0) {
-            distribution = E_SPREAD_PIN_DISTR;
-        } else if (strcmp(Prop, "perimeter") == 0) {
-            distribution = E_PERIMETER_PIN_DISTR;
-        } else if (strcmp(Prop, "spread_inputs_perimeter_outputs") == 0) {
-            distribution = E_SPREAD_INPUTS_PERIMETER_OUTPUTS_PIN_DISTR;
-        } else if (strcmp(Prop, "custom") == 0) {
-            distribution = E_CUSTOM_PIN_DISTR;
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                           "%s is an invalid pin location pattern.\n", Prop);
-        }
-    } else {
-        distribution = E_SPREAD_PIN_DISTR;
-        Prop = "spread";
-    }
-
-    if (pin_locs->is_distribution_set()) {
-        if (pin_locs->distribution != distribution) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                           "Sub Tile %s has a different pin location pattern (%s) with respect "
-                           "to the sibling sub tiles",
-                           SubTile->name, Prop);
-        }
-    } else {
-        pin_locs->distribution = distribution;
-        pin_locs->set_distribution();
-    }
-
-    int sub_tile_index = SubTile->index;
-
-    /* Load the pin locations */
-    if (distribution == E_CUSTOM_PIN_DISTR) {
-        expect_only_children(Locations, {"loc"}, loc_data);
-        Cur = Locations.first_child();
-        std::set<std::tuple<e_side, int, int>> seen_sides;
-        while (Cur) {
-            check_node(Cur, "loc", loc_data);
-
-            expect_only_attributes(Cur, {"side", "xoffset", "yoffset"}, loc_data);
-
-            /* Get offset (ie. height) */
-            int x_offset = get_attribute(Cur, "xoffset", loc_data, ReqOpt::OPTIONAL).as_int(0);
-            int y_offset = get_attribute(Cur, "yoffset", loc_data, ReqOpt::OPTIONAL).as_int(0);
-
-            /* Get side */
-            e_side side = TOP;
-            Prop = get_attribute(Cur, "side", loc_data).value();
-            if (0 == strcmp(Prop, "left")) {
-                side = LEFT;
-            } else if (0 == strcmp(Prop, "top")) {
-                side = TOP;
-            } else if (0 == strcmp(Prop, "right")) {
-                side = RIGHT;
-            } else if (0 == strcmp(Prop, "bottom")) {
-                side = BOTTOM;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "'%s' is not a valid side.\n", Prop);
-            }
-
-            if ((x_offset < 0) || (x_offset >= PhysicalTileType->width)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "'%d' is an invalid horizontal offset for type '%s' (must be within [0, %d]).\n",
-                               x_offset, PhysicalTileType->name, PhysicalTileType->width - 1);
-            }
-            if ((y_offset < 0) || (y_offset >= PhysicalTileType->height)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "'%d' is an invalid vertical offset for type '%s' (must be within [0, %d]).\n",
-                               y_offset, PhysicalTileType->name, PhysicalTileType->height - 1);
-            }
-
-            //Check for duplicate side specifications, since the code below silently overwrites if there are duplicates
-            auto side_offset = std::make_tuple(side, x_offset, y_offset);
-            if (seen_sides.count(side_offset)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                               "Duplicate pin location side/offset specification."
-                               " Only a single <loc> per side/xoffset/yoffset is permitted.\n");
-            }
-            seen_sides.insert(side_offset);
-
-            /* Go through lists of pins */
-            const std::vector<std::string> Tokens = vtr::split(Cur.child_value());
-            int Count = (int)Tokens.size();
-            if (Count > 0) {
-                for (int pin = 0; pin < Count; ++pin) {
-                    /* Store location assignment */
-                    pin_locs->assignments[sub_tile_index][x_offset][y_offset][side].push_back(std::string(Tokens[pin].c_str()));
-
-                    /* Advance through list of pins in this location */
-                }
-            }
-            Cur = Cur.next_sibling(Cur.name());
-        }
-
-        //Verify that all top-level pins have had their locations specified
-
-        //Record all the specified pins
-        std::map<std::string, std::set<int>> port_pins_with_specified_locations;
-        for (int w = 0; w < PhysicalTileType->width; ++w) {
-            for (int h = 0; h < PhysicalTileType->height; ++h) {
-                for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) {
-                    for (auto token : pin_locs->assignments[sub_tile_index][w][h][side]) {
-                        InstPort inst_port(token.c_str());
-
-                        //A pin specification should contain only the block name, and not any instace count information
-                        if (inst_port.instance_low_index() != InstPort::UNSPECIFIED || inst_port.instance_high_index() != InstPort::UNSPECIFIED) {
-                            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                                           "Pin location specification '%s' should not contain an instance range (should only be the block name)",
-                                           token.c_str());
-                        }
-
-                        //Check that the block name matches
-                        if (inst_port.instance_name() != SubTile->name) {
-                            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                                           "Mismatched sub tile name in pin location specification (expected '%s' was '%s')",
-                                           SubTile->name, inst_port.instance_name().c_str());
-                        }
-
-                        int pin_low_idx = inst_port.port_low_index();
-                        int pin_high_idx = inst_port.port_high_index();
-
-                        if (pin_low_idx == InstPort::UNSPECIFIED && pin_high_idx == InstPort::UNSPECIFIED) {
-                            //Empty range, so full port
-
-                            //Find the matching pb type to get the total number of pins
-                            const t_physical_tile_port* port = nullptr;
-                            for (const auto& tmp_port : SubTile->ports) {
-                                if (tmp_port.name == inst_port.port_name()) {
-                                    port = &tmp_port;
-                                    break;
-                                }
-                            }
-
-                            if (port) {
-                                pin_low_idx = 0;
-                                pin_high_idx = port->num_pins - 1;
-                            } else {
-                                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                                               "Failed to find port named '%s' on block '%s'",
-                                               inst_port.port_name().c_str(), SubTile->name);
-                            }
-                        }
-                        VTR_ASSERT(pin_low_idx >= 0);
-                        VTR_ASSERT(pin_high_idx >= 0);
-
-                        for (int ipin = pin_low_idx; ipin <= pin_high_idx; ++ipin) {
-                            //Record that the pin has it's location specified
-                            port_pins_with_specified_locations[inst_port.port_name()].insert(ipin);
-                        }
-                    }
-                }
-            }
-        }
-
-        //Check for any pins missing location specs
-        for (const auto& port : SubTile->ports) {
-            for (int ipin = 0; ipin < port.num_pins; ++ipin) {
-                if (!port_pins_with_specified_locations[port.name].count(ipin)) {
-                    //Missing
-                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations),
-                                   "Pin '%s.%s[%d]' has no pin location specificed (a location is required for pattern=\"custom\")",
-                                   SubTile->name, port.name, ipin);
-                }
-            }
-        }
-    } else if (Locations) {
-        //Non-custom pin locations. There should be no child tags
-        expect_child_node_count(Locations, 0, loc_data);
-    }
-}
-
-static void ProcessSubTiles(pugi::xml_node Node,
-                            t_physical_tile_type* PhysicalTileType,
-                            std::vector<t_logical_block_type>& LogicalBlockTypes,
-                            std::vector<t_segment_inf>& segments,
-                            const t_default_fc_spec& arch_def_fc,
-                            const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurSubTile;
-    pugi::xml_node Cur;
-    int index = 0;
-
-    unsigned long int num_sub_tiles = count_children(Node, "sub_tile", loc_data);
-    unsigned long int width = PhysicalTileType->width;
-    unsigned long int height = PhysicalTileType->height;
-    unsigned long int num_sides = 4;
-
-    std::map<std::string, int> sub_tile_names;
-
-    t_pin_locs pin_locs;
-    pin_locs.assignments.resize({num_sub_tiles, width, height, num_sides});
-
-    if (num_sub_tiles == 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "No sub tile found for the Physical Tile %s.\n"
-                       "At least one sub tile is needed to correctly describe the Physical Tile.\n",
-                       PhysicalTileType->name);
-    }
-
-    CurSubTile = get_first_child(Node, "sub_tile", loc_data);
-
-    while (CurSubTile) {
-        t_sub_tile SubTile;
-
-        SubTile.index = index;
-
-        expect_only_attributes(CurSubTile, {"name", "capacity"}, loc_data);
-
-        /* Load type name */
-        auto name = vtr::strdup(get_attribute(CurSubTile, "name", loc_data).value());
-
-        //Check Sub Tile name duplicates
-        auto result = sub_tile_names.insert(std::pair<std::string, int>(std::string(name), 0));
-        if (!result.second) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Cur),
-                           "Duplicate Sub Tile names in tile '%s': Sub Tile'%s'\n",
-                           PhysicalTileType->name, name);
-        }
-
-        SubTile.name = name;
-
-        /* Load properties */
-        int capacity = get_attribute(CurSubTile, "capacity", loc_data, ReqOpt::OPTIONAL).as_int(1);
-        SubTile.capacity.set(PhysicalTileType->capacity, PhysicalTileType->capacity + capacity - 1);
-        PhysicalTileType->capacity += capacity;
-
-        /* Process sub tile port definitions */
-        std::unordered_map<std::string, t_physical_tile_port> tile_port_names;
-        auto pin_counts = ProcessSubTilePorts(CurSubTile, &SubTile, tile_port_names, loc_data);
-
-        /* Map Sub Tile physical pins with the Physical Tile Type physical pins.
-         * This takes into account the capacity of each sub tiles to add the correct offset.
-         */
-        for (int ipin = 0; ipin < capacity * pin_counts.total(); ipin++) {
-            SubTile.sub_tile_to_tile_pin_indices.push_back(PhysicalTileType->num_pins + ipin);
-        }
-
-        SubTile.num_phy_pins = pin_counts.total() * capacity;
-
-        /* Assign pin counts to the Physical Tile Type */
-        PhysicalTileType->num_input_pins += capacity * pin_counts.input;
-        PhysicalTileType->num_output_pins += capacity * pin_counts.output;
-        PhysicalTileType->num_clock_pins += capacity * pin_counts.clock;
-        PhysicalTileType->num_pins += capacity * pin_counts.total();
-        PhysicalTileType->num_inst_pins += pin_counts.total();
-
-        /* Assign drivers and receivers count to Physical Tile Type */
-        PhysicalTileType->num_receivers += capacity * pin_counts.input;
-        PhysicalTileType->num_drivers += capacity * pin_counts.output;
-
-        Cur = get_single_child(CurSubTile, "pinlocations", loc_data, ReqOpt::OPTIONAL);
-        ProcessPinLocations(Cur, PhysicalTileType, &SubTile, &pin_locs, loc_data);
-
-        /* Load Fc */
-        Cur = get_single_child(CurSubTile, "fc", loc_data, ReqOpt::OPTIONAL);
-        Process_Fc(Cur, PhysicalTileType, &SubTile, pin_counts, segments, arch_def_fc, loc_data);
-
-        //Load equivalent sites infromation
-        Cur = get_single_child(CurSubTile, "equivalent_sites", loc_data, ReqOpt::REQUIRED);
-        ProcessTileEquivalentSites(Cur, &SubTile, PhysicalTileType, LogicalBlockTypes, loc_data);
-
-        PhysicalTileType->sub_tiles.push_back(SubTile);
-
-        index++;
-
-        CurSubTile = CurSubTile.next_sibling(CurSubTile.name());
-    }
-
-    // Initialize pinloc data structure.
-    int num_pins = PhysicalTileType->num_pins;
-    PhysicalTileType->pinloc.resize({width, height, num_sides}, std::vector<bool>(num_pins, false));
-
-    setup_pin_classes(PhysicalTileType);
-    LoadPinLoc(Cur, PhysicalTileType, &pin_locs, loc_data);
-}
-
-/* Takes in node pointing to <typelist> and loads all the
- * child type objects. */
-static void ProcessComplexBlocks(vtr::string_internment* strings, pugi::xml_node Node, std::vector<t_logical_block_type>& LogicalBlockTypes, t_arch& arch, const bool timing_enabled, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurBlockType;
-    pugi::xml_node Cur;
-    std::map<std::string, int> pb_type_descriptors;
-
-    /* Alloc the type list. Need one additional t_type_desctiptors:
-     * 1: empty psuedo-type
-     */
-    t_logical_block_type EMPTY_LOGICAL_BLOCK_TYPE = get_empty_logical_type();
-    EMPTY_LOGICAL_BLOCK_TYPE.index = 0;
-    LogicalBlockTypes.push_back(EMPTY_LOGICAL_BLOCK_TYPE);
-
-    /* Process the types */
-    int index = 1; /* Skip over 'empty' type */
-
-    CurBlockType = Node.first_child();
-    while (CurBlockType) {
-        check_node(CurBlockType, "pb_type", loc_data);
-
-        t_logical_block_type LogicalBlockType;
-
-        expect_only_attributes(CurBlockType, {"name"}, loc_data);
-
-        /* Load type name */
-        auto Prop = get_attribute(CurBlockType, "name", loc_data).value();
-        LogicalBlockType.name = vtr::strdup(Prop);
-
-        auto result = pb_type_descriptors.insert(std::pair<std::string, int>(LogicalBlockType.name, 0));
-        if (!result.second) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurBlockType),
-                           "Duplicate pb_type descriptor name: '%s'.\n", LogicalBlockType.name);
-        }
-
-        /* Load pb_type info to assign to the Logical Block Type */
-        LogicalBlockType.pb_type = new t_pb_type;
-        LogicalBlockType.pb_type->name = vtr::strdup(LogicalBlockType.name);
-        ProcessPb_Type(strings, CurBlockType, LogicalBlockType.pb_type, nullptr, timing_enabled, arch, loc_data);
-
-        LogicalBlockType.index = index;
-
-        /* Type fully read */
-        ++index;
-
-        /* Push newly created Types to corresponding vectors */
-        LogicalBlockTypes.push_back(LogicalBlockType);
-
-        /* Free this node and get its next sibling node */
-        CurBlockType = CurBlockType.next_sibling(CurBlockType.name());
-    }
-    pb_type_descriptors.clear();
-}
-
-static void ProcessSegments(pugi::xml_node Parent,
-                            std::vector<t_segment_inf>& Segs,
-                            const t_arch_switch_inf* Switches,
-                            const int NumSwitches,
-                            const bool timing_enabled,
-                            const bool switchblocklist_required,
-                            const pugiutil::loc_data& loc_data) {
-    int i, j, length;
-    const char* tmp;
-
-    pugi::xml_node SubElem;
-    pugi::xml_node Node;
-
-    /* Count the number of segs and check they are in fact
-     * of segment elements. */
-    int NumSegs = count_children(Parent, "segment", loc_data);
-
-    /* Alloc segment list */
-    if (NumSegs > 0) {
-        Segs.resize(NumSegs);
-    }
-
-    /* Load the segments. */
-    Node = get_first_child(Parent, "segment", loc_data);
-
-    bool x_axis_seg_found = false; /*Flags to see if we have any x-directed segment type specified*/
-    bool y_axis_seg_found = false; /*Flags to see if we have any y-directed segment type specified*/
-
-    for (i = 0; i < NumSegs; ++i) {
-        /* Get segment name */
-        tmp = get_attribute(Node, "name", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (tmp) {
-            Segs[i].name = std::string(tmp);
-        } else {
-            /* if swich block is "custom", then you have to provide a name for segment */
-            if (switchblocklist_required) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                               "No name specified for the segment #%d.\n", i);
-            }
-            /* set name to default: "unnamed_segment_<segment_index>" */
-            std::stringstream ss;
-            ss << "unnamed_segment_" << i;
-            std::string dummy = ss.str();
-            tmp = dummy.c_str();
-            Segs[i].name = std::string(tmp);
-        }
-
-        /* Get segment length */
-        length = 1; /* DEFAULT */
-        tmp = get_attribute(Node, "length", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (tmp) {
-            if (strcmp(tmp, "longline") == 0) {
-                Segs[i].longline = true;
-            } else {
-                length = vtr::atoi(tmp);
-            }
-        }
-        Segs[i].length = length;
-
-        /* Get the frequency */
-        Segs[i].frequency = 1; /* DEFAULT */
-        tmp = get_attribute(Node, "freq", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (tmp) {
-            Segs[i].frequency = (int)(atof(tmp) * MAX_CHANNEL_WIDTH);
-        }
-
-        /* Get timing info */
-        ReqOpt TIMING_ENABLE_REQD = BoolToReqOpt(timing_enabled);
-        Segs[i].Rmetal = get_attribute(Node, "Rmetal", loc_data, TIMING_ENABLE_REQD).as_float(0);
-        Segs[i].Cmetal = get_attribute(Node, "Cmetal", loc_data, TIMING_ENABLE_REQD).as_float(0);
-
-        /*Get parallel axis*/
-
-        Segs[i].parallel_axis = BOTH_AXIS; /*DEFAULT value if no axis is specified*/
-        tmp = get_attribute(Node, "axis", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-
-        if (tmp) {
-            if (strcmp(tmp, "x") == 0) {
-                Segs[i].parallel_axis = X_AXIS;
-                x_axis_seg_found = true;
-            } else if (strcmp(tmp, "y") == 0) {
-                Segs[i].parallel_axis = Y_AXIS;
-                y_axis_seg_found = true;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Unsopported parralel axis type: %s\n", tmp);
-            }
-        } else {
-            x_axis_seg_found = true;
-            y_axis_seg_found = true;
-        }
-
-        /* Get Power info */
-        /*
-         * (*Segs)[i].Cmetal_per_m = get_attribute(Node, "Cmetal_per_m", false,
-         * 0.);*/
-
-        //Set of expected subtags (exact subtags are dependant on parameters)
-        std::vector<std::string> expected_subtags;
-
-        if (!Segs[i].longline) {
-            //Long line doesn't accpet <sb> or <cb> since it assumes full population
-            expected_subtags.push_back("sb");
-            expected_subtags.push_back("cb");
-        }
-
-        /* Get the type */
-        tmp = get_attribute(Node, "type", loc_data).value();
-        if (0 == strcmp(tmp, "bidir")) {
-            Segs[i].directionality = BI_DIRECTIONAL;
-
-            //Bidir requires the following tags
-            expected_subtags.push_back("wire_switch");
-            expected_subtags.push_back("opin_switch");
-        }
-
-        else if (0 == strcmp(tmp, "unidir")) {
-            Segs[i].directionality = UNI_DIRECTIONAL;
-
-            //Unidir requires the following tags
-            expected_subtags.push_back("mux");
-        }
-
-        else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Invalid switch type '%s'.\n", tmp);
-        }
-
-        //Verify only expected sub-tags are found
-        expect_only_children(Node, expected_subtags, loc_data);
-
-        /* Get the wire and opin switches, or mux switch if unidir */
-        if (UNI_DIRECTIONAL == Segs[i].directionality) {
-            SubElem = get_single_child(Node, "mux", loc_data);
-            tmp = get_attribute(SubElem, "name", loc_data).value();
-
-            /* Match names */
-            for (j = 0; j < NumSwitches; ++j) {
-                if (0 == strcmp(tmp, Switches[j].name)) {
-                    break; /* End loop so j is where we want it */
-                }
-            }
-            if (j >= NumSwitches) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
-                               "'%s' is not a valid mux name.\n", tmp);
-            }
-
-            /* Unidir muxes must have the same switch
-             * for wire and opin fanin since there is
-             * really only the mux in unidir. */
-            Segs[i].arch_wire_switch = j;
-            Segs[i].arch_opin_switch = j;
-        }
-
-        else {
-            VTR_ASSERT(BI_DIRECTIONAL == Segs[i].directionality);
-            SubElem = get_single_child(Node, "wire_switch", loc_data);
-            tmp = get_attribute(SubElem, "name", loc_data).value();
-
-            /* Match names */
-            for (j = 0; j < NumSwitches; ++j) {
-                if (0 == strcmp(tmp, Switches[j].name)) {
-                    break; /* End loop so j is where we want it */
-                }
-            }
-            if (j >= NumSwitches) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
-                               "'%s' is not a valid wire_switch name.\n", tmp);
-            }
-            Segs[i].arch_wire_switch = j;
-            SubElem = get_single_child(Node, "opin_switch", loc_data);
-            tmp = get_attribute(SubElem, "name", loc_data).value();
-
-            /* Match names */
-            for (j = 0; j < NumSwitches; ++j) {
-                if (0 == strcmp(tmp, Switches[j].name)) {
-                    break; /* End loop so j is where we want it */
-                }
-            }
-            if (j >= NumSwitches) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem),
-                               "'%s' is not a valid opin_switch name.\n", tmp);
-            }
-            Segs[i].arch_opin_switch = j;
-        }
-
-        /* Setup the CB list if they give one, otherwise use full */
-        Segs[i].cb.resize(length);
-        for (j = 0; j < length; ++j) {
-            Segs[i].cb[j] = true;
-        }
-        SubElem = get_single_child(Node, "cb", loc_data, ReqOpt::OPTIONAL);
-        if (SubElem) {
-            ProcessCB_SB(SubElem, Segs[i].cb, loc_data);
-        }
-
-        /* Setup the SB list if they give one, otherwise use full */
-        Segs[i].sb.resize(length + 1);
-        for (j = 0; j < (length + 1); ++j) {
-            Segs[i].sb[j] = true;
-        }
-        SubElem = get_single_child(Node, "sb", loc_data, ReqOpt::OPTIONAL);
-        if (SubElem) {
-            ProcessCB_SB(SubElem, Segs[i].sb, loc_data);
-        }
-
-        /*Store the index of this segment in Segs vector*/
-        Segs[i].seg_index = i;
-        /* Get next Node */
-        Node = Node.next_sibling(Node.name());
-    }
-    /*We need at least one type of segment that applies to each of x- and y-directed wiring.*/
-
-    if (!x_axis_seg_found || !y_axis_seg_found) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Atleast one segment per-axis needs to get specified if no segments with non-specified (default) axis attribute exist.");
-    }
-}
-/* Processes the switchblocklist section from the xml architecture file.
- * See vpr/SRC/route/build_switchblocks.c for a detailed description of this
- * switch block format */
-static void ProcessSwitchblocks(pugi::xml_node Parent, t_arch* arch, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Node;
-    pugi::xml_node SubElem;
-    const char* tmp;
-
-    /* get the number of switchblocks */
-    int num_switchblocks = count_children(Parent, "switchblock", loc_data);
-    arch->switchblocks.reserve(num_switchblocks);
-
-    /* read-in all switchblock data */
-    Node = get_first_child(Parent, "switchblock", loc_data);
-    for (int i_sb = 0; i_sb < num_switchblocks; i_sb++) {
-        /* use a temp variable which will be assigned to switchblocks later */
-        t_switchblock_inf sb;
-
-        /* get name */
-        tmp = get_attribute(Node, "name", loc_data).as_string(nullptr);
-        if (tmp) {
-            sb.name = tmp;
-        }
-
-        /* get type */
-        tmp = get_attribute(Node, "type", loc_data).as_string(nullptr);
-        if (tmp) {
-            if (0 == strcmp(tmp, "bidir")) {
-                sb.directionality = BI_DIRECTIONAL;
-            } else if (0 == strcmp(tmp, "unidir")) {
-                sb.directionality = UNI_DIRECTIONAL;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Unsopported switchblock type: %s\n", tmp);
-            }
-        }
-
-        /* get the switchblock location */
-        SubElem = get_single_child(Node, "switchblock_location", loc_data);
-        tmp = get_attribute(SubElem, "type", loc_data).as_string(nullptr);
-        if (tmp) {
-            if (strcmp(tmp, "EVERYWHERE") == 0) {
-                sb.location = E_EVERYWHERE;
-            } else if (strcmp(tmp, "PERIMETER") == 0) {
-                sb.location = E_PERIMETER;
-            } else if (strcmp(tmp, "CORE") == 0) {
-                sb.location = E_CORE;
-            } else if (strcmp(tmp, "CORNER") == 0) {
-                sb.location = E_CORNER;
-            } else if (strcmp(tmp, "FRINGE") == 0) {
-                sb.location = E_FRINGE;
-            } else {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(SubElem), "unrecognized switchblock location: %s\n", tmp);
-            }
-        }
-
-        /* get switchblock permutation functions */
-        SubElem = get_first_child(Node, "switchfuncs", loc_data);
-        read_sb_switchfuncs(SubElem, &sb, loc_data);
-
-        read_sb_wireconns(arch->Switches, arch->num_switches, Node, &sb, loc_data);
-
-        /* run error checks on switch blocks */
-        check_switchblock(&sb, arch);
-
-        /* assign the sb to the switchblocks vector */
-        arch->switchblocks.push_back(sb);
-
-        Node = Node.next_sibling(Node.name());
-    }
-
-    return;
-}
-
-static void ProcessCB_SB(pugi::xml_node Node, std::vector<bool>& list, const pugiutil::loc_data& loc_data) {
-    const char* tmp = nullptr;
-    int i;
-    int len = list.size();
-    /* Check the type. We only support 'pattern' for now.
-     * Should add frac back eventually. */
-    tmp = get_attribute(Node, "type", loc_data).value();
-    if (0 == strcmp(tmp, "pattern")) {
-        i = 0;
-
-        /* Get the content string */
-        tmp = Node.child_value();
-        while (*tmp) {
-            switch (*tmp) {
-                case ' ':
-                case '\t':
-                case '\n':
-                    break;
-                case 'T':
-                case '1':
-                    if (i >= len) {
-                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                                       "CB or SB depopulation is too long (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
-                                       i, len - 1, len);
-                    }
-                    list[i] = true;
-                    ++i;
-                    break;
-                case 'F':
-                case '0':
-                    if (i >= len) {
-                        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                                       "CB or SB depopulation is too long (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
-                                       i, len - 1, len);
-                    }
-                    list[i] = false;
-                    ++i;
-                    break;
-                default:
-                    archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                                   "Invalid character %c in CB or SB depopulation list.\n",
-                                   *tmp);
-            }
-            ++tmp;
-        }
-        if (i < len) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "CB or SB depopulation is too short (%d). It should be %d symbols for CBs and %d symbols for SBs.\n",
-                           i, len - 1, len);
-        }
-    }
-
-    else {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "'%s' is not a valid type for specifying cb and sb depopulation.\n",
-                       tmp);
-    }
-}
-
-static void ProcessSwitches(pugi::xml_node Parent,
-                            t_arch_switch_inf** Switches,
-                            int* NumSwitches,
-                            const bool timing_enabled,
-                            const pugiutil::loc_data& loc_data) {
-    int i, j;
-    const char* type_name;
-    const char* switch_name;
-    ReqOpt TIMING_ENABLE_REQD = BoolToReqOpt(timing_enabled);
-
-    pugi::xml_node Node;
-
-    /* Count the children and check they are switches */
-    *NumSwitches = count_children(Parent, "switch", loc_data);
-
-    /* Alloc switch list */
-    *Switches = nullptr;
-    if (*NumSwitches > 0) {
-        (*Switches) = new t_arch_switch_inf[(*NumSwitches)];
-    }
-
-    /* Load the switches. */
-    Node = get_first_child(Parent, "switch", loc_data);
-    for (i = 0; i < *NumSwitches; ++i) {
-        t_arch_switch_inf& arch_switch = (*Switches)[i];
-
-        switch_name = get_attribute(Node, "name", loc_data).value();
-
-        /* Check if the switch has conflicts with any reserved names */
-        if (0 == strcmp(switch_name, VPR_DELAYLESS_SWITCH_NAME)) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Switch name '%s' is a reserved name for VPR internal usage! Please use another  name.\n",
-                           switch_name);
-        }
-
-        type_name = get_attribute(Node, "type", loc_data).value();
-
-        /* Check for switch name collisions */
-        for (j = 0; j < i; ++j) {
-            if (0 == strcmp((*Switches)[j].name, switch_name)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                               "Two switches with the same name '%s' were found.\n",
-                               switch_name);
-            }
-        }
-        arch_switch.name = vtr::strdup(switch_name);
-
-        /* Figure out the type of switch */
-        /* As noted above, due to their configuration of pass transistors feeding into a buffer,
-         * only multiplexers and tristate buffers have an internal capacitance element.         */
-
-        SwitchType type = SwitchType::MUX;
-        if (0 == strcmp(type_name, "mux")) {
-            type = SwitchType::MUX;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data);
-
-        } else if (0 == strcmp(type_name, "tristate")) {
-            type = SwitchType::TRISTATE;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
-
-        } else if (0 == strcmp(type_name, "buffer")) {
-            type = SwitchType::BUFFER;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
-
-        } else if (0 == strcmp(type_name, "pass_gate")) {
-            type = SwitchType::PASS_GATE;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data);
-
-        } else if (0 == strcmp(type_name, "short")) {
-            type = SwitchType::SHORT;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data);
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Invalid switch type '%s'.\n", type_name);
-        }
-        arch_switch.set_type(type);
-
-        arch_switch.R = get_attribute(Node, "R", loc_data, TIMING_ENABLE_REQD).as_float(0);
-
-        ReqOpt COUT_REQD = TIMING_ENABLE_REQD;
-        ReqOpt CIN_REQD = TIMING_ENABLE_REQD;
-        // We have defined the Cinternal parameter as optional, so that the user may specify an
-        // architecture without Cinternal without breaking the program flow.
-        ReqOpt CINTERNAL_REQD = ReqOpt::OPTIONAL;
-
-        if (arch_switch.type() == SwitchType::SHORT) {
-            //Cin/Cout are optional on shorts, since they really only have one capacitance
-            CIN_REQD = ReqOpt::OPTIONAL;
-            COUT_REQD = ReqOpt::OPTIONAL;
-        }
-        arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0);
-        arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0);
-        arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0);
-
-        if (arch_switch.type() == SwitchType::MUX) {
-            //Only muxes have mux transistors
-            arch_switch.mux_trans_size = get_attribute(Node, "mux_trans_size", loc_data, ReqOpt::OPTIONAL).as_float(1);
-        } else {
-            arch_switch.mux_trans_size = 0.;
-        }
-
-        if (arch_switch.type() == SwitchType::SHORT
-            || arch_switch.type() == SwitchType::PASS_GATE) {
-            //No buffers
-            arch_switch.buf_size_type = BufferSize::ABSOLUTE;
-            arch_switch.buf_size = 0.;
-            arch_switch.power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
-            arch_switch.power_buffer_size = 0.;
-        } else {
-            auto buf_size_attrib = get_attribute(Node, "buf_size", loc_data, ReqOpt::OPTIONAL);
-            if (!buf_size_attrib || buf_size_attrib.as_string() == std::string("auto")) {
-                arch_switch.buf_size_type = BufferSize::AUTO;
-                arch_switch.buf_size = 0.;
-            } else {
-                arch_switch.buf_size_type = BufferSize::ABSOLUTE;
-                arch_switch.buf_size = buf_size_attrib.as_float();
-            }
-
-            auto power_buf_size = get_attribute(Node, "power_buf_size", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-            if (power_buf_size == nullptr) {
-                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_AUTO;
-            } else if (strcmp(power_buf_size, "auto") == 0) {
-                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_AUTO;
-            } else {
-                arch_switch.power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE;
-                arch_switch.power_buffer_size = (float)vtr::atof(power_buf_size);
-            }
-        }
-
-        //Load the Tdel (which may be specfied with sub-tags)
-        ProcessSwitchTdel(Node, timing_enabled, i, (*Switches), loc_data);
-
-        /* Get next switch element */
-        Node = Node.next_sibling(Node.name());
-    }
-}
-
-/* Processes the switch delay. Switch delay can be specified in two ways.
- * First way: switch delay is specified as a constant via the property Tdel in the switch node.
- * Second way: switch delay is specified as a function of the switch fan-in. In this
- * case, multiple nodes in the form
- *
- * <Tdel num_inputs="1" delay="3e-11"/>
- *
- * are specified as children of the switch node. In this case, Tdel
- * is not included as a property of the switch node (first way). */
-static void ProcessSwitchTdel(pugi::xml_node Node, const bool timing_enabled, const int switch_index, t_arch_switch_inf* Switches, const pugiutil::loc_data& loc_data) {
-    float Tdel_prop_value;
-    int num_Tdel_children;
-
-    /* check if switch node has the Tdel property */
-    bool has_Tdel_prop = false;
-    Tdel_prop_value = get_attribute(Node, "Tdel", loc_data, ReqOpt::OPTIONAL).as_float(UNDEFINED);
-    if (Tdel_prop_value != UNDEFINED) {
-        has_Tdel_prop = true;
-    }
-
-    /* check if switch node has Tdel children */
-    bool has_Tdel_children = false;
-    num_Tdel_children = count_children(Node, "Tdel", loc_data, ReqOpt::OPTIONAL);
-    if (num_Tdel_children != 0) {
-        has_Tdel_children = true;
-    }
-
-    /* delay should not be specified as a Tdel property AND a Tdel child */
-    if (has_Tdel_prop && has_Tdel_children) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Switch delay should be specified as EITHER a Tdel property OR as a child of the switch node, not both");
-    }
-
-    /* get pointer to the switch's Tdel map, then read-in delay data into this map */
-    if (has_Tdel_prop) {
-        /* delay specified as a constant */
-        Switches[switch_index].set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel_prop_value);
-    } else if (has_Tdel_children) {
-        /* Delay specified as a function of switch fan-in.
-         * Go through each Tdel child, read-in num_inputs and the delay value.
-         * Insert this info into the switch delay map */
-        pugi::xml_node Tdel_child = get_first_child(Node, "Tdel", loc_data);
-        std::set<int> seen_fanins;
-        for (int ichild = 0; ichild < num_Tdel_children; ichild++) {
-            int num_inputs = get_attribute(Tdel_child, "num_inputs", loc_data).as_int(0);
-            float Tdel_value = get_attribute(Tdel_child, "delay", loc_data).as_float(0.);
-
-            if (seen_fanins.count(num_inputs)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Tdel_child),
-                               "Tdel node specified num_inputs (%d) that has already been specified by another Tdel node", num_inputs);
-            } else {
-                Switches[switch_index].set_Tdel(num_inputs, Tdel_value);
-                seen_fanins.insert(num_inputs);
-            }
-            Tdel_child = Tdel_child.next_sibling(Tdel_child.name());
-        }
-    } else {
-        /* No delay info specified for switch */
-        if (timing_enabled) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "Switch should contain intrinsic delay information if timing is enabled");
-        } else {
-            /* set a default value */
-            Switches[switch_index].set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, 0.);
-        }
-    }
-}
-
-static void ProcessDirects(pugi::xml_node Parent, t_direct_inf** Directs, int* NumDirects, const t_arch_switch_inf* Switches, const int NumSwitches, const pugiutil::loc_data& loc_data) {
-    int i, j;
-    const char* direct_name;
-    const char* from_pin_name;
-    const char* to_pin_name;
-    const char* switch_name;
-
-    pugi::xml_node Node;
-
-    /* Count the children and check they are direct connections */
-    expect_only_children(Parent, {"direct"}, loc_data);
-    *NumDirects = count_children(Parent, "direct", loc_data);
-
-    /* Alloc direct list */
-    *Directs = nullptr;
-    if (*NumDirects > 0) {
-        *Directs = (t_direct_inf*)vtr::malloc(*NumDirects * sizeof(t_direct_inf));
-        memset(*Directs, 0, (*NumDirects * sizeof(t_direct_inf)));
-    }
-
-    /* Load the directs. */
-    Node = get_first_child(Parent, "direct", loc_data);
-    for (i = 0; i < *NumDirects; ++i) {
-        expect_only_attributes(Node, {"name", "from_pin", "to_pin", "x_offset", "y_offset", "z_offset", "switch_name", "from_side", "to_side"}, loc_data);
-
-        direct_name = get_attribute(Node, "name", loc_data).value();
-        /* Check for direct name collisions */
-        for (j = 0; j < i; ++j) {
-            if (0 == strcmp((*Directs)[j].name, direct_name)) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                               "Two directs with the same name '%s' were found.\n",
-                               direct_name);
-            }
-        }
-        (*Directs)[i].name = vtr::strdup(direct_name);
-
-        /* Figure out the source pin and sink pin name */
-        from_pin_name = get_attribute(Node, "from_pin", loc_data).value();
-        to_pin_name = get_attribute(Node, "to_pin", loc_data).value();
-
-        /* Check that to_pin and the from_pin are not the same */
-        if (0 == strcmp(to_pin_name, from_pin_name)) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "The source pin and sink pin are the same: %s.\n",
-                           to_pin_name);
-        }
-        (*Directs)[i].from_pin = vtr::strdup(from_pin_name);
-        (*Directs)[i].to_pin = vtr::strdup(to_pin_name);
-
-        (*Directs)[i].x_offset = get_attribute(Node, "x_offset", loc_data).as_int(0);
-        (*Directs)[i].y_offset = get_attribute(Node, "y_offset", loc_data).as_int(0);
-        (*Directs)[i].sub_tile_offset = get_attribute(Node, "z_offset", loc_data).as_int(0);
-
-        std::string from_side_str = get_attribute(Node, "from_side", loc_data, ReqOpt::OPTIONAL).value();
-        (*Directs)[i].from_side = string_to_side(from_side_str);
-        std::string to_side_str = get_attribute(Node, "to_side", loc_data, ReqOpt::OPTIONAL).value();
-        (*Directs)[i].to_side = string_to_side(to_side_str);
-
-        //Set the optional switch type
-        switch_name = get_attribute(Node, "switch_name", loc_data, ReqOpt::OPTIONAL).as_string(nullptr);
-        if (switch_name != nullptr) {
-            //Look-up the user defined switch
-            for (j = 0; j < NumSwitches; j++) {
-                if (0 == strcmp(switch_name, Switches[j].name)) {
-                    break; //Found the switch
-                }
-            }
-            if (j >= NumSwitches) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                               "Could not find switch named '%s' in switch list.\n", switch_name);
-            }
-            (*Directs)[i].switch_type = j; //Save the correct switch index
-        } else {
-            //If not defined, use the delayless switch by default
-            //TODO: find a better way of indicating this.  Ideally, we would
-            //specify the delayless switch index here, but it does not appear
-            //to be defined at this point.
-            (*Directs)[i].switch_type = -1;
-        }
-
-        /* Check that the direct chain connection is not zero in both direction */
-        if ((*Directs)[i].x_offset == 0 && (*Directs)[i].y_offset == 0 && (*Directs)[i].sub_tile_offset == 0) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "The x_offset, y_offset, z_offset are all zero, this is a length 0 direct chain connection.\n");
-        }
-
-        (*Directs)[i].line = loc_data.line(Node);
-        /* Should I check that the direct chain offset is not greater than the chip? How? */
-
-        /* Get next direct element */
-        Node = Node.next_sibling(Node.name());
-    }
-}
-
-static void ProcessClockMetalLayers(pugi::xml_node parent,
-                                    std::unordered_map<std::string, t_metal_layer>& metal_layers,
-                                    pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_attributes = {"name", "Rmetal", "Cmetal"};
-    std::vector<std::string> expected_children = {"metal_layer"};
-
-    pugi::xml_node metal_layers_parent = get_single_child(parent, "metal_layers", loc_data);
-    int num_metal_layers = count_children(metal_layers_parent, "metal_layer", loc_data);
-
-    pugi::xml_node curr_layer = get_first_child(metal_layers_parent, "metal_layer", loc_data);
-    for (int i = 0; i < num_metal_layers; i++) {
-        expect_only_children(metal_layers_parent, expected_children, loc_data);
-        expect_only_attributes(curr_layer, expected_attributes, loc_data);
-
-        // Get metal layer values: name, r_metal, and c_metal
-        std::string name(get_attribute(curr_layer, "name", loc_data).value());
-        t_metal_layer metal_layer;
-        metal_layer.r_metal = get_attribute(curr_layer, "Rmetal", loc_data).as_float(0.);
-        metal_layer.c_metal = get_attribute(curr_layer, "Cmetal", loc_data).as_float(0.);
-
-        // Insert metal layer into map
-        auto itter = metal_layers.find(name);
-        if (itter != metal_layers.end()) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_layer),
-                           "Two metal layers with the same name '%s' were found.\n",
-                           name.c_str());
-        }
-        metal_layers.insert({name, metal_layer});
-
-        curr_layer = curr_layer.next_sibling(curr_layer.name());
-    }
-}
-
-static void ProcessClockNetworks(pugi::xml_node parent,
-                                 std::vector<t_clock_network_arch>& clock_networks,
-                                 const t_arch_switch_inf* switches,
-                                 const int num_switches,
-                                 pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_spine_attributes = {"name", "num_inst", "metal_layer", "starty", "endy", "x", "repeatx", "repeaty"};
-    std::vector<std::string> expected_rib_attributes = {"name", "num_inst", "metal_layer", "startx", "endx", "y", "repeatx", "repeaty"};
-    std::vector<std::string> expected_children = {"rib", "spine"};
-
-    int num_clock_networks = count_children(parent, "clock_network", loc_data);
-    pugi::xml_node curr_network = get_first_child(parent, "clock_network", loc_data);
-    for (int i = 0; i < num_clock_networks; i++) {
-        expect_only_children(curr_network, expected_children, loc_data);
-
-        t_clock_network_arch clock_network;
-
-        std::string name(get_attribute(curr_network, "name", loc_data).value());
-        clock_network.name = name;
-        clock_network.num_inst = get_attribute(curr_network, "num_inst", loc_data).as_int(0);
-        bool is_supported_clock_type = false;
-        pugi::xml_node curr_type;
-
-        // Parse spine
-        curr_type = get_single_child(curr_network, "spine", loc_data, ReqOpt::OPTIONAL);
-        if (curr_type) {
-            expect_only_attributes(curr_network, expected_spine_attributes, loc_data);
-
-            is_supported_clock_type = true;
-            clock_network.type = e_clock_type::SPINE;
-
-            std::string metal_layer(get_attribute(curr_type, "metal_layer", loc_data).value());
-            std::string starty(get_attribute(curr_type, "starty", loc_data).value());
-            std::string endy(get_attribute(curr_type, "endy", loc_data).value());
-            std::string x(get_attribute(curr_type, "x", loc_data).value());
-
-            std::string repeatx;
-            auto repeatx_attr = get_attribute(curr_type, "repeatx", loc_data, ReqOpt::OPTIONAL);
-            if (repeatx_attr) {
-                repeatx = repeatx_attr.value();
-            } else {
-                repeatx = "W";
-            }
-            std::string repeaty;
-            auto repeaty_attr = get_attribute(curr_type, "repeaty", loc_data, ReqOpt::OPTIONAL);
-            if (repeaty_attr) {
-                repeaty = repeaty_attr.value();
-            } else {
-                repeaty = "H";
-            }
-
-            clock_network.metal_layer = metal_layer;
-            clock_network.wire.start = starty;
-            clock_network.wire.end = endy;
-            clock_network.wire.position = x;
-            clock_network.repeat.x = repeatx;
-            clock_network.repeat.y = repeaty;
-
-            ProcessClockSwitchPoints(curr_type, clock_network, switches, num_switches, loc_data);
-        }
-
-        // Parse rib
-        curr_type = get_single_child(curr_network, "rib", loc_data, ReqOpt::OPTIONAL);
-        if (curr_type) {
-            expect_only_attributes(curr_network, expected_spine_attributes, loc_data);
-
-            is_supported_clock_type = true;
-            clock_network.type = e_clock_type::RIB;
-
-            std::string metal_layer(get_attribute(curr_type, "metal_layer", loc_data).value());
-            std::string startx(get_attribute(curr_type, "startx", loc_data).value());
-            std::string endx(get_attribute(curr_type, "endx", loc_data).value());
-            std::string y(get_attribute(curr_type, "y", loc_data).value());
-
-            std::string repeatx;
-            auto repeatx_attr = get_attribute(curr_type, "repeatx", loc_data, ReqOpt::OPTIONAL);
-            if (repeatx_attr) {
-                repeatx = repeatx_attr.value();
-            } else {
-                repeatx = "W";
-            }
-            std::string repeaty;
-            auto repeaty_attr = get_attribute(curr_type, "repeaty", loc_data, ReqOpt::OPTIONAL);
-            if (repeaty_attr) {
-                repeaty = repeaty_attr.value();
-            } else {
-                repeaty = "H";
-            }
-
-            clock_network.metal_layer = metal_layer;
-            clock_network.wire.start = startx;
-            clock_network.wire.end = endx;
-            clock_network.wire.position = y;
-            clock_network.repeat.x = repeatx;
-            clock_network.repeat.y = repeaty;
-
-            ProcessClockSwitchPoints(curr_type, clock_network, switches, num_switches, loc_data);
-        }
-
-        // Currently their is only support for ribs and spines
-        if (!is_supported_clock_type) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_type),
-                           "Found no supported clock network type for '%s' clock network.\n"
-                           "Currently there is only support for rib and spine networks.\n",
-                           name.c_str());
-        }
-
-        clock_networks.push_back(clock_network);
-        curr_network = curr_network.next_sibling(curr_network.name());
-    }
-}
-
-static void ProcessClockSwitchPoints(pugi::xml_node parent,
-                                     t_clock_network_arch& clock_network,
-                                     const t_arch_switch_inf* switches,
-                                     const int num_switches,
-                                     pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_spine_drive_attributes = {"name", "type", "yoffset", "switch_name"};
-    std::vector<std::string> expected_rib_drive_attributes = {"name", "type", "xoffset", "switch_name"};
-    std::vector<std::string> expected_spine_tap_attributes = {"name", "type", "yoffset", "yincr"};
-    std::vector<std::string> expected_rib_tap_attributes = {"name", "type", "xoffset", "xincr"};
-    std::vector<std::string> expected_children = {"switch_point"};
-
-    int num_clock_switches = count_children(parent, "switch_point", loc_data);
-    pugi::xml_node curr_switch = get_first_child(parent, "switch_point", loc_data);
-
-    //TODO: currently only supporting one drive and one tap. Should change to support
-    //      multiple taps
-    VTR_ASSERT(num_switches != 2);
-
-    //TODO: ensure switch name is unique for every switch of this clock network
-    for (int i = 0; i < num_clock_switches; i++) {
-        expect_only_children(curr_switch, expected_children, loc_data);
-
-        std::string switch_type(get_attribute(curr_switch, "type", loc_data).value());
-        if (switch_type == "drive") {
-            t_clock_drive drive;
-
-            std::string name(get_attribute(curr_switch, "name", loc_data).value());
-            const char* offset;
-            if (clock_network.type == e_clock_type::SPINE) {
-                expect_only_attributes(curr_switch, expected_spine_drive_attributes, loc_data);
-                offset = get_attribute(curr_switch, "yoffset", loc_data).value();
-            } else {
-                VTR_ASSERT(clock_network.type == e_clock_type::RIB);
-                expect_only_attributes(curr_switch, expected_rib_drive_attributes, loc_data);
-                offset = get_attribute(curr_switch, "xoffset", loc_data).value();
-            }
-
-            // get switch index
-            const char* switch_name = get_attribute(curr_switch, "switch_name", loc_data).value();
-            int switch_idx;
-            for (switch_idx = 0; switch_idx < num_switches; switch_idx++) {
-                if (0 == strcmp(switch_name, switches[switch_idx].name)) {
-                    break; // switch_idx has been found
-                }
-            }
-            if (switch_idx >= num_switches) {
-                archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_switch),
-                               "'%s' is not a valid switch name.\n", switch_name);
-            }
-
-            drive.name = name;
-            drive.offset = offset;
-            drive.arch_switch_idx = switch_idx;
-            clock_network.drive = drive;
-
-        } else if (switch_type == "tap") {
-            t_clock_taps tap;
-
-            std::string name(get_attribute(curr_switch, "name", loc_data).value());
-            const char* offset;
-            const char* increment;
-            if (clock_network.type == e_clock_type::SPINE) {
-                expect_only_attributes(curr_switch, expected_spine_tap_attributes, loc_data);
-                offset = get_attribute(curr_switch, "yoffset", loc_data).value();
-                increment = get_attribute(curr_switch, "yincr", loc_data).value();
-            } else {
-                VTR_ASSERT(clock_network.type == e_clock_type::RIB);
-                expect_only_attributes(curr_switch, expected_rib_tap_attributes, loc_data);
-                offset = get_attribute(curr_switch, "xoffset", loc_data).value();
-                increment = get_attribute(curr_switch, "xincr", loc_data).value();
-            }
-
-            tap.name = name;
-            tap.offset = offset;
-            tap.increment = increment;
-            clock_network.tap = tap;
-
-        } else {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_switch),
-                           "Found unsupported switch type for '%s' clock network.\n"
-                           "Currently there is only support for drive and tap switch types.\n",
-                           clock_network.name.c_str());
-        }
-
-        curr_switch = curr_switch.next_sibling(curr_switch.name());
-    }
-}
-
-static void ProcessClockRouting(pugi::xml_node parent,
-                                std::vector<t_clock_connection_arch>& clock_connections,
-                                const t_arch_switch_inf* switches,
-                                const int num_switches,
-                                pugiutil::loc_data& loc_data) {
-    std::vector<std::string> expected_attributes = {"from", "to", "switch", "fc_val", "locationx", "locationy"};
-
-    pugi::xml_node clock_routing_parent = get_single_child(parent, "clock_routing", loc_data);
-    int num_routing_connections = count_children(clock_routing_parent, "tap", loc_data);
-
-    pugi::xml_node curr_connection = get_first_child(clock_routing_parent, "tap", loc_data);
-    for (int i = 0; i < num_routing_connections; i++) {
-        expect_only_attributes(curr_connection, expected_attributes, loc_data);
-
-        t_clock_connection_arch clock_connection;
-
-        const char* from = get_attribute(curr_connection, "from", loc_data).value();
-        const char* to = get_attribute(curr_connection, "to", loc_data).value();
-        const char* switch_name = get_attribute(curr_connection, "switch", loc_data).value();
-        const char* locationx = get_attribute(curr_connection, "locationx", loc_data, ReqOpt::OPTIONAL).value();
-        const char* locationy = get_attribute(curr_connection, "locationy", loc_data, ReqOpt::OPTIONAL).value();
-        float fc = get_attribute(curr_connection, "fc_val", loc_data).as_float(0.);
-
-        int switch_idx;
-        for (switch_idx = 0; switch_idx < num_switches; switch_idx++) {
-            if (0 == strcmp(switch_name, switches[switch_idx].name)) {
-                break; // switch_idx has been found
-            }
-        }
-        if (switch_idx >= num_switches) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(curr_connection),
-                           "'%s' is not a valid switch name.\n", switch_name);
-        }
-
-        clock_connection.from = from;
-        clock_connection.to = to;
-        clock_connection.arch_switch_idx = switch_idx;
-        clock_connection.locationx = locationx;
-        clock_connection.locationy = locationy;
-        clock_connection.fc = fc;
-
-        clock_connections.push_back(clock_connection);
-
-        curr_connection = curr_connection.next_sibling(curr_connection.name());
-    }
-}
-
-static void ProcessPower(pugi::xml_node parent,
-                         t_power_arch* power_arch,
-                         const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Cur;
-
-    /* Get the local interconnect capacitances */
-    power_arch->local_interc_factor = 0.5;
-    Cur = get_single_child(parent, "local_interconnect", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->C_wire_local = get_attribute(Cur, "C_wire", loc_data, ReqOpt::OPTIONAL).as_float(0.);
-        power_arch->local_interc_factor = get_attribute(Cur, "factor", loc_data, ReqOpt::OPTIONAL).as_float(0.5);
-    }
-
-    /* Get logical effort factor */
-    power_arch->logical_effort_factor = 4.0;
-    Cur = get_single_child(parent, "buffers", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->logical_effort_factor = get_attribute(Cur,
-                                                          "logical_effort_factor", loc_data)
-                                                .as_float(0);
-        ;
-    }
-
-    /* Get SRAM Size */
-    power_arch->transistors_per_SRAM_bit = 6.0;
-    Cur = get_single_child(parent, "sram", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->transistors_per_SRAM_bit = get_attribute(Cur,
-                                                             "transistors_per_bit", loc_data)
-                                                   .as_float(0);
-    }
-
-    /* Get Mux transistor size */
-    power_arch->mux_transistor_size = 1.0;
-    Cur = get_single_child(parent, "mux_transistor_size", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->mux_transistor_size = get_attribute(Cur,
-                                                        "mux_transistor_size", loc_data)
-                                              .as_float(0);
-    }
-
-    /* Get FF size */
-    power_arch->FF_size = 1.0;
-    Cur = get_single_child(parent, "FF_size", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->FF_size = get_attribute(Cur, "FF_size", loc_data).as_float(0);
-    }
-
-    /* Get LUT transistor size */
-    power_arch->LUT_transistor_size = 1.0;
-    Cur = get_single_child(parent, "LUT_transistor_size", loc_data, ReqOpt::OPTIONAL);
-    if (Cur) {
-        power_arch->LUT_transistor_size = get_attribute(Cur,
-                                                        "LUT_transistor_size", loc_data)
-                                              .as_float(0);
-    }
-}
-
-/* Get the clock architcture */
-static void ProcessClocks(pugi::xml_node Parent, t_clock_arch* clocks, const pugiutil::loc_data& loc_data) {
-    pugi::xml_node Node;
-    int i;
-    const char* tmp;
-
-    clocks->num_global_clocks = count_children(Parent, "clock", loc_data, ReqOpt::OPTIONAL);
-
-    /* Alloc the clockdetails */
-    clocks->clock_inf = nullptr;
-    if (clocks->num_global_clocks > 0) {
-        clocks->clock_inf = (t_clock_network*)vtr::malloc(clocks->num_global_clocks * sizeof(t_clock_network));
-        memset(clocks->clock_inf, 0,
-               clocks->num_global_clocks * sizeof(t_clock_network));
-    }
-
-    /* Load the clock info. */
-    Node = get_first_child(Parent, "clock", loc_data);
-    for (i = 0; i < clocks->num_global_clocks; ++i) {
-        tmp = get_attribute(Node, "buffer_size", loc_data).value();
-        if (strcmp(tmp, "auto") == 0) {
-            clocks->clock_inf[i].autosize_buffer = true;
-        } else {
-            clocks->clock_inf[i].autosize_buffer = false;
-            clocks->clock_inf[i].buffer_size = (float)atof(tmp);
-        }
-
-        clocks->clock_inf[i].C_wire = get_attribute(Node, "C_wire", loc_data).as_float(0);
-
-        /* get the next clock item */
-        Node = Node.next_sibling(Node.name());
-    }
-}
-/*
- * Get the NoC design 
- */
-static void ProcessNoc(pugi::xml_node noc_tag, t_arch* arch, const pugiutil::loc_data& loc_data) {
-    // a vector representing all the possible attributes within the noc tag
-    std::vector<std::string> expected_noc_attributes = {"link_bandwidth", "link_latency", "router_latency", "noc_router_tile_name"};
-
-    std::vector<std::string> expected_noc_children_tags = {"mesh", "topology"};
-
-    pugi::xml_node noc_topology;
-    pugi::xml_node noc_mesh_topology;
-
-    // identifier that lets us know when we could not properly convert an attribute value to a integer
-    int attribute_conversion_failure = -1;
-
-    // identifier that lets us know when we could not properly convert a string conversion value
-    std::string attribute_conversion_failure_string = "";
-
-    // if we are here, then the user has a NoC in their architecture, so need to add it
-    arch->noc = new t_noc_inf;
-    t_noc_inf* noc_ref = arch->noc;
-
-    /* process the noc attributes first */
-
-    // quick error check to make sure that we dont have unexpected attributes
-    pugiutil::expect_only_attributes(noc_tag, expected_noc_attributes, loc_data);
-
-    // now go through and parse the required attributes for noc tag
-    noc_ref->link_bandwidth = pugiutil::get_attribute(noc_tag, "link_bandwidth", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    noc_ref->link_latency = pugiutil::get_attribute(noc_tag, "link_latency", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    noc_ref->router_latency = pugiutil::get_attribute(noc_tag, "router_latency", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    noc_ref->noc_router_tile_name = pugiutil::get_attribute(noc_tag, "noc_router_tile_name", loc_data, pugiutil::REQUIRED).as_string();
-
-    // the noc parameters can only be non-zero positive values
-    if ((noc_ref->link_bandwidth < 0) || (noc_ref->link_latency < 0) || (noc_ref->router_latency < 0)) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(noc_tag),
-                       "The link bandwidth, link latency and router latency for the NoC must be a positive non-zero value.");
-    }
-
-    // check that the router tile name was supplied properly
-    if (!(noc_ref->noc_router_tile_name.compare(attribute_conversion_failure_string))) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(noc_tag),
-                       "The noc router tile name must be a string.");
-    }
-
-    /* We processed the NoC node, so now process the topology*/
-
-    // make sure that only the topology tag is found under NoC
-    pugiutil::expect_only_children(noc_tag, expected_noc_children_tags, loc_data);
-
-    noc_mesh_topology = pugiutil::get_single_child(noc_tag, "mesh", loc_data, pugiutil::OPTIONAL);
-
-    // we cannot check for errors related to number of routers and as well as whether a router is out of bounds (this will be done later)
-    // the chip still needs to be sized
-
-    if (noc_mesh_topology) {
-        processMeshTopology(noc_mesh_topology, loc_data, noc_ref);
-
-        for (auto i = noc_ref->router_list.begin(); i != noc_ref->router_list.end(); i++) {
-            std::cout << "router " << i->id << ": ";
-
-            for (auto j = i->connection_list.begin(); j != i->connection_list.end(); j++) {
-                std::cout << *j << ",";
-            }
-
-            std::cout << "\n";
-        }
-    } else {
-        noc_topology = pugiutil::get_single_child(noc_tag, "topology", loc_data, pugiutil::REQUIRED);
-
-        processTopology(noc_topology, loc_data, noc_ref);
-    }
-
-    return;
-}
-
-/*
- * A NoC mesh is created based on the user supplied size and region location.
- */
-static void processMeshTopology(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref) {
-    // noc mesh topology properties
-    double mesh_region_start_x = 0;
-    double mesh_region_end_x = 0;
-    double mesh_region_start_y = 0;
-    double mesh_region_end_y = 0;
-    int mesh_size = 0;
-
-    // identifier that lets us know when we could not properly convert an attribute value to a integer
-    int attribute_conversion_failure = -1;
-
-    // a list of attrbutes that should be found for the mesh tag
-    std::vector<std::string> expected_router_attributes = {"startx", "endx", "starty", "endy", "size"};
-
-    // verify that only the acceptable attributes were supplied
-    pugiutil::expect_only_attributes(mesh_topology_tag, expected_router_attributes, loc_data);
-
-    // go through the attributes and store their values
-    mesh_region_start_x = pugiutil::get_attribute(mesh_topology_tag, "startx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    mesh_region_end_x = pugiutil::get_attribute(mesh_topology_tag, "endx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    mesh_region_start_y = pugiutil::get_attribute(mesh_topology_tag, "starty", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    mesh_region_end_y = pugiutil::get_attribute(mesh_topology_tag, "endy", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    mesh_size = pugiutil::get_attribute(mesh_topology_tag, "size", loc_data, pugiutil::REQUIRED).as_int(attribute_conversion_failure);
-
-    // verify that the attrbiutes provided were legal
-    if ((mesh_region_start_x < 0) || (mesh_region_end_x < 0) || (mesh_region_start_y < 0) || (mesh_region_end_y < 0) || (mesh_size < 0)) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
-                       "The parameters for the mesh topology have to be positive values.");
-    }
-
-    // now create the mesh topology for the noc
-    // create routers, make connections and detertmine positions
-    generate_noc_mesh(mesh_topology_tag, loc_data, noc_ref, mesh_region_start_x, mesh_region_end_x, mesh_region_start_y, mesh_region_end_y, mesh_size);
-
-    return;
-}
-
-/*
- * Go through each router in the NoC and store the list of routers that connect to it.
- */
-static void processTopology(pugi::xml_node topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref) {
-    // The topology tag should have no attributes, check that
-    pugiutil::expect_only_attributes(topology_tag, {}, loc_data);
-
-    /**
-     * Stores router information that includes the number of connections a router has within a given topology and also the number of times a router was declared in the arch file using the <router> tag.
-     * In the datastructure below, the router id is the key and the stored data is a pair, where the first element describes the number of router declarations and the second element describes the number of router connections.
-     * This is used only for error checking.
-     */
-    std::map<int, std::pair<int, int>> routers_in_arch_info;
-
-    /* Now go through the children tags of topology, which is basically
-     * each router found within the NoC 
-     */
-    for (pugi::xml_node router : topology_tag.children()) {
-        // we can only have router tags within the topology
-        if (router.name() != std::string("router")) {
-            bad_tag(router, loc_data, topology_tag, {"router"});
-        } else {
-            // curent tag is a valid router, so process it
-            processRouter(router, loc_data, noc_ref, routers_in_arch_info);
-        }
-    }
-
-    // check whether any routers were supplied
-    if (noc_ref->router_list.size() == 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(topology_tag),
-                       "No routers were supplied for the NoC.");
-    }
-
-    // check that the topology of the noc was correctly described in the arch file
-    verify_noc_topology(routers_in_arch_info);
-
-    return;
-}
-
-/*
- * Store the properties of a single router and then store the list of routers that connect to it.
- */
-static void processRouter(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
-    // identifier that lets us know when we could not properly convert an attribute value to a integer
-    int attribute_conversion_failure = -1;
-
-    // an accepted list of attributes for the router tag
-    std::vector<std::string> expected_router_attributes = {"id", "positionx", "positiony", "connections"};
-
-    // variable to store current router info
-    t_router router_info;
-
-    // router connection list attribute information
-    std::string router_connection_list_attribute_value;
-
-    // lets us know if there was an error processing the router connection list
-    bool router_connection_list_result = true;
-
-    // check that only the accepted router attributes are found in the tag
-    pugiutil::expect_only_attributes(router_tag, expected_router_attributes, loc_data);
-
-    // store the router information from the attributes
-    router_info.id = pugiutil::get_attribute(router_tag, "id", loc_data, pugiutil::REQUIRED).as_int(attribute_conversion_failure);
-
-    router_info.device_x_position = pugiutil::get_attribute(router_tag, "positionx", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    router_info.device_y_position = pugiutil::get_attribute(router_tag, "positiony", loc_data, pugiutil::REQUIRED).as_double(attribute_conversion_failure);
-
-    // verify whether the attribute information was legal
-    if ((router_info.id < 0) || (router_info.device_x_position < 0) || (router_info.device_y_position < 0)) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
-                       "The router id, and position (x & y) for the router must be a positive number.");
-    }
-
-    // get the current router connection list
-    router_connection_list_attribute_value.assign(pugiutil::get_attribute(router_tag, "connections", loc_data, pugiutil::REQUIRED).as_string());
-
-    // if the connections attrbiute was not provided or it was empty, then we don't process it and throw a warning
-
-    if (router_connection_list_attribute_value.compare("") != 0) {
-        // process the router connection list
-        router_connection_list_result = parse_noc_router_connection_list(router_tag, loc_data, router_info.id, router_info.connection_list, router_connection_list_attribute_value, routers_in_arch_info);
-
-        // check if the user provided a legal router connection list
-        if (!router_connection_list_result) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
-                           "The 'connections' attribute for the router must be a list of integers seperated by spaces, where each integer represents a router id that the current router is connected to.");
-        }
-
-    } else {
-        VTR_LOGF_WARN(loc_data.filename_c_str(), loc_data.line(router_tag),
-                      "The router with id:%d either has an empty 'connections' attrtibute or does not have any associated connections to other routers in the NoC.\n", router_info.id);
-    }
-
-    // at this point the current router information was completely legal, so we store the newly created router within the noc
-    noc_ref->router_list.push_back(router_info);
-
-    // update the number of declarations info for the current router (since we just finished processing one <router> tag)
-    update_router_info_in_arch(router_info.id, false, routers_in_arch_info);
-
-    return;
-}
-
-std::string inst_port_to_port_name(std::string inst_port) {
-    auto pos = inst_port.find('.');
-    if (pos != std::string::npos) {
-        return inst_port.substr(pos + 1);
-    }
-    return inst_port;
-}
-
-static bool attribute_to_bool(const pugi::xml_node node,
-                              const pugi::xml_attribute attr,
-                              const pugiutil::loc_data& loc_data) {
-    if (attr.value() == std::string("1")) {
-        return true;
-    } else if (attr.value() == std::string("0")) {
-        return false;
-    } else {
-        bad_attribute_value(attr, node, loc_data, {"0", "1"});
-    }
-
-    return false;
-}
-
-int find_switch_by_name(const t_arch& arch, std::string switch_name) {
-    for (int iswitch = 0; iswitch < arch.num_switches; ++iswitch) {
-        const t_arch_switch_inf& arch_switch = arch.Switches[iswitch];
-        if (arch_switch.name == switch_name) {
-            return iswitch;
-        }
-    }
-
-    return OPEN;
-}
-
-e_side string_to_side(std::string side_str) {
-    e_side side = NUM_SIDES;
-    if (side_str.empty()) {
-        side = NUM_SIDES;
-    } else if (side_str == "left") {
-        side = LEFT;
-    } else if (side_str == "right") {
-        side = RIGHT;
-    } else if (side_str == "top") {
-        side = TOP;
-    } else if (side_str == "bottom") {
-        side = BOTTOM;
-    } else {
-        archfpga_throw(__FILE__, __LINE__,
-                       "Invalid side specification");
-    }
-    return side;
-}
-
-template<typename T>
-static T* get_type_by_name(const char* type_name, std::vector<T>& types) {
-    for (auto& type : types) {
-        if (0 == strcmp(type.name, type_name)) {
-            return &type;
-        }
-    }
-
-    archfpga_throw(__FILE__, __LINE__,
-                   "Could not find type: %s\n", type_name);
-}
-
-/*
- * Create routers and set their properties so that a mesh grid of routers is created. Then connect the routers together so that a mesh topology is created.
- */
-static void generate_noc_mesh(pugi::xml_node mesh_topology_tag, const pugiutil::loc_data& loc_data, t_noc_inf* noc_ref, double mesh_region_start_x, double mesh_region_end_x, double mesh_region_start_y, double mesh_region_end_y, int mesh_size) {
-    // check that the mesh size of the router is not 0
-    if (mesh_size == 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
-                       "The NoC mesh size cannot be 0.");
-    }
-
-    // calculating the vertical horizontal distances between routers in the supplied region
-    // we decrease the mesh size by 1 when calculating the spacing so that the first and last routers of each row or column are positioned on the mesh boundary
-    /*
-     * For example:
-     * - If we had a mesh size of 3, then using 3 would result in a spacing that would result in one router positions being placed in either the start of the reigion or end of the region. This is because the distance calculation resulted in having 3 spaces between the ends of the region 
-     *
-     * start              end
-     ***   ***   ***   ***
-     *
-     * - if we instead used 2 in the distance calculation, the the resulting positions would result in having 2 routers positioned on the start and end of the region. This is beacuse we now specified 2 spaces between the region and this allows us to place 2 routers on the regions edges and one router in the center.
-     *
-     * start        end
-     ***   ***   ***
-     *
-     * THe reasoning for this is to reduce the number of calculated router positions.
-     */
-    double vertical_router_separation = (mesh_region_end_y - mesh_region_start_y) / (mesh_size - 1);
-    double horizontal_router_separation = (mesh_region_end_x - mesh_region_start_x) / (mesh_size - 1);
-
-    t_router temp_router;
-
-    // improper region check
-    if ((vertical_router_separation <= 0) || (horizontal_router_separation <= 0)) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(mesh_topology_tag),
-                       "The NoC region is invalid.");
-    }
-
-    // create routers and their connections
-    // start with router id 0 (bottom left of the chip) to the maximum router id (top right of the chip)
-    for (int j = 0; j < mesh_size; j++) {
-        for (int i = 0; i < mesh_size; i++) {
-            // assign router id
-            temp_router.id = (mesh_size * j) + i;
-
-            // calculate router position
-            /* The first and last router of each column or row will be located on the mesh region boundary, the remaining routers will be placed within the region and seperated from other routers using the distance calculated previously.
-             */
-            temp_router.device_x_position = (i * horizontal_router_separation) + mesh_region_start_x;
-            temp_router.device_y_position = (j * vertical_router_separation) + mesh_region_start_y;
-
-            // assign connections
-            // check if there is a router to the left
-            if ((i - 1) >= 0) {
-                // add the left router as a connection
-                temp_router.connection_list.push_back((mesh_size * j) + i - 1);
-            }
-
-            // check if there is a router to the top
-            if ((j + 1) <= (mesh_size - 1)) {
-                // add the top router as a connection
-                temp_router.connection_list.push_back((mesh_size * (j + 1)) + i);
-            }
-
-            // check if there is a router to the right
-            if ((i + 1) <= (mesh_size - 1)) {
-                // add the router located to the right
-                temp_router.connection_list.push_back((mesh_size * j) + i + 1);
-            }
-
-            // check of there is a router below
-            if ((j - 1) >= (0)) {
-                // add the bottom router as a connection
-                temp_router.connection_list.push_back((mesh_size * (j - 1)) + i);
-            }
-
-            // add the router to the list
-            noc_ref->router_list.push_back(temp_router);
-
-            // clear the current router information for the next router
-            temp_router.connection_list.clear();
-        }
-    }
-
-    return;
-}
-
-/*
- * THe user provides the list of routers any given router is connected to by the router ids seperated by spaces. For example:
- *
- * connections= 1 2 3 4 5
- *
- * Go through the connections here and store them. Also make sure the list is legal.
- */
-static bool parse_noc_router_connection_list(pugi::xml_node router_tag, const pugiutil::loc_data& loc_data, int router_id, std::vector<int>& connection_list, std::string connection_list_attribute_value, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
-    // we wil be modifying the string so store it in a temporary variable
-    // additinally, we peocess substrings seperated by spaces, so we add a space at the end of the string to be able to process the last sub-string
-    std::string modified_attribute_value = connection_list_attribute_value + " ";
-    std::string delimiter = " ";
-    std::stringstream single_connection;
-    int converted_connection;
-
-    size_t position = 0;
-
-    bool result = true;
-
-    // find the position of the first space in the connection list string
-    while ((position = modified_attribute_value.find(delimiter)) != std::string::npos) {
-        // the string upto the space represent a single connection, so grab the substring
-        single_connection << modified_attribute_value.substr(0, position);
-
-        // convert the connection to an integer
-        single_connection >> converted_connection;
-
-        /* we expect the connection list to be a string of integers seperated by spaces, where each integer represents a router id that the current router is connected to. So we make sure that the router id was an integer.
-         */
-        if (single_connection.fail()) {
-            // if we are here, then an integer was not supplied
-            result = false;
-            break;
-        }
-
-        // check the case where a duplicate connection was provided
-        if (std::find(connection_list.begin(), connection_list.end(), converted_connection) != connection_list.end()) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
-                           "The router with id:'%d' was included multiple times in the connection list for another router.", converted_connection);
-        }
-
-        // make sure that the current router isn't connected to itself
-        if (router_id == converted_connection) {
-            archfpga_throw(loc_data.filename_c_str(), loc_data.line(router_tag),
-                           "The router with id:%d was added to its own connection list. A router cannot connect to itself.", router_id);
-        }
-
-        // if we are here then a legal router id was supplied, so store it
-        connection_list.push_back(converted_connection);
-        // update the connection information for the current router in the connection list
-        update_router_info_in_arch(converted_connection, true, routers_in_arch_info);
-
-        // before we process the next router connection, we need to delete the substring (current router connection)
-        modified_attribute_value.erase(0, position + delimiter.length());
-        // clear the buffer that stores the router connection in a string format for the next iteration
-        single_connection.clear();
-    }
-
-    return result;
-}
-
-/* Each router needs a sperate <router> tag in the architecture description
- * to declare it. The number of declarations for each router in the 
- * architecture file is updated here.
- *
- * Additionally, for any given topology, a router can connect to other routers.
- * THe number of connections for each router is also updated here. 
- *
- */
-static void update_router_info_in_arch(int router_id, bool router_updated_as_a_connection, std::map<int, std::pair<int, int>>& routers_in_arch_info) {
-    // get the corresponding router info for the given router id
-    std::map<int, std::pair<int, int>>::iterator curr_router_info = routers_in_arch_info.find(router_id);
-
-    // check if the router previously existed in the router indo database
-    if (curr_router_info == routers_in_arch_info.end()) {
-        // case where the router did not exist previosuly, so we add it here and also get a reference to it
-        // initially a router has no declarations or connections
-        curr_router_info = routers_in_arch_info.insert(std::pair<int, std::pair<int, int>>(router_id, std::pair<int, int>(0, 0))).first;
-    }
-
-    // case where the current router was provided while parsing the connections of another router
-    if (router_updated_as_a_connection) {
-        // since we are within the case where the current router is being processed as a connection to another router we just increment its number of connections
-        (curr_router_info->second.second)++;
-
-    } else {
-        // since we are within the case where the current router is processed from a <router> tag, we just increment its number of declarations
-        (curr_router_info->second.first)++;
-    }
-
-    return;
-}
-
-/*
- * Verify each router in the noc by checking whether they satisfy the following conditions:
- * - The router has only one declaration in the arch file
- * - The router has atleast one connection to another router
- * If any of the conditions above are not met, then an error is thrown. 
- */
-static void verify_noc_topology(std::map<int, std::pair<int, int>>& routers_in_arch_info) {
-    for (auto router_info = routers_in_arch_info.begin(); router_info != routers_in_arch_info.end(); router_info++) {
-        // case where the router was included in the architecture and had no connections to other routers
-        if ((router_info->second.first == 1) && (router_info->second.second == 0)) {
-            archfpga_throw("", -1,
-                           "The router with id:'%d' is not connected to any other router in the NoC.", router_info->first);
-
-        } // case where a router was found to be connected to another router but not declared using the <router> tag in the arch file (ie. missing)
-        else if ((router_info->second.first == 0) && (router_info->second.second > 0)) {
-            archfpga_throw("", -1,
-                           "The router with id:'%d' was found to be connected to another router but missing in the architecture file. Add the router using the <router> tag.", router_info->first);
-
-        } // case where the router was delcared multiple times in the architecture file (multiple <router> tags for the same router)
-        else if (router_info->second.first > 1) {
-            archfpga_throw("", -1,
-                           "The router with id:'%d' was included more than once in the architecture file. Routers should only be declared once.", router_info->first);
-        }
-    }
-
-    return;
-}
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h b/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
deleted file mode 100644
index 5021d0317..000000000
--- a/third_party/vtr/libs/archfpga/src/read_xml_arch_file.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef READ_XML_ARCH_FILE_H
-#define READ_XML_ARCH_FILE_H
-
-#include "arch_types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* special type indexes, necessary for initialization, everything afterwards
- * should use the pointers to these type indices*/
-
-#define NUM_MODELS_IN_LIBRARY 4
-#define EMPTY_TYPE_INDEX 0
-
-/* function declarations */
-void XmlReadArch(const char* ArchFile,
-                 const bool timing_enabled,
-                 t_arch* arch,
-                 std::vector<t_physical_tile_type>& PhysicalTileTypes,
-                 std::vector<t_logical_block_type>& LogicalBlockTypes);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_util.cc b/third_party/vtr/libs/archfpga/src/read_xml_util.cc
deleted file mode 100644
index 784d08a9b..000000000
--- a/third_party/vtr/libs/archfpga/src/read_xml_util.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-#include "read_xml_util.h"
-
-#include "vtr_util.h"
-#include "arch_error.h"
-
-using pugiutil::ReqOpt;
-
-/* Convert bool to ReqOpt enum */
-extern ReqOpt BoolToReqOpt(bool b) {
-    if (b) {
-        return ReqOpt::REQUIRED;
-    }
-    return ReqOpt::OPTIONAL;
-}
-
-InstPort make_inst_port(std::string str, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    InstPort inst_port;
-    try {
-        inst_port = InstPort(str);
-    } catch (const ArchFpgaError& e) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Failed to parse instance port specification '%s' for"
-                       " on <%s> tag, %s",
-                       str.c_str(), node.name(), e.what());
-    }
-
-    return inst_port;
-}
-
-InstPort make_inst_port(pugi::xml_attribute attr, pugi::xml_node node, const pugiutil::loc_data& loc_data) {
-    InstPort inst_port;
-    try {
-        inst_port = InstPort(attr.value());
-    } catch (const ArchFpgaError& e) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(node),
-                       "Failed to parse instance port specification '%s' for"
-                       " attribute '%s' on <%s> tag, %s",
-                       attr.value(), attr.name(), node.name(), e.what());
-    }
-    return inst_port;
-}
-
-void bad_tag(const pugi::xml_node node,
-             const pugiutil::loc_data& loc_data,
-             const pugi::xml_node parent_node,
-             const std::vector<std::string> expected_tags) {
-    std::string msg = "Unexpected tag ";
-    msg += "<";
-    msg += node.name();
-    msg += ">";
-
-    if (parent_node) {
-        msg += " in section <";
-        msg += parent_node.name();
-        msg += ">";
-    }
-
-    if (!expected_tags.empty()) {
-        msg += ", expected ";
-        for (auto iter = expected_tags.begin(); iter != expected_tags.end(); ++iter) {
-            msg += "<";
-            msg += *iter;
-            msg += ">";
-
-            if (iter < expected_tags.end() - 2) {
-                msg += ", ";
-            } else if (iter == expected_tags.end() - 2) {
-                msg += " or ";
-            }
-        }
-    }
-
-    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
-}
-
-void bad_attribute(const pugi::xml_attribute attr,
-                   const pugi::xml_node node,
-                   const pugiutil::loc_data& loc_data,
-                   const std::vector<std::string> expected_attributes) {
-    std::string msg = "Unexpected attribute ";
-    msg += "'";
-    msg += attr.name();
-    msg += "'";
-
-    if (node) {
-        msg += " on <";
-        msg += node.name();
-        msg += "> tag";
-    }
-
-    if (!expected_attributes.empty()) {
-        msg += ", expected ";
-        for (auto iter = expected_attributes.begin(); iter != expected_attributes.end(); ++iter) {
-            msg += "'";
-            msg += *iter;
-            msg += "'";
-
-            if (iter < expected_attributes.end() - 2) {
-                msg += ", ";
-            } else if (iter == expected_attributes.end() - 2) {
-                msg += " or ";
-            }
-        }
-    }
-
-    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
-}
-
-void bad_attribute_value(const pugi::xml_attribute attr,
-                         const pugi::xml_node node,
-                         const pugiutil::loc_data& loc_data,
-                         const std::vector<std::string> expected_values) {
-    std::string msg = "Invalid value '";
-    msg += attr.value();
-    msg += "'";
-    msg += " for attribute '";
-    msg += attr.name();
-    msg += "'";
-
-    if (node) {
-        msg += " on <";
-        msg += node.name();
-        msg += "> tag";
-    }
-
-    if (!expected_values.empty()) {
-        msg += ", expected value ";
-        for (auto iter = expected_values.begin(); iter != expected_values.end(); ++iter) {
-            msg += "'";
-            msg += *iter;
-            msg += "'";
-
-            if (iter < expected_values.end() - 2) {
-                msg += ", ";
-            } else if (iter == expected_values.end() - 2) {
-                msg += " or ";
-            }
-        }
-    }
-
-    throw ArchFpgaError(msg, loc_data.filename(), loc_data.line(node));
-}
diff --git a/third_party/vtr/libs/archfpga/src/read_xml_util.h b/third_party/vtr/libs/archfpga/src/read_xml_util.h
deleted file mode 100644
index 05a77ab03..000000000
--- a/third_party/vtr/libs/archfpga/src/read_xml_util.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef READ_XML_UTIL_H
-#define READ_XML_UTIL_H
-
-#include "pugixml.hpp"
-#include "pugixml_loc.hpp"
-#include "pugixml_util.hpp"
-#include "arch_util.h"
-
-pugiutil::ReqOpt BoolToReqOpt(bool b);
-
-void bad_tag(const pugi::xml_node node,
-             const pugiutil::loc_data& loc_data,
-             const pugi::xml_node parent_node = pugi::xml_node(),
-             const std::vector<std::string> expected_tags = std::vector<std::string>());
-
-void bad_attribute(const pugi::xml_attribute attr,
-                   const pugi::xml_node node,
-                   const pugiutil::loc_data& loc_data,
-                   const std::vector<std::string> expected_attributes = std::vector<std::string>());
-void bad_attribute_value(const pugi::xml_attribute attr,
-                         const pugi::xml_node node,
-                         const pugiutil::loc_data& loc_data,
-                         const std::vector<std::string> expected_attributes = std::vector<std::string>());
-
-InstPort make_inst_port(std::string str, pugi::xml_node node, const pugiutil::loc_data& loc_data);
-InstPort make_inst_port(pugi::xml_attribute attr, pugi::xml_node node, const pugiutil::loc_data& loc_data);
-
-#endif
diff --git a/third_party/vtr/libs/archfpga/test/main.cpp b/third_party/vtr/libs/archfpga/test/main.cpp
deleted file mode 100644
index 2a2e12d62..000000000
--- a/third_party/vtr/libs/archfpga/test/main.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#define CATCH_CONFIG_MAIN
-#include "catch2/catch_test_macros.hpp"
\ No newline at end of file
diff --git a/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp b/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
deleted file mode 100644
index adc9eab42..000000000
--- a/third_party/vtr/libs/archfpga/test/test_read_xml_arch_file.cpp
+++ /dev/null
@@ -1,269 +0,0 @@
-// test framework
-#include "catch2/catch_test_macros.hpp"
-#include "catch2/matchers/catch_matchers_all.hpp"
-
-// testting statuc functions so include whole source file it is in
-#include "read_xml_arch_file.cpp"
-
-// for comparing floats
-#include "vtr_math.h"
-
-TEST_CASE("Updating router info in arch", "[NoC Arch Tests]") {
-    std::map<int, std::pair<int, int>> test_router_list;
-
-    std::map<int, std::pair<int, int>>::iterator it;
-
-    // initial conditions
-    int router_id = 1;
-    bool router_is_from_connection_list = false;
-
-    // we initially need the map to be empty
-    REQUIRE(test_router_list.size() == 0);
-
-    SECTION("Update the number of declarations for a router for the first time ") {
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        it = test_router_list.find(router_id);
-
-        // check first that the router was newly added to the router databse
-        REQUIRE(it != test_router_list.end());
-
-        // no verify the components of the router parameter
-        REQUIRE(it->second.first == 1);
-        REQUIRE(it->second.second == 0);
-    }
-    SECTION("Update the number of connections for a router for the first time") {
-        router_is_from_connection_list = true;
-
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        it = test_router_list.find(router_id);
-
-        // check first that the router was newly added to the router databse
-        REQUIRE(it != test_router_list.end());
-
-        // no verify the components of the router parameter
-        REQUIRE(it->second.first == 0);
-        REQUIRE(it->second.second == 1);
-    }
-    SECTION("Update the number of declarations for a router when it already exists") {
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        // verify that a router was added
-        REQUIRE(test_router_list.size() != 0);
-
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        it = test_router_list.find(router_id);
-
-        // check first that the router was newly added to the router databse
-        REQUIRE(it != test_router_list.end());
-
-        // no verify the components of the router parameter
-        REQUIRE(it->second.first == 2);
-        REQUIRE(it->second.second == 0);
-    }
-    SECTION("Update the number of connections for a router when it already exists") {
-        router_is_from_connection_list = true;
-
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        // verify that a router was added
-        REQUIRE(test_router_list.size() != 0);
-
-        update_router_info_in_arch(router_id, router_is_from_connection_list, test_router_list);
-
-        it = test_router_list.find(router_id);
-
-        // check first that the router was newly added to the router databse
-        REQUIRE(it != test_router_list.end());
-
-        // no verify the components of the router parameter
-        REQUIRE(it->second.first == 0);
-        REQUIRE(it->second.second == 2);
-    }
-}
-
-TEST_CASE("Verifying a parsed NoC topology", "[NoC Arch Tests]") {
-    std::map<int, std::pair<int, int>> test_router_list;
-
-    REQUIRE(test_router_list.size() == 0);
-
-    SECTION("Check the error where a router in the NoC is not connected to other routers.") {
-        // error router
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 0)));
-
-        // sonme normal routers
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 5)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(1, 6)));
-
-        REQUIRE(test_router_list.size() == 3);
-
-        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'1' is not connected to any other router in the NoC.");
-    }
-    SECTION("Check the error where a router in the NoC is connected to other routers but missing a declaration in the arch file.") {
-        // normal routers
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 5)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 3)));
-
-        // error router
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(0, 5)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(4, std::pair<int, int>(1, 10)));
-
-        REQUIRE(test_router_list.size() == 4);
-
-        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'3' was found to be connected to another router but missing in the architecture file. Add the router using the <router> tag.");
-    }
-    SECTION("Check the error where the router is included more than once in the architecture file.") {
-        // normal routers
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(1, std::pair<int, int>(1, 5)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(2, std::pair<int, int>(1, 3)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(3, std::pair<int, int>(1, 10)));
-
-        // error routers
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(4, std::pair<int, int>(2, 10)));
-
-        // normal routers
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(5, std::pair<int, int>(1, 3)));
-
-        test_router_list.insert(std::pair<int, std::pair<int, int>>(6, std::pair<int, int>(1, 10)));
-
-        REQUIRE(test_router_list.size() == 6);
-
-        REQUIRE_THROWS_WITH(verify_noc_topology(test_router_list), "The router with id:'4' was included more than once in the architecture file. Routers should only be declared once.");
-    }
-}
-
-TEST_CASE("Verifying mesh topology creation", "[NoC Arch Tests]") {
-    // data for the xml parsing
-    pugi::xml_node test;
-    pugiutil::loc_data test_location;
-
-    // the noc storage
-    t_noc_inf test_noc;
-
-    // mesh parameters
-    double mesh_start_x = 10;
-    double mesh_start_y = 10;
-    double mesh_end_x = 5;
-    double mesh_end_y = 56;
-    double mesh_size = 0;
-
-    SECTION("Check the error where a mesh size was illegal.") {
-        REQUIRE_THROWS_WITH(generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size), "The NoC mesh size cannot be 0.");
-    }
-    SECTION("Check the error where a mesh region size was invalid.") {
-        mesh_size = 3;
-
-        REQUIRE_THROWS_WITH(generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size), "The NoC region is invalid.");
-    }
-    SECTION("Check the mesh creation for integer precision coordinates.") {
-        // define test parameters
-        mesh_size = 3;
-
-        mesh_start_x = 0;
-        mesh_start_y = 0;
-
-        mesh_end_x = 4;
-        mesh_end_y = 4;
-
-        // create the golden golden results
-        double golden_results_x[9];
-        double golden_results_y[9];
-
-        // first row of the mesh
-        golden_results_x[0] = 0;
-        golden_results_y[0] = 0;
-        golden_results_x[1] = 2;
-        golden_results_y[1] = 0;
-        golden_results_x[2] = 4;
-        golden_results_y[2] = 0;
-
-        // second row of the mesh
-        golden_results_x[3] = 0;
-        golden_results_y[3] = 2;
-        golden_results_x[4] = 2;
-        golden_results_y[4] = 2;
-        golden_results_x[5] = 4;
-        golden_results_y[5] = 2;
-
-        // third row of the mesh
-        golden_results_x[6] = 0;
-        golden_results_y[6] = 4;
-        golden_results_x[7] = 2;
-        golden_results_y[7] = 4;
-        golden_results_x[8] = 4;
-        golden_results_y[8] = 4;
-
-        generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size);
-
-        // go through all the expected routers
-        for (int expected_router_id = 0; expected_router_id < (mesh_size * mesh_size); expected_router_id++) {
-            // make sure the router ids match
-            REQUIRE(test_noc.router_list[expected_router_id].id == expected_router_id);
-
-            // make sure the position of the routers are correct
-            // x position
-            REQUIRE(golden_results_x[expected_router_id] == test_noc.router_list[expected_router_id].device_x_position);
-            // y position
-            REQUIRE(golden_results_y[expected_router_id] == test_noc.router_list[expected_router_id].device_y_position);
-        }
-    }
-    SECTION("Check the mesh creation for double precision coordinates.") {
-        // define test parameters
-        mesh_size = 3;
-
-        mesh_start_x = 3.5;
-        mesh_start_y = 5.7;
-
-        mesh_end_x = 10.8;
-        mesh_end_y = 6.4;
-
-        // create the golden golden results
-        double golden_results_x[9];
-        double golden_results_y[9];
-
-        // first row of the mesh
-        golden_results_x[0] = 3.5;
-        golden_results_y[0] = 5.7;
-        golden_results_x[1] = 7.15;
-        golden_results_y[1] = 5.7;
-        golden_results_x[2] = 10.8;
-        golden_results_y[2] = 5.7;
-
-        // second row of the mesh
-        golden_results_x[3] = 3.5;
-        golden_results_y[3] = 6.05;
-        golden_results_x[4] = 7.15;
-        golden_results_y[4] = 6.05;
-        golden_results_x[5] = 10.8;
-        golden_results_y[5] = 6.05;
-
-        // third row of the mesh
-        golden_results_x[6] = 3.5;
-        golden_results_y[6] = 6.4;
-        golden_results_x[7] = 7.15;
-        golden_results_y[7] = 6.4;
-        golden_results_x[8] = 10.8;
-        golden_results_y[8] = 6.4;
-
-        generate_noc_mesh(test, test_location, &test_noc, mesh_start_x, mesh_end_x, mesh_start_y, mesh_end_y, mesh_size);
-
-        // go through all the expected routers
-        for (int expected_router_id = 0; expected_router_id < (mesh_size * mesh_size); expected_router_id++) {
-            // make sure the router ids match
-            REQUIRE(test_noc.router_list[expected_router_id].id == expected_router_id);
-
-            // make sure the position of the routers are correct
-            // x position
-            REQUIRE(vtr::isclose(golden_results_x[expected_router_id], test_noc.router_list[expected_router_id].device_x_position));
-            // y position
-            REQUIRE(vtr::isclose(golden_results_y[expected_router_id], test_noc.router_list[expected_router_id].device_y_position));
-        }
-    }
-}
\ No newline at end of file
diff --git a/third_party/vtr/libs/log/.gitignore b/third_party/vtr/libs/log/.gitignore
deleted file mode 100644
index da50203d3..000000000
--- a/third_party/vtr/libs/log/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-test_log
diff --git a/third_party/vtr/libs/log/CMakeLists.txt b/third_party/vtr/libs/log/CMakeLists.txt
deleted file mode 100644
index 7445ef341..000000000
--- a/third_party/vtr/libs/log/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-cmake_minimum_required(VERSION 3.9)
-
-project("liblog")
-
-file(GLOB_RECURSE EXEC_SOURCES src/main.cpp)
-file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
-file(GLOB_RECURSE LIB_HEADERS src/*.h)
-files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
-
-#Remove test executable from library
-list(REMOVE_ITEM LIB_SOURCES ${EXEC_SOURCES})
-
-#Create the library
-add_library(liblog STATIC
-             ${LIB_HEADERS}
-             ${LIB_SOURCES})
-target_include_directories(liblog PUBLIC ${LIB_INCLUDE_DIRS})
-set_target_properties(liblog PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
-
-#Create the test executable
-add_executable(test_log ${EXEC_SOURCES})
-target_link_libraries(test_log liblog)
-
-install(TARGETS liblog DESTINATION bin)
diff --git a/third_party/vtr/libs/log/LICENSE.txt b/third_party/vtr/libs/log/LICENSE.txt
deleted file mode 100644
index 41116e903..000000000
--- a/third_party/vtr/libs/log/LICENSE.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2014 Jason Luu
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/third_party/vtr/libs/log/Readme.txt b/third_party/vtr/libs/log/Readme.txt
deleted file mode 100644
index 8f89aafdb..000000000
--- a/third_party/vtr/libs/log/Readme.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Simple Logger Library
-
-Author: Jason Luu
-Date: Sept 5, 2014
-
-This library provides simple logging operations.
-
-- Output messages to both file and terminal
-- Tag messages based on feedback type (eg. info, warning, or error)
-- Track number of warnings and errors
-
-Specialized programming knowledge:
-- Requires usage of variable arguments in <stdarg.h> standard C library to properly wrap printf and fprintf
diff --git a/third_party/vtr/libs/log/src/log.cc b/third_party/vtr/libs/log/src/log.cc
deleted file mode 100644
index cbc2e79fa..000000000
--- a/third_party/vtr/libs/log/src/log.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Lightweight logging tool.  Automatically prepend messages with prefixes and store in log file.
- *
- * Author: Jason Luu
- * Date: Sept 5, 2014
- */
-
-#include <stdio.h>
-#include <stdarg.h> /* Allows for variable arguments, necessary for wrapping printf */
-#include "log.h"
-
-#define LOG_DEFAULT_FILE_NAME "output.log"
-
-static int log_warning = 0;
-static int log_error = 0;
-FILE* log_stream = nullptr;
-
-static void check_init();
-
-/* Set the output file of logger.
- * If different than current log file, close current log file and reopen to new log file
- */
-void log_set_output_file(const char* filename) {
-    if (log_stream != nullptr) {
-        fclose(log_stream);
-    }
-
-    if (filename == nullptr) {
-        log_stream = nullptr;
-    } else {
-        log_stream = fopen(filename, "w");
-        if (log_stream == nullptr) {
-            printf("Error writing to file %s\n\n", filename);
-        }
-    }
-}
-
-void log_print_direct(const char* message, ...) {
-    va_list args;
-    va_start(args, message);
-    vprintf(message, args);
-    va_end(args);
-}
-
-void log_print_info(const char* message, ...) {
-    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
-
-    va_list args;
-    va_start(args, message);
-    vprintf(message, args);
-    va_end(args);
-
-    if (log_stream) {
-        va_start(args, message); /* Must reset variable arguments so that they can be read again */
-        vfprintf(log_stream, message, args);
-        va_end(args);
-
-        fflush(log_stream);
-    }
-}
-
-void log_print_warning(const char* /*filename*/, unsigned int /*line_num*/, const char* message, ...) {
-    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
-
-    va_list args;
-    va_start(args, message);
-    log_warning++;
-
-    printf("Warning %d: ", log_warning);
-    vprintf(message, args);
-    va_end(args);
-
-    if (log_stream) {
-        va_start(args, message); /* Must reset variable arguments so that they can be read again */
-        fprintf(log_stream, "Warning %d: ", log_warning);
-        vfprintf(log_stream, message, args);
-
-        va_end(args);
-        fflush(log_stream);
-    }
-}
-
-void log_print_error(const char* /*filename*/, unsigned int /*line_num*/, const char* message, ...) {
-    check_init(); /* Check if output log file setup, if not, then this function also sets it up */
-
-    va_list args;
-    va_start(args, message);
-    log_error++;
-
-    check_init();
-    fprintf(stderr, "Error %d: ", log_error);
-    vfprintf(stderr, message, args);
-    va_end(args);
-
-    if (log_stream) {
-        va_start(args, message); /* Must reset variable arguments so that they can be read again */
-        fprintf(log_stream, "Error %d: ", log_error);
-        vfprintf(log_stream, message, args);
-
-        va_end(args);
-
-        fflush(log_stream);
-    }
-}
-
-/**
- * Check if output log file setup, if not, then this function also sets it up
- */
-static void check_init() {
-    //We now allow a nullptr log_stream (i.e. no log file) so nothing to do here
-}
-
-void log_close() {
-    if (log_stream) {
-        fclose(log_stream);
-    }
-}
diff --git a/third_party/vtr/libs/log/src/log.h b/third_party/vtr/libs/log/src/log.h
deleted file mode 100644
index a350a64c5..000000000
--- a/third_party/vtr/libs/log/src/log.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Lightweight logging tool.  Automatically prepend messages with prefixes and store in log file.
- *
- * Init/Change name of log file using log_set_output_file, when done, call log_close
- *
- * Author: Jason Luu
- * Date: Sept 5, 2014
- */
-
-#ifndef LOG_H
-#define LOG_H
-
-void log_set_output_file(const char* filename);
-
-void log_print_direct(const char* message, ...);
-void log_print_info(const char* message, ...);
-void log_print_warning(const char* filename, unsigned int line_num, const char* message, ...);
-void log_print_error(const char* filename, unsigned int line_num, const char* message, ...);
-
-void log_close();
-
-#endif
diff --git a/third_party/vtr/libs/log/src/main.cc b/third_party/vtr/libs/log/src/main.cc
deleted file mode 100644
index 653882560..000000000
--- a/third_party/vtr/libs/log/src/main.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/** Jason Luu
- * Test program for logger
- */
-
-#include "log.h"
-
-int main() {
-    int x = 10, y = 20;
-    float a = 1.5f, b = -2.01f;
-    log_print_info("Testing logger\n\n");
-    log_print_info("Output separate strings: %s %s\n", "pass", "[PASS]");
-    log_print_info("Output two integers: x = %d y = %d\n", x, y);
-    log_print_warning(__FILE__, __LINE__, "Test warning on floating point arguments %g %g\n", a, b);
-    log_print_error(__FILE__, __LINE__, "Test error on two variables %g %g \n\n", a - x, b + y);
-
-    log_print_info("Test complete\n");
-    return 0;
-}
\ No newline at end of file
diff --git a/third_party/vtr/libs/pugiutil/CMakeLists.txt b/third_party/vtr/libs/pugiutil/CMakeLists.txt
deleted file mode 100644
index edbd4c988..000000000
--- a/third_party/vtr/libs/pugiutil/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-cmake_minimum_required(VERSION 3.9)
-
-project("libpugiutil")
-
-#
-# Source files and library
-#
-file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
-file(GLOB_RECURSE LIB_HEADERS src/*.hpp src/*.h)
-files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
-
-#Create the library
-add_library(libpugiutil STATIC
-             ${LIB_HEADERS}
-             ${LIB_SOURCES})
-target_include_directories(libpugiutil PUBLIC ${LIB_INCLUDE_DIRS})
-set_target_properties(libpugiutil PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
-
-target_link_libraries(libpugiutil
-                        libpugixml)
-
-install(TARGETS libpugiutil DESTINATION bin)
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc b/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
deleted file mode 100644
index b773b410b..000000000
--- a/third_party/vtr/libs/pugiutil/src/pugixml_loc.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <cstdio>
-#include <algorithm>
-#include "pugixml_util.hpp"
-#include "pugixml_loc.hpp"
-
-namespace pugiutil {
-
-//Return the line number from the given offset
-std::size_t loc_data::line(std::ptrdiff_t offset) const {
-    auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
-    std::size_t index = it - offsets_.begin();
-
-    return 1 + index;
-}
-
-//Return the column number from the given offset
-std::size_t loc_data::col(std::ptrdiff_t offset) const {
-    auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
-    std::size_t index = it - offsets_.begin();
-
-    return index == 0 ? offset + 1 : offset - offsets_[index - 1];
-}
-
-void loc_data::build_loc_data() {
-    FILE* f = fopen(filename_.c_str(), "rb");
-
-    if (f == nullptr) {
-        throw XmlError("Failed to open file", filename_);
-    }
-
-    std::ptrdiff_t offset = 0;
-
-    char buffer[1024];
-    std::size_t size;
-
-    while ((size = fread(buffer, 1, sizeof(buffer), f)) > 0) {
-        for (std::size_t i = 0; i < size; ++i) {
-            if (buffer[i] == '\n') {
-                offsets_.push_back(offset + i);
-            }
-        }
-
-        offset += size;
-    }
-
-    fclose(f);
-}
-
-} // namespace pugiutil
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp b/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
deleted file mode 100644
index 0f597a593..000000000
--- a/third_party/vtr/libs/pugiutil/src/pugixml_loc.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef PUGIXML_LOC_H
-#define PUGIXML_LOC_H
-/*
- * This file contains utilities for the  PUGI XML parser,
- * hanlding the retrieval of line numbers (useful for error messages)
- */
-
-#include <vector>
-#include "pugixml.hpp"
-
-namespace pugiutil {
-
-//pugi offset to line/col data based on: https://stackoverflow.com/questions/21003471/convert-pugixmls-result-offset-to-column-line
-class loc_data {
-  public:
-    loc_data() = default;
-
-    loc_data(std::string filename_val)
-        : filename_(filename_val) {
-        build_loc_data();
-    }
-
-    //The filename this location data is for
-    const std::string& filename() const { return filename_; }
-    const char* filename_c_str() const { return filename_.c_str(); }
-
-    //Convenience wrapper which takes xml_nodes
-    std::size_t line(pugi::xml_node node) const {
-        return line(node.offset_debug());
-    }
-
-    //Convenience wrapper which takes xml_nodes
-    std::size_t col(pugi::xml_node node) const {
-        return col(node.offset_debug());
-    }
-
-    //Return the line number from the given offset
-    std::size_t line(std::ptrdiff_t offset) const;
-
-    //Return the column number from the given offset
-    std::size_t col(std::ptrdiff_t offset) const;
-
-  private:
-    void build_loc_data();
-
-    std::string filename_;
-    std::vector<std::ptrdiff_t> offsets_;
-};
-} // namespace pugiutil
-
-#endif
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_util.cc b/third_party/vtr/libs/pugiutil/src/pugixml_util.cc
deleted file mode 100644
index d4d2a3982..000000000
--- a/third_party/vtr/libs/pugiutil/src/pugixml_util.cc
+++ /dev/null
@@ -1,298 +0,0 @@
-#include "pugixml_util.hpp"
-#include <algorithm>
-
-namespace pugiutil {
-
-//Loads the XML file specified by filename into the passed pugi::xml_document
-//
-//Returns loc_data look-up for xml node line numbers
-loc_data load_xml(pugi::xml_document& doc,      //Document object to be loaded with file contents
-                  const std::string filename) { //Filename to load from
-    auto location_data = loc_data(filename);
-
-    auto load_result = doc.load_file(filename.c_str());
-    if (!load_result) {
-        std::string msg = load_result.description();
-        auto line = location_data.line(load_result.offset);
-        auto col = location_data.col(load_result.offset);
-        throw XmlError("Unable to load XML file '" + filename + "', " + msg
-                           + " (line: " + std::to_string(line) + " col: " + std::to_string(col) + ")",
-                       filename.c_str(), line);
-    }
-
-    return location_data;
-}
-
-//Gets the first child element of the given name and returns it.
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_node get_first_child(const pugi::xml_node node,
-                               const std::string& child_name,
-                               const loc_data& loc_data,
-                               const ReqOpt req_opt) {
-    pugi::xml_node child = node.child(child_name.c_str());
-    if (!child && req_opt == REQUIRED) {
-        throw XmlError("Missing required child node '" + child_name + "' in parent node '" + node.name() + "'",
-                       loc_data.filename(), loc_data.line(node));
-    }
-    return child;
-}
-
-//Gets the child element of the given name and returns it.
-//Errors if more than one matching child is found.
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_node get_single_child(const pugi::xml_node node,
-                                const std::string& child_name,
-                                const loc_data& loc_data,
-                                const ReqOpt req_opt) {
-    pugi::xml_node child = get_first_child(node, child_name, loc_data, req_opt);
-
-    if (child && child.next_sibling(child_name.c_str())) {
-        throw XmlError("Multiple child '" + child_name + "' nodes found in parent node '" + node.name() + "' (only one expected)",
-                       loc_data.filename(), loc_data.line(node));
-    }
-
-    return child;
-}
-
-//Counts the number of child nodes of type 'child_name'
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-size_t count_children(const pugi::xml_node node,
-                      const std::string& child_name,
-                      const loc_data& loc_data,
-                      const ReqOpt req_opt) {
-    size_t count = 0;
-
-    pugi::xml_node child = get_first_child(node, child_name, loc_data, req_opt);
-
-    while (child) {
-        ++count;
-        child = child.next_sibling(child_name.c_str());
-    }
-
-    //Note that we don't do any error checking here since get_first_child does the existance check
-
-    return count;
-}
-
-//Counts the number of child nodes (any type)
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-size_t count_children(const pugi::xml_node node,
-                      const loc_data& loc_data,
-                      const ReqOpt req_opt) {
-    size_t count = std::distance(node.begin(), node.end());
-
-    if (count == 0 && req_opt == REQUIRED) {
-        throw XmlError("Expected child node(s) in node '" + std::string(node.name()) + "'",
-                       loc_data.filename(), loc_data.line(node));
-    }
-
-    return count;
-}
-
-//Throws a well formatted error if the actual count of child nodes name 'child_name' does not equal the 'expected_count'
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  expected_count - The expected number of child nodes
-void expect_child_node_count(const pugi::xml_node node,
-                             std::string child_name,
-                             size_t expected_count,
-                             const loc_data& loc_data) {
-    size_t actual_count = count_children(node, child_name, loc_data, OPTIONAL);
-
-    if (actual_count != expected_count) {
-        throw XmlError("Found " + std::to_string(actual_count)
-                           + " '" + child_name + "' child node(s) of "
-                           + "'" + std::string(node.name()) + "'"
-                           + " (expected " + std::to_string(expected_count) + ")",
-                       loc_data.filename(), loc_data.line(node));
-    }
-}
-
-//Throws a well formatted error if the actual child count does not equal the 'expected_count'
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  expected_count - The expected number of child nodes
-void expect_child_node_count(const pugi::xml_node node,
-                             size_t expected_count,
-                             const loc_data& loc_data) {
-    size_t actual_count = count_children(node, loc_data, OPTIONAL);
-
-    if (actual_count != expected_count) {
-        throw XmlError("Found " + std::to_string(actual_count)
-                           + " child node(s) of "
-                           + "'" + std::string(node.name()) + "'"
-                           + " (expected " + std::to_string(expected_count) + ")",
-                       loc_data.filename(), loc_data.line(node));
-    }
-}
-
-//Throws a well formatted error if any of node's children are not part of child_names.
-//Note this does not check whether the nodes in 'attribute_names' actually exist.
-//
-//  node - The parent xml node
-//  child_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_children(const pugi::xml_node node,
-                          std::vector<std::string> child_names,
-                          const loc_data& loc_data) {
-    for (auto child : node.children()) {
-        std::string child_name = child.name();
-        auto iter = std::find(child_names.begin(),
-                              child_names.end(),
-                              child_name);
-        if (iter == child_names.end()) {
-            std::string msg = "Unexpected child '" + child_name + "'"
-                              + " of node '" + node.name() + "'.";
-
-            if (child_names.size() > 0) {
-                msg += " Expected (possibly) one of: ";
-                for (size_t i = 0; i < child_names.size(); i++) {
-                    if (i != 0) {
-                        msg += ", ";
-                    }
-                    if (i > 0 && i == child_names.size() - 1) {
-                        msg += "or ";
-                    }
-                    msg += "'" + child_names[i] + "'";
-                }
-                msg += ".";
-            }
-
-            throw XmlError(msg, loc_data.filename(), loc_data.line(child));
-        }
-    }
-}
-
-//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node' with an additional explanation.
-//Note this does not check whether the attribues in 'attribute_names' actually exist.
-//
-//  node - The parent xml node
-//  attribute_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_attributes(const pugi::xml_node node,
-                            std::vector<std::string> attribute_names,
-                            std::string explanation,
-                            const loc_data& loc_data) {
-    for (auto attrib : node.attributes()) {
-        std::string attrib_name = attrib.name();
-        auto iter = std::find(attribute_names.begin(),
-                              attribute_names.end(),
-                              attrib_name);
-        if (iter == attribute_names.end()) {
-            std::string msg = "Unexpected attribute '" + attrib_name + "'"
-                              + " found on node '" + node.name() + "'";
-
-            if (!explanation.empty()) {
-                msg += explanation;
-            }
-
-            msg += ".";
-
-            if (attribute_names.size() > 0) {
-                msg += " Expected (possibly) one of: ";
-                for (size_t i = 0; i < attribute_names.size(); i++) {
-                    if (i != 0) {
-                        msg += ", ";
-                    }
-                    if (i > 0 && i == attribute_names.size() - 1) {
-                        msg += "or ";
-                    }
-                    msg += "'" + attribute_names[i] + "'";
-                }
-                msg += ".";
-            }
-
-            throw XmlError(msg, loc_data.filename(), loc_data.line(node));
-        }
-    }
-}
-
-//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node'.
-//Note this does not check whether the attribues in 'attribute_names' actually exist; for that use get_attribute().
-//
-//  node - The parent xml node
-//  attribute_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_attributes(const pugi::xml_node node,
-                            std::vector<std::string> attribute_names,
-                            const loc_data& loc_data) {
-    expect_only_attributes(node, attribute_names, "", loc_data);
-}
-
-//Counts the number of attributes on the specified node
-//
-//  node - The xml node
-//  loc_data - XML file location data
-//  req_opt - Whether any attributes are required (will error if required and none are found) or optional. Defaults to REQUIRED
-size_t count_attributes(const pugi::xml_node node,
-                        const loc_data& loc_data,
-                        const ReqOpt req_opt) {
-    size_t count = std::distance(node.attributes_begin(), node.attributes_end());
-
-    if (count == 0 && req_opt == REQUIRED) {
-        throw XmlError("Expected attributes on node'" + std::string(node.name()) + "'",
-                       loc_data.filename(), loc_data.line(node));
-    }
-
-    return count;
-}
-
-//Gets a named property on an node and returns it.
-//
-//  node - The xml node
-//  attr_name - The attribute name
-//  loc_data - XML file location data
-//  req_opt - Whether the peropry is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_attribute get_attribute(const pugi::xml_node node,
-                                  const std::string& attr_name,
-                                  const loc_data& loc_data,
-                                  const ReqOpt req_opt) {
-    pugi::xml_attribute attr = node.attribute(attr_name.c_str());
-
-    if (!attr && req_opt == REQUIRED) {
-        throw XmlError("Expected '" + attr_name + "' attribute on node '" + node.name() + "'",
-                       loc_data.filename(), loc_data.line(node));
-    }
-
-    return attr;
-}
-
-//Checks that the given node matches the given tag name.
-//
-//  node - The xml node
-//  tag_name - The expected tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the tag name is required (will error if required and not found) or optional. Defaults to REQUIRED
-bool check_node(const pugi::xml_node node,
-                const std::string& tag_name,
-                const loc_data& loc_data,
-                const ReqOpt req_opt) {
-    if (node.name() == tag_name) {
-        return true;
-    } else {
-        if (req_opt == REQUIRED) {
-            throw XmlError(std::string("Unexpected node type '") + node.name() + "' expected '" + tag_name + "'",
-                           loc_data.filename(), loc_data.line(node));
-        }
-        return false;
-    }
-}
-
-} // namespace pugiutil
diff --git a/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp b/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
deleted file mode 100644
index 8e55f232b..000000000
--- a/third_party/vtr/libs/pugiutil/src/pugixml_util.hpp
+++ /dev/null
@@ -1,198 +0,0 @@
-#ifndef PUGIXML_UTIL_H
-#define PUGIXML_UTIL_H
-/*
- * This file contains utilities for the  PUGI XML parser.
- *
- * They primarily relate to:
- *   - Checking for node/attribute exitance and reporting errors if not
- *   - Misc. utilities like counting tags
- *
- * Using these utilities simplifies error handling while manipulating XML
- * since the user doesn't need to explicitly check for node/attribute existance
- * (by default most of these functions will raise exceptions with useful error
- * messages if the requested item does not exists).
- */
-
-#include <vector>
-#include <stdexcept>
-#include <cstdio>
-
-#include "pugixml.hpp"
-
-#include "pugixml_loc.hpp"
-
-namespace pugiutil {
-
-//An error produced while getting an XML node/attribute
-class XmlError : public std::runtime_error {
-  public:
-    XmlError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
-        : std::runtime_error(msg)
-        , filename_(new_filename)
-        , linenumber_(new_linenumber) {}
-
-    //Returns the filename associated with this error
-    //returns an empty string if none is specified
-    std::string filename() const { return filename_; }
-    const char* filename_c_str() const { return filename_.c_str(); }
-
-    //Returns the line number associated with this error
-    //returns zero if none is specified
-    size_t line() const { return linenumber_; }
-
-  private:
-    std::string filename_;
-    size_t linenumber_;
-};
-
-//Loads the XML file specified by filename into the passed pugi::xml_docment
-//
-//Returns loc_data look-up for xml node line numbers
-loc_data load_xml(pugi::xml_document& doc,     //Document object to be loaded with file contents
-                  const std::string filename); //Filename to load from
-
-//Defines whether something (e.g. a node/attribute) is optional or required.
-//  We use this to improve clarity at the function call site (compared to just
-//  using boolean values).
-//
-//  For example:
-//
-//      auto node = get_first_child(node, "port", loc_data, true);
-//
-//  is ambiguous without looking up what the 4th argument represents, where as:
-//
-//      auto node = get_first_child(node, "port", loc_data, REQUIRED);
-//
-//  is much more explicit.
-enum ReqOpt {
-    REQUIRED,
-    OPTIONAL
-};
-
-//Gets the first child element of the given name and returns it.
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_node get_first_child(const pugi::xml_node node,
-                               const std::string& child_name,
-                               const loc_data& loc_data,
-                               const ReqOpt req_opt = REQUIRED);
-
-//Gets the child element of the given name and returns it.
-//Errors if more than one matching child is found.
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_node get_single_child(const pugi::xml_node node,
-                                const std::string& child_name,
-                                const loc_data& loc_data,
-                                const ReqOpt req_opt = REQUIRED);
-
-//Counts the number of child nodes of type 'child_name'
-//
-//  node - The parent xml node
-//  child_name - The child tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-size_t count_children(const pugi::xml_node node,
-                      const std::string& child_name,
-                      const loc_data& loc_data,
-                      const ReqOpt req_opt = REQUIRED);
-
-//Counts the number of child nodes (any type)
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  req_opt - Whether the child tag is required (will error if required and not found) or optional. Defaults to REQUIRED
-size_t count_children(const pugi::xml_node node,
-                      const loc_data& loc_data,
-                      const ReqOpt req_opt);
-
-//Throws a well formatted error if the actual count of child nodes named 'child_name' does not equal the 'expected_count'
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  expected_count - The expected number of child nodes
-void expect_child_node_count(const pugi::xml_node node,
-                             std::string child_name,
-                             size_t expected_count,
-                             const loc_data& loc_data);
-
-//Throws a well formatted error if the actual child count does not equal the 'expected_count'
-//
-//  node - The parent xml node
-//  loc_data - XML file location data
-//  expected_count - The expected number of child nodes
-void expect_child_node_count(const pugi::xml_node node,
-                             size_t expected_count,
-                             const loc_data& loc_data);
-
-//Throws a well formatted error if any of node's children are not part of child_names.
-//Note this does not check whether the nodes in 'child_names' actually exist.
-//
-//  node - The parent xml node
-//  child_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_children(const pugi::xml_node node,
-                          std::vector<std::string> child_names,
-                          const loc_data& loc_data);
-
-//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node'.
-//Note this does not check whether the attribues in 'attribute_names' actually exist.
-//
-//  node - The parent xml node
-//  attribute_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_attributes(const pugi::xml_node node,
-                            std::vector<std::string> attribute_names,
-                            const loc_data& loc_data);
-
-//Throws a well formatted error if any attribute other than those named in 'attribute_names' are found on 'node' with an additional explanation.
-//Note this does not check whether the attribues in 'attribute_names' actually exist.
-//
-//  node - The parent xml node
-//  attribute_names - expected attribute names
-//  loc_data - XML file location data
-void expect_only_attributes(const pugi::xml_node node,
-                            std::vector<std::string> attribute_names,
-                            std::string explanation,
-                            const loc_data& loc_data);
-
-//Counts the number of attributes on the specified node
-//
-//  node - The xml node
-//  loc_data - XML file location data
-//  req_opt - Whether any attributes are required (will error if required and none are found) or optional. Defaults to REQUIRED
-size_t count_attributes(const pugi::xml_node node,
-                        const loc_data& loc_data,
-                        const ReqOpt req_opt = REQUIRED);
-
-//Gets a named property on an node and returns it.
-//
-//  node - The xml node
-//  attr_name - The attribute name
-//  loc_data - XML file location data
-//  req_opt - Whether the attribute is required (will error if required and not found) or optional. Defaults to REQUIRED
-pugi::xml_attribute get_attribute(const pugi::xml_node node,
-                                  const std::string& attr_name,
-                                  const loc_data& loc_data,
-                                  const ReqOpt req_opt = REQUIRED);
-
-//Checks that the given node matches the given tag name.
-//
-//  node - The xml node
-//  tag_name - The expected tag name
-//  loc_data - XML file location data
-//  req_opt - Whether the tag name is required (will error if required and not found) or optional. Defaults to REQUIRED
-bool check_node(const pugi::xml_node node,
-                const std::string& tag_name,
-                const loc_data& loc_data,
-                const ReqOpt req_opt = REQUIRED);
-
-} // namespace pugiutil
-
-#endif
diff --git a/third_party/vtr/libs/rtlnumber/.gitignore b/third_party/vtr/libs/rtlnumber/.gitignore
deleted file mode 100644
index 620109a7a..000000000
--- a/third_party/vtr/libs/rtlnumber/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-rtl_number
diff --git a/third_party/vtr/libs/rtlnumber/CMakeLists.txt b/third_party/vtr/libs/rtlnumber/CMakeLists.txt
deleted file mode 100644
index 33c84b3b9..000000000
--- a/third_party/vtr/libs/rtlnumber/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-cmake_minimum_required(VERSION 3.9)
-
-project("librtlnumber")
-
-option(RTL_ALLOW_UNKNOWN_COMPARE "Skips initial check for unknowns in comparison and compares MSB-to-LSB" OFF)
-
-if(RTL_ALLOW_UNKNOWN_COMPARE)
-    add_definitions(-DRTL_ALLOW_UNKNOWN_COMPARE)
-endif()
-
-file(GLOB_RECURSE EXEC_SOURCES main.cpp)
-file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
-file(GLOB_RECURSE LIB_HEADERS src/include/*.hpp)
-files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
-
-#Create the library
-add_library(librtlnumber STATIC
-             ${LIB_HEADERS}
-             ${LIB_SOURCES})
-target_include_directories(librtlnumber PUBLIC ${LIB_INCLUDE_DIRS})
-set_target_properties(librtlnumber PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
-
-#Create the test executable
-target_link_libraries(librtlnumber)
-
-#Create the executable
-add_executable(rtl_number ${EXEC_SOURCES})
-
-
-target_link_libraries(rtl_number
-                        librtlnumber)
-
-install(TARGETS rtl_number librtlnumber DESTINATION bin)
diff --git a/third_party/vtr/libs/rtlnumber/Makefile b/third_party/vtr/libs/rtlnumber/Makefile
deleted file mode 100644
index 0e1fd16d5..000000000
--- a/third_party/vtr/libs/rtlnumber/Makefile
+++ /dev/null
@@ -1,81 +0,0 @@
-#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
-#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
-#          Dr. Kenneth B. Kent (ken@unb.ca)
-#          for the Reconfigurable Computing Research Lab at the
-#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
-
-# If the first argument is "run"...
-ifeq (build,$(firstword $(MAKECMDGOALS)))
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (run,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (gdb,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (valgrind,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (debug,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-
-INCLUDE =-Isrc/include
-SRC =src/*.cpp
-
-BIN = rtl_number
-
-C = clang++ -std=c++14 -lpthread #-DENABLE_DEBUG_MESSAGES
-
-cleanup_flags=\
--ferror-limit=1000 \
--Werror \
--Wpedantic \
--Weverything \
--Wall \
--ggdb -O0 -g \
--Wno-c++98-compat \
--Wno-padded
-#  \
-# -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls
-
-PHONY: error
-
-error: 
-	echo "can only use 'clean', 'debug <testname>.cpp', 'build <testname>.cpp' or 'run <arguments>'"
-
-debug: clean
-	mkdir -p bin
-	$(C) $(cleanup_flags) $(INCLUDE) $(SRC) main.cpp -o $(BIN)
-
-build: clean
-	$(C) $(INCLUDE) $(SRC) main.cpp -o $(BIN)
-
-run:
-	./$(BIN) $(RUN_ARGS) 
-
-valgrind: build
-	valgrind --tool=helgrind $(BIN) $(RUN_ARGS) 
-
-gdb:
-	gdb --args $(BIN) $(RUN_ARGS)
-
-clean:
-	$(RM) -Rf bin
-
diff --git a/third_party/vtr/libs/rtlnumber/README.md b/third_party/vtr/libs/rtlnumber/README.md
deleted file mode 100644
index 28054545c..000000000
--- a/third_party/vtr/libs/rtlnumber/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-librtlnumber - Register Transfer Level (RTL) Verilog Number Library
-
-Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
-         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com)
-		  and Dr. Kenneth B. Kent (ken@unb.ca)
-           for the Reconfigurable Computing Research Lab at the
-            Univerity of New Brunswick in Fredericton, New Brunswick, Canada
-
-Arbitrary Length Verilog Number Library that can Handle `X` and `Z` inputs.
diff --git a/third_party/vtr/libs/rtlnumber/main.cpp b/third_party/vtr/libs/rtlnumber/main.cpp
deleted file mode 100644
index 9a59be4d8..000000000
--- a/third_party/vtr/libs/rtlnumber/main.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
- *            Dr. Kenneth B. Kent (ken@unb.ca)
- *            for the Reconfigurable Computing Research Lab at the
- *             Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#include <iostream>
-#include <vector>
-#include <string>
-#include <algorithm>
-
-#include "rtl_int.hpp"
-#include "rtl_utils.hpp"
-
-#define bad_ops(test) _bad_ops(test, __func__, __LINE__)
-inline static std::string _bad_ops(std::string test, const char* FUNCT, int LINE) {
-    std::cerr << "INVALID INPUT OPS: (" << test << ")@" << FUNCT << "::" << std::to_string(LINE) << std::endl;
-    std::abort();
-}
-
-/***
- *     __   __       ___  __   __           ___       __       
- *    /  ` /  \ |\ |  |  |__) /  \ |       |__  |    /  \ |  | 
- *    \__, \__/ | \|  |  |  \ \__/ |___    |    |___ \__/ |/\| 
- *                                                             
- * 	This is used for testing purposes only, unused in ODIN as the input is already preprocessed
- */
-static std::string arithmetic(std::string op, std::string a_in) {
-    VNumber a(a_in);
-    VNumber result;
-
-    if (op == "is_true") {
-        result = VNumber(V_TRUE(a));
-    } else if (op == "is_false") {
-        result = VNumber(V_FALSE(a));
-    } else if (op == "is_unk") {
-        result = VNumber(V_UNK(a));
-    } else if (op == "is_x") {
-        result = VNumber(V_IS_X(a));
-    } else if (op == "is_z") {
-        result = VNumber(V_IS_Z(a));
-    } else if (op == "is_unsigned") {
-        result = VNumber(V_IS_UNSIGNED(a));
-    } else if (op == "is_signed") {
-        result = VNumber(V_IS_SIGNED(a));
-    } else if (op == "to_unsigned") {
-        result = V_UNSIGNED(a);
-    } else if (op == "to_signed") {
-        result = V_SIGNED(a);
-    } else if (op == "~") {
-        result = V_BITWISE_NOT(a);
-    } else if (op == "-") {
-        result = V_MINUS(a);
-    } else if (op == "+") {
-        result = V_ADD(a);
-    } else if (op == "&") {
-        result = V_BITWISE_AND(a);
-    } else if (op == "|") {
-        result = V_BITWISE_OR(a);
-    } else if (op == "^") {
-        result = V_BITWISE_XOR(a);
-    } else if (op == "~&") {
-        result = V_BITWISE_NAND(a);
-    } else if (op == "~|") {
-        result = V_BITWISE_NOR(a);
-    } else if (op == "~^" || op == "^~") {
-        result = V_BITWISE_XNOR(a);
-    } else if (op == "!") {
-        result = V_LOGICAL_NOT(a);
-    } else {
-        bad_ops(op);
-    }
-
-    return result.to_verilog_bitstring();
-}
-
-static std::string arithmetic(std::string a_in, std::string op, std::string b_in) {
-    VNumber a(a_in);
-    VNumber b(b_in);
-    VNumber result;
-
-    if (op == "&") {
-        result = V_BITWISE_AND(a, b);
-    } else if (op == "|") {
-        result = V_BITWISE_OR(a, b);
-    } else if (op == "^") {
-        result = V_BITWISE_XOR(a, b);
-    } else if (op == "~&") {
-        result = V_BITWISE_NAND(a, b);
-    } else if (op == "~|") {
-        result = V_BITWISE_NOR(a, b);
-    } else if (op == "~^" || op == "^~") {
-        result = V_BITWISE_XNOR(a, b);
-    } else if (op == "===") {
-        result = V_CASE_EQUAL(a, b);
-    } else if (op == "!==") {
-        result = V_CASE_NOT_EQUAL(a, b);
-    } else if (op == "<<") {
-        result = V_SHIFT_LEFT(a, b);
-    } else if (op == "<<<") {
-        result = V_SIGNED_SHIFT_LEFT(a, b);
-    } else if (op == ">>") {
-        result = V_SHIFT_RIGHT(a, b);
-    } else if (op == ">>>") {
-        result = V_SIGNED_SHIFT_RIGHT(a, b);
-    } else if (op == "&&") {
-        result = V_LOGICAL_AND(a, b);
-    } else if (op == "||") {
-        result = V_LOGICAL_OR(a, b);
-    } else if (op == "<") {
-        result = V_LT(a, b);
-    } else if (op == ">") {
-        result = V_GT(a, b);
-    } else if (op == "<=") {
-        result = V_LE(a, b);
-    } else if (op == ">=") {
-        result = V_GE(a, b);
-    } else if (op == "==") {
-        result = V_EQUAL(a, b);
-    } else if (op == "!=") {
-        result = V_NOT_EQUAL(a, b);
-    } else if (op == "+") {
-        result = V_ADD(a, b);
-    } else if (op == "-") {
-        result = V_MINUS(a, b);
-    } else if (op == "*") {
-        result = V_MULTIPLY(a, b);
-    } else if (op == "**") {
-        result = V_POWER(a, b);
-    } else if (op == "/") {
-        result = V_DIV(a, b);
-    } else if (op == "%") {
-        result = V_MOD(a, b);
-    } else {
-        bad_ops(op);
-    }
-
-    return result.to_verilog_bitstring();
-}
-
-int main(int argc, char** argv) {
-    std::vector<std::string> input;
-    for (int i = 0; i < argc; i++)
-        input.push_back(argv[i]);
-
-    std::string result = "";
-
-    if (argc < 3) {
-        ERR_MSG("Not Enough Arguments: " << std::to_string(argc - 1));
-
-        return -1;
-    } else if (argc == 3) {
-        result = arithmetic(input[1], input[2]);
-    } else if (argc == 4 && input[1] == "display") {
-        VNumber a(input[3]);
-        result = V_STRING(a, input[2][0]);
-    } else if (argc == 4 && input[1] == "bufif0") {
-        VNumber bus(input[2]);
-        VNumber trigger(input[3]);
-        result = V_BITWISE_BUFIF0(bus, trigger).to_verilog_bitstring();
-    } else if (argc == 4 && input[1] == "bufif1") {
-        VNumber bus(input[2]);
-        VNumber trigger(input[3]);
-        result = V_BITWISE_BUFIF1(bus, trigger).to_verilog_bitstring();
-    } else if (argc == 4 && input[1] == "notif0") {
-        VNumber bus(input[2]);
-        VNumber trigger(input[3]);
-        result = V_BITWISE_NOTIF0(bus, trigger).to_verilog_bitstring();
-    } else if (argc == 4 && input[1] == "notif1") {
-        VNumber bus(input[2]);
-        VNumber trigger(input[3]);
-        result = V_BITWISE_NOTIF1(bus, trigger).to_verilog_bitstring();
-    } else if (argc == 4) {
-        result = arithmetic(input[1], input[2], input[3]);
-    } else if (argc == 6 && (input[2] == "?" && input[4] == ":")) {
-        VNumber a(input[1]);
-        VNumber b(input[3]);
-        VNumber c(input[5]);
-
-        result = V_TERNARY(a, b, c).to_verilog_bitstring();
-    } else if (argc == 6 && (input[1] == "{" && input[3] == "," && input[5] == "}")) {
-        VNumber a(input[2]);
-        VNumber b(input[4]);
-
-        result = V_CONCAT({a, b}).to_verilog_bitstring();
-    } else if (argc == 7 && (input[1] == "{" && input[3] == "{" && input[5] == "}" && input[6] == "}")) {
-        VNumber n_times(input[2]);
-        VNumber replicant(input[4]);
-
-        result = V_REPLICATE(replicant, n_times).to_verilog_bitstring();
-    } else {
-        ERR_MSG("invalid Arguments: " << std::to_string(argc - 1));
-        return -1;
-    }
-
-    std::cout << result << std::endl;
-
-    return 0;
-}
diff --git a/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv b/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
deleted file mode 100644
index 444bf1199..000000000
--- a/third_party/vtr/libs/rtlnumber/regression_tests/basic_regression_tests.csv
+++ /dev/null
@@ -1,310 +0,0 @@
-#####################
-# Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
-#  Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
-#   Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
-#    Dr. Kenneth B. Kent (ken@unb.ca)
-#    for the Reconfigurable Computing Research Lab at the
-#     Univerity of New Brunswick in Fredericton, New Brunswick, Canada
-#####################
-
-# truth test
-simple_true,        is_true, 1'b1, 1'b1
-simple_fail,        is_true, 1'b0, 1'b0
-decimal_true,       is_true, 1, 1'b1
-decimal_fail,       is_true, 0, 1'b0
-complex_true,       is_true, 3'b1xz, 1'b1
-complex_fail,       is_true, 3'b0xz, 1'b0
-unknown_x,          is_true, 1'bx, 1'b0
-unknown_z,          is_true, 1'bz, 1'b0
-large_number_pass,  is_true, 128'h8000_0000_0000_0000, 1'b1
-
-# type test
-simple_x_pass,      is_x,   1'bx,    1'b1
-simple_x_fail,      is_x,   1'b1,    1'b0
-complex_x_pass,     is_x,   4'bxxxx, 1'b1
-complex_x_fail,     is_x,   4'bzxxx, 1'b0
-simple_z_pass,      is_z,   1'bz,    1'b1
-simple_z_fail,      is_z,   1'b1,    1'b0
-complex_z_pass,     is_z,   4'bzzzz, 1'b1
-complex_z_fail,     is_z,   4'bxzzz, 1'b0
-simple_unk_x_pass,  is_unk, 2'bx1,   1'b1
-simple_unk_z_pass,  is_unk, 2'bz1,   1'b1
-simple_unk_fail,    is_unk, 2'b10,   1'b0
-
-# sign test
-simple_is_unsigned_pass,     is_unsigned,    2'b11,  1'b1
-simple_is_unsigned_fail,     is_unsigned,    2'sb11, 1'b0
-simple_is_signed_pass,       is_signed,      2'b11,  1'b0
-simple_is_signed_fail,       is_signed,      2'sb11, 1'b1
-
-# type conversion test
-simple_is_unsigned_1,        to_unsigned,    2'b11,  2'b11
-simple_is_unsigned_2,        to_unsigned,    2'sb11, 2'b11
-simple_is_signed_1,          to_signed,      2'b11,  2'sb11
-simple_is_signed_2,          to_signed,      2'sb11, 2'sb11
-
-# string display
-simple_string_b,          display, b, 64'd18446744073709551615, 1111111111111111111111111111111111111111111111111111111111111111
-simple_string_B,          display, B, 65'd18446744073709551616, 10000000000000000000000000000000000000000000000000000000000000000
-simple_string_u,          display, u, 1'bz, 1'b0        # todo, figure out the real result for this
-simple_string_U,          display, U, 1'bx, 1'b0        # todo, figure out the real result for this
-simple_string_z,          display, z, 1'bz, z
-simple_string_Z,          display, Z, 1'bx, X
-simple_string_s,          display, s, "hello world", hello world
-simple_string_S,          display, S, "hello world", hello world
-simple_string_o,          display, o, 64'd18446744073709551615, 1777777777777777777777
-simple_string_O,          display, O, 65'd18446744073709551616, 2000000000000000000000
-simple_string_h,          display, h, 64'd18446744073709551615, ffffffffffffffff
-simple_string_H,          display, H, 64'd18446744073709551614, FFFFFFFFFFFFFFFE
-simple_string_d,          display, d, 265663, 265663
-simple_string_D,          display, D, 265663, 265663
-simple_string_c,          display, c, "hello world", h
-simple_string_C,          display, C, "world", w
-
-# string test
-simple_string,          display,        s,   "hello world", hello world
-string_compare_pass,    "hello world",  ==,  "hello world", 1'b1
-string_compare_fail,    "hello world",  ==,  "hello", 1'b0
-string_ne_pass,         "hello world",  !=,  "hello", 1'b1
-string_ne_fail,         "hello world",  !=,  "hello world", 1'b0
-string_add,             "a",            +,    1,      "b"
-string_add_empty,       "b",             +,    "",    "b"
-
-# replicate test
-simple_replicate,       {, 3, {, 2'b10, }, }, 6'b101010
-
-# concat test
-simple_concat,          {, 2'b01, \, , 2'b10, }, 4'b0110
-
-# base conversions
-Decimal-To-Binary,      10,         ==, 4'b1010,    1'b1
-Binary-To-Decimal,      4'b1100,    ==, 12,         1'b1
-Decimal-To-Hex,         10,         ==, 'hA,        1'b1
-Hex-To-Decimal,         'hBF,       ==, 191,        1'b1
-Decimal-To-Octal,       10,         ==, 'o12,       1'b1
-Octal-To-Decimal,       'o37,       ==, 31,         1'b1
-
-# Sign modifier
-Sign_minus,		        -,  10'b1010101010,	'b0101010110
-Sign_plus,			    +,  4'b1010,	    'b1010
-
-# Simple Base 10 conversion
-simple_decimal_conversion,			+,  6,	6
-simple_decimal_minus,			    6, -, 1, 5
-simple_decimal_plus,			    6, +, 1, 7
-simple_decimal_shift_right,			6,  >>>, 1, 3
-simple_decimal_shift_left,			6,  <<<, 1, 12
-
-#################
-# tristate
-
-# single bit trigger
-# ======
-
-# bufif0
-bufif0_on,                 bufif0, 4'b10xz, 1'b0, 4'b10xx
-bufif0_off,                bufif0, 4'b10xz, 1'b1, 4'bzzzz
-bufif0_dc,                 bufif0, 4'b10xz, 1'bx, 4'bxxxx
-bufif0_hihz,               bufif0, 4'b10xz, 1'bz, 4'bxxxx
-
-# bufif1
-bufif1_on,                 bufif1, 4'b10xz, 1'b1, 4'b10xx
-bufif1_off,                bufif1, 4'b10xz, 1'b0, 4'bzzzz
-bufif1_dc,                 bufif1, 4'b10xz, 1'bx, 4'bxxxx
-bufif1_hihz,               bufif1, 4'b10xz, 1'bz, 4'bxxxx
-
-# notif0
-notif0_on,                 notif0, 4'b10xz, 1'b0, 4'b01xx
-notif0_off,                notif0, 4'b10xz, 1'b1, 4'bzzzz
-notif0_dc,                 notif0, 4'b10xz, 1'bx, 4'bxxxx
-notif0_hihz,               notif0, 4'b10xz, 1'bz, 4'bxxxx
-
-# notif1
-notif1_on,                 notif1, 4'b10xz, 1'b1, 4'b01xx
-notif1_off,                notif1, 4'b10xz, 1'b0, 4'bzzzz
-notif1_dc,                 notif1, 4'b10xz, 1'bx, 4'bxxxx
-notif1_hihz,               notif1, 4'b10xz, 1'bz, 4'bxxxx
-
-# wide trigger
-# ======
-
-# bufif0
-bufif0_upper,              bufif0, 4'b10xz, 4'b1100, 4'bzzxx
-bufif0_lower,              bufif0, 4'b10xz, 4'b0011, 4'b10zz
-
-# bufif1
-bufif1_upper,              bufif1, 4'b10xz, 4'b1100, 4'b10zz
-bufif1_lower,              bufif1, 4'b10xz, 4'b0011, 4'bzzxx
-
-# notif0
-notif0_upper,              notif0, 4'b10xz, 4'b1100, 4'bzzxx
-notif0_lower,              notif0, 4'b10xz, 4'b0011, 4'b01zz
-
-# notif1
-notif1_upper,              notif1, 4'b10xz, 4'b1100, 4'b01zz
-notif1_lower,              notif1, 4'b10xz, 4'b0011, 4'bzzxx
-
-
-# Reduction
-Reduction-and,			&,  4'b1010,	1'b0
-Reduction-or,			|,  4'b1010,	1'b1
-Reduction-xor,			^,  4'b1010,	1'b0
-Reduction-nand,			~&,	4'b1010,	1'b1
-Reduction-nor,			~|,	4'b1010,	1'b0
-Reduction-xnor,			~^,	4'b1010,	1'b1
-
-# Reduction unknowns
-Reduction-and-XZ,		&,  4'b10xz,	1'b0
-Reduction-or-XZ,		|,  4'b10xz,	1'b1
-Reduction-xor-XZ,		^,  4'b10xz,	1'bx
-Reduction-nand-XZ,		~&,	4'b10xz,	1'b1
-Reduction-nor-XZ,		~|,	4'b10xz,	1'b0
-Reduction-xnor-XZ,		~^,	4'b10xz,	1'bx
-
-# bitwise
-Bitwise-Not,	            ~,  4'b1010,	'b0101
-Bitwise-And,	4'b1010,	&,  4'b1000,	'b1000
-Bitwise-Or,		5'b1010,	|,  5'b1000,	'b1010
-Bitwise-Nor,	5'b1010,	~|, 5'b1000,	5'b10101
-Bitwise-Nand,   5'b1010,	~&, 5'b1000,	5'b10111
-Bitwise-Xnor,   5'b1010,	~^, 5'b1000,	5'b11101
-Bitwise-Xor,	5'b1010,	^,  5'b1000,	2'b10
-
-# bitwise unknowns
-Bitwise-Not-XZ,	                    ~,  4'b10xz,        4'b01xx
-Bitwise-And-XZ,	    8'bxxxxzzzz,    &,  8'b10xz10xz,    8'bx0xxx0xx
-Bitwise-Or-XZ,	    8'bxxxxzzzz,    |,  8'b10xz10xz,    8'b1xxx1xxx
-Bitwise-Nor-XZ,	    8'bxxxxzzzz,    ~|, 8'b10xz10xz,    8'b0xxx0xxx
-Bitwise-Nand-XZ,    8'bxxxxzzzz,    ~&, 8'b10xz10xz,    8'bx1xxx1xx
-Bitwise-Xnor-XZ,    8'bxxxxzzzz,    ~^, 8'b10xz10xz,    8'bxxxxxxxx
-Bitwise-Xor-XZ,	    8'bxxxxzzzz,    ^,  8'b10xz10xz,    8'bxxxxxxxx
-
-# case equivalence
-Case-eq,		5'b1xz10,	===,  5'b1xz10,	1'b1
-Case-ne,		5'b1xz11,	!==,  5'b1xz10,	1'b1
-Case-ne-XZ,		5'b1xz10,	!==,  5'b1zx10,	1'b1
-Case-ne,		5'b1xz10,	!==,  5'b1xz10,	1'b0
-Case-eq,		5'b1xz11,	===,  5'b1xz10,	1'b0
-Case-eq-XZ,		5'b1xz10,	===,  5'b1zx10,	1'b0
-
-# logical operation
-Logical-Not,	                    !,  4'b1010,	1'b0
-Logical-Not-XZ,	                    !,  4'b1x1z,	1'bx
-Logical-And,			4'b1010,	&&, 4'b1000,	1'b1
-Logical-And-XZ,			4'b1x1z,	&&, 4'b1000,	1'bx
-Logical-Or,				4'b1010,	||, 4'b1000,	1'b1
-Logical-Or,				4'b0000,	||, 4'b1000,	1'b1
-Logical-Or,				4'b0000,	||, 4'b0000,	1'b0
-Logical-Or,				4'b0zx0,	||, 4'b0000,	1'bx
-Logical-less,           4'b0000,    <,  4'b0000,    1'b0	
-Logical-less-1,			4'b0000,	<,	4'b0001,	1'b1
-Logical-less-2,         4'b0001,    <,  4'b0000,    1'b0
-Logical-less-3,         4'bxxxx,    <,  4'b0001,    1'bx
-Logical-less-4,         4'b0001,    <,  4'bxxxx,    1'bx
-Logical-less-5,         4'b0xxx,    <,  4'b1001,    1'bx
-Logical-less-6,         4'b1001,    <,  4'b0xxx,    1'bx
-Logical-less-7,         4'sb1001,   <,  4'sb0xxx,   1'bx
-Logical-less-8,         4'sb0xxx,   <,  4'sb1001,   1'bx
-Logical-less-9,         4'b0zzz,    <,  4'b1001,    1'bx
-Logical-less-10,         4'sb0zzz,   <,  4'sb1001,   1'bx
-Logical-greater,        4'b0000,    >,  4'b0000,    1'b0
-Logical-greater-1,		4'b0000,	>,	4'b0001,	1'b0
-Logical-greater-2,      4'b0001,    >,  4'b0000,    1'b1
-Logical-greater-3,      4'bxxxx,    >,  4'b0001,    1'bx
-Logical-greater-4,      4'b0001,    >,  4'bxxxx,    1'bx
-Logical-greater-5,      4'b0xxx,    >,  4'b1001,    1'bx
-Logical-greater-6,      4'b1001,    >,  4'b0xxx,    1'bx
-Logical-greater-7,      4'sb1001,   >,  4'sb0xxx,   1'bx
-Logical-greater-8,      4'sb0xxx,   >,  4'sb1001,   1'bx
-Logical-gr-equal,	    4'b0000,	>=, 4'b0000,	1'b1
-Logical-gr-equal-1,		4'b0000,	>=,	4'b0001,	1'b0
-Logical-gr-equal-2,     4'b0001,    >=, 4'b0000,    1'b1
-Logical-gr-equal-3,     4'bxxxx,    >=, 4'b0001,    1'bx
-Logical-gr-equal-4,     4'b0001,    >=, 4'bxxxx,    1'bx
-Logical-gr-equal-5,     4'b0001,    >=, 4'bzzzz,    1'bx
-Logical-less-equal,		4'b0000,	<=, 4'b0000,	1'b1
-Logical-less-equal-1,	4'b0000,	<=,	4'b0001,	1'b1
-Logical-less-equal-2,   4'b0001,    <=, 4'b0000,    1'b0
-Logical-less-equal-3,   4'bxxxx,    <=, 4'b0001,    1'bx
-Logical-less-equal-4,   4'b0001,    <=, 4'bxxxx,    1'bx	
-Logical-less-equal-5,   4'b0001,    <=, 4'bzzzz,    1'bx	
-Logical-equal,			4'b0000,	==, 4'b0000,	1'b1
-Logical-equal-1,		4'b0000,	==,	4'b0001,	1'b0
-Logical-equal-2,        4'b0001,    ==, 4'b0000,    1'b0
-Logical-equal-3,        4'bxxxx,    ==, 4'b0001,    1'bx
-Logical-equal-4,        4'b0001,    ==, 4'bxxxx,    1'bx
-Logical-equal-5,        4'b0001,    ==, 4'bzzzz,    1'bx
-Logical-bits,			'b1110,	    ==, 4'b1110,	1'b1	
-Logical-not-equal,		4'b0000,	!=,	4'b0000,	1'b0
-Logical-not-equal-1,	4'b0000,	!=,	4'b0001,	1'b1
-Logical-not-equal-2,    4'b0001,    !=, 4'b0000,    1'b1
-Logical-not-equal-3,    4'bxxxx,    !=, 4'b0001,    1'bx
-Logical-not-equal-4,    4'b0001,    !=, 4'bxxxx,    1'bx
-Logical-not-equal-5,    4'b0001,    !=, 4'bzzzz,    1'bx
-
-# Tests for correct sign extension
-Sign-Extend,                    4'b0001,    ===,    1'b1,   1'b1
-Sign-Extend-Unknown,            4'b00xz,    ===,    2'bxz,  1'b1
-Sign-Extend-Sign,               4'sb1111,   ===,    1'sb1,  1'b1
-Sign-Extend-Sign-Unknown,       4'sbxxxz,   ===,    2'sbxz,  1'b1
-
-# shift operation
-Shift-left,				        6'b0001zx,	<<,	    2'b10,	6'b01zx00
-Shift-right, 			        6'b0zx100,	>>,		2'b10,	3'bzx1
-Signed-shift-left,		        5'b001zx,	<<<,	2'b10,	5'b1zx00
-Unsigned-Signed-shift-right,	6'b1z1x00,	>>>,	2'b10,	6'b001z1x
-Unsigned-arithmetic-shift-right,6'b1z1x00,	>>>,	2'b10,	6'b001z1x
-Signed-arithmetic-shift-right,  6'sb1z1x00,	>>>,	2'b10,	6'sb111z1x
-Shift-left-X,                   6'b1010,	<<,	1'bx,	'bx
-Shift-left-Z,                   6'b1010,	<<,	1'bz,	'bx
-Shift-right-X,                  6'b1010,	>>,	1'bx,	'bx
-Shift-right-Z,                  6'b1010,	>>,	1'bz,	'bx
-
-# arithmetic
-Addition,			        4'b0110,    +,  4'b0011,    'b1001
-Addition-Base-10,			2,          +,  2,          4
-Subtraction,		        4'b0100,    -,  4'b0010,    'b10
-Subtraction-Base-10,        4,          -,  2,          2
-Subtraction-Smaller-Larger, 4'b0010,    -,  4'b0100,    4'b1110
-Subtraction-Small-Base-10,  2,          -,  4,          3'sb110
-Division,			        4'b1010,    /,  4'b0010,    'b101
-Division-Base-10,           10,         /,  5,          2
-Division-Neg,               4'sb1010,   /,  3'sb010,    3'sb101
-Multiplication,	            4'b0010,    *,  4'b0010,    'b100
-Multiplication-Base-10,     2,          *,  3,          6
-Multiplication-Neg,         4,          *,  4'sb1100,   7'sb1110000
-Modulo,			            4'b1011,    %,  4'b0010,    'b1
-Modulo-Base-10,             13,         %,  5,          3
-Modulo-Neg,                 4'sb1011,   %,  4'sb0010,   2'sb11
-Power,                      2'b10,      **, 2'b10,      4'b0100
-Power-Base-10,              2,          **, 3,          8
-Power-Zero,                 4'b0010,    **, 4'b0000,    'b1
-Power-Zero-Base-10,         3,          **, 0,          1
-Power-Unknown,              4'b0000,    **, 4'sb1110,   'bx
-Power-Neg-Even,             4'sb1111,   **, 2'b10,      'b1
-Power-Neg-Odd,              4'sb1111,   **, 3'b011,     2'sb11
-
-# Ternary operations
-Ternary,			1'b1,   ?,  2'b01,  :,  2'b10,  2'b01
-
-# Test Cases As-Per Verilog 2005 Specification: 
-# Page 46 "Table 5-8 Examples of modulus and power operators"
-V2005-1,                    10,         %,  3,       1          # 10/3 yields a remainder of 1.
-V2005-2,                    11,         %,  3,       2          # 11/3 yields a remainder of 2.
-V2005-3,                    12,         %,  3,       0          # 12/3 yields no remainder.
-V2005-4,                    5'sb10110,  %,  3'sb011, 2'sb11     # The result takes the sign of the first operand.
-V2005-5,                    11,         %,  3'sb101, 2          # The result takes the sign of the first operand
-#V2005-6,                    5'sb10100, %,  3,       1          # -4'd12 is seen as a large positive number that leaves a remainder of 1 when divided by 3.
-V2005-7,                    3,          **, 2,       9          # 3 * 3
-V2005-8,                    2,          **, 3,       8          # 2 * 2 * 2
-V2005-9,                    2,          **, 0,       1          # Anything to the zero exponent is 1.
-V2005-10,                   0,          **, 0,       1          # Zero to the zero exponent is also 1.
-V2005-11,                   2,          **, 3'sb111, 0          # 2 ** -1 = 1/2. Integer division truncates to zero.
-V2005-12,                   0,          **, 2'sb11,  'bx     # 0 ** -1 = 1/0. Integer division by zero is 'bx.
-
-#2.0 ** -3'sb1 0.5 2.0 is real, giving real reciprocal.
-#9 ** 0.5 3.0 Real square root.
-#9.0 ** (1/2) 1.0 Integer division truncates exponent to zero.
-#-3.0 ** 2.0 9.0 Defined because real 2.0 is still integral value
-
-
diff --git a/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp b/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
deleted file mode 100644
index c3a8092d1..000000000
--- a/third_party/vtr/libs/rtlnumber/src/include/internal_bits.hpp
+++ /dev/null
@@ -1,1140 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
- *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
- *             Dr. Kenneth B. Kent (ken@unb.ca)
- *             for the Reconfigurable Computing Research Lab at the
- *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#ifndef INTERNAL_BITS_HPP
-#define INTERNAL_BITS_HPP
-
-#include <cstdint>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <bitset>
-#include "rtl_utils.hpp"
-
-typedef uint16_t veri_internal_bits_t;
-
-using integer_t = int64_t;
-constexpr short integer_t_size = (sizeof(integer_t) * 8);
-
-#define _static_unused(x)    \
-    namespace {              \
-    constexpr auto _##x = x; \
-    }
-
-#define unroll_1d(lut) \
-    { lut[_0], lut[_1], lut[_x], lut[_z] }
-#define unroll_2d(lut) \
-    { unroll_1d(lut[_0]), unroll_1d(lut[_1]), unroll_1d(lut[_x]), unroll_1d(lut[_z]) }
-
-#define unroll_1d_invert(lut) \
-    { l_not[lut[_0]], l_not[lut[_1]], l_not[lut[_x]], l_not[lut[_z]] }
-#define unroll_2d_invert(lut) \
-    { unroll_1d_invert(lut[_0]), unroll_1d_invert(lut[_1]), unroll_1d_invert(lut[_x]), unroll_1d_invert(lut[_z]) }
-
-namespace BitSpace {
-typedef uint8_t bit_value_t;
-
-constexpr veri_internal_bits_t _All_0 = static_cast<veri_internal_bits_t>(0x0000000000000000UL);
-constexpr veri_internal_bits_t _All_1 = static_cast<veri_internal_bits_t>(0x5555555555555555UL);
-constexpr veri_internal_bits_t _All_x = static_cast<veri_internal_bits_t>(0xAAAAAAAAAAAAAAAAUL);
-constexpr veri_internal_bits_t _All_z = static_cast<veri_internal_bits_t>(0xFFFFFFFFFFFFFFFFUL);
-
-constexpr bit_value_t _0 = 0x0;
-constexpr bit_value_t _1 = 0x1;
-constexpr bit_value_t _x = 0x2;
-constexpr bit_value_t _z = 0x3;
-
-/***                                                              
- * these are taken from the raw verilog truth tables so that the evaluation are correct.
- * only use this to evaluate any expression for the number_t binary digits.
- * reference: http://staff.ustc.edu.cn/~songch/download/IEEE.1364-2005.pdf
- * 
- *******************************************************/
-
-constexpr bit_value_t l_buf[4] = {
-    /*	 0   1   x   z  <- a*/
-    _0, _1, _x, _x};
-_static_unused(l_buf)
-
-    constexpr bit_value_t l_not[4]
-    = {
-        /*   0   1   x   z 	<- a */
-        _1, _0, _x, _x};
-_static_unused(l_not)
-
-    constexpr bit_value_t is_unk[4]
-    = {
-        /*	 0   1   x   z  <- a*/
-        _0, _0, _1, _1};
-_static_unused(is_unk)
-
-    constexpr bit_value_t is_x_bit[4]
-    = {
-        /*	 0   1   x   z  <- a*/
-        _0, _0, _1, _0};
-_static_unused(is_x_bit)
-
-    constexpr bit_value_t is_z_bit[4]
-    = {
-        /*	 0   1   x   z  <- a*/
-        _0, _0, _0, _1};
-_static_unused(is_z_bit)
-
-    constexpr bit_value_t is_one_bit[4]
-    = {
-        /*	 0   1   x   z  <- a*/
-        _0, _1, _0, _0};
-_static_unused(is_one_bit)
-
-    constexpr bit_value_t is_zero_bit[4]
-    = {
-        /*	 0   1   x   z  <- a*/
-        _1, _0, _0, _0};
-_static_unused(is_zero_bit)
-
-    constexpr bit_value_t l_and[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_0, _0, _0, _0},
-        /* 1 */ {_0, _1, _x, _x},
-        /* x */ {_0, _x, _x, _x},
-        /* z */ {_0, _x, _x, _x}};
-_static_unused(l_and)
-
-    constexpr bit_value_t l_nand[4][4]
-    = unroll_2d_invert(l_and);
-_static_unused(l_nand)
-
-    constexpr bit_value_t l_or[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_0, _1, _x, _x},
-        /* 1 */ {_1, _1, _1, _1},
-        /* x */ {_x, _1, _x, _x},
-        /* z */ {_x, _1, _x, _x}};
-_static_unused(l_or)
-
-    constexpr bit_value_t l_nor[4][4]
-    = unroll_2d_invert(l_or);
-_static_unused(l_nor)
-
-    constexpr bit_value_t l_xor[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_0, _1, _x, _x},
-        /* 1 */ {_1, _0, _x, _x},
-        /* x */ {_x, _x, _x, _x},
-        /* z */ {_x, _x, _x, _x}};
-_static_unused(l_xor)
-
-    constexpr bit_value_t l_xnor[4][4]
-    = unroll_2d_invert(l_xor);
-_static_unused(l_xnor)
-
-    constexpr bit_value_t l_notif1[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_z, _1, _x, _x},
-        /* 1 */ {_z, _0, _x, _x},
-        /* x */ {_z, _x, _x, _x},
-        /* z */ {_z, _x, _x, _x}};
-_static_unused(l_notif1)
-
-    constexpr bit_value_t l_notif0[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_1, _z, _x, _x},
-        /* 1 */ {_0, _z, _x, _x},
-        /* x */ {_x, _z, _x, _x},
-        /* z */ {_x, _z, _x, _x}};
-_static_unused(l_notif0)
-
-    constexpr bit_value_t l_bufif1[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_z, _0, _x, _x},
-        /* 1 */ {_z, _1, _x, _x},
-        /* x */ {_z, _x, _x, _x},
-        /* z */ {_z, _x, _x, _x}};
-_static_unused(l_bufif1)
-
-    constexpr bit_value_t l_bufif0[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_0, _z, _x, _x},
-        /* 1 */ {_1, _z, _x, _x},
-        /* x */ {_x, _z, _x, _x},
-        /* z */ {_x, _z, _x, _x}};
-_static_unused(l_bufif0)
-
-    /*****************************************************
-     *  Tran NO SUPPORT FOR THESE YET 
-     */
-    /* cmos gates */
-    constexpr bit_value_t l_rpmos[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_0, _z, _x, _x},
-        /* 1 */ {_1, _z, _x, _x},
-        /* x */ {_x, _z, _x, _x},
-        /* z */ {_z, _z, _z, _z}};
-_static_unused(l_rpmos)
-
-    constexpr bit_value_t l_rnmos[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_z, _0, _x, _x},
-        /* 1 */ {_z, _1, _x, _x},
-        /* x */ {_z, _x, _x, _x},
-        /* z */ {_z, _z, _z, _z}};
-_static_unused(l_rnmos)
-
-    constexpr bit_value_t l_nmos[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_z, _0, _x, _x},
-        /* 1 */ {_z, _1, _x, _x},
-        /* x */ {_z, _x, _x, _x},
-        /* z */ {_z, _z, _z, _z}};
-_static_unused(l_nmos)
-
-    // see table 5-21 p:54 IEEE 1364-2005
-    constexpr bit_value_t l_ternary[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_0, _x, _x, _x},
-        /* 1 */ {_x, _1, _x, _x},
-        /* x */ {_x, _x, _x, _x},
-        /* z */ {_x, _x, _x, _x}};
-_static_unused(l_ternary)
-
-    /*****
-     * these extend the library and simplify the process
-     */
-    /* helper */
-    constexpr bit_value_t l_unk[4][4]
-    = {
-        /* in /	 0   1   x   z 	<-control */
-        /* 0 */ {_x, _x, _x, _x},
-        /* 1 */ {_x, _x, _x, _x},
-        /* x */ {_x, _x, _x, _x},
-        /* z */ {_x, _x, _x, _x}};
-_static_unused(l_unk)
-
-    constexpr bit_value_t l_case_eq[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_1, _0, _0, _0},
-        /* 1 */ {_0, _1, _0, _0},
-        /* x */ {_0, _0, _1, _0},
-        /* z */ {_0, _0, _0, _1}};
-_static_unused(l_case_eq)
-
-    constexpr bit_value_t l_lt[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_0, _1, _x, _x},
-        /* 1 */ {_0, _0, _x, _x},
-        /* x */ {_x, _x, _x, _x},
-        /* z */ {_x, _x, _x, _x}};
-_static_unused(l_lt)
-
-    constexpr bit_value_t l_gt[4][4]
-    = {
-        /* a  /	 0   1   x   z 	<-b */
-        /* 0 */ {_0, _0, _x, _x},
-        /* 1 */ {_1, _0, _x, _x},
-        /* x */ {_x, _x, _x, _x},
-        /* z */ {_x, _x, _x, _x}};
-_static_unused(l_gt)
-
-    constexpr bit_value_t l_eq[4][4]
-    = unroll_2d(l_xnor);
-_static_unused(l_eq)
-
-    constexpr bit_value_t l_sum[4][4][4]
-    = {
-        /* c_in */
-        /* 0 */ unroll_2d(l_xor),
-        /* 1 */ unroll_2d(l_xnor),
-        /* x */ unroll_2d(l_unk),
-        /* z */ unroll_2d(l_unk)};
-_static_unused(l_sum)
-
-    constexpr bit_value_t l_carry[4][4][4]
-    = {
-        /* c_in */
-        /* 0 */ unroll_2d(l_and),
-        /* 1 */ unroll_2d(l_or),
-        /* x */ unroll_2d(l_ternary),
-        /* z */ unroll_2d(l_ternary)};
-_static_unused(l_carry)
-
-    constexpr bit_value_t l_half_carry[4][4]
-    = unroll_2d(l_carry[_0]);
-_static_unused(l_half_carry)
-
-    constexpr bit_value_t l_half_sum[4][4]
-    = unroll_2d(l_sum[_0]);
-_static_unused(l_half_sum)
-
-    static char bit_to_c(bit_value_t bit, bool uppercase) {
-    switch (bit) {
-        case _0:
-            return '0';
-        case _1:
-            return '1';
-        case _z:
-            return (uppercase) ? 'Z' : 'z';
-        default:
-            return (uppercase) ? 'X' : 'x';
-    }
-}
-
-static char bit_to_u(bit_value_t bit) {
-    switch (bit) {
-        case _1:
-            return '1';
-        default:
-            return '0';
-    }
-}
-
-static char bits_to_hex_c(short digit, bool uppercase) {
-    switch (digit) {
-        case 0:
-            return '0';
-        case 1:
-            return '1';
-        case 2:
-            return '2';
-        case 3:
-            return '3';
-        case 4:
-            return '4';
-        case 5:
-            return '5';
-        case 6:
-            return '6';
-        case 7:
-            return '7';
-        case 8:
-            return '8';
-        case 9:
-            return '9';
-        case 10:
-            return (uppercase) ? 'A' : 'a';
-        case 11:
-            return (uppercase) ? 'B' : 'b';
-        case 12:
-            return (uppercase) ? 'C' : 'c';
-        case 13:
-            return (uppercase) ? 'D' : 'd';
-        case 14:
-            return (uppercase) ? 'E' : 'e';
-        case 15:
-            return (uppercase) ? 'F' : 'f';
-        default:
-            assert_Werr(0,
-                        "Invalid bits input" + std::to_string(digit));
-
-            break;
-    }
-
-    std::abort();
-}
-
-static bit_value_t c_to_bit(char c) {
-    switch (tolower(c)) {
-        case '0':
-            return _0;
-        case '1':
-            return _1;
-        case 'z':
-            return _z;
-        case 'x':
-            return _x;
-        default:
-            break;
-    }
-    assert_Werr(0,
-                "Invalid bits input " + std::string(1, c));
-    return 0;
-}
-
-template<typename T>
-class BitFields {
-  private:
-    T bits = static_cast<T>(_All_x);
-
-    template<typename Addr_t>
-    size_t get_bit_location(Addr_t address) {
-        size_t current_address = static_cast<size_t>(address);
-        current_address %= this->size();
-        current_address <<= 1;
-        return current_address;
-    }
-
-  public:
-    BitFields(bit_value_t init_v) {
-        this->bits = static_cast<T>(
-            (_0 == init_v) ? _All_0 : (_1 == init_v) ? _All_1 : (_z == init_v) ? _All_z : _All_x);
-    }
-
-    template<typename Addr_t>
-    bit_value_t get_bit(Addr_t address) {
-        auto result = this->bits >> this->get_bit_location(address);
-        result &= 0x3;
-
-        return static_cast<bit_value_t>(result);
-    }
-
-    template<typename Addr_t>
-    void set_bit(Addr_t address, bit_value_t value) {
-        size_t real_address = this->get_bit_location(address);
-
-        T set_value = static_cast<T>(value);
-        set_value = static_cast<T>(set_value << real_address);
-
-        T mask = static_cast<T>(0x3);
-        mask = static_cast<T>(mask << real_address);
-        mask = static_cast<T>(~(mask));
-
-        this->bits = static_cast<T>(this->bits & mask);
-        this->bits = static_cast<T>(this->bits | set_value);
-    }
-
-    /**
-     * get 16 real bit (8 verilog bits) as 8 bit (char)
-     */
-    template<typename Addr_t>
-    char get_as_char(Addr_t address) {
-        char value = 0;
-        for (size_t i = 0; i < 8; i++) {
-            value += (((this->get_bit((address * 8) + i)) ? 1 : 0) << i);
-        }
-
-        return value;
-    }
-
-    static size_t size() {
-        return (sizeof(T) << 2); // 8 bit in a byte, 2 bits for a verilog bits = 4 bits in a byte, << 2 = sizeof x 4
-    }
-};
-
-// #define DEBUG_V_BITS
-
-/*****
- * we use large array since we process the bits in chunks
- */
-class VerilogBits {
-  private:
-    std::vector<BitFields<veri_internal_bits_t>> bits;
-    size_t bit_size = 0;
-
-    size_t to_index(size_t address) {
-        return (address / BitFields<veri_internal_bits_t>::size());
-    }
-
-    size_t list_size() {
-        return this->bits.size();
-    }
-
-  public:
-    VerilogBits() {
-        this->bit_size = 0;
-        this->bits = std::vector<BitSpace::BitFields<veri_internal_bits_t>>();
-    }
-
-    VerilogBits(size_t data_size, bit_value_t value_in) {
-        this->bit_size = data_size;
-        this->bits = std::vector<BitSpace::BitFields<veri_internal_bits_t>>();
-
-        size_t bitfield_count = (this->bit_size / BitFields<veri_internal_bits_t>::size()) + 1;
-
-        for (size_t i = 0; i < bitfield_count; i++) {
-            this->bits.push_back(BitSpace::BitFields<veri_internal_bits_t>(value_in));
-        }
-    }
-
-    VerilogBits(VerilogBits* other) {
-        this->bit_size = other->size();
-        this->bits = other->get_internal_bitvector();
-    }
-
-    size_t size() {
-        return this->bit_size;
-    }
-
-    std::vector<BitFields<veri_internal_bits_t>> get_internal_bitvector() {
-        return this->bits;
-    }
-
-    BitFields<veri_internal_bits_t>* get_bitfield(size_t index) {
-#ifdef DEBUG_V_BITS
-        if (index >= this->bits.size()) {
-            std::cerr << "Bit array indexing out of bounds " << index << " but size is " << this->bit_size << std::endl;
-            std::abort();
-        }
-#endif
-
-        return (&this->bits[index]);
-    }
-
-    bit_value_t get_bit(size_t address) {
-#ifdef DEBUG_V_BITS
-        if (address >= this->bit_size) {
-            std::cerr << "Bit index array out of bounds " << address << " but size is " << this->bit_size << std::endl;
-            std::abort();
-        }
-#endif
-
-        return (this->get_bitfield(to_index(address))->get_bit(address));
-    }
-
-    void set_bit(size_t address, bit_value_t value) {
-#ifdef DEBUG_V_BITS
-        if (address >= this->bit_size) {
-            std::cerr << "Bit index array out of bounds " << address << " but size is " << this->bit_size << std::endl;
-            std::abort();
-        }
-#endif
-        (this->get_bitfield(to_index(address))->set_bit(address, value));
-    }
-
-    std::string to_printable() {
-        std::string to_return = "";
-
-        for (size_t i = 0; i < this->size(); i += 8) {
-            to_return.insert(0, 1, this->get_bitfield(to_index(i))->get_as_char(i));
-        }
-
-        return to_return;
-    }
-
-    char getc() {
-        size_t last_index = this->size() - 1;
-        return this->get_bitfield(to_index(last_index))->get_as_char(last_index);
-    }
-
-    bool has_unknown() {
-        for (size_t address = 0x0; address < this->size(); address++) {
-            if (is_unk[this->get_bit(address)])
-                return true;
-        }
-
-        return false;
-    }
-
-    bool is_only_z() {
-        for (size_t address = 0x0; address < this->size(); address++) {
-            if (!is_z_bit[this->get_bit(address)])
-                return false;
-        }
-
-        return true;
-    }
-
-    bool is_only_x() {
-        for (size_t address = 0x0; address < this->size(); address++) {
-            if (!is_x_bit[this->get_bit(address)])
-                return false;
-        }
-
-        return true;
-    }
-
-    bool is_true() {
-        for (size_t address = 0x0; address < this->size(); address++) {
-            if (is_one_bit[this->get_bit(address)])
-                return true;
-        }
-
-        return false;
-    }
-
-    bool is_false() {
-        for (size_t address = 0x0; address < this->size(); address++) {
-            if (!is_zero_bit[this->get_bit(address)])
-                return false;
-        }
-
-        return true;
-    }
-
-    /**
-     * Unary Reduction operations
-     * This is Msb to Lsb on purpose, as per specs
-     */
-    VerilogBits bitwise_reduce(const bit_value_t lut[4][4]) {
-        bit_value_t result = this->get_bit(this->size() - 1);
-        for (size_t i = this->size() - 2; i < this->size(); i--) {
-            result = lut[result][this->get_bit(i)];
-        }
-
-        return VerilogBits(1, result);
-    }
-
-    /**
-     * Unary Bitwise operations
-     */
-    VerilogBits bitwise(const bit_value_t lut[4]) {
-        VerilogBits other(this->bit_size, _0);
-
-        for (size_t i = 0; i < this->size(); i++)
-            other.set_bit(i, lut[this->get_bit(i)]);
-
-        return other;
-    }
-
-    VerilogBits twos_complement(BitSpace::bit_value_t previous_carry) {
-        VerilogBits other(this->bit_size, _0);
-
-        for (size_t i = 0; i < this->size(); i++) {
-            BitSpace::bit_value_t not_bit_i = BitSpace::l_not[this->get_bit(i)];
-
-            other.set_bit(i, BitSpace::l_half_sum[previous_carry][not_bit_i]);
-            previous_carry = BitSpace::l_half_carry[previous_carry][not_bit_i];
-        }
-
-        return other;
-    }
-
-    VerilogBits twos_complement() {
-        return this->twos_complement(BitSpace::_1);
-    }
-
-    /**
-     * size of zero compact to the least amount of bits
-     */
-    VerilogBits resize(BitSpace::bit_value_t pad, size_t new_size) {
-        /**
-         * find the new size
-         */
-        if (new_size == 0) {
-            size_t last_bit_id = this->size() - 1;
-            size_t next_bit_id = last_bit_id - 1;
-
-            while (next_bit_id < this->size() - 1) {
-                BitSpace::bit_value_t current = this->get_bit(last_bit_id);
-                BitSpace::bit_value_t next = this->get_bit(next_bit_id);
-
-                if (current == next && current == pad) {
-                    last_bit_id--;
-                    next_bit_id--;
-                } else {
-                    break; /* it down. oh. oh! */
-                }
-            }
-
-            new_size = last_bit_id + 1;
-        }
-
-        VerilogBits other(new_size, BitSpace::_0);
-
-        size_t i = 0;
-
-        while (i < this->size() && i < new_size) {
-            other.set_bit(i, this->get_bit(i));
-            i++;
-        }
-
-        while (i < new_size) {
-            other.set_bit(i, pad); /* <- ask Eve about it */
-            i++;
-        }
-
-        return other;
-    }
-
-    /**
-     * replicates the bitset n times
-     */
-    VerilogBits replicate(size_t n_times) {
-        size_t old_size = this->size();
-        size_t new_size = old_size * n_times;
-
-        VerilogBits other(new_size, BitSpace::_0);
-
-        for (size_t i = 0; i < new_size; i += 1) {
-            other.set_bit(i, this->get_bit(i % old_size));
-        }
-
-        return other;
-    }
-};
-} // namespace BitSpace
-
-//template<size_t bit_size>
-class VNumber {
-  private:
-    bool sign = false;
-    bool defined_size = false;
-    BitSpace::VerilogBits bitstring = BitSpace::VerilogBits(1, BitSpace::_x);
-
-    VNumber(BitSpace::VerilogBits other_bitstring, bool other_defined_size, bool other_sign) {
-        bitstring = BitSpace::VerilogBits(other_bitstring);
-        sign = other_sign;
-        defined_size = other_defined_size;
-    }
-
-    VNumber insert(VNumber& other, size_t index_to_insert_at, size_t insertion_size) {
-        assert_Werr(other.is_defined_size() && this->is_defined_size(), "Size must be defined on both operand for insertion");
-
-        VNumber new_bitstring(this->size() + insertion_size, BitSpace::_0, this->is_signed() && other.is_signed(), true);
-
-        size_t index = 0;
-
-        for (size_t i = 0; i < this->size() && i < index_to_insert_at; i += 1, index += 1)
-            new_bitstring.set_bit_from_lsb(index, this->get_bit_from_lsb(i));
-
-        for (size_t i = 0; i < insertion_size; i += 1, index += 1)
-            new_bitstring.set_bit_from_lsb(index, other.get_bit_from_lsb(i));
-
-        for (size_t i = index_to_insert_at; i < this->size(); i += 1, index += 1)
-            new_bitstring.set_bit_from_lsb(index, this->get_bit_from_lsb(i));
-
-        return new_bitstring;
-    }
-
-  public:
-    VNumber() {
-        this->sign = false;
-        this->bitstring = BitSpace::VerilogBits(1, BitSpace::_x);
-        this->defined_size = false;
-    }
-
-    VNumber(VNumber&&) = default;
-    VNumber& operator=(VNumber&&) = default;
-    VNumber& operator=(const VNumber& other) = default;
-
-    VNumber(const VNumber& other) {
-        this->sign = other.sign;
-        this->bitstring = other.bitstring;
-        this->defined_size = other.defined_size;
-    }
-
-    VNumber(VNumber* other) {
-        this->sign = other->sign;
-        this->bitstring = other->bitstring;
-        this->defined_size = other->defined_size;
-    }
-
-    VNumber(VNumber other, size_t length) {
-        this->sign = other.sign;
-        this->bitstring = other.bitstring.resize(other.get_padding_bit(), length);
-        this->defined_size = other.defined_size;
-    }
-
-    VNumber(const std::string& verilog_string) {
-        set_value(verilog_string);
-    }
-
-    VNumber(int64_t numeric_value) {
-        set_value(numeric_value);
-    }
-
-    VNumber(size_t len, BitSpace::bit_value_t initial_bits, bool input_sign, bool this_defined_size) {
-        this->bitstring = BitSpace::VerilogBits(len, initial_bits);
-        this->sign = input_sign;
-        this->defined_size = this_defined_size;
-    }
-
-    /***
-     * getters to 64 bit int
-     */
-    int64_t get_value() {
-        size_t bit_size = 8 * sizeof(integer_t);
-
-        assert_Werr((!this->bitstring.has_unknown()),
-                    "Invalid Number contains dont care values. number: " + this->to_verilog_bitstring());
-
-        size_t end = this->size();
-        if (end > integer_t_size) {
-            printf(" === Warning: Returning a 64 bit integer from a larger bitstring (%zu). The bitstring will be truncated\n", bit_size);
-            end = bit_size;
-        }
-
-        integer_t result = 0;
-        BitSpace::bit_value_t pad = this->get_padding_bit();
-
-        for (size_t bit_index = 0; bit_index < end; bit_index++) {
-            integer_t current_bit = static_cast<integer_t>(pad);
-            if (bit_index < this->size())
-                current_bit = this->bitstring.get_bit(bit_index);
-
-            result |= (current_bit << bit_index);
-        }
-
-        return result;
-    }
-
-    std::string to_string(bool big_endian, bool uppercase) {
-        // make a big endian string
-        std::string to_return = "";
-        for (size_t address = 0x0; address < this->size(); address++) {
-            char value = BitSpace::bit_to_c(this->get_bit_from_lsb(address), uppercase);
-            if (big_endian) {
-                to_return.push_back(value);
-            } else {
-                to_return.insert(0, 1, value);
-            }
-        }
-
-        return to_return;
-    }
-
-    std::string to_Ustring(bool big_endian) {
-        // make a big endian string
-        std::string to_return = "";
-        for (size_t address = 0x0; address < this->size(); address++) {
-            char value = BitSpace::bit_to_u(this->get_bit_from_lsb(address));
-            if (big_endian) {
-                to_return.push_back(value);
-            } else {
-                to_return.insert(0, 1, value);
-            }
-        }
-
-        return to_return;
-    }
-
-    std::string to_log2radix(short bit_count, bool big_endian, bool uppercase) {
-        std::string to_return = "";
-        int temp = 0;
-        int i = 0;
-        for (size_t address = 0x0; address < this->size(); address++) {
-            temp |= (this->get_bit_from_lsb(address) << i);
-            i += 1;
-
-            // 3 bit for octal value
-            if (i >= bit_count || address == this->size() - 1) {
-                // share the same digits so we use hex
-                char value = BitSpace::bits_to_hex_c(temp, uppercase);
-                if (big_endian) {
-                    to_return.push_back(value);
-                } else {
-                    to_return.insert(0, 1, value);
-                }
-                temp = 0;
-                i = 0;
-            }
-        }
-
-        return to_return;
-    }
-
-    std::string to_base10(bool big_endian, bool uppercase) {
-        VNumber temp(this);
-        std::string to_return = "";
-        while (!temp.is_false()) {
-            int carry = 0;
-            for (size_t address = 0x0; address < temp.size(); address++) {
-                // we read from msb to lsb
-                int temp_value = temp.get_bit_from_msb(address);
-                temp_value += carry << 1;
-                carry = temp_value % 10;
-                temp_value = temp_value / 10;
-                temp.set_bit_from_msb(address, temp_value);
-            }
-            char value = BitSpace::bits_to_hex_c(carry, uppercase);
-
-            if (big_endian) {
-                to_return.push_back(value);
-            } else {
-                to_return.insert(0, 1, value);
-            }
-        }
-        return to_return;
-    }
-
-    // convert lsb_msb bitstring to verilog
-    std::string to_verilog_bitstring() {
-        std::string out = this->to_vstring('b');
-        size_t len = this->bitstring.size();
-
-        return std::to_string(len) + ((this->is_signed()) ? "\'sb" : "\'b") + out;
-    }
-
-    std::string to_vstring(char input_base) {
-        std::string out = "";
-        char base = tolower(input_base);
-        bool upercase = (base != input_base);
-        if (this->has_unknown() && (base == 'o' || base == 'h' || base == 'd')) {
-            // hot swap to binary since that is all we can print
-            base = 'b';
-        }
-
-        switch (base) {
-            case 'b':
-                return this->to_string(false, upercase);
-            case 'z':
-                return this->to_string(false, upercase);
-            case 'u':
-                return this->to_Ustring(false);
-            case 'o':
-                return this->to_log2radix(3, false, upercase);
-            case 'd':
-                return this->to_base10(false, upercase);
-            case 'h':
-                return this->to_log2radix(4, false, upercase);
-            case 's':
-                return this->bitstring.to_printable();
-            case 'c':
-                // forcefully truncate to a char
-                return std::string(1, this->bitstring.getc());
-            default:
-                assert_Werr(0,
-                            "Invalid base for conversion");
-                break;
-        }
-        std::abort();
-    }
-
-    /***
-     * setters
-     */
-    void set_value(const std::string& input) {
-        if (!input.size()) {
-            return;
-        }
-
-        std::string verilog_string(input);
-
-        /**
-         * set defaults
-         */
-        size_t bitsize = 32;        // 32 bit is the fall back
-        this->defined_size = false; // the size is undefined unless otherwise specified
-        size_t radix = 0;           // the radix is unknown to start with
-        this->sign = false;         // we treat everything as unsigned unless specified
-
-        // if this is a string
-        if (verilog_string[0] == '\"') {
-            assert_Werr(verilog_string.size() >= 2,
-                        "Malformed input String for VNumber, only open quote" + verilog_string);
-
-            assert_Werr(verilog_string.back() == '\"',
-                        "Malformed input String for VNumber, expected closing quotes" + verilog_string);
-
-            verilog_string.erase(0, 1);
-            verilog_string.pop_back();
-
-            size_t string_size = verilog_string.size();
-            if (string_size == 0)
-                string_size = 1;
-
-            bitsize = string_size * 8;
-            this->defined_size = true;
-            radix = 256;
-        } else {
-            size_t loc = verilog_string.find("\'");
-            if (loc == std::string::npos) {
-                verilog_string.insert(0, "\'sd");
-                loc = 0;
-            }
-
-            if (loc != 0) {
-                std::string bit_length_char = verilog_string.substr(0, loc);
-                bitsize = strtoul(bit_length_char.c_str(), nullptr, 10);
-                this->defined_size = true;
-            }
-
-            if (std::tolower(verilog_string[loc + 1]) == 's') {
-                this->sign = true;
-            }
-
-            char base = static_cast<char>(std::tolower(verilog_string[loc + 1 + sign]));
-            switch (base) {
-                case 'b':
-                    radix = 2;
-                    break; // binary
-                case 'o':
-                    radix = 8;
-                    break; // octal
-                case 'd':
-                    radix = 10;
-                    break; // decimal
-                case 'h':
-                    radix = 16;
-                    break; // hexadecimal
-                default:
-                    assert_Werr(false,
-                                "Invalid radix base for number: " + std::string(1, base));
-                    break;
-            }
-
-            //remove underscores
-            verilog_string = verilog_string.substr(loc + 2 + sign);
-            verilog_string.erase(std::remove(verilog_string.begin(), verilog_string.end(), '_'), verilog_string.end());
-
-            //little endian bitstring string
-        }
-
-        std::string temp_bitstring = string_of_radix_to_bitstring(verilog_string, radix);
-
-        char pad = temp_bitstring[0];
-        if (!this->sign && pad == '1') {
-            pad = '0';
-        }
-
-        // convert the bits to the internal data struct (bit at index 0 in string is msb since string go from msb to lsb)
-        BitSpace::VerilogBits new_bitstring(temp_bitstring.size(), BitSpace::_0);
-        size_t counter = temp_bitstring.size() - 1;
-        for (char in : temp_bitstring) {
-            new_bitstring.set_bit(counter--, BitSpace::c_to_bit(in));
-        }
-
-        this->bitstring = new_bitstring.resize(BitSpace::c_to_bit(pad), bitsize);
-    }
-
-    void set_value(int64_t in) {
-        this->set_value(std::to_string(in));
-    }
-
-    size_t msb_index() {
-        return this->bitstring.size() - 1;
-    }
-
-    /****
-     * bit twiddling functions
-     */
-    BitSpace::bit_value_t get_bit_from_msb(size_t index) {
-        assert_Werr(index <= msb_index(), "Index out of range");
-        return this->bitstring.get_bit(msb_index() - index);
-    }
-
-    BitSpace::bit_value_t get_bit_from_lsb(size_t index) {
-        if (index < this->size())
-            return this->bitstring.get_bit(index);
-        else
-            return this->get_padding_bit();
-    }
-
-    void set_bit_from_msb(size_t index, BitSpace::bit_value_t val) {
-        this->bitstring.set_bit(msb_index() - index, val);
-    }
-
-    void set_bit_from_lsb(size_t index, BitSpace::bit_value_t val) {
-        this->bitstring.set_bit(index, val);
-    }
-
-    /***
-     *  other
-     */
-    size_t size() {
-        return this->bitstring.size();
-    }
-
-    BitSpace::bit_value_t get_padding_bit() {
-        return (this->is_signed()) ? get_bit_from_msb(0) : BitSpace::_0;
-    }
-
-    bool is_signed() const {
-        return this->sign;
-    }
-
-    bool is_defined_size() {
-        return this->defined_size;
-    }
-
-    bool is_negative() {
-        return (this->get_bit_from_msb(0) == BitSpace::_1 && this->sign);
-    }
-
-    bool has_unknown() {
-        return this->bitstring.has_unknown();
-    }
-
-    bool is_z() {
-        return this->bitstring.is_only_z();
-    }
-
-    bool is_x() {
-        return this->bitstring.is_only_x();
-    }
-
-    bool is_true() {
-        return this->bitstring.is_true();
-    }
-
-    bool is_false() {
-        return this->bitstring.is_false();
-    }
-
-    VNumber twos_complement(BitSpace::bit_value_t carry) {
-        return VNumber(this->bitstring.twos_complement(carry), this->defined_size, this->sign);
-    }
-
-    VNumber twos_complement() {
-        return VNumber(this->bitstring.twos_complement(), this->defined_size, this->sign);
-    }
-
-    VNumber to_signed() {
-        return VNumber(this->bitstring, this->defined_size, true);
-    }
-
-    VNumber to_unsigned() {
-        return VNumber(this->bitstring, this->defined_size, false);
-    }
-
-    VNumber bitwise_reduce(const BitSpace::bit_value_t lut[4][4]) {
-        return VNumber(this->bitstring.bitwise_reduce(lut), this->defined_size, false);
-    }
-
-    /**
-     * Unary operations
-     */
-    VNumber bitwise(const BitSpace::bit_value_t lut[4]) {
-        return VNumber(this->bitstring.bitwise(lut), this->defined_size, false);
-    }
-
-    /**
-     * Binary operations
-     */
-    VNumber bitwise(VNumber& b, const BitSpace::bit_value_t lut[4][4]) {
-        size_t std_length = std::max(this->size(), b.size());
-        const BitSpace::bit_value_t pad_a = this->get_padding_bit();
-        const BitSpace::bit_value_t pad_b = b.get_padding_bit();
-
-        VNumber result(std_length, BitSpace::_x, false, this->is_defined_size() && b.is_defined_size());
-
-        for (size_t i = 0; i < result.size(); i++) {
-            BitSpace::bit_value_t bit_a = pad_a;
-            if (i < this->size())
-                bit_a = this->get_bit_from_lsb(i);
-
-            BitSpace::bit_value_t bit_b = pad_b;
-            if (i < b.size())
-                bit_b = b.get_bit_from_lsb(i);
-
-            result.set_bit_from_lsb(i, lut[bit_a][bit_b]);
-        }
-
-        return result;
-    }
-
-    VNumber replicate(int64_t n_times_replicate) {
-        assert_Werr(n_times_replicate > 0,
-                    "Cannot replicate bitstring less than 1 times");
-
-        size_t n_times_unsigned = static_cast<size_t>(n_times_replicate);
-
-        return VNumber(this->bitstring.replicate(n_times_unsigned), true, this->sign);
-    }
-
-    VNumber insert_at_lsb(VNumber& other) {
-        return this->insert(other, 0, other.size());
-    }
-
-    VNumber insert_at_msb(VNumber& other) {
-        return this->insert(other, this->size(), other.size());
-    }
-};
-
-#endif
diff --git a/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp b/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
deleted file mode 100644
index 41bead2a7..000000000
--- a/third_party/vtr/libs/rtlnumber/src/include/rtl_int.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
- *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
- *             Dr. Kenneth B. Kent (ken@unb.ca)
- *             for the Reconfigurable Computing Research Lab at the
- *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#ifndef RTL_INT_H
-#define RTL_INT_H
-
-#include <string>
-#include "internal_bits.hpp"
-
-/**
- * Unary Operator
- */
-
-bool V_TRUE(VNumber& a);
-bool V_FALSE(VNumber& a);
-bool V_UNK(VNumber& a);
-bool V_IS_X(VNumber& a);
-bool V_IS_Z(VNumber& a);
-bool V_IS_SIGNED(VNumber& a);
-bool V_IS_UNSIGNED(VNumber& a);
-
-std::string V_STRING(VNumber& a, const char base);
-
-VNumber V_UNSIGNED(VNumber& a);
-VNumber V_SIGNED(VNumber& a);
-VNumber V_ADD(VNumber& a);
-VNumber V_MINUS(VNumber& a);
-VNumber V_MINUS(VNumber& a, BitSpace::bit_value_t carry);
-
-VNumber V_BITWISE_BUF(VNumber& a);
-VNumber V_BITWISE_NOT(VNumber& a);
-
-VNumber V_BITWISE_AND(VNumber& a);
-VNumber V_BITWISE_OR(VNumber& a);
-VNumber V_BITWISE_XOR(VNumber& a);
-VNumber V_BITWISE_NAND(VNumber& a);
-VNumber V_BITWISE_NOR(VNumber& a);
-VNumber V_BITWISE_XNOR(VNumber& a);
-VNumber V_LOGICAL_NOT(VNumber& a);
-
-/**
- * Binary Operator
- */
-VNumber V_REPLICATE(VNumber& a, VNumber& n_times);
-VNumber V_CONCAT(std::vector<VNumber> concat_list);
-
-VNumber V_BITWISE_BUFIF0(VNumber& input, VNumber& trigger);
-VNumber V_BITWISE_BUFIF1(VNumber& input, VNumber& trigger);
-VNumber V_BITWISE_NOTIF0(VNumber& input, VNumber& trigger);
-VNumber V_BITWISE_NOTIF1(VNumber& input, VNumber& trigger);
-
-VNumber V_BITWISE_AND(VNumber& a, VNumber& b);
-VNumber V_BITWISE_OR(VNumber& a, VNumber& b);
-VNumber V_BITWISE_XOR(VNumber& a, VNumber& b);
-VNumber V_BITWISE_NAND(VNumber& a, VNumber& b);
-VNumber V_BITWISE_NOR(VNumber& a, VNumber& b);
-VNumber V_BITWISE_XNOR(VNumber& a, VNumber& b);
-
-VNumber V_SIGNED_SHIFT_LEFT(VNumber& a, VNumber& b);
-VNumber V_SIGNED_SHIFT_RIGHT(VNumber& a, VNumber& b);
-VNumber V_SHIFT_LEFT(VNumber& a, VNumber& b);
-VNumber V_SHIFT_RIGHT(VNumber& a, VNumber& b);
-
-VNumber V_LOGICAL_AND(VNumber& a, VNumber& b);
-VNumber V_LOGICAL_OR(VNumber& a, VNumber& b);
-
-VNumber V_LT(VNumber& a, VNumber& b);
-VNumber V_GT(VNumber& a, VNumber& b);
-VNumber V_LE(VNumber& a, VNumber& b);
-VNumber V_GE(VNumber& a, VNumber& b);
-VNumber V_EQUAL(VNumber& a, VNumber& b);
-VNumber V_NOT_EQUAL(VNumber& a, VNumber& b);
-VNumber V_CASE_EQUAL(VNumber& a, VNumber& b);
-VNumber V_CASE_NOT_EQUAL(VNumber& a, VNumber& b);
-
-VNumber V_ADD(VNumber& a, VNumber& b);
-VNumber V_MINUS(VNumber& a, VNumber& b);
-VNumber V_ADD(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in);
-VNumber V_MINUS(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in);
-VNumber V_MULTIPLY(VNumber& a, VNumber& b);
-VNumber V_POWER(VNumber& a, VNumber& b);
-VNumber V_DIV(VNumber& a, VNumber& b);
-VNumber V_MOD(VNumber& a, VNumber& b);
-
-/**
- * Ternary Operator
- */
-VNumber V_TERNARY(VNumber& a, VNumber& b, VNumber& c);
-
-#endif //RTL_INT_H
diff --git a/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp b/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
deleted file mode 100644
index d4e8139e7..000000000
--- a/third_party/vtr/libs/rtlnumber/src/include/rtl_utils.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
- *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
- *             Dr. Kenneth B. Kent (ken@unb.ca)
- *             for the Reconfigurable Computing Research Lab at the
- *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#ifndef RTL_UTILS_H
-#define RTL_UTILS_H
-
-#include <string>
-#include <iostream>
-
-#include <string.h>
-
-#ifndef FILE_NAME
-#    define FILE_NAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-#endif
-
-/* Enable Debug Messages for libRTLNumber: Un-Comment to Enable Debug Messages:
- *                                          Comment-out to Disable Debug Messages: */
-// #define ENABLE_DEBUG_MESSAGES
-
-#ifdef ENABLE_DEBUG_MESSAGES
-#    define DEBUG_MSG(debugMsg) std::cerr << "DEBUG: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
-                                          << ": " << debugMsg << std::endl
-#else
-#    define DEBUG_MSG(debugMsg) /* No-Op */
-#endif
-
-#ifndef WARN_MSG
-#    define WARN_MSG(warnMSG) std::cerr << "WARNING: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
-                                        << ": " << warnMSG << "!" << std::endl
-#endif
-
-#ifndef ERR_MSG
-#    define ERR_MSG(errMsg) std::cerr << std::endl                                                            \
-                                      << "ERROR: " << FILE_NAME << ":" << __LINE__ << " " << __func__ << "()" \
-                                      << ": " << errMsg << "!" << std::endl                                   \
-                                      << std::endl
-#endif
-
-std::string string_of_radix_to_bitstring(std::string orig_string, size_t radix);
-std::string convert_between_bases(std::string str, uint8_t base_from, uint8_t base_to, bool uppercase, bool big_endian);
-
-inline void _assert_Werr(bool cond, const char* FUNCT, int LINE, std::string error_string) {
-    if (!cond) {
-        std::cerr << std::endl
-                  << "ERROR: " << FUNCT << "::" << std::to_string(LINE) << " Assert 'assert_Werr' Failed:\t" << error_string << "!" << std::endl
-                  << std::endl;
-        std::abort();
-    }
-}
-#define assert_Werr(cond, error_string) _assert_Werr((cond), __func__, __LINE__, std::string(error_string))
-
-#endif
diff --git a/third_party/vtr/libs/rtlnumber/src/rtl_int.cc b/third_party/vtr/libs/rtlnumber/src/rtl_int.cc
deleted file mode 100644
index 28c4c9d10..000000000
--- a/third_party/vtr/libs/rtlnumber/src/rtl_int.cc
+++ /dev/null
@@ -1,746 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
- *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
- *             Dr. Kenneth B. Kent (ken@unb.ca)
- *             for the Reconfigurable Computing Research Lab at the
- *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#include <string>
-
-#include "internal_bits.hpp"
-#include "rtl_int.hpp"
-#include "rtl_utils.hpp"
-
-#define AMBIGUOUS_VALUE VNumber("'bx")
-
-using namespace BitSpace;
-
-class compare_bit {
-  private:
-    uint8_t result = 0x0;
-
-  public:
-    compare_bit(uint8_t set_to) { result = set_to; }
-
-    bool is_unk() { return (!result); }
-
-    bool is_gt() { return (result & (0x1)); }
-    bool is_eq() { return (result & (0x2)); }
-    bool is_lt() { return (result & (0x4)); }
-
-    bool is_ne() { return (!is_eq()); }
-    bool is_ge() { return (result & (0x3)); }
-    bool is_le() { return (result & (0x6)); }
-};
-
-#define UNK_EVAL compare_bit(0x0)
-#define GT_EVAL compare_bit(0x1)
-#define EQ_EVAL compare_bit(0x2)
-#define LT_EVAL compare_bit(0x4)
-
-static compare_bit eval_op(VNumber& a_in, VNumber& b_in) {
-    assert_Werr(a_in.size(),
-                "empty 1st bit string");
-
-    assert_Werr(b_in.size(),
-                "empty 2nd bit string");
-
-#ifndef RTL_ALLOW_UNKNOWN_COMPARE
-    if (a_in.has_unknown() || b_in.has_unknown())
-        return UNK_EVAL;
-#endif
-
-    bool neg_a = (a_in.is_negative());
-    bool neg_b = (b_in.is_negative());
-
-    if (neg_a && !neg_b) {
-        return LT_EVAL;
-    } else if (!neg_a && neg_b) {
-        return GT_EVAL;
-    }
-
-    VNumber a;
-    VNumber b;
-    bool invert_result = (neg_a && neg_b);
-
-    if (invert_result) {
-        a = a_in.twos_complement();
-        b = b_in.twos_complement();
-    } else {
-        a = a_in;
-        b = b_in;
-    }
-
-    size_t std_length = std::max(a.size(), b.size());
-    bit_value_t pad_a = a.get_padding_bit();
-    bit_value_t pad_b = b.get_padding_bit();
-
-    for (size_t i = std_length - 1; i < std_length; i--) {
-        bit_value_t bit_a = pad_a;
-        if (i < a.size()) {
-            bit_a = a.get_bit_from_lsb(i);
-        }
-
-        bit_value_t bit_b = pad_b;
-        if (i < b.size()) {
-            bit_b = b.get_bit_from_lsb(i);
-        }
-
-        if (BitSpace::l_lt[bit_a][bit_b] == BitSpace::_1) {
-            return (!invert_result) ? LT_EVAL : GT_EVAL;
-        } else if (BitSpace::l_gt[bit_a][bit_b] == BitSpace::_1) {
-            return (!invert_result) ? GT_EVAL : LT_EVAL;
-        } else if (BitSpace::l_eq[bit_a][bit_b] == BitSpace::_1) {
-            continue;
-        } else {
-            return UNK_EVAL;
-        }
-    }
-
-    return EQ_EVAL;
-}
-
-static compare_bit eval_op(VNumber a, int64_t b) {
-    VNumber bits_value = VNumber(std::to_string(std::abs(b)));
-    if (b < 0)
-        bits_value = bits_value.twos_complement();
-
-    return eval_op(a, bits_value);
-}
-
-/**
- * Check if the Operation Should be Signed by Checking if Both Operands Are Signed:
- */
-static bool is_signed_operation(VNumber& a, VNumber& b) {
-    bool is_signed_operation = false;
-
-    if ((true == a.is_signed()) && (true == b.is_signed())) {
-        is_signed_operation = true;
-    }
-
-    return is_signed_operation;
-}
-
-/**
- * Addition operations
- */
-static VNumber sum_op(VNumber& a, VNumber& b, const bit_value_t& initial_carry, bool is_twos_complement_subtraction) {
-    assert_Werr(a.size(),
-                "empty 1st bit string");
-
-    assert_Werr(b.size(),
-                "empty 2nd bit string");
-
-    size_t std_length = std::max(a.size(), b.size());
-    size_t new_length = ((true == is_twos_complement_subtraction) ? (std_length) : (std_length + 1));
-    const bit_value_t pad_a = a.get_padding_bit();
-    const bit_value_t pad_b = b.get_padding_bit();
-    bool is_addition_signed_operation = is_signed_operation(a, b);
-
-    //("pad_b: '" << (unsigned(pad_b)) << "'");
-
-    bit_value_t previous_carry = initial_carry;
-    VNumber result(new_length, _0, is_addition_signed_operation, a.is_defined_size() && b.is_defined_size());
-
-    for (size_t i = 0; i < new_length; i++) {
-        bit_value_t bit_a = pad_a;
-        if (i < a.size()) {
-            bit_a = a.get_bit_from_lsb(i);
-        }
-
-        bit_value_t bit_b = pad_b;
-        if (i < b.size()) {
-            bit_b = b.get_bit_from_lsb(i);
-        }
-
-        result.set_bit_from_lsb(i, l_sum[previous_carry][bit_a][bit_b]);
-        previous_carry = l_carry[previous_carry][bit_a][bit_b];
-    }
-
-    return result;
-}
-
-static VNumber shift_op(VNumber& a, int64_t b, bool sign_shift) {
-    VNumber to_return;
-
-    if (b == 0) {
-        to_return = a;
-    }
-    //if b is negative then shift right
-    else if (b < 0) {
-        size_t u_b = static_cast<size_t>(-b);
-        bit_value_t pad = (sign_shift) ? a.get_padding_bit() : BitSpace::_0;
-        to_return = VNumber(a.size(), pad, sign_shift, a.is_defined_size());
-        for (size_t i = 0; i < (a.size() - u_b); i++) {
-            to_return.set_bit_from_lsb(i, a.get_bit_from_lsb(i + u_b));
-        }
-    } else {
-        size_t u_b = static_cast<size_t>(b);
-        bit_value_t pad = BitSpace::_0;
-        to_return = VNumber((a.size() + u_b), pad, sign_shift, a.is_defined_size());
-        for (size_t i = 0; i < a.size(); i++) {
-            to_return.set_bit_from_lsb(i + u_b, a.get_bit_from_lsb(i));
-        }
-    }
-    return to_return;
-}
-
-bool V_TRUE(VNumber& a) {
-    return a.is_true();
-}
-
-bool V_FALSE(VNumber& a) {
-    return a.is_false();
-}
-
-bool V_UNK(VNumber& a) {
-    return a.has_unknown();
-}
-
-bool V_IS_X(VNumber& a) {
-    return a.is_x();
-}
-
-bool V_IS_Z(VNumber& a) {
-    return a.is_z();
-}
-
-bool V_IS_SIGNED(VNumber& a) {
-    return a.is_signed();
-}
-
-bool V_IS_UNSIGNED(VNumber& a) {
-    return !a.is_signed();
-}
-
-std::string V_STRING(VNumber& a, const char base) {
-    return a.to_vstring(base);
-}
-
-/***
- *                    __          __   __   ___  __       ___    __       
- *    |  | |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
- *    \__/ | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
- *                                                                        
- */
-
-VNumber V_BITWISE_NOT(VNumber& a) {
-    return a.bitwise(l_not);
-}
-
-VNumber V_LOGICAL_NOT(VNumber& a) {
-    if (a.has_unknown())
-        return AMBIGUOUS_VALUE;
-
-    VNumber ored = a.bitwise_reduce(l_or);
-    VNumber noted = ored.bitwise(l_not);
-    return noted;
-}
-
-VNumber V_ADD(VNumber& a) {
-    VNumber result(a);
-    return result;
-}
-
-VNumber V_MINUS(VNumber& a) {
-    return a.twos_complement();
-}
-
-VNumber V_MINUS(VNumber& a, BitSpace::bit_value_t carry) {
-    return a.twos_complement(carry);
-}
-
-VNumber V_UNSIGNED(VNumber& a) {
-    return a.to_unsigned();
-}
-
-VNumber V_SIGNED(VNumber& a) {
-    return a.to_signed();
-}
-
-VNumber V_BITWISE_AND(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_and);
-    return to_return;
-}
-
-VNumber V_BITWISE_OR(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_or);
-    return to_return;
-}
-
-VNumber V_BITWISE_XOR(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_xor);
-    return to_return;
-}
-
-VNumber V_BITWISE_NAND(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_and).bitwise(l_not);
-    return to_return;
-}
-
-VNumber V_BITWISE_NOR(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_or).bitwise(l_not);
-    return to_return;
-}
-
-VNumber V_BITWISE_XNOR(VNumber& a) {
-    VNumber to_return = a.bitwise_reduce(l_xor).bitwise(l_not);
-    return to_return;
-}
-
-/***
- *     __               __          __   __   ___  __       ___    __       
- *    |__) | |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
- *    |__) | | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
- *                                                                          
- */
-
-VNumber V_REPLICATE(VNumber& a, VNumber& n_times) {
-    assert_Werr(!n_times.has_unknown(),
-                "Cannot use undefined number for the replication count");
-
-    return a.replicate(n_times.get_value());
-}
-
-VNumber V_CONCAT(std::vector<VNumber> concat_list) {
-    assert_Werr(!concat_list.empty(),
-                "Concat List cannot be empty");
-
-    VNumber init = concat_list[0];
-    for (size_t i = 1; i < concat_list.size(); i++) {
-        init = init.insert_at_lsb(concat_list[i]);
-    }
-    return init;
-}
-
-VNumber V_BITWISE_BUF(VNumber& a) {
-    return a.bitwise(l_buf);
-}
-
-VNumber V_BITWISE_BUFIF0(VNumber& input, VNumber& trigger) {
-    if (trigger.size() == 1 && input.size() > 1) {
-        trigger = trigger.replicate(input.size());
-    }
-    assert_Werr(input.size() == trigger.size(),
-                "tristate must either have a single trigger or contains as many as the input width");
-    return input.bitwise(trigger, l_bufif0);
-}
-
-VNumber V_BITWISE_BUFIF1(VNumber& input, VNumber& trigger) {
-    if (trigger.size() == 1 && input.size() > 1) {
-        trigger = trigger.replicate(input.size());
-    }
-    assert_Werr(input.size() == trigger.size(),
-                "tristate must either have a single trigger or contains as many as the input width");
-    return input.bitwise(trigger, l_bufif1);
-}
-
-VNumber V_BITWISE_NOTIF0(VNumber& input, VNumber& trigger) {
-    if (trigger.size() == 1 && input.size() > 1) {
-        trigger = trigger.replicate(input.size());
-    }
-    assert_Werr(input.size() == trigger.size(),
-                "tristate must either have a single trigger or contains as many as the input width");
-    return input.bitwise(trigger, l_notif0);
-}
-
-VNumber V_BITWISE_NOTIF1(VNumber& input, VNumber& trigger) {
-    if (trigger.size() == 1 && input.size() > 1) {
-        trigger = trigger.replicate(input.size());
-    }
-    assert_Werr(input.size() == trigger.size(),
-                "tristate must either have a single trigger or contains as many as the input width");
-    return input.bitwise(trigger, l_notif1);
-}
-
-VNumber V_BITWISE_AND(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_and);
-}
-
-VNumber V_BITWISE_OR(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_or);
-}
-
-VNumber V_BITWISE_XOR(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_xor);
-}
-
-VNumber V_BITWISE_NAND(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_nand);
-}
-
-VNumber V_BITWISE_NOR(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_nor);
-}
-
-VNumber V_BITWISE_XNOR(VNumber& a, VNumber& b) {
-    return a.bitwise(b, l_xnor);
-}
-
-/**
- * Logical Operations
- */
-
-VNumber V_CASE_EQUAL(VNumber& a, VNumber& b) {
-    VNumber longEval = a.bitwise(b, l_case_eq);
-    VNumber eq = V_BITWISE_AND(longEval);
-    return eq;
-}
-
-VNumber V_CASE_NOT_EQUAL(VNumber& a, VNumber& b) {
-    VNumber eq = V_CASE_EQUAL(a, b);
-    VNumber neq = V_LOGICAL_NOT(eq);
-    return neq;
-}
-
-VNumber V_LOGICAL_AND(VNumber& a, VNumber& b) {
-    if (a.has_unknown() || b.has_unknown())
-        return AMBIGUOUS_VALUE;
-    VNumber reduxA = a.bitwise_reduce(l_or);
-    VNumber reduxB = b.bitwise_reduce(l_or);
-
-    VNumber to_return = reduxA.bitwise(reduxB, l_and);
-
-    return to_return;
-}
-
-VNumber V_LOGICAL_OR(VNumber& a, VNumber& b) {
-    if (a.has_unknown() || b.has_unknown())
-        return AMBIGUOUS_VALUE;
-    VNumber reduxA = a.bitwise_reduce(l_or);
-    VNumber reduxB = b.bitwise_reduce(l_or);
-
-    VNumber to_return = reduxA.bitwise(reduxB, l_or);
-
-    return to_return;
-}
-
-VNumber V_LT(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_lt() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_GT(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_gt() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_EQUAL(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_eq() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_GE(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_ge() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_LE(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_le() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_NOT_EQUAL(VNumber& a, VNumber& b) {
-    compare_bit cmp = eval_op(a, b);
-    BitSpace::bit_value_t result = cmp.is_unk() ? BitSpace::_x : cmp.is_ne() ? BitSpace::_1 : BitSpace::_0;
-    VNumber to_return(1, result, false, true);
-    return to_return;
-}
-
-VNumber V_SIGNED_SHIFT_LEFT(VNumber& a, VNumber& b) {
-    if (b.has_unknown())
-        return AMBIGUOUS_VALUE;
-
-    return shift_op(a, b.get_value(), a.is_signed());
-}
-
-VNumber V_SHIFT_LEFT(VNumber& a, VNumber& b) {
-    if (b.has_unknown())
-        return AMBIGUOUS_VALUE;
-
-    return shift_op(a, b.get_value(), false);
-}
-
-VNumber V_SIGNED_SHIFT_RIGHT(VNumber& a, VNumber& b) {
-    if (b.has_unknown())
-        return AMBIGUOUS_VALUE;
-
-    return shift_op(a, -1 * b.get_value(), a.is_signed());
-}
-
-VNumber V_SHIFT_RIGHT(VNumber& a, VNumber& b) {
-    if (b.has_unknown())
-        return AMBIGUOUS_VALUE;
-
-    return shift_op(a, -1 * b.get_value(), false);
-}
-
-VNumber V_ADD(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in) {
-    return sum_op(a, b, carry_in, /* is_twos_complement_subtraction */ false);
-}
-
-VNumber V_MINUS(VNumber& a, VNumber& b, BitSpace::bit_value_t carry_in) {
-    size_t std_length = std::max(a.size(), b.size());
-    VNumber padded_a(a, std_length);
-    VNumber padded_b(b, std_length);
-
-    VNumber complement = V_MINUS(padded_b);
-    if (padded_b.is_negative() && complement.is_negative()) {
-        /* special case: 2's comp is identical to original, must pad */
-        complement = VNumber(padded_b, padded_b.size() + 1);
-        complement = V_MINUS(complement);
-    }
-
-    return sum_op(padded_a, complement, carry_in, /* is_twos_complement_subtraction */ true);
-}
-
-VNumber V_ADD(VNumber& a, VNumber& b) {
-    return V_ADD(a, b, _0);
-}
-
-VNumber V_MINUS(VNumber& a, VNumber& b) {
-    return V_MINUS(a, b, _0);
-}
-
-VNumber V_MULTIPLY(VNumber& a_in, VNumber& b_in) {
-    if (a_in.has_unknown() || b_in.has_unknown()) {
-        return AMBIGUOUS_VALUE;
-    }
-
-    VNumber a;
-    VNumber b;
-
-    bool is_multiply_signed_operation = is_signed_operation(a_in, b_in);
-    bool neg_a = a_in.is_negative();
-    bool neg_b = b_in.is_negative();
-
-    if (neg_a) {
-        a = V_MINUS(a_in);
-
-        if (a.is_negative()) {
-            /* special case: 2's comp is identical to original, must pad */
-            a = VNumber(a_in, a_in.size() + 1);
-            a = V_MINUS(a);
-        }
-    } else {
-        a = a_in;
-    }
-
-    if (neg_b) {
-        b = V_MINUS(b_in);
-
-        if (b.is_negative()) {
-            /* special case: 2's comp is identical to original, must pad */
-            b = VNumber(b_in, b_in.size() + 1);
-            b = V_MINUS(b);
-        }
-    } else {
-        b = b_in;
-    }
-
-    bool invert_result = ((!neg_a && neg_b) || (neg_a && !neg_b));
-
-    VNumber result("0");
-    VNumber b_copy = b;
-
-    for (size_t i = 0; i < a.size(); i++) {
-        bit_value_t bit_a = a.get_bit_from_lsb(i);
-
-        if (bit_a == _1) {
-            result = V_ADD(result, b_copy);
-        }
-
-        b_copy = shift_op(b_copy, 1, is_multiply_signed_operation);
-    }
-
-    if (invert_result) {
-        result = V_MINUS(result);
-    }
-
-    return result;
-}
-
-/*
- * From Table 5-6 "Power operator rules" of IEEE Standard 1364-2005:
- *  "Verilog Hardware Description Language"; on Page 46 (PDF Page 76):
- *
- * Table 5-6 — Power operator rules:
- *
- * |-----------------------------------------------------------------------------|
- * | |  \ op1 is -> |               |                  |      |   |              |
- * | \/   \         | negative < –1 | –1               | zero | 1 | positive > 1 |
- * | op2 is \       |               |                  |      |   |              |
- * |-----------------------------------------------------------------------------|
- * |                |               |                  |      |   |              |
- * | Positive       | op1 ** op2    | op2 is odd -> –1 | 0    | 1 | op1 ** op2   |
- * |                |               | op2 is even -> 1 |      |   |              |
- * |                |               |                  |      |   |              |
- * |-----------------------------------------------------------------------------|
- * |                |               |                  |      |   |              |
- * | Zero           | 1             | 1                | 1    | 1 | 1            |
- * |                |               |                  |      |   |              |
- * |-----------------------------------------------------------------------------|
- * |                |               |                  |      |   |              |
- * | Negative       | 0             | op2 is odd -> –1 | 'bx  | 1 | 0            |
- * |                |               | op2 is even -> 1 |      |   |              |
- * |                |               |                  |      |   |              |
- * |-----------------------------------------------------------------------------|
- */
-VNumber V_POWER(VNumber& a, VNumber& b) {
-    if (a.has_unknown() || b.has_unknown()) {
-        return AMBIGUOUS_VALUE;
-    }
-
-    compare_bit res_a = eval_op(a, 0);
-    short val_a = (res_a.is_eq()) ? 0 : (res_a.is_lt()) ? (eval_op(a, -1).is_lt()) ? -2 : -1 :
-                                                        /* GREATHER_THAN */ (eval_op(a, 1).is_gt()) ? 2 : 1;
-
-    compare_bit res_b = eval_op(b, 0);
-    short val_b = (res_b.is_eq()) ? 0 : (res_b.is_lt()) ? -1 :
-                                                        /* GREATHER_THAN */ 1;
-
-    // Compute: Case Where 'val_a <= -2' or 'val_a >= 2'; As-Per the Spec:
-    if (val_b > 0 && (val_a < -1 || val_a > 1)) {
-        VNumber result("2'sb01");
-        VNumber one = VNumber("2'sb01");
-        VNumber tmp_b = b;
-
-        while (eval_op(tmp_b, 0).is_gt()) {
-            VNumber tmp_b_comp = V_MINUS(tmp_b, one);
-            if (tmp_b_comp.is_negative() && tmp_b.is_negative()) {
-                /* special case: 2's comp is identical to original, must pad */
-                tmp_b_comp = VNumber(tmp_b, tmp_b.size() + 1);
-                tmp_b_comp = V_MINUS(tmp_b_comp);
-            }
-            tmp_b = tmp_b_comp;
-
-            result = V_MULTIPLY(result, a);
-        }
-
-        return result;
-    } else if (val_b == 0 || val_a == 1) {
-        return VNumber("2'sb01");
-    } else if (val_b == -1 && val_a == 0) {
-        return AMBIGUOUS_VALUE;
-    } else if (val_a == -1) {
-        // Even:
-        if (BitSpace::_0 == b.get_bit_from_lsb(0)) {
-            return VNumber("2'sb01");
-        }
-        // Odd:
-        else {
-            return VNumber("2'sb11");
-        }
-    } else {
-        return VNumber("2'sb00");
-    }
-}
-
-/////////////////////////////
-VNumber V_DIV(VNumber& a_in, VNumber& b_in) {
-    if (a_in.has_unknown() || b_in.has_unknown() || eval_op(b_in, 0).is_eq())
-        return AMBIGUOUS_VALUE;
-
-    VNumber result("0");
-
-    bool is_division_signed_operation = is_signed_operation(a_in, b_in);
-
-    bool neg_a = a_in.is_negative();
-    bool neg_b = b_in.is_negative();
-
-    VNumber a = neg_a ? V_MINUS(a_in) : a_in;
-    VNumber b = neg_b ? V_MINUS(b_in) : b_in;
-
-    if (neg_a && a.is_negative()) {
-        /* special case: 2's comp is identical to original, must pad */
-        a = VNumber(a_in, a_in.size() + 1);
-        a = V_MINUS(a);
-    }
-
-    if (neg_b && b.is_negative()) {
-        /* special case: 2's comp is identical to original, must pad */
-        b = VNumber(b_in, b_in.size() + 1);
-        b = V_MINUS(b);
-    }
-
-    while (eval_op(a, b).is_ge()) {
-        VNumber count("1");
-        VNumber tmp = b;
-
-        // initialize our variables
-        VNumber sub_with = tmp;
-        VNumber count_sub_with = count;
-        while (eval_op(tmp, a).is_le()) {
-            sub_with = tmp;
-            count_sub_with = count;
-            count = shift_op(count, 1, is_division_signed_operation);
-            tmp = shift_op(tmp, 1, is_division_signed_operation);
-        }
-        a = V_MINUS(a, sub_with);
-        result = V_ADD(result, count_sub_with);
-    }
-
-    return (neg_a != neg_b) ? V_MINUS(result) : result;
-}
-
-VNumber V_MOD(VNumber& a_in, VNumber& b_in) {
-    if (a_in.has_unknown() || b_in.has_unknown() || eval_op(b_in, 0).is_eq())
-        return AMBIGUOUS_VALUE;
-
-    bool neg_a = a_in.is_negative();
-    bool neg_b = b_in.is_negative();
-
-    VNumber a = neg_a ? V_MINUS(a_in) : a_in;
-    VNumber b = neg_b ? V_MINUS(b_in) : b_in;
-
-    if (neg_a && a.is_negative()) {
-        /* special case: 2's comp is identical to original, must pad */
-        a = VNumber(a_in, a_in.size() + 1);
-        a = V_MINUS(a);
-    }
-
-    if (neg_b && b.is_negative()) {
-        /* special case: 2's comp is identical to original, must pad */
-        b = VNumber(b_in, b_in.size() + 1);
-        b = V_MINUS(b);
-    }
-
-    bool is_modulo_signed_operation = is_signed_operation(a, b);
-
-    while (eval_op(a, b).is_ge()) {
-        VNumber tmp = b;
-        VNumber sub_with = tmp;
-
-        while (eval_op(tmp, a).is_le()) {
-            sub_with = tmp;
-            tmp = shift_op(tmp, 1, is_modulo_signed_operation);
-        }
-        a = V_MINUS(a, sub_with);
-    }
-
-    return (neg_a) ? V_MINUS(a) : a;
-}
-
-/***
- *    ___  ___  __             __          __   __   ___  __       ___    __       
- *     |  |__  |__) |\ |  /\  |__) \ /    /  \ |__) |__  |__)  /\   |  | /  \ |\ | 
- *     |  |___ |  \ | \| /~~\ |  \  |     \__/ |    |___ |  \ /~~\  |  | \__/ | \| 
- *                                                                                 
- */
-VNumber V_TERNARY(VNumber& a_in, VNumber& b_in, VNumber& c_in) {
-    /*	if a evaluates properly	*/
-    compare_bit eval = eval_op(V_LOGICAL_NOT(a_in), 0);
-
-    return (eval.is_unk()) ? b_in.bitwise(c_in, l_ternary) : (eval.is_eq()) ? VNumber(b_in) : VNumber(c_in);
-}
diff --git a/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc b/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
deleted file mode 100644
index 8989456aa..000000000
--- a/third_party/vtr/libs/rtlnumber/src/rtl_utils.cc
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com),
- *            Alexandrea Demmings (alexandrea.demmings@unb.ca, lxdemmings@gmail.com) and
- *             Dr. Kenneth B. Kent (ken@unb.ca)
- *             for the Reconfigurable Computing Research Lab at the
- *              Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#include "rtl_utils.hpp"
-#include <algorithm>
-#include <iostream>
-
-static const char* base_10_digits = "0123456789";
-
-static int to_nb(char val, short base) {
-    if (base == 256) {
-        return (int)val;
-    } else if (val >= '0' && val <= '9') {
-        return val - '0';
-    } else {
-        return tolower(val) - 'a' + 10;
-    }
-}
-
-static char to_chr(int val, short base, bool uppercase) {
-    if (base == 256) {
-        return (char)val;
-    } else if (val >= 0 and val <= 9) {
-        return val + '0';
-    } else if (!uppercase) {
-        return (val - 10) + 'a';
-    } else {
-        return (val - 10) + 'A';
-    }
-}
-
-std::string convert_between_bases(std::string str, uint8_t base_from, uint8_t base_to, bool uppercase, bool big_endian) {
-    std::string digits = "";
-    while (str != "0") {
-        int carry = 0;
-
-        size_t start = (big_endian) ? str.size() - 1 : 0;
-        size_t end = (big_endian) ? 0 : str.size() - 1;
-        size_t increment = (big_endian) ? -1 : 1;
-
-        for (size_t i = start; (big_endian) ? (i >= end && i <= start) : (i >= start && i <= end); i += increment) {
-            int temp = to_nb(str[i], base_from);
-            temp += base_from * carry;
-            carry = temp % base_to;
-            temp = temp / base_to;
-            str[i] = to_chr(temp, base_from, uppercase);
-        }
-
-        if (big_endian) {
-            digits.push_back(to_chr(carry, base_to, uppercase));
-            while (str.size() > 1 && str.back() == '0') {
-                str.pop_back();
-            }
-        } else {
-            digits.insert(0, 1, to_chr(carry, base_to, uppercase));
-            while (str.size() > 1 && str[0] == '0') {
-                str.erase(0, 1);
-            }
-        }
-    }
-    return digits;
-}
-
-static uint8_t _to_decimal(char digit, const char* FUNCT, int LINE) {
-    switch (std::tolower(digit)) {
-        case '0':
-            return 0;
-        case '1':
-            return 1;
-        case '2':
-            return 2;
-        case '3':
-            return 3;
-        case '4':
-            return 4;
-        case '5':
-            return 5;
-        case '6':
-            return 6;
-        case '7':
-            return 7;
-        case '8':
-            return 8;
-        case '9':
-            return 9;
-        default:
-            _assert_Werr(false, FUNCT, LINE,
-                         "INVALID BIT INPUT: " + std::string(1, digit));
-            break;
-    }
-    return 10;
-}
-
-#define to_decimal(num) _to_decimal(num, __func__, __LINE__)
-
-static std::string _radix_digit_to_bits_str(const char digit, size_t radix, const char* FUNCT, int LINE) {
-    switch (radix) {
-        case 2: {
-            switch (std::tolower(digit)) {
-                case '0':
-                    return "0";
-                case '1':
-                    return "1";
-                case 'x':
-                    return "x";
-                case 'z':
-                    return "z";
-                default:
-                    _assert_Werr(false, FUNCT, LINE,
-                                 "INVALID BIT INPUT: " + std::string(1, digit));
-                    break;
-            }
-            break;
-        }
-        case 8: {
-            switch (std::tolower(digit)) {
-                case '0':
-                    return "000";
-                case '1':
-                    return "001";
-                case '2':
-                    return "010";
-                case '3':
-                    return "011";
-                case '4':
-                    return "100";
-                case '5':
-                    return "101";
-                case '6':
-                    return "110";
-                case '7':
-                    return "111";
-                case 'x':
-                    return "xxx";
-                case 'z':
-                    return "zzz";
-                default:
-                    _assert_Werr(false, FUNCT, LINE,
-                                 "INVALID BIT INPUT: " + std::string(1, digit));
-                    break;
-            }
-            break;
-        }
-        case 16: {
-            switch (std::tolower(digit)) {
-                case '0':
-                    return "0000";
-                case '1':
-                    return "0001";
-                case '2':
-                    return "0010";
-                case '3':
-                    return "0011";
-                case '4':
-                    return "0100";
-                case '5':
-                    return "0101";
-                case '6':
-                    return "0110";
-                case '7':
-                    return "0111";
-                case '8':
-                    return "1000";
-                case '9':
-                    return "1001";
-                case 'a':
-                    return "1010";
-                case 'b':
-                    return "1011";
-                case 'c':
-                    return "1100";
-                case 'd':
-                    return "1101";
-                case 'e':
-                    return "1110";
-                case 'f':
-                    return "1111";
-                case 'x':
-                    return "xxxx";
-                case 'z':
-                    return "zzzz";
-                default:
-                    _assert_Werr(false, FUNCT, LINE,
-                                 "INVALID BIT INPUT: " + std::string(1, digit));
-                    break;
-            }
-            break;
-        }
-        case 256: {
-            std::string bitstring = "";
-            char temp = digit;
-            // 8 bit per char
-            for (int i = 0; i < 8; i++) {
-                char value = temp % 2;
-                temp = temp / 2;
-
-                bitstring.insert(bitstring.begin(), (value) ? '1' : '0');
-            }
-            return bitstring;
-        }
-        default: {
-            _assert_Werr(false, FUNCT, LINE,
-                         "Invalid base " + std::to_string(radix));
-            break;
-        }
-    }
-    std::abort();
-}
-
-#define radix_digit_to_bits(num, radix) _radix_digit_to_bits(num, radix, __func__, __LINE__)
-static std::string _radix_digit_to_bits(const char digit, size_t radix, const char* FUNCT, int LINE) {
-    std::string result = _radix_digit_to_bits_str(digit, radix, FUNCT, LINE);
-    return result;
-}
-
-/**********************
- * convert from different radix to bitstring
- */
-std::string string_of_radix_to_bitstring(std::string orig_string, size_t radix) {
-    std::string result = "";
-
-    switch (radix) {
-        case 2:
-            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
-
-            assert_Werr(std::string::npos == orig_string.find_first_not_of("xXzZ01"),
-                        "INVALID BIT INPUT: " + orig_string + "for radix 2");
-            break;
-
-        case 8:
-            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
-
-            assert_Werr(std::string::npos == orig_string.find_first_not_of("xXzZ01234567"),
-                        "INVALID BIT INPUT: " + orig_string + "for radix 8");
-            break;
-
-        case 10:
-            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
-
-            assert_Werr(std::string::npos == orig_string.find_first_not_of("0123456789"),
-                        "INVALID BIT INPUT: " + orig_string + "for radix 10");
-            break;
-
-        case 16:
-            assert_Werr(!orig_string.empty(), "INVALID BIT INPUT: empty string");
-
-            assert_Werr(std::string::npos == orig_string.find_first_not_of("xZzZ0123456789aAbBcCdDeEfF"),
-                        "INVALID BIT INPUT: " + orig_string + "for radix 16");
-            break;
-
-        case 256:
-            // allow all chars
-            break;
-
-        default:
-            assert_Werr(false,
-                        "invalid radix: " + std::to_string(radix));
-            break;
-    }
-
-    while (!orig_string.empty()) {
-        switch (radix) {
-            case 10: {
-                std::string new_number = "";
-
-                uint8_t rem_digit = 0;
-                for (char current_digit : orig_string) {
-                    uint8_t new_pair = (rem_digit * 10) + to_decimal(current_digit);
-
-                    new_number.push_back(base_10_digits[(new_pair / 2)]);
-                    rem_digit = new_pair % 2;
-                }
-
-                result.insert(result.begin(), base_10_digits[rem_digit]);
-                while (new_number.size() > 1
-                       && new_number[0] == '0') {
-                    new_number.erase(0, 1);
-                }
-
-                if (new_number == "0") {
-                    orig_string = "";
-                } else {
-                    orig_string = new_number;
-                }
-
-                break;
-            }
-            default: {
-                result = radix_digit_to_bits(orig_string.back(), radix) + result;
-                orig_string.pop_back();
-                break;
-            }
-        }
-    }
-
-    result.insert(result.begin(), '0');
-
-    return result;
-}
diff --git a/third_party/vtr/libs/rtlnumber/unit_test/Makefile b/third_party/vtr/libs/rtlnumber/unit_test/Makefile
deleted file mode 100644
index a872a5922..000000000
--- a/third_party/vtr/libs/rtlnumber/unit_test/Makefile
+++ /dev/null
@@ -1,79 +0,0 @@
-#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
-#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
-#          Dr. Kenneth B. Kent (ken@unb.ca)
-#          for the Reconfigurable Computing Research Lab at the
-#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
-
-# If the first argument is "run"...
-ifeq (build,$(firstword $(MAKECMDGOALS)))
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (run,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (gdb,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (valgrind,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-ifeq (debug,$(firstword $(MAKECMDGOALS)))	
-  # use the rest as arguments for "make"
-  RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
-  # ...and turn them into do-nothing targets
-  $(eval $(RUN_ARGS):;@:)
-endif
-
-INCLUDE =-I../src/include
-
-BIN = bin/exec.out
-
-C = clang++ -std=c++14 -lpthread
-
-cleanup_flags=\
--ferror-limit=1000 \
--Werror \
--Wpedantic \
--Weverything \
--Wall \
--Wno-c++98-compat \
--Wno-unused-parameter \
--g -O0 -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls
-
-PHONY: error
-
-error: 
-	echo "can only use 'clean', 'debug <testname>.cpp', 'build <testname>.cpp' or 'run <arguments>'"
-
-debug:
-	mkdir -p bin
-	$(C) -ggdb $(cleanup_flags) $(INCLUDE) $(RUN_ARGS) -o $(BIN)
-
-build:
-	mkdir -p bin
-	$(C) $(INCLUDE) $(RUN_ARGS) -o $(BIN)
-
-run:
-	$(BIN) $(RUN_ARGS) 
-
-valgrind: build
-	valgrind --tool=helgrind $(BIN) $(RUN_ARGS) 
-
-gdb:
-	gdb --args $(BIN) $(RUN_ARGS)
-
-clean:
-	$(RM) -Rf bin
-
diff --git a/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp b/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
deleted file mode 100644
index d9a239997..000000000
--- a/third_party/vtr/libs/rtlnumber/unit_test/verilog_bits.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
- *           Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
- *            Dr. Kenneth B. Kent (ken@unb.ca)
- *            for the Reconfigurable Computing Research Lab at the
- *             Univerity of New Brunswick in Fredericton, New Brunswick, Canada
- */
-
-#include "internal_bits.hpp"
-
-using namespace BitSpace;
-int main(int argc, char** argv) {
-    size_t size = 0;
-    size = strtoul(argv[1], nullptr, 10);
-    VerilogBits my_bits(size, 'x');
-    printf("array_size(%zu) \n\n================\n", my_bits.size());
-
-    std::cout << my_bits.to_string(false) << std::endl;
-
-    for (size_t value = 0; value < 8; value++) {
-        for (size_t i = 0; i < size; i++) {
-            BitSpace::bit_value_t val = static_cast<BitSpace::bit_value_t>(value);
-            printf("(%hhu)[%zu] : ", val, i);
-            my_bits.set_bit(i, val);
-            std::cout << my_bits.to_string(false) << std::endl;
-        }
-    }
-}
diff --git a/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh b/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
deleted file mode 100755
index 08b7ef09d..000000000
--- a/third_party/vtr/libs/rtlnumber/verify_librtlnumber.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env bash
-#Authors: Aaron Graham (aaron.graham@unb.ca, aarongraham9@gmail.com),
-#         Jean-Philippe Legault (jlegault@unb.ca, jeanphilippe.legault@gmail.com) and
-#          Dr. Kenneth B. Kent (ken@unb.ca)
-#          for the Reconfigurable Computing Research Lab at the
-#           Univerity of New Brunswick in Fredericton, New Brunswick, Canada
-
-# trap ctrl-c and call ctrl_c()
-trap ctrl_c INT
-
-TOTAL_TEST_RAN=0
-FAILURE_COUNT=0
-DEBUG=0
-
-function ctrl_c() {
-    FAILURE_COUNT=$((FAILURE_COUNT+1))
-	exit_code ${FAILURE_COUNT} "\n\n** EXITED FORCEFULLY **\n\n"
-}
-
-function exit_code() {
-	#print passed in value
-	echo -e $2
-	my_failed_count=$1
-	echo -e "$TOTAL_TEST_RAN Tests Ran; $my_failed_count Test Failures.\n"
-	[ "$my_failed_count" -gt "127" ] && echo "WARNING: Return Code may be unreliable: More than 127 Failures!"
-	echo "End."
-	exit ${my_failed_count}
-}
-
-# # Check if Library 'file' "${0%/*}/librtlnumber.a" exists
-if [ ! -f ./librtlnumber.a ] && [ ! -f ./rtl_number ]; 
-then
-		exit_code 99 "${0%/*}rtl number is nowhere to be found :o !\n" 
-fi
-
-# Dynamically load in inputs and results from
-#  file(s) on disk.
-for INPUT in ${0%/*}/regression_tests/*.csv; do
-	[ ! -f $INPUT ] && exit_code 99 "$INPUT regression test file not found!\n"
-
-	echo -e "\nRunning Test File: $INPUT:"
-
-	LINE=0
-
-	while IFS= read -r input_line; do
-
-		LINE=$((LINE + 1))
-
-		#glob whitespace from line and remove everything after comment
-		input_line=$(echo ${input_line} | tr -d '[:space:]' | cut -d '#' -f1)
-
-		#flip escaped commas to 'ESCAPED_COMMA' to safeguard agains having them as csv separator
-		input_line=$(echo ${input_line} | sed 's/\\\,/ESCAPED_COMMA/g')
-
-		#skip empty lines
-		[  "_" ==  "_${input_line}" ] && continue
-
-		#split csv
-		IFS="," read -ra arr <<< ${input_line}
-		len=${#arr[@]}
-
-		if 	[ ${len} != "4" ] &&		# unary
-			[ ${len} != "5" ] &&		# binary
-			[ ${len} != "7" ] &&		# ternary
-			[ ${len} != "8" ]; then		# replicate
-				[ ! -z ${DEBUG} ] && echo -e "\nWARNING: Malformed Line in CSV File ($INPUT:$LINE) Input Line: ${input_line}! Skipping...\n"
-				continue
-		fi
-
-		
-
-		TOTAL_TEST_RAN=$((TOTAL_TEST_RAN + 1))
-
-		#deal with multiplication
-		set -f
-
-		# everything between is the operation to pipe in so we slice the array and concatenate with space
-		TEST_LABEL=${arr[0]}
-		EXPECTED_RESULT=${arr[$(( len -1 ))]}
-		
-		# build the command and get back our escaped commas 
-		RTL_CMD_IN=$(printf "%s " "${arr[@]:1:$(( len -2 ))}")
-		RTL_CMD_IN=$( echo ${RTL_CMD_IN} | sed 's/ESCAPED_COMMA/,/g' )
-
-		# Check for Anything on standard out and any non-'0' exit codes:
-		OUTPUT_AND_RESULT=$(${0%/*}/rtl_number ${RTL_CMD_IN})
-		EXIT_CODE=$?
-
-		if [[ 0 -ne $EXIT_CODE ]]
-		then
-			FAILURE_COUNT=$((FAILURE_COUNT+1))
-
-			echo -e "\nERROR: Non-Zero Exit Code from ${0%/*}/rtl_number (on $INPUT:$LINE)\n"
-
-			echo -e "-X- FAILED == $TEST_LABEL\t  ./rtl_number ${RTL_CMD_IN}\t Output:<$OUTPUT_AND_RESULT> != Expected:<$EXPECTED_RESULT>"
-
-		elif [ "${OUTPUT_AND_RESULT}" == "${EXPECTED_RESULT}" ]
-		then
-			echo "--- PASSED == $TEST_LABEL ( ${OUTPUT_AND_RESULT} ) "
-
-		elif [ "1'b1" == "$(${0%/*}/rtl_number ${OUTPUT_AND_RESULT} === ${EXPECTED_RESULT})" ]
-		then
-			echo "--- PASSED == $TEST_LABEL ( ${OUTPUT_AND_RESULT} )"
-
-		else
-			FAILURE_COUNT=$((FAILURE_COUNT+1))
-
-			# echo -e "${0##*/}@${HOSTNAME}: DEBUG: FAILURE_COUNT: $FAILURE_COUNT\n"
-
-			echo -e "\nERROR: Expected Result Didn't match what we got back from ${0%/*}/rtl_number (on $INPUT:$LINE)\n"
-
-			echo -e "-X- FAILED == $TEST_LABEL\t  ./rtl_number ${RTL_CMD_IN}\t Output:<$OUTPUT_AND_RESULT> != Expected:<$EXPECTED_RESULT>"
-
-		fi
-
-		#unset the multiplication token override
-		unset -f
-
-	done < "$INPUT"
-	#  Re-Enable Bash Wildcard Expanstion '*' 
-	set +f
-done
-
-exit_code ${FAILURE_COUNT} "Completed Tests\n"
diff --git a/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h b/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
deleted file mode 100644
index d1bb22383..000000000
--- a/third_party/vtr/libs/vpr/src/draw/breakpoint_state_globals.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef BREAKPOINT_STATE_GLOBALS
-#define BREAKPOINT_STATE_GLOBALS
-
-#include <string>
-#include <vector>
-
-//the BreakpointState struct holds all values that could possibly trigger a breakpoint
-//some variables such as move_num, from_block, temp_count, blocks_affected are related to the placer and router_iter and net_id are related to the router
-//there is also a string that holds the breakpoint description that are displayed in the UI and printed to the terminal
-//these values are updated in place.cpp and route.cpp and expr_eval.cpp and breakpoint.cpp use these values to look for breakpoints
-struct BreakpointState {
-    int move_num = 0;                         //current number of completed placer moves
-    int from_block = -1;                      //first block moved in the current placement swap
-    int temp_count = 0;                       //number of temperature changes thus far
-    int block_affected = -1;                  //the block_id that was requested to be stopped at if in blocks_affected
-    std::vector<int> blocks_affected_by_move; //vector giving the clb netlist block ids of all blocks moving in the current perturbation
-    int route_net_id = -1;                    //clb netlist id of net that was just routed
-    int router_iter = 0;                      //current rip-up and re-route iteration count of router
-    std::string bp_description;               //the breakpoint description to appear in the breakpoint list in the GUI
-};
-
-class BreakpointStateGlobals {
-    //holds one global BreakpointState variable to be accessed and modified by the placer and router
-    BreakpointState glob_breakpoint_state;
-
-  public:
-    //accessor for glob_breakpoint_state
-    BreakpointState* get_glob_breakpoint_state() {
-        return &glob_breakpoint_state;
-    }
-};
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/CMakeLists.txt b/third_party/vtr/libs/vtrutil/CMakeLists.txt
deleted file mode 100644
index 6e69ae530..000000000
--- a/third_party/vtr/libs/vtrutil/CMakeLists.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-cmake_minimum_required(VERSION 3.9)
-
-project("libvtrutil")
-
-#Version info
-set(VTR_VERSION_FILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/src/vtr_version.cpp.in)
-set(VTR_VERSION_FILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/vtr_version.cpp)
-
-#Compiler info
-set(VTR_COMPILER_INFO "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} on ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR}")
-
-#Set default version numbers in case not specified
-if(NOT DEFINED VTR_VERSION_MAJOR)
-    set(VTR_VERSION_MAJOR 0)
-endif()
-
-if(NOT DEFINED VTR_VERSION_MINOR)
-    set(VTR_VERSION_MINOR 0)
-endif()
-
-if(NOT DEFINED VTR_VERSION_PATCH)
-    set(VTR_VERSION_PATCH 0)
-endif()
-
-set(VTR_BUILD_INFO "${CMAKE_BUILD_TYPE}")
-if (CMAKE_INTERPROCEDURAL_OPTIMIZATION)
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} IPO")
-endif()
-
-if (VPR_PGO_CONFIG STREQUAL "prof_use")
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} PGO")
-elseif (VPR_PGO_CONFIG STREQUAL "prof_gen")
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} PGOgen")
-endif()
-
-set(VTR_BUILD_INFO "${VTR_BUILD_INFO} VTR_ASSERT_LEVEL=${VTR_ASSERT_LEVEL}")
-
-if (VTR_ENABLE_SANITIZE)
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} sanitizers")
-endif()
-if (VTR_ENABLE_PROFILING)
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} gprof")
-endif()
-
-if (VTR_ENABLE_COVERAGE)
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} gcov")
-endif()
-
-if (VTR_ENABLE_DEBUG_LOGGING)
-    set(VTR_BUILD_INFO "${VTR_BUILD_INFO} debug_logging")
-endif()
-
-# We always update the vtr_version.cpp file every time the project is built, 
-# to ensure the git revision and dirty status are up to date.
-#
-# We need to do this in two stages:
-#
-# 1) We a custom target 'version' (which is always out of date) so it will always be run.
-#    It touches the unprocessed version input file so it too will always be out of date.
-#
-# 2) The custom command depends on the touched version input file and generates the processed 
-#    version file, with updated values. The custom command uses the configure_version.cmake 
-#    script to generate the up-to-date vtr_version.cpp
-add_custom_target(version ALL
-    COMMAND ${CMAKE_COMMAND} -E touch ${VTR_VERSION_FILE_IN})
-
-add_custom_command(OUTPUT ${VTR_VERSION_FILE_OUT}
-    COMMAND ${CMAKE_COMMAND} 
-                    -D IN_FILE=${VTR_VERSION_FILE_IN}
-                    -D OUT_FILE=${VTR_VERSION_FILE_OUT}
-                    -D VTR_VERSION_MAJOR=${VTR_VERSION_MAJOR}
-                    -D VTR_VERSION_MINOR=${VTR_VERSION_MINOR}
-                    -D VTR_VERSION_PATCH=${VTR_VERSION_PATCH}
-                    -D VTR_VERSION_PRERELEASE=${VTR_VERSION_PRERELEASE}
-                    -D VTR_COMPILER_INFO=${VTR_COMPILER_INFO}
-                    -D VTR_BUILD_INFO=${VTR_BUILD_INFO}
-                    -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/configure_version.cmake
-    MAIN_DEPENDENCY ${VTR_VERSION_FILE_IN}
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-	VERBATIM)
-
-
-#
-# Source files and library
-#
-file(GLOB_RECURSE LIB_SOURCES src/*.cpp)
-file(GLOB_RECURSE LIB_HEADERS src/*.hpp src/*.h)
-files_to_dirs(LIB_HEADERS LIB_INCLUDE_DIRS)
-
-#Add the version file to the sources
-list(APPEND LIB_SOURCES ${VTR_VERSION_FILE_OUT})
-
-#Create the library
-add_library(libvtrutil STATIC
-             ${LIB_HEADERS}
-             ${LIB_SOURCES})
-target_include_directories(libvtrutil PUBLIC ${LIB_INCLUDE_DIRS})
-set_target_properties(libvtrutil PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
-
-#Ensure version is always up to date by requiring version to be run first
-add_dependencies(libvtrutil version)
-
-#Specify link-time dependancies
-target_link_libraries(libvtrutil
-                        liblog)
-
-install(TARGETS libvtrutil DESTINATION bin)
-
-#
-# Unit Tests
-#
-file(GLOB_RECURSE TEST_SOURCES test/*.cpp)
-add_executable(test_vtrutil ${TEST_SOURCES})
-target_link_libraries(test_vtrutil 
-                        libvtrutil
-                        Catch2::Catch2WithMain)
-
-add_test(NAME test_vtrutil COMMAND test_vtrutil --use-colour=yes)
-            
diff --git a/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake b/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
deleted file mode 100644
index 8c7fbf17e..000000000
--- a/third_party/vtr/libs/vtrutil/cmake/modules/configure_version.cmake
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Versioning information
-#
-#Figure out the git revision
-find_package(Git QUIET)
-if(GIT_FOUND)
-    exec_program(${GIT_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}
-                 ARGS describe --always --long --dirty
-                 OUTPUT_VARIABLE VTR_VCS_REVISION
-                 RETURN_VALUE GIT_DESCRIBE_RETURN_VALUE)
-
-    if(NOT GIT_DESCRIBE_RETURN_VALUE EQUAL 0)
-        #Git describe failed, usually this means we
-        #aren't in a git repo -- so don't set a VCS 
-        #revision
-        set(VTR_VCS_REVISION "unkown")
-    endif()
-
-    #Call again with exclude to get the revision excluding any tags
-    #(i.e. just the commit ID and dirty flag)
-    exec_program(${GIT_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}
-                 ARGS describe --always --long --dirty --exclude '*'
-                 OUTPUT_VARIABLE VTR_VCS_REVISION_SHORT
-                 RETURN_VALUE GIT_DESCRIBE_RETURN_VALUE)
-    if(NOT GIT_DESCRIBE_RETURN_VALUE EQUAL 0)
-        #Git describe failed, usually this means we
-        #aren't in a git repo -- so don't set a VCS 
-        #revision
-        set(VTR_VCS_REVISION_SHORT "unkown")
-    endif()
-else()
-    #Couldn't find git, so can't look-up VCS revision
-    set(VTR_VCS_REVISION "unkown")
-    set(VTR_VCS_REVISION_SHORT "unkown")
-endif()
-
-
-#Set the version according to semver.org
-set(VTR_VERSION "${VTR_VERSION_MAJOR}.${VTR_VERSION_MINOR}.${VTR_VERSION_PATCH}")
-if(VTR_VERSION_PRERELEASE)
-    set(VTR_VERSION "${VTR_VERSION}-${VTR_VERSION_PRERELEASE}")
-endif()
-set(VTR_VERSION_SHORT ${VTR_VERSION})
-if(VTR_VCS_REVISION)
-    set(VTR_VERSION "${VTR_VERSION}+${VTR_VCS_REVISION_SHORT}")
-endif()
-
-#Other build meta-data
-string(TIMESTAMP VTR_BUILD_TIMESTAMP)
-set(VTR_BUILD_TIMESTAMP "${VTR_BUILD_TIMESTAMP}")
-set(VTR_BUILD_INFO "${VTR_BUILD_INFO}")
-
-message(STATUS "VTR Version: ${VTR_VERSION}")
-
-configure_file(${IN_FILE} ${OUT_FILE})
diff --git a/third_party/vtr/libs/vtrutil/src/picosha2.h b/third_party/vtr/libs/vtrutil/src/picosha2.h
deleted file mode 100644
index 67794f920..000000000
--- a/third_party/vtr/libs/vtrutil/src/picosha2.h
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * The MIT License (MIT)
- *
- * Copyright (C) 2014 okdshin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef PICOSHA2_H
-#define PICOSHA2_H
-//picosha2:20140213
-#include <iostream>
-#include <vector>
-#include <iterator>
-#include <cassert>
-#include <sstream>
-#include <algorithm>
-
-namespace picosha2 {
-typedef unsigned long word_t;
-typedef unsigned char byte_t;
-
-namespace detail {
-inline byte_t mask_8bit(byte_t x) {
-    return x & 0xff;
-}
-
-inline word_t mask_32bit(word_t x) {
-    return x & 0xffffffff;
-}
-
-const word_t add_constant[64] = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};
-
-const word_t initial_message_digest[8] = {
-    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
-    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-inline word_t ch(word_t x, word_t y, word_t z) {
-    return (x & y) ^ ((~x) & z);
-}
-
-inline word_t maj(word_t x, word_t y, word_t z) {
-    return (x & y) ^ (x & z) ^ (y & z);
-}
-
-inline word_t rotr(word_t x, std::size_t n) {
-    assert(n < 32);
-    return mask_32bit((x >> n) | (x << (32 - n)));
-}
-
-inline word_t bsig0(word_t x) {
-    return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22);
-}
-
-inline word_t bsig1(word_t x) {
-    return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25);
-}
-
-inline word_t shr(word_t x, std::size_t n) {
-    assert(n < 32);
-    return x >> n;
-}
-
-inline word_t ssig0(word_t x) {
-    return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3);
-}
-
-inline word_t ssig1(word_t x) {
-    return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10);
-}
-
-template<typename RaIter1, typename RaIter2>
-void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 /*last*/) {
-    word_t w[64];
-    std::fill(w, w + 64, 0);
-    for (std::size_t i = 0; i < 16; ++i) {
-        w[i] = (static_cast<word_t>(mask_8bit(*(first + i * 4))) << 24)
-               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 1))) << 16)
-               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 2))) << 8)
-               | (static_cast<word_t>(mask_8bit(*(first + i * 4 + 3))));
-    }
-    for (std::size_t i = 16; i < 64; ++i) {
-        w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + w[i - 16]);
-    }
-
-    word_t a = *message_digest;
-    word_t b = *(message_digest + 1);
-    word_t c = *(message_digest + 2);
-    word_t d = *(message_digest + 3);
-    word_t e = *(message_digest + 4);
-    word_t f = *(message_digest + 5);
-    word_t g = *(message_digest + 6);
-    word_t h = *(message_digest + 7);
-
-    for (std::size_t i = 0; i < 64; ++i) {
-        word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i];
-        word_t temp2 = bsig0(a) + maj(a, b, c);
-        h = g;
-        g = f;
-        f = e;
-        e = mask_32bit(d + temp1);
-        d = c;
-        c = b;
-        b = a;
-        a = mask_32bit(temp1 + temp2);
-    }
-    *message_digest += a;
-    *(message_digest + 1) += b;
-    *(message_digest + 2) += c;
-    *(message_digest + 3) += d;
-    *(message_digest + 4) += e;
-    *(message_digest + 5) += f;
-    *(message_digest + 6) += g;
-    *(message_digest + 7) += h;
-    for (std::size_t i = 0; i < 8; ++i) {
-        *(message_digest + i) = mask_32bit(*(message_digest + i));
-    }
-}
-
-} //namespace detail
-
-template<typename InIter>
-void output_hex(InIter first, InIter last, std::ostream& os) {
-    std::ios::fmtflags orig_flags = os.flags();
-    std::streamsize orig_width = os.width();
-    char orig_fill = os.fill();
-
-    os.setf(std::ios::hex, std::ios::basefield);
-    while (first != last) {
-        os.width(2);
-        os.fill('0');
-        os << static_cast<unsigned int>(*first);
-        ++first;
-    }
-    os.flags(orig_flags);
-    os.fill(orig_fill);
-    os.width(orig_width);
-}
-
-template<typename InIter>
-void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) {
-    std::ostringstream oss;
-    output_hex(first, last, oss);
-    hex_str.assign(oss.str());
-}
-
-template<typename InContainer>
-void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) {
-    bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str);
-}
-
-template<typename InIter>
-std::string bytes_to_hex_string(InIter first, InIter last) {
-    std::string hex_str;
-    bytes_to_hex_string(first, last, hex_str);
-    return hex_str;
-}
-
-template<typename InContainer>
-std::string bytes_to_hex_string(const InContainer& bytes) {
-    std::string hex_str;
-    bytes_to_hex_string(bytes, hex_str);
-    return hex_str;
-}
-
-class hash256_one_by_one {
-  public:
-    hash256_one_by_one() {
-        init();
-    }
-
-    void init() {
-        buffer_.clear();
-        std::fill(data_length_digits_, data_length_digits_ + 4, 0);
-        std::copy(detail::initial_message_digest, detail::initial_message_digest + 8, h_);
-    }
-
-    template<typename RaIter>
-    void process(RaIter first, RaIter last) {
-        add_to_data_length(std::distance(first, last));
-        std::copy(first, last, std::back_inserter(buffer_));
-        std::size_t i = 0;
-        for (; i + 64 <= buffer_.size(); i += 64) {
-            detail::hash256_block(h_, buffer_.begin() + i, buffer_.begin() + i + 64);
-        }
-        buffer_.erase(buffer_.begin(), buffer_.begin() + i);
-    }
-
-    void finish() {
-        byte_t temp[64];
-        std::fill(temp, temp + 64, 0);
-        std::size_t remains = buffer_.size();
-        std::copy(buffer_.begin(), buffer_.end(), temp);
-        temp[remains] = 0x80;
-
-        if (remains > 55) {
-            std::fill(temp + remains + 1, temp + 64, 0);
-            detail::hash256_block(h_, temp, temp + 64);
-            std::fill(temp, temp + 64 - 4, 0);
-        } else {
-            std::fill(temp + remains + 1, temp + 64 - 4, 0);
-        }
-
-        write_data_bit_length(&(temp[56]));
-        detail::hash256_block(h_, temp, temp + 64);
-    }
-
-    template<typename OutIter>
-    void get_hash_bytes(OutIter first, OutIter last) const {
-        for (const word_t* iter = h_; iter != h_ + 8; ++iter) {
-            for (std::size_t i = 0; i < 4 && first != last; ++i) {
-                *(first++) = detail::mask_8bit(static_cast<byte_t>((*iter >> (24 - 8 * i))));
-            }
-        }
-    }
-
-  private:
-    void add_to_data_length(word_t n) {
-        word_t carry = 0;
-        data_length_digits_[0] += n;
-        for (std::size_t i = 0; i < 4; ++i) {
-            data_length_digits_[i] += carry;
-            if (data_length_digits_[i] >= 65536u) {
-                carry = data_length_digits_[i] >> 16;
-                data_length_digits_[i] &= 65535u;
-            } else {
-                break;
-            }
-        }
-    }
-    void write_data_bit_length(byte_t* begin) {
-        word_t data_bit_length_digits[4];
-        std::copy(
-            data_length_digits_, data_length_digits_ + 4,
-            data_bit_length_digits);
-
-        // convert byte length to bit length (multiply 8 or shift 3 times left)
-        word_t carry = 0;
-        for (std::size_t i = 0; i < 4; ++i) {
-            word_t before_val = data_bit_length_digits[i];
-            data_bit_length_digits[i] <<= 3;
-            data_bit_length_digits[i] |= carry;
-            data_bit_length_digits[i] &= 65535u;
-            carry = (before_val >> (16 - 3)) & 65535u;
-        }
-
-        // write data_bit_length
-        for (int i = 3; i >= 0; --i) {
-            (*begin++) = static_cast<byte_t>(data_bit_length_digits[i] >> 8);
-            (*begin++) = static_cast<byte_t>(data_bit_length_digits[i]);
-        }
-    }
-    std::vector<byte_t> buffer_;
-    word_t data_length_digits_[4]; //as 64bit integer (16bit x 4 integer)
-    word_t h_[8];
-};
-
-inline void get_hash_hex_string(const hash256_one_by_one& hasher, std::string& hex_str) {
-    byte_t hash[32];
-    hasher.get_hash_bytes(hash, hash + 32);
-    return bytes_to_hex_string(hash, hash + 32, hex_str);
-}
-
-inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) {
-    std::string hex_str;
-    get_hash_hex_string(hasher, hex_str);
-    return hex_str;
-}
-
-template<typename RaIter, typename OutIter>
-void hash256(RaIter first, RaIter last, OutIter first2, OutIter last2) {
-    hash256_one_by_one hasher;
-    //hasher.init();
-    hasher.process(first, last);
-    hasher.finish();
-    hasher.get_hash_bytes(first2, last2);
-}
-
-template<typename RaIter, typename OutContainer>
-void hash256(RaIter first, RaIter last, OutContainer& dst) {
-    hash256(first, last, dst.begin(), dst.end());
-}
-
-template<typename RaContainer, typename OutIter>
-void hash256(const RaContainer& src, OutIter first, OutIter last) {
-    hash256(src.begin(), src.end(), first, last);
-}
-
-template<typename RaContainer, typename OutContainer>
-void hash256(const RaContainer& src, OutContainer& dst) {
-    hash256(src.begin(), src.end(), dst.begin(), dst.end());
-}
-
-template<typename RaIter>
-void hash256_hex_string(RaIter first, RaIter last, std::string& hex_str) {
-    byte_t hashed[32];
-    hash256(first, last, hashed, hashed + 32);
-    std::ostringstream oss;
-    output_hex(hashed, hashed + 32, oss);
-    hex_str.assign(oss.str());
-}
-
-template<typename RaIter>
-std::string hash256_hex_string(RaIter first, RaIter last) {
-    std::string hex_str;
-    hash256_hex_string(first, last, hex_str);
-    return hex_str;
-}
-
-inline void hash256_hex_string(const std::string& src, std::string& hex_str) {
-    hash256_hex_string(src.begin(), src.end(), hex_str);
-}
-
-template<typename RaContainer>
-void hash256_hex_string(const RaContainer& src, std::string& hex_str) {
-    hash256_hex_string(src.begin(), src.end(), hex_str);
-}
-
-template<typename RaContainer>
-std::string hash256_hex_string(const RaContainer& src) {
-    return hash256_hex_string(src.begin(), src.end());
-}
-
-} //namespace picosha2
-
-#endif //PICOSHA2_H
diff --git a/third_party/vtr/libs/vtrutil/src/vpr_error.cc b/third_party/vtr/libs/vtrutil/src/vpr_error.cc
deleted file mode 100644
index 14eb464b5..000000000
--- a/third_party/vtr/libs/vtrutil/src/vpr_error.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <cstdarg>
-#include <string>
-
-#include "vtr_util.h"
-#include "vtr_log.h"
-#include "vpr_error.h"
-
-// Set of function names for which the VPR_THROW errors are treated
-// as VTR_LOG_WARN
-static std::unordered_set<std::string> functions_to_demote;
-
-/* Date:June 15th, 2013
- * Author: Daniel Chen
- * Purpose: Used to throw any internal VPR error or architecture
- *			file error and output the appropriate file name,
- *			line number, and the error message. Does not return
- *			anything but throw an exception which will be caught
- *			main.c.
- */
-void map_error_activation_status(std::string function_name) {
-    functions_to_demote.insert(function_name);
-}
-
-void vpr_throw(enum e_vpr_error type,
-               const char* psz_file_name,
-               unsigned int line_num,
-               const char* psz_message,
-               ...) {
-    // Make a variable argument list
-    va_list va_args;
-
-    // Initialize variable argument list
-    va_start(va_args, psz_message);
-
-    //Format the message
-    std::string msg = vtr::vstring_fmt(psz_message, va_args);
-
-    // Reset variable argument list
-    va_end(va_args);
-
-    vpr_throw_msg(type, psz_file_name, line_num, msg);
-}
-
-void vvpr_throw(enum e_vpr_error type,
-                const char* psz_file_name,
-                unsigned int line_num,
-                const char* psz_message,
-                va_list va_args) {
-    //Format the message
-    std::string msg = vtr::vstring_fmt(psz_message, va_args);
-
-    vpr_throw_msg(type, psz_file_name, line_num, msg);
-}
-
-void vpr_throw_msg(enum e_vpr_error type,
-                   const char* psz_file_name,
-                   unsigned int line_num,
-                   std::string msg) {
-    throw VprError(type, msg, psz_file_name, line_num);
-}
-
-void vpr_throw_opt(enum e_vpr_error type,
-                   const char* psz_func_pretty_name,
-                   const char* psz_func_name,
-                   const char* psz_file_name,
-                   unsigned int line_num,
-                   const char* psz_message,
-                   ...) {
-    std::string func_name(psz_func_name);
-
-    // Make a variable argument list
-    va_list va_args;
-
-    // Initialize variable argument list
-    va_start(va_args, psz_message);
-
-    //Format the message
-    std::string msg = vtr::vstring_fmt(psz_message, va_args);
-
-    // Reset variable argument list
-    va_end(va_args);
-
-    auto result = functions_to_demote.find(func_name);
-    if (result != functions_to_demote.end()) {
-        VTR_LOGFF_WARN(psz_file_name, line_num, psz_func_pretty_name, msg.data());
-    } else {
-        vpr_throw_msg(type, psz_file_name, line_num, msg);
-    }
-}
diff --git a/third_party/vtr/libs/vtrutil/src/vpr_error.h b/third_party/vtr/libs/vtrutil/src/vpr_error.h
deleted file mode 100644
index 0c3d5a1cb..000000000
--- a/third_party/vtr/libs/vtrutil/src/vpr_error.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef VPR_ERROR_H
-#define VPR_ERROR_H
-
-#include <cstdarg>
-#include <string>
-#include <unordered_set>
-
-#include "vtr_error.h"
-
-enum e_vpr_error {
-    VPR_ERROR_UNKNOWN = 0,
-
-    // Flow errors
-    VPR_ERROR_ARCH,
-    VPR_ERROR_PACK,
-    VPR_ERROR_PLACE,
-    VPR_ERROR_ROUTE,
-    VPR_ERROR_TIMING,
-    VPR_ERROR_POWER,
-    VPR_ERROR_SDC,
-
-    // File parsing errors
-    VPR_ERROR_NET_F,        // Error while parsing the packed netlist file
-    VPR_ERROR_PLACE_F,      // Error while parsning the placement file
-    VPR_ERROR_BLIF_F,       // Error while parsing the blif file
-    VPR_ERROR_IC_NETLIST_F, // Error while parsing the interchange netlist file
-
-    VPR_ERROR_IMPL_NETLIST_WRITER,
-    VPR_ERROR_NETLIST,
-    VPR_ERROR_ATOM_NETLIST,
-    VPR_ERROR_CLB_NETLIST,
-    VPR_ERROR_ANALYSIS,
-    VPR_ERROR_INTERRUPTED,
-    VPR_ERROR_DRAW,
-    VPR_ERROR_OTHER
-};
-typedef enum e_vpr_error t_vpr_error_type;
-
-/* This structure is thrown back to highest level of VPR flow if an *
- * internal VPR or user input error occurs. */
-
-class VprError : public vtr::VtrError {
-  public:
-    VprError(t_vpr_error_type err_type,
-             std::string msg = "",
-             std::string file = "",
-             size_t linenum = -1)
-        : VtrError(msg, file, linenum)
-        , type_(err_type) {}
-
-    t_vpr_error_type type() const { return type_; }
-
-  private:
-    t_vpr_error_type type_;
-};
-
-// This function is used to save into the functions_to_demote set
-// all the function names which contain VPR_THROW errors that are
-// going to be demoted to be VTR_LOG_WARN
-void map_error_activation_status(std::string function_name);
-
-//VPR error reporting routines
-//
-//Note that we mark these functions with the C++11 attribute 'noreturn'
-//as they will throw exceptions and not return normally. This can help
-//reduce false-positive compiler warnings
-[[noreturn]] void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
-[[noreturn]] void vvpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, va_list args);
-[[noreturn]] void vpr_throw_msg(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, std::string msg);
-
-void vpr_throw_opt(enum e_vpr_error type, const char* psz_func_pretty_name, const char* psz_func_name, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
-
-//Figure out what macro to use to get the name of the current function
-// We default to __func__ which is defined in C99
-//
-// g++ > 2.6 define __PRETTY_FUNC__ which includes class/namespace/overload
-// information, so we prefer to use it if possible
-#define VPR_THROW_FUNCTION __func__
-#ifdef __GNUC__
-#    ifdef __GNUC_MINOR__
-#        if __GNUC__ >= 2 && __GNUC_MINOR__ > 6
-#            undef VPR_THROW_FUNCTION
-#            define VPR_THROW_FUNCTION __PRETTY_FUNCTION__
-#        endif
-#    endif
-#endif
-
-/*
- * Unconditionally throws a VprError condition with automatically specified
- * file and line number of the call site.
- *
- * It is preferred to use either VPR_FATAL_ERROR(), or VPR_ERROR() to capture
- * the intention behind the throw.
- *
- * This macro is a wrapper around vpr_throw().
- */
-#define VPR_THROW(type, ...)                              \
-    do {                                                  \
-        vpr_throw(type, __FILE__, __LINE__, __VA_ARGS__); \
-    } while (false)
-
-/*
- * VPR_FATAL_ERROR() is used to signal an *unconditional* fatal error which should
- * stop the program.
- *
- * This macro is a wrapper around VPR_THOW()
- */
-#define VPR_FATAL_ERROR(...)    \
-    do {                        \
-        VPR_THROW(__VA_ARGS__); \
-    } while (false)
-
-/*
- * VPR_ERROR() is used to signal an error (potentially non-fatal) which by
- * default stops the program, but may be suppressed (i.e. converted to a
- * warning).
- *
- * This macro is a wrapper around vpr_throw_opt() which automatically
- * specifies file and line number of call site.
- *
- */
-#define VPR_ERROR(type, ...)                                                                \
-    do {                                                                                    \
-        vpr_throw_opt(type, VPR_THROW_FUNCTION, __func__, __FILE__, __LINE__, __VA_ARGS__); \
-    } while (false)
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_array_view.h b/third_party/vtr/libs/vtrutil/src/vtr_array_view.h
deleted file mode 100644
index 3383263e0..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_array_view.h
+++ /dev/null
@@ -1,273 +0,0 @@
-#ifndef _VTR_ARRAY_VIEW_H
-#define _VTR_ARRAY_VIEW_H
-
-#include <cstddef>
-#include <stdexcept>
-#include <iterator>
-#include "vtr_range.h"
-
-namespace vtr {
-
-/**
- * @brief An array view class to avoid copying data
- */
-template<typename T>
-class array_view {
-  public:
-    ///@brief default constructor
-    explicit constexpr array_view()
-        : data_(nullptr)
-        , size_(0) {}
-
-    ///@brief A constructor with data initialization
-    explicit constexpr array_view(T* str, size_t size)
-        : data_(str)
-        , size_(size) {}
-
-    constexpr array_view(const array_view& other) noexcept = default;
-    constexpr array_view& operator=(const array_view& view) noexcept {
-        data_ = view.data_;
-        size_ = view.size_;
-        return *this;
-    }
-
-    ///@brief [] operator
-    constexpr T& operator[](size_t pos) {
-        return data_[pos];
-    }
-
-    ///@brief constant [] operator
-    constexpr const T& operator[](size_t pos) const {
-        return data_[pos];
-    }
-
-    ///@brief at() operator
-    T& at(size_t pos) {
-        if (pos >= size()) {
-            throw std::out_of_range("Pos is out of range.");
-        }
-
-        return data_[pos];
-    }
-
-    ///@brief const at() operator
-    const T& at(size_t pos) const {
-        if (pos >= size()) {
-            throw std::out_of_range("Pos is out of range.");
-        }
-
-        return data_[pos];
-    }
-
-    ///@brief get the first element of the array
-    constexpr T& front() {
-        return data_[0];
-    }
-
-    ///@brief get the first element of the array (can't update it)
-    constexpr const T& front() const {
-        return data_[0];
-    }
-
-    ///@brief get the last element of the array
-    constexpr T& back() {
-        return data_[size() - 1];
-    }
-
-    ///@brief get the last element of the array (can't update it)
-    constexpr const T& back() const {
-        return data_[size() - 1];
-    }
-
-    ///@brief return the underlying pointer
-    constexpr T* data() {
-        return data_;
-    }
-
-    ///@brief return the underlying pointer (constant pointer)
-    constexpr const T* data() const {
-        return data_;
-    }
-
-    ///@brief return thr array size
-    constexpr size_t size() const noexcept {
-        return size_;
-    }
-
-    ///@brief return the array size
-    constexpr size_t length() const noexcept {
-        return size_;
-    }
-
-    ///@brief check if the array is empty
-    constexpr bool empty() const noexcept {
-        return size_ != 0;
-    }
-
-    ///@brief return a pointer to the first element of the array
-    constexpr T* begin() noexcept {
-        return data_;
-    }
-
-    ///@brief return a constant pointer to the first element of the array
-    constexpr const T* begin() const noexcept {
-        return data_;
-    }
-
-    ///@brief return a constant pointer to the first element of the array
-    constexpr const T* cbegin() const noexcept {
-        return data_;
-    }
-
-    ///@brief return a pointer to the last element of the array
-    constexpr T* end() noexcept {
-        return data_ + size_;
-    }
-
-    ///@brief return a constant pointer to the last element of the array
-    constexpr const T* end() const noexcept {
-        return data_ + size_;
-    }
-
-    ///@brief return a constant pointer to the last element of the array
-    constexpr const T* cend() const noexcept {
-        return data_ + size_;
-    }
-
-  private:
-    T* data_;
-    size_t size_;
-};
-
-/**
- * @brief Implements a fixed length view to an array which is indexed by vtr::StrongId
- *
- * The main use of this container is to behave like a std::span which is
- * indexed by a vtr::StrongId instead of size_t. It assumes that K is explicitly 
- * convertable to size_t 
- * (i.e. via operator size_t()), and can be explicitly constructed from a size_t.
- */
-template<typename K, typename V>
-class array_view_id : private array_view<V> {
-    using storage = array_view<V>;
-
-  public:
-    explicit constexpr array_view_id(V* str, size_t a_size)
-        : array_view<V>(str, a_size) {}
-
-    typedef K key_type;
-
-    class key_iterator;
-    typedef vtr::Range<key_iterator> key_range;
-
-    // Don't include operator[] and at() from std::vector, since we redine them to take key_type instead of size_t
-    ///@brief [] operator
-    V& operator[](const key_type id) {
-        auto i = size_t(id);
-        return storage::operator[](i);
-    }
-    ///@brief constant [] operator
-    const V& operator[](const key_type id) const {
-        auto i = size_t(id);
-        return storage::operator[](i);
-    }
-    ///@brief at() operator
-    V& at(const key_type id) {
-        auto i = size_t(id);
-        return storage::at(i);
-    }
-    ///@brief constant at() operator
-    const V& at(const key_type id) const {
-        auto i = size_t(id);
-        return storage::at(i);
-    }
-
-    ///@brief Returns a range containing the keys
-    key_range keys() const {
-        return vtr::make_range(key_begin(), key_end());
-    }
-
-    using storage::begin;
-    using storage::cbegin;
-    using storage::cend;
-    using storage::end;
-
-    using storage::empty;
-    using storage::size;
-
-    using storage::back;
-    using storage::data;
-    using storage::front;
-
-  public:
-    /**
-     * @brief Iterator class which is convertable to the key_type
-     *
-     * This allows end-users to call the parent class's keys() member
-     * to iterate through the keys with a range-based for loop
-     *
-     */
-    class key_iterator : public std::iterator<std::bidirectional_iterator_tag, key_type> {
-      public:
-        /**
-         * @brief Intermediate type my_iter
-         *
-         * We use the intermediate type my_iter to avoid a potential ambiguity for which
-         * clang generates errors and warnings
-         */
-        using my_iter = typename std::iterator<std::bidirectional_iterator_tag, K>;
-        using typename my_iter::iterator;
-        using typename my_iter::pointer;
-        using typename my_iter::reference;
-        using typename my_iter::value_type;
-
-        key_iterator(key_iterator::value_type init)
-            : value_(init) {}
-
-        /**
-         * @brief Note
-         *
-         * vtr::vector assumes that the key time is convertable to size_t and
-         * that all the underlying IDs are zero-based and contiguous. That means
-         * we can just increment the underlying Id to build the next key.
-         */
-
-        ///@brief increment the iterator
-        key_iterator operator++() {
-            value_ = value_type(size_t(value_) + 1);
-            return *this;
-        }
-
-        ///@brief decrement the iterator
-        key_iterator operator--() {
-            value_ = value_type(size_t(value_) - 1);
-            return *this;
-        }
-
-        ///@brief dereference operator (*)
-        reference operator*() { return value_; }
-
-        ///@brief -> operator
-        pointer operator->() { return &value_; }
-
-        friend bool operator==(const key_iterator lhs, const key_iterator rhs) { return lhs.value_ == rhs.value_; }
-        friend bool operator!=(const key_iterator lhs, const key_iterator rhs) { return !(lhs == rhs); }
-
-      private:
-        value_type value_;
-    };
-
-  private:
-    key_iterator key_begin() const { return key_iterator(key_type(0)); }
-    key_iterator key_end() const { return key_iterator(key_type(size())); }
-};
-
-template<typename Container>
-array_view_id<typename Container::key_type, const typename Container::value_type> make_const_array_view_id(Container& container) {
-    return array_view_id<typename Container::key_type, const typename Container::value_type>(
-        container.data(), container.size());
-}
-
-} // namespace vtr
-
-#endif /* _VTR_ARRAY_VIEW_H */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_assert.cc b/third_party/vtr/libs/vtrutil/src/vtr_assert.cc
deleted file mode 100644
index b77bec6e4..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_assert.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "vtr_assert.h"
-
-#include <cstdio>  //fprintf, stderr
-#include <cstdlib> //abort
-
-namespace vtr {
-namespace assert {
-
-void handle_assert(const char* expr, const char* file, unsigned int line, const char* function, const char* msg) {
-    fprintf(stderr, "%s:%d", file, line);
-    if (function) {
-        fprintf(stderr, " %s:", function);
-    }
-    fprintf(stderr, " Assertion '%s' failed", expr);
-    if (msg) {
-        fprintf(stderr, " (%s)", msg);
-    }
-    fprintf(stderr, ".\n");
-    std::abort();
-}
-
-} // namespace assert
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_assert.h b/third_party/vtr/libs/vtrutil/src/vtr_assert.h
deleted file mode 100644
index ba63a4bb5..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_assert.h
+++ /dev/null
@@ -1,151 +0,0 @@
-#ifndef VTR_ASSERT_H
-#define VTR_ASSERT_H
-
-/**
- * @file
- * @brief The header vtr_assert.h defines useful assertion macros for VTR projects.
- *
- * Four types of assertions are defined:
- *
- *      VTR_ASSERT_OPT   - low overhead assertions that should always be enabled
- *      VTR_ASSERT       - medium overhead assertions that are usually be enabled
- *      VTR_ASSERT_SAFE  - high overhead assertions typically enabled only for debugging
- *      VTR_ASSERT_DEBUG - very high overhead assertions typically enabled only for extreme debugging
- * Each of the above assertions also have a *_MSG variants (e.g. VTR_ASSERT_MSG(expr, msg))
- * which takes an additional argument specifying additional message text to be shown.
- * By convention the message should state the condition *being checked* (and not the failure condition),
- * since that the condition failed is obvious from the assertion failure itself.
- *
- * The macro VTR_ASSERT_LEVEL specifies the level of assertion checking desired and is updated in CMAKE compilation:
- *
- *      VTR_ASSERT_LEVEL == 4: VTR_ASSERT_OPT, VTR_ASSERT, VTR_ASSERT_SAFE, VTR_ASSERT_DEBUG enabled
- *      VTR_ASSERT_LEVEL == 3: VTR_ASSERT_OPT, VTR_ASSERT, VTR_ASSERT_SAFE enabled
- *      VTR_ASSERT_LEVEL == 2: VTR_ASSERT_OPT, VTR_ASSERT enabled
- *      VTR_ASSERT_LEVEL == 1: VTR_ASSERT_OPT enabled
- *      VTR_ASSERT_LEVEL == 0: No assertion checking enabled
- *
- * @Note that an assertion levels beyond 4 are currently treated the same as level 4 and the default assertion level is 2
- */
-
-// Set a default assertion level if none is specified
-#ifndef VTR_ASSERT_LEVEL
-#    define VTR_ASSERT_LEVEL 2
-#endif
-
-// Enable the assertions based on the specified level
-#if VTR_ASSERT_LEVEL >= 4
-#    define VTR_ASSERT_DEBUG_ENABLED
-#endif
-
-#if VTR_ASSERT_LEVEL >= 3
-#    define VTR_ASSERT_SAFE_ENABLED
-#endif
-
-#if VTR_ASSERT_LEVEL >= 2
-#    define VTR_ASSERT_ENABLED
-#endif
-
-#if VTR_ASSERT_LEVEL >= 1
-#    define VTR_ASSERT_OPT_ENABLED
-#endif
-
-// Define the user assertion macros
-#ifdef VTR_ASSERT_DEBUG_ENABLED
-#    define VTR_ASSERT_DEBUG(expr) VTR_ASSERT_IMPL(expr, nullptr)
-#    define VTR_ASSERT_DEBUG_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
-#else
-#    define VTR_ASSERT_DEBUG(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
-#    define VTR_ASSERT_DEBUG_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
-#endif
-
-#ifdef VTR_ASSERT_SAFE_ENABLED
-#    define VTR_ASSERT_SAFE(expr) VTR_ASSERT_IMPL(expr, nullptr)
-#    define VTR_ASSERT_SAFE_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
-#else
-#    define VTR_ASSERT_SAFE(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
-#    define VTR_ASSERT_SAFE_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
-#endif
-
-#ifdef VTR_ASSERT_ENABLED
-#    define VTR_ASSERT(expr) VTR_ASSERT_IMPL(expr, nullptr)
-#    define VTR_ASSERT_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
-#else
-#    define VTR_ASSERT(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
-#    define VTR_ASSERT_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
-#endif
-
-#ifdef VTR_ASSERT_OPT_ENABLED
-#    define VTR_ASSERT_OPT(expr) VTR_ASSERT_IMPL(expr, nullptr)
-#    define VTR_ASSERT_OPT_MSG(expr, msg) VTR_ASSERT_IMPL(expr, msg)
-#else
-#    define VTR_ASSERT_OPT(expr) VTR_ASSERT_IMPL_NOP(expr, nullptr)
-#    define VTR_ASSERT_OPT_MSG(expr, msg) VTR_ASSERT_IMPL_NOP(expr, msg)
-#endif
-
-/**
- * @brief Define the assertion implementation macro
- *
- * We wrap the check in a do {} while() to ensure the function-like
- * macro can be always be followed by a ';'
- */
-#define VTR_ASSERT_IMPL(expr, msg)                                                           \
-    do {                                                                                     \
-        if (!(expr)) {                                                                       \
-            vtr::assert::handle_assert(#expr, __FILE__, __LINE__, VTR_ASSERT_FUNCTION, msg); \
-        }                                                                                    \
-    } while (false)
-
-/**
- * @brief Define the no-op assertion implementation macro
- *
- * We wrap the check in a do {} while() to ensure the function-like
- * macro can be always be followed by a ';'
- *
- * Note that to avoid 'unused' variable warnings when assertions are
- * disabled, we pass the expr and msg to sizeof(). We use sizeof specifically
- * since it accepts expressions, and the C++ standard gaurentees sizeof's arguments
- * are never evaluated (ensuring any expensive expressions are not evaluated when
- * assertions are disabled). To avoid warnings about the unused result of sizeof()
- * we cast it to void.
- */
-#define VTR_ASSERT_IMPL_NOP(expr, msg)   \
-    do {                                 \
-        static_cast<void>(sizeof(expr)); \
-        static_cast<void>(sizeof(msg));  \
-    } while (false)
-
-/**
- * @brief Figure out what macro to use to get the name of the current function
- * 
- * We default to __func__ which is defined in C99
- * 
- * g++ > 2.6 define __PRETTY_FUNC__ which includes class/namespace/overload
- * information, so we prefer to use it if possible
- */
-#define VTR_ASSERT_FUNCTION __func__
-#ifdef __GNUC__
-#    ifdef __GNUC_MINOR__
-#        if __GNUC__ >= 2 && __GNUC_MINOR__ > 6
-#            undef VTR_ASSERT_FUNCTION
-#            define VTR_ASSERT_FUNCTION __PRETTY_FUNCTION__
-#        endif
-#    endif
-#endif
-
-namespace vtr {
-namespace assert {
-/**
- * @brief Assertion handling routine
- * 
- * Note that we mark the routine with the standard C++11
- * attribute 'noreturn' which tells the compiler this
- * function will never return. This should ensure the
- * compiler won't warn about detected conditions such as
- * dead-code or potential null pointer dereferences
- * which are gaurded against by assertions.
- */
-[[noreturn]] void handle_assert(const char* expr, const char* file, unsigned int line, const char* function, const char* msg);
-} // namespace assert
-} // namespace vtr
-
-#endif //VTR_ASSERT_H
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_bimap.h b/third_party/vtr/libs/vtrutil/src/vtr_bimap.h
deleted file mode 100644
index 59bc19b42..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_bimap.h
+++ /dev/null
@@ -1,167 +0,0 @@
-#ifndef VTR_BIMAP
-#define VTR_BIMAP
-
-/**
- * @file
- * @brief The vtr_bimap.h header provides a bi-directonal mapping between key and value which means that it can be addressed by either the key or the value
- *
- * It provides this bi-directional feature for all the map-like containers defined in vtr:
- *      - unordered map
- *      - flat map
- *      - linear map
- * 
- * One example where this container might be so useful is the mapping between the atom and clustered net Id. See atom_lookup.h
- */
-
-#include <map>
-#include <unordered_map>
-#include "vtr_flat_map.h"
-#include "vtr_linear_map.h"
-
-#include "vtr_error.h"
-
-namespace vtr {
-
-/**
- * @brief A map-like class which provides a bi-directonal mapping between key and value
- *
- * Keys and values can be looked up directly by passing either the key or value.
- * the indexing operator will throw if the key/value does not exist.
- */
-template<class K, class V, template<typename...> class Map = std::map, template<typename...> class InvMap = std::map>
-class bimap {
-  public: //Public types
-    typedef typename Map<K, V>::const_iterator iterator;
-    typedef typename InvMap<V, K>::const_iterator inverse_iterator;
-
-  public: //Accessors
-    //Iterators
-
-    ///@brief Return an iterator to the begin of the map
-    iterator begin() const { return map_.begin(); }
-
-    ///@brief Return an iterator to the end of the map
-    iterator end() const { return map_.end(); }
-
-    ///@brief Return an iterator to the begin of the inverse map
-    inverse_iterator inverse_begin() const { return inverse_map_.begin(); }
-
-    ///@brief Return an iterator to the end of the inverse map
-    inverse_iterator inverse_end() const { return inverse_map_.end(); }
-
-    ///@brief Return an iterator to the key-value pair matching key, or end() if not found
-    iterator find(const K key) const {
-        return map_.find(key);
-    }
-
-    ///@brief Return an iterator to the value-key pair matching value, or inverse_end() if not found
-    inverse_iterator find(const V value) const {
-        return inverse_map_.find(value);
-    }
-
-    ///@brief Return an immutable reference to the value matching key (throw an exception if key is not found)
-    const V& operator[](const K key) const {
-        auto iter = find(key);
-        if (iter == end()) {
-            throw VtrError("Invalid bimap key during look-up", __FILE__, __LINE__);
-        }
-        return iter->second;
-    }
-
-    ///@brief Return an immutable reference to the key matching value (throw an exception if value is not found)
-    const K& operator[](const V value) const {
-        auto iter = find(value);
-        if (iter == inverse_end()) {
-            throw VtrError("Invalid bimap value during inverse look-up", __FILE__, __LINE__);
-        }
-        return iter->second;
-    }
-
-    ///@brief Return the number of key-value pairs stored
-    std::size_t size() const {
-        VTR_ASSERT(map_.size() == inverse_map_.size());
-        return map_.size();
-    }
-
-    ///@brief Return true if there are no key-value pairs stored
-    bool empty() const { return (size() == 0); }
-
-    ///@brief Return true if the specified key exists
-    bool contains(const K key) const { return find(key) != end(); }
-
-    ///@brief Return true if the specified value exists
-    bool contains(const V value) const { return find(value) != inverse_end(); }
-
-  public: //Mutators
-    ///@brief Drop all stored key-values
-    void clear() {
-        map_.clear();
-        inverse_map_.clear();
-    }
-
-    ///@brief Insert a key-value pair, if not already in map
-    std::pair<iterator, bool> insert(const K key, const V value) {
-        auto ret1 = map_.insert({key, value});
-        auto ret2 = inverse_map_.insert({value, key});
-
-        VTR_ASSERT(ret1.second == ret2.second);
-
-        // Return true if inserted
-        return ret1;
-    }
-
-    ///@brief Update a key-value pair, will insert if not already in map
-    void update(const K key, const V value) {
-        map_[key] = value;
-        inverse_map_[value] = key;
-    }
-
-    ///@brief Remove the specified key (and it's associated value)
-    void erase(const K key) {
-        auto iter = map_.find(key);
-        if (iter != map_.end()) {
-            V val = iter->second;
-            map_.erase(iter);
-
-            auto inv_iter = inverse_map_.find(val);
-            VTR_ASSERT(inv_iter != inverse_map_.end());
-            inverse_map_.erase(inv_iter);
-        }
-    }
-
-    ///@brief Remove the specified value (and it's associated key)
-    void erase(const V val) {
-        auto inv_iter = inverse_map_.find(val);
-        if (inv_iter != inverse_map_.end()) {
-            K key = inv_iter->second;
-            inverse_map_.erase(inv_iter);
-
-            auto iter = map_.find(key);
-            VTR_ASSERT(iter != map_.end());
-            map_.erase(iter);
-        }
-    }
-
-    ///@brief Swap (this enables std::swap via ADL)
-    friend void swap(bimap<K, V, Map, InvMap>& x, bimap<K, V, Map, InvMap>& y) {
-        std::swap(x.map_, y.map_);
-        std::swap(x.inverse_map_, y.inverse_map_);
-    }
-
-  private:
-    Map<K, V> map_;
-    InvMap<V, K> inverse_map_;
-};
-
-template<class K, class V>
-using unordered_bimap = bimap<K, V, std::unordered_map, std::unordered_map>;
-
-template<class K, class V>
-using flat_bimap = bimap<K, V, vtr::flat_map, vtr::flat_map>;
-
-template<class K, class V>
-using linear_bimap = bimap<K, V, vtr::linear_map, vtr::linear_map>;
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_cache.h b/third_party/vtr/libs/vtrutil/src/vtr_cache.h
deleted file mode 100644
index 30871cd44..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_cache.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef VTR_CACHE_H_
-#define VTR_CACHE_H_
-
-#include <memory>
-
-namespace vtr {
-
-///@brief An implementation of a simple cache
-template<typename CacheKey, typename CacheValue>
-class Cache {
-  public:
-    ///@brief Clear cache.
-    void clear() {
-        key_ = CacheKey();
-        value_.reset();
-    }
-    /**
-     * @brief Check if the cache is valid.
-     * 
-     * Returns the cached value if present and valid.
-     * Returns nullptr if the cache is invalid.
-     */
-    const CacheValue* get(const CacheKey& key) const {
-        if (key == key_ && value_) {
-            return value_.get();
-        } else {
-            return nullptr;
-        }
-    }
-
-    ///@brief Update the cache.
-    const CacheValue* set(const CacheKey& key, std::unique_ptr<CacheValue> value) {
-        key_ = key;
-        value_ = std::move(value);
-
-        return value_.get();
-    }
-
-  private:
-    CacheKey key_;
-    std::unique_ptr<CacheValue> value_;
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc b/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
deleted file mode 100644
index 7a100e25e..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_color_map.cc
+++ /dev/null
@@ -1,831 +0,0 @@
-#include <cmath>
-#include <cstddef>
-#include "vtr_color_map.h"
-#include "vtr_assert.h"
-
-namespace vtr {
-
-//Inferno data from MatPlotLib
-static std::vector<Color<float>> inferno_data = {
-    {0.001462f, 0.000466f, 0.013866f},
-    {0.002267f, 0.001270f, 0.018570f},
-    {0.003299f, 0.002249f, 0.024239f},
-    {0.004547f, 0.003392f, 0.030909f},
-    {0.006006f, 0.004692f, 0.038558f},
-    {0.007676f, 0.006136f, 0.046836f},
-    {0.009561f, 0.007713f, 0.055143f},
-    {0.011663f, 0.009417f, 0.063460f},
-    {0.013995f, 0.011225f, 0.071862f},
-    {0.016561f, 0.013136f, 0.080282f},
-    {0.019373f, 0.015133f, 0.088767f},
-    {0.022447f, 0.017199f, 0.097327f},
-    {0.025793f, 0.019331f, 0.105930f},
-    {0.029432f, 0.021503f, 0.114621f},
-    {0.033385f, 0.023702f, 0.123397f},
-    {0.037668f, 0.025921f, 0.132232f},
-    {0.042253f, 0.028139f, 0.141141f},
-    {0.046915f, 0.030324f, 0.150164f},
-    {0.051644f, 0.032474f, 0.159254f},
-    {0.056449f, 0.034569f, 0.168414f},
-    {0.061340f, 0.036590f, 0.177642f},
-    {0.066331f, 0.038504f, 0.186962f},
-    {0.071429f, 0.040294f, 0.196354f},
-    {0.076637f, 0.041905f, 0.205799f},
-    {0.081962f, 0.043328f, 0.215289f},
-    {0.087411f, 0.044556f, 0.224813f},
-    {0.092990f, 0.045583f, 0.234358f},
-    {0.098702f, 0.046402f, 0.243904f},
-    {0.104551f, 0.047008f, 0.253430f},
-    {0.110536f, 0.047399f, 0.262912f},
-    {0.116656f, 0.047574f, 0.272321f},
-    {0.122908f, 0.047536f, 0.281624f},
-    {0.129285f, 0.047293f, 0.290788f},
-    {0.135778f, 0.046856f, 0.299776f},
-    {0.142378f, 0.046242f, 0.308553f},
-    {0.149073f, 0.045468f, 0.317085f},
-    {0.155850f, 0.044559f, 0.325338f},
-    {0.162689f, 0.043554f, 0.333277f},
-    {0.169575f, 0.042489f, 0.340874f},
-    {0.176493f, 0.041402f, 0.348111f},
-    {0.183429f, 0.040329f, 0.354971f},
-    {0.190367f, 0.039309f, 0.361447f},
-    {0.197297f, 0.038400f, 0.367535f},
-    {0.204209f, 0.037632f, 0.373238f},
-    {0.211095f, 0.037030f, 0.378563f},
-    {0.217949f, 0.036615f, 0.383522f},
-    {0.224763f, 0.036405f, 0.388129f},
-    {0.231538f, 0.036405f, 0.392400f},
-    {0.238273f, 0.036621f, 0.396353f},
-    {0.244967f, 0.037055f, 0.400007f},
-    {0.251620f, 0.037705f, 0.403378f},
-    {0.258234f, 0.038571f, 0.406485f},
-    {0.264810f, 0.039647f, 0.409345f},
-    {0.271347f, 0.040922f, 0.411976f},
-    {0.277850f, 0.042353f, 0.414392f},
-    {0.284321f, 0.043933f, 0.416608f},
-    {0.290763f, 0.045644f, 0.418637f},
-    {0.297178f, 0.047470f, 0.420491f},
-    {0.303568f, 0.049396f, 0.422182f},
-    {0.309935f, 0.051407f, 0.423721f},
-    {0.316282f, 0.053490f, 0.425116f},
-    {0.322610f, 0.055634f, 0.426377f},
-    {0.328921f, 0.057827f, 0.427511f},
-    {0.335217f, 0.060060f, 0.428524f},
-    {0.341500f, 0.062325f, 0.429425f},
-    {0.347771f, 0.064616f, 0.430217f},
-    {0.354032f, 0.066925f, 0.430906f},
-    {0.360284f, 0.069247f, 0.431497f},
-    {0.366529f, 0.071579f, 0.431994f},
-    {0.372768f, 0.073915f, 0.432400f},
-    {0.379001f, 0.076253f, 0.432719f},
-    {0.385228f, 0.078591f, 0.432955f},
-    {0.391453f, 0.080927f, 0.433109f},
-    {0.397674f, 0.083257f, 0.433183f},
-    {0.403894f, 0.085580f, 0.433179f},
-    {0.410113f, 0.087896f, 0.433098f},
-    {0.416331f, 0.090203f, 0.432943f},
-    {0.422549f, 0.092501f, 0.432714f},
-    {0.428768f, 0.094790f, 0.432412f},
-    {0.434987f, 0.097069f, 0.432039f},
-    {0.441207f, 0.099338f, 0.431594f},
-    {0.447428f, 0.101597f, 0.431080f},
-    {0.453651f, 0.103848f, 0.430498f},
-    {0.459875f, 0.106089f, 0.429846f},
-    {0.466100f, 0.108322f, 0.429125f},
-    {0.472328f, 0.110547f, 0.428334f},
-    {0.478558f, 0.112764f, 0.427475f},
-    {0.484789f, 0.114974f, 0.426548f},
-    {0.491022f, 0.117179f, 0.425552f},
-    {0.497257f, 0.119379f, 0.424488f},
-    {0.503493f, 0.121575f, 0.423356f},
-    {0.509730f, 0.123769f, 0.422156f},
-    {0.515967f, 0.125960f, 0.420887f},
-    {0.522206f, 0.128150f, 0.419549f},
-    {0.528444f, 0.130341f, 0.418142f},
-    {0.534683f, 0.132534f, 0.416667f},
-    {0.540920f, 0.134729f, 0.415123f},
-    {0.547157f, 0.136929f, 0.413511f},
-    {0.553392f, 0.139134f, 0.411829f},
-    {0.559624f, 0.141346f, 0.410078f},
-    {0.565854f, 0.143567f, 0.408258f},
-    {0.572081f, 0.145797f, 0.406369f},
-    {0.578304f, 0.148039f, 0.404411f},
-    {0.584521f, 0.150294f, 0.402385f},
-    {0.590734f, 0.152563f, 0.400290f},
-    {0.596940f, 0.154848f, 0.398125f},
-    {0.603139f, 0.157151f, 0.395891f},
-    {0.609330f, 0.159474f, 0.393589f},
-    {0.615513f, 0.161817f, 0.391219f},
-    {0.621685f, 0.164184f, 0.388781f},
-    {0.627847f, 0.166575f, 0.386276f},
-    {0.633998f, 0.168992f, 0.383704f},
-    {0.640135f, 0.171438f, 0.381065f},
-    {0.646260f, 0.173914f, 0.378359f},
-    {0.652369f, 0.176421f, 0.375586f},
-    {0.658463f, 0.178962f, 0.372748f},
-    {0.664540f, 0.181539f, 0.369846f},
-    {0.670599f, 0.184153f, 0.366879f},
-    {0.676638f, 0.186807f, 0.363849f},
-    {0.682656f, 0.189501f, 0.360757f},
-    {0.688653f, 0.192239f, 0.357603f},
-    {0.694627f, 0.195021f, 0.354388f},
-    {0.700576f, 0.197851f, 0.351113f},
-    {0.706500f, 0.200728f, 0.347777f},
-    {0.712396f, 0.203656f, 0.344383f},
-    {0.718264f, 0.206636f, 0.340931f},
-    {0.724103f, 0.209670f, 0.337424f},
-    {0.729909f, 0.212759f, 0.333861f},
-    {0.735683f, 0.215906f, 0.330245f},
-    {0.741423f, 0.219112f, 0.326576f},
-    {0.747127f, 0.222378f, 0.322856f},
-    {0.752794f, 0.225706f, 0.319085f},
-    {0.758422f, 0.229097f, 0.315266f},
-    {0.764010f, 0.232554f, 0.311399f},
-    {0.769556f, 0.236077f, 0.307485f},
-    {0.775059f, 0.239667f, 0.303526f},
-    {0.780517f, 0.243327f, 0.299523f},
-    {0.785929f, 0.247056f, 0.295477f},
-    {0.791293f, 0.250856f, 0.291390f},
-    {0.796607f, 0.254728f, 0.287264f},
-    {0.801871f, 0.258674f, 0.283099f},
-    {0.807082f, 0.262692f, 0.278898f},
-    {0.812239f, 0.266786f, 0.274661f},
-    {0.817341f, 0.270954f, 0.270390f},
-    {0.822386f, 0.275197f, 0.266085f},
-    {0.827372f, 0.279517f, 0.261750f},
-    {0.832299f, 0.283913f, 0.257383f},
-    {0.837165f, 0.288385f, 0.252988f},
-    {0.841969f, 0.292933f, 0.248564f},
-    {0.846709f, 0.297559f, 0.244113f},
-    {0.851384f, 0.302260f, 0.239636f},
-    {0.855992f, 0.307038f, 0.235133f},
-    {0.860533f, 0.311892f, 0.230606f},
-    {0.865006f, 0.316822f, 0.226055f},
-    {0.869409f, 0.321827f, 0.221482f},
-    {0.873741f, 0.326906f, 0.216886f},
-    {0.878001f, 0.332060f, 0.212268f},
-    {0.882188f, 0.337287f, 0.207628f},
-    {0.886302f, 0.342586f, 0.202968f},
-    {0.890341f, 0.347957f, 0.198286f},
-    {0.894305f, 0.353399f, 0.193584f},
-    {0.898192f, 0.358911f, 0.188860f},
-    {0.902003f, 0.364492f, 0.184116f},
-    {0.905735f, 0.370140f, 0.179350f},
-    {0.909390f, 0.375856f, 0.174563f},
-    {0.912966f, 0.381636f, 0.169755f},
-    {0.916462f, 0.387481f, 0.164924f},
-    {0.919879f, 0.393389f, 0.160070f},
-    {0.923215f, 0.399359f, 0.155193f},
-    {0.926470f, 0.405389f, 0.150292f},
-    {0.929644f, 0.411479f, 0.145367f},
-    {0.932737f, 0.417627f, 0.140417f},
-    {0.935747f, 0.423831f, 0.135440f},
-    {0.938675f, 0.430091f, 0.130438f},
-    {0.941521f, 0.436405f, 0.125409f},
-    {0.944285f, 0.442772f, 0.120354f},
-    {0.946965f, 0.449191f, 0.115272f},
-    {0.949562f, 0.455660f, 0.110164f},
-    {0.952075f, 0.462178f, 0.105031f},
-    {0.954506f, 0.468744f, 0.099874f},
-    {0.956852f, 0.475356f, 0.094695f},
-    {0.959114f, 0.482014f, 0.089499f},
-    {0.961293f, 0.488716f, 0.084289f},
-    {0.963387f, 0.495462f, 0.079073f},
-    {0.965397f, 0.502249f, 0.073859f},
-    {0.967322f, 0.509078f, 0.068659f},
-    {0.969163f, 0.515946f, 0.063488f},
-    {0.970919f, 0.522853f, 0.058367f},
-    {0.972590f, 0.529798f, 0.053324f},
-    {0.974176f, 0.536780f, 0.048392f},
-    {0.975677f, 0.543798f, 0.043618f},
-    {0.977092f, 0.550850f, 0.039050f},
-    {0.978422f, 0.557937f, 0.034931f},
-    {0.979666f, 0.565057f, 0.031409f},
-    {0.980824f, 0.572209f, 0.028508f},
-    {0.981895f, 0.579392f, 0.026250f},
-    {0.982881f, 0.586606f, 0.024661f},
-    {0.983779f, 0.593849f, 0.023770f},
-    {0.984591f, 0.601122f, 0.023606f},
-    {0.985315f, 0.608422f, 0.024202f},
-    {0.985952f, 0.615750f, 0.025592f},
-    {0.986502f, 0.623105f, 0.027814f},
-    {0.986964f, 0.630485f, 0.030908f},
-    {0.987337f, 0.637890f, 0.034916f},
-    {0.987622f, 0.645320f, 0.039886f},
-    {0.987819f, 0.652773f, 0.045581f},
-    {0.987926f, 0.660250f, 0.051750f},
-    {0.987945f, 0.667748f, 0.058329f},
-    {0.987874f, 0.675267f, 0.065257f},
-    {0.987714f, 0.682807f, 0.072489f},
-    {0.987464f, 0.690366f, 0.079990f},
-    {0.987124f, 0.697944f, 0.087731f},
-    {0.986694f, 0.705540f, 0.095694f},
-    {0.986175f, 0.713153f, 0.103863f},
-    {0.985566f, 0.720782f, 0.112229f},
-    {0.984865f, 0.728427f, 0.120785f},
-    {0.984075f, 0.736087f, 0.129527f},
-    {0.983196f, 0.743758f, 0.138453f},
-    {0.982228f, 0.751442f, 0.147565f},
-    {0.981173f, 0.759135f, 0.156863f},
-    {0.980032f, 0.766837f, 0.166353f},
-    {0.978806f, 0.774545f, 0.176037f},
-    {0.977497f, 0.782258f, 0.185923f},
-    {0.976108f, 0.789974f, 0.196018f},
-    {0.974638f, 0.797692f, 0.206332f},
-    {0.973088f, 0.805409f, 0.216877f},
-    {0.971468f, 0.813122f, 0.227658f},
-    {0.969783f, 0.820825f, 0.238686f},
-    {0.968041f, 0.828515f, 0.249972f},
-    {0.966243f, 0.836191f, 0.261534f},
-    {0.964394f, 0.843848f, 0.273391f},
-    {0.962517f, 0.851476f, 0.285546f},
-    {0.960626f, 0.859069f, 0.298010f},
-    {0.958720f, 0.866624f, 0.310820f},
-    {0.956834f, 0.874129f, 0.323974f},
-    {0.954997f, 0.881569f, 0.337475f},
-    {0.953215f, 0.888942f, 0.351369f},
-    {0.951546f, 0.896226f, 0.365627f},
-    {0.950018f, 0.903409f, 0.380271f},
-    {0.948683f, 0.910473f, 0.395289f},
-    {0.947594f, 0.917399f, 0.410665f},
-    {0.946809f, 0.924168f, 0.426373f},
-    {0.946392f, 0.930761f, 0.442367f},
-    {0.946403f, 0.937159f, 0.458592f},
-    {0.946903f, 0.943348f, 0.474970f},
-    {0.947937f, 0.949318f, 0.491426f},
-    {0.949545f, 0.955063f, 0.507860f},
-    {0.951740f, 0.960587f, 0.524203f},
-    {0.954529f, 0.965896f, 0.540361f},
-    {0.957896f, 0.971003f, 0.556275f},
-    {0.961812f, 0.975924f, 0.571925f},
-    {0.966249f, 0.980678f, 0.587206f},
-    {0.971162f, 0.985282f, 0.602154f},
-    {0.976511f, 0.989753f, 0.616760f},
-    {0.982257f, 0.994109f, 0.631017f},
-    {0.988362f, 0.998364f, 0.644924f}};
-
-//Plasma data from MatPlotLib
-static std::vector<Color<float>> plasma_data = {
-    {5.03832136e-02f, 2.98028976e-02f, 5.27974883e-01f},
-    {6.35363639e-02f, 2.84259729e-02f, 5.33123681e-01f},
-    {7.53531234e-02f, 2.72063728e-02f, 5.38007001e-01f},
-    {8.62217979e-02f, 2.61253206e-02f, 5.42657691e-01f},
-    {9.63786097e-02f, 2.51650976e-02f, 5.47103487e-01f},
-    {1.05979704e-01f, 2.43092436e-02f, 5.51367851e-01f},
-    {1.15123641e-01f, 2.35562500e-02f, 5.55467728e-01f},
-    {1.23902903e-01f, 2.28781011e-02f, 5.59423480e-01f},
-    {1.32380720e-01f, 2.22583774e-02f, 5.63250116e-01f},
-    {1.40603076e-01f, 2.16866674e-02f, 5.66959485e-01f},
-    {1.48606527e-01f, 2.11535876e-02f, 5.70561711e-01f},
-    {1.56420649e-01f, 2.06507174e-02f, 5.74065446e-01f},
-    {1.64069722e-01f, 2.01705326e-02f, 5.77478074e-01f},
-    {1.71573925e-01f, 1.97063415e-02f, 5.80805890e-01f},
-    {1.78950212e-01f, 1.92522243e-02f, 5.84054243e-01f},
-    {1.86212958e-01f, 1.88029767e-02f, 5.87227661e-01f},
-    {1.93374449e-01f, 1.83540593e-02f, 5.90329954e-01f},
-    {2.00445260e-01f, 1.79015512e-02f, 5.93364304e-01f},
-    {2.07434551e-01f, 1.74421086e-02f, 5.96333341e-01f},
-    {2.14350298e-01f, 1.69729276e-02f, 5.99239207e-01f},
-    {2.21196750e-01f, 1.64970484e-02f, 6.02083323e-01f},
-    {2.27982971e-01f, 1.60071509e-02f, 6.04867403e-01f},
-    {2.34714537e-01f, 1.55015065e-02f, 6.07592438e-01f},
-    {2.41396253e-01f, 1.49791041e-02f, 6.10259089e-01f},
-    {2.48032377e-01f, 1.44393586e-02f, 6.12867743e-01f},
-    {2.54626690e-01f, 1.38820918e-02f, 6.15418537e-01f},
-    {2.61182562e-01f, 1.33075156e-02f, 6.17911385e-01f},
-    {2.67702993e-01f, 1.27162163e-02f, 6.20345997e-01f},
-    {2.74190665e-01f, 1.21091423e-02f, 6.22721903e-01f},
-    {2.80647969e-01f, 1.14875915e-02f, 6.25038468e-01f},
-    {2.87076059e-01f, 1.08554862e-02f, 6.27294975e-01f},
-    {2.93477695e-01f, 1.02128849e-02f, 6.29490490e-01f},
-    {2.99855122e-01f, 9.56079551e-03f, 6.31623923e-01f},
-    {3.06209825e-01f, 8.90185346e-03f, 6.33694102e-01f},
-    {3.12543124e-01f, 8.23900704e-03f, 6.35699759e-01f},
-    {3.18856183e-01f, 7.57551051e-03f, 6.37639537e-01f},
-    {3.25150025e-01f, 6.91491734e-03f, 6.39512001e-01f},
-    {3.31425547e-01f, 6.26107379e-03f, 6.41315649e-01f},
-    {3.37683446e-01f, 5.61830889e-03f, 6.43048936e-01f},
-    {3.43924591e-01f, 4.99053080e-03f, 6.44710195e-01f},
-    {3.50149699e-01f, 4.38202557e-03f, 6.46297711e-01f},
-    {3.56359209e-01f, 3.79781761e-03f, 6.47809772e-01f},
-    {3.62553473e-01f, 3.24319591e-03f, 6.49244641e-01f},
-    {3.68732762e-01f, 2.72370721e-03f, 6.50600561e-01f},
-    {3.74897270e-01f, 2.24514897e-03f, 6.51875762e-01f},
-    {3.81047116e-01f, 1.81356205e-03f, 6.53068467e-01f},
-    {3.87182639e-01f, 1.43446923e-03f, 6.54176761e-01f},
-    {3.93304010e-01f, 1.11388259e-03f, 6.55198755e-01f},
-    {3.99410821e-01f, 8.59420809e-04f, 6.56132835e-01f},
-    {4.05502914e-01f, 6.78091517e-04f, 6.56977276e-01f},
-    {4.11580082e-01f, 5.77101735e-04f, 6.57730380e-01f},
-    {4.17642063e-01f, 5.63847476e-04f, 6.58390492e-01f},
-    {4.23688549e-01f, 6.45902780e-04f, 6.58956004e-01f},
-    {4.29719186e-01f, 8.31008207e-04f, 6.59425363e-01f},
-    {4.35733575e-01f, 1.12705875e-03f, 6.59797077e-01f},
-    {4.41732123e-01f, 1.53984779e-03f, 6.60069009e-01f},
-    {4.47713600e-01f, 2.07954744e-03f, 6.60240367e-01f},
-    {4.53677394e-01f, 2.75470302e-03f, 6.60309966e-01f},
-    {4.59622938e-01f, 3.57374415e-03f, 6.60276655e-01f},
-    {4.65549631e-01f, 4.54518084e-03f, 6.60139383e-01f},
-    {4.71456847e-01f, 5.67758762e-03f, 6.59897210e-01f},
-    {4.77343929e-01f, 6.97958743e-03f, 6.59549311e-01f},
-    {4.83210198e-01f, 8.45983494e-03f, 6.59094989e-01f},
-    {4.89054951e-01f, 1.01269996e-02f, 6.58533677e-01f},
-    {4.94877466e-01f, 1.19897486e-02f, 6.57864946e-01f},
-    {5.00677687e-01f, 1.40550640e-02f, 6.57087561e-01f},
-    {5.06454143e-01f, 1.63333443e-02f, 6.56202294e-01f},
-    {5.12206035e-01f, 1.88332232e-02f, 6.55209222e-01f},
-    {5.17932580e-01f, 2.15631918e-02f, 6.54108545e-01f},
-    {5.23632990e-01f, 2.45316468e-02f, 6.52900629e-01f},
-    {5.29306474e-01f, 2.77468735e-02f, 6.51586010e-01f},
-    {5.34952244e-01f, 3.12170300e-02f, 6.50165396e-01f},
-    {5.40569510e-01f, 3.49501310e-02f, 6.48639668e-01f},
-    {5.46157494e-01f, 3.89540334e-02f, 6.47009884e-01f},
-    {5.51715423e-01f, 4.31364795e-02f, 6.45277275e-01f},
-    {5.57242538e-01f, 4.73307585e-02f, 6.43443250e-01f},
-    {5.62738096e-01f, 5.15448092e-02f, 6.41509389e-01f},
-    {5.68201372e-01f, 5.57776706e-02f, 6.39477440e-01f},
-    {5.73631859e-01f, 6.00281369e-02f, 6.37348841e-01f},
-    {5.79028682e-01f, 6.42955547e-02f, 6.35126108e-01f},
-    {5.84391137e-01f, 6.85790261e-02f, 6.32811608e-01f},
-    {5.89718606e-01f, 7.28775875e-02f, 6.30407727e-01f},
-    {5.95010505e-01f, 7.71902878e-02f, 6.27916992e-01f},
-    {6.00266283e-01f, 8.15161895e-02f, 6.25342058e-01f},
-    {6.05485428e-01f, 8.58543713e-02f, 6.22685703e-01f},
-    {6.10667469e-01f, 9.02039303e-02f, 6.19950811e-01f},
-    {6.15811974e-01f, 9.45639838e-02f, 6.17140367e-01f},
-    {6.20918555e-01f, 9.89336721e-02f, 6.14257440e-01f},
-    {6.25986869e-01f, 1.03312160e-01f, 6.11305174e-01f},
-    {6.31016615e-01f, 1.07698641e-01f, 6.08286774e-01f},
-    {6.36007543e-01f, 1.12092335e-01f, 6.05205491e-01f},
-    {6.40959444e-01f, 1.16492495e-01f, 6.02064611e-01f},
-    {6.45872158e-01f, 1.20898405e-01f, 5.98867442e-01f},
-    {6.50745571e-01f, 1.25309384e-01f, 5.95617300e-01f},
-    {6.55579615e-01f, 1.29724785e-01f, 5.92317494e-01f},
-    {6.60374266e-01f, 1.34143997e-01f, 5.88971318e-01f},
-    {6.65129493e-01f, 1.38566428e-01f, 5.85582301e-01f},
-    {6.69845385e-01f, 1.42991540e-01f, 5.82153572e-01f},
-    {6.74522060e-01f, 1.47418835e-01f, 5.78688247e-01f},
-    {6.79159664e-01f, 1.51847851e-01f, 5.75189431e-01f},
-    {6.83758384e-01f, 1.56278163e-01f, 5.71660158e-01f},
-    {6.88318440e-01f, 1.60709387e-01f, 5.68103380e-01f},
-    {6.92840088e-01f, 1.65141174e-01f, 5.64521958e-01f},
-    {6.97323615e-01f, 1.69573215e-01f, 5.60918659e-01f},
-    {7.01769334e-01f, 1.74005236e-01f, 5.57296144e-01f},
-    {7.06177590e-01f, 1.78437000e-01f, 5.53656970e-01f},
-    {7.10548747e-01f, 1.82868306e-01f, 5.50003579e-01f},
-    {7.14883195e-01f, 1.87298986e-01f, 5.46338299e-01f},
-    {7.19181339e-01f, 1.91728906e-01f, 5.42663338e-01f},
-    {7.23443604e-01f, 1.96157962e-01f, 5.38980786e-01f},
-    {7.27670428e-01f, 2.00586086e-01f, 5.35292612e-01f},
-    {7.31862231e-01f, 2.05013174e-01f, 5.31600995e-01f},
-    {7.36019424e-01f, 2.09439071e-01f, 5.27908434e-01f},
-    {7.40142557e-01f, 2.13863965e-01f, 5.24215533e-01f},
-    {7.44232102e-01f, 2.18287899e-01f, 5.20523766e-01f},
-    {7.48288533e-01f, 2.22710942e-01f, 5.16834495e-01f},
-    {7.52312321e-01f, 2.27133187e-01f, 5.13148963e-01f},
-    {7.56303937e-01f, 2.31554749e-01f, 5.09468305e-01f},
-    {7.60263849e-01f, 2.35975765e-01f, 5.05793543e-01f},
-    {7.64192516e-01f, 2.40396394e-01f, 5.02125599e-01f},
-    {7.68090391e-01f, 2.44816813e-01f, 4.98465290e-01f},
-    {7.71957916e-01f, 2.49237220e-01f, 4.94813338e-01f},
-    {7.75795522e-01f, 2.53657797e-01f, 4.91170517e-01f},
-    {7.79603614e-01f, 2.58078397e-01f, 4.87539124e-01f},
-    {7.83382636e-01f, 2.62499662e-01f, 4.83917732e-01f},
-    {7.87132978e-01f, 2.66921859e-01f, 4.80306702e-01f},
-    {7.90855015e-01f, 2.71345267e-01f, 4.76706319e-01f},
-    {7.94549101e-01f, 2.75770179e-01f, 4.73116798e-01f},
-    {7.98215577e-01f, 2.80196901e-01f, 4.69538286e-01f},
-    {8.01854758e-01f, 2.84625750e-01f, 4.65970871e-01f},
-    {8.05466945e-01f, 2.89057057e-01f, 4.62414580e-01f},
-    {8.09052419e-01f, 2.93491117e-01f, 4.58869577e-01f},
-    {8.12611506e-01f, 2.97927865e-01f, 4.55337565e-01f},
-    {8.16144382e-01f, 3.02368130e-01f, 4.51816385e-01f},
-    {8.19651255e-01f, 3.06812282e-01f, 4.48305861e-01f},
-    {8.23132309e-01f, 3.11260703e-01f, 4.44805781e-01f},
-    {8.26587706e-01f, 3.15713782e-01f, 4.41315901e-01f},
-    {8.30017584e-01f, 3.20171913e-01f, 4.37835947e-01f},
-    {8.33422053e-01f, 3.24635499e-01f, 4.34365616e-01f},
-    {8.36801237e-01f, 3.29104836e-01f, 4.30905052e-01f},
-    {8.40155276e-01f, 3.33580106e-01f, 4.27454836e-01f},
-    {8.43484103e-01f, 3.38062109e-01f, 4.24013059e-01f},
-    {8.46787726e-01f, 3.42551272e-01f, 4.20579333e-01f},
-    {8.50066132e-01f, 3.47048028e-01f, 4.17153264e-01f},
-    {8.53319279e-01f, 3.51552815e-01f, 4.13734445e-01f},
-    {8.56547103e-01f, 3.56066072e-01f, 4.10322469e-01f},
-    {8.59749520e-01f, 3.60588229e-01f, 4.06916975e-01f},
-    {8.62926559e-01f, 3.65119408e-01f, 4.03518809e-01f},
-    {8.66077920e-01f, 3.69660446e-01f, 4.00126027e-01f},
-    {8.69203436e-01f, 3.74211795e-01f, 3.96738211e-01f},
-    {8.72302917e-01f, 3.78773910e-01f, 3.93354947e-01f},
-    {8.75376149e-01f, 3.83347243e-01f, 3.89975832e-01f},
-    {8.78422895e-01f, 3.87932249e-01f, 3.86600468e-01f},
-    {8.81442916e-01f, 3.92529339e-01f, 3.83228622e-01f},
-    {8.84435982e-01f, 3.97138877e-01f, 3.79860246e-01f},
-    {8.87401682e-01f, 4.01761511e-01f, 3.76494232e-01f},
-    {8.90339687e-01f, 4.06397694e-01f, 3.73130228e-01f},
-    {8.93249647e-01f, 4.11047871e-01f, 3.69767893e-01f},
-    {8.96131191e-01f, 4.15712489e-01f, 3.66406907e-01f},
-    {8.98983931e-01f, 4.20391986e-01f, 3.63046965e-01f},
-    {9.01807455e-01f, 4.25086807e-01f, 3.59687758e-01f},
-    {9.04601295e-01f, 4.29797442e-01f, 3.56328796e-01f},
-    {9.07364995e-01f, 4.34524335e-01f, 3.52969777e-01f},
-    {9.10098088e-01f, 4.39267908e-01f, 3.49610469e-01f},
-    {9.12800095e-01f, 4.44028574e-01f, 3.46250656e-01f},
-    {9.15470518e-01f, 4.48806744e-01f, 3.42890148e-01f},
-    {9.18108848e-01f, 4.53602818e-01f, 3.39528771e-01f},
-    {9.20714383e-01f, 4.58417420e-01f, 3.36165582e-01f},
-    {9.23286660e-01f, 4.63250828e-01f, 3.32800827e-01f},
-    {9.25825146e-01f, 4.68103387e-01f, 3.29434512e-01f},
-    {9.28329275e-01f, 4.72975465e-01f, 3.26066550e-01f},
-    {9.30798469e-01f, 4.77867420e-01f, 3.22696876e-01f},
-    {9.33232140e-01f, 4.82779603e-01f, 3.19325444e-01f},
-    {9.35629684e-01f, 4.87712357e-01f, 3.15952211e-01f},
-    {9.37990034e-01f, 4.92666544e-01f, 3.12575440e-01f},
-    {9.40312939e-01f, 4.97642038e-01f, 3.09196628e-01f},
-    {9.42597771e-01f, 5.02639147e-01f, 3.05815824e-01f},
-    {9.44843893e-01f, 5.07658169e-01f, 3.02433101e-01f},
-    {9.47050662e-01f, 5.12699390e-01f, 2.99048555e-01f},
-    {9.49217427e-01f, 5.17763087e-01f, 2.95662308e-01f},
-    {9.51343530e-01f, 5.22849522e-01f, 2.92274506e-01f},
-    {9.53427725e-01f, 5.27959550e-01f, 2.88883445e-01f},
-    {9.55469640e-01f, 5.33093083e-01f, 2.85490391e-01f},
-    {9.57468770e-01f, 5.38250172e-01f, 2.82096149e-01f},
-    {9.59424430e-01f, 5.43431038e-01f, 2.78700990e-01f},
-    {9.61335930e-01f, 5.48635890e-01f, 2.75305214e-01f},
-    {9.63202573e-01f, 5.53864931e-01f, 2.71909159e-01f},
-    {9.65023656e-01f, 5.59118349e-01f, 2.68513200e-01f},
-    {9.66798470e-01f, 5.64396327e-01f, 2.65117752e-01f},
-    {9.68525639e-01f, 5.69699633e-01f, 2.61721488e-01f},
-    {9.70204593e-01f, 5.75028270e-01f, 2.58325424e-01f},
-    {9.71835007e-01f, 5.80382015e-01f, 2.54931256e-01f},
-    {9.73416145e-01f, 5.85761012e-01f, 2.51539615e-01f},
-    {9.74947262e-01f, 5.91165394e-01f, 2.48151200e-01f},
-    {9.76427606e-01f, 5.96595287e-01f, 2.44766775e-01f},
-    {9.77856416e-01f, 6.02050811e-01f, 2.41387186e-01f},
-    {9.79232922e-01f, 6.07532077e-01f, 2.38013359e-01f},
-    {9.80556344e-01f, 6.13039190e-01f, 2.34646316e-01f},
-    {9.81825890e-01f, 6.18572250e-01f, 2.31287178e-01f},
-    {9.83040742e-01f, 6.24131362e-01f, 2.27937141e-01f},
-    {9.84198924e-01f, 6.29717516e-01f, 2.24595006e-01f},
-    {9.85300760e-01f, 6.35329876e-01f, 2.21264889e-01f},
-    {9.86345421e-01f, 6.40968508e-01f, 2.17948456e-01f},
-    {9.87332067e-01f, 6.46633475e-01f, 2.14647532e-01f},
-    {9.88259846e-01f, 6.52324832e-01f, 2.11364122e-01f},
-    {9.89127893e-01f, 6.58042630e-01f, 2.08100426e-01f},
-    {9.89935328e-01f, 6.63786914e-01f, 2.04858855e-01f},
-    {9.90681261e-01f, 6.69557720e-01f, 2.01642049e-01f},
-    {9.91364787e-01f, 6.75355082e-01f, 1.98452900e-01f},
-    {9.91984990e-01f, 6.81179025e-01f, 1.95294567e-01f},
-    {9.92540939e-01f, 6.87029567e-01f, 1.92170500e-01f},
-    {9.93031693e-01f, 6.92906719e-01f, 1.89084459e-01f},
-    {9.93456302e-01f, 6.98810484e-01f, 1.86040537e-01f},
-    {9.93813802e-01f, 7.04740854e-01f, 1.83043180e-01f},
-    {9.94103226e-01f, 7.10697814e-01f, 1.80097207e-01f},
-    {9.94323596e-01f, 7.16681336e-01f, 1.77207826e-01f},
-    {9.94473934e-01f, 7.22691379e-01f, 1.74380656e-01f},
-    {9.94553260e-01f, 7.28727890e-01f, 1.71621733e-01f},
-    {9.94560594e-01f, 7.34790799e-01f, 1.68937522e-01f},
-    {9.94494964e-01f, 7.40880020e-01f, 1.66334918e-01f},
-    {9.94355411e-01f, 7.46995448e-01f, 1.63821243e-01f},
-    {9.94140989e-01f, 7.53136955e-01f, 1.61404226e-01f},
-    {9.93850778e-01f, 7.59304390e-01f, 1.59091984e-01f},
-    {9.93482190e-01f, 7.65498551e-01f, 1.56890625e-01f},
-    {9.93033251e-01f, 7.71719833e-01f, 1.54807583e-01f},
-    {9.92505214e-01f, 7.77966775e-01f, 1.52854862e-01f},
-    {9.91897270e-01f, 7.84239120e-01f, 1.51041581e-01f},
-    {9.91208680e-01f, 7.90536569e-01f, 1.49376885e-01f},
-    {9.90438793e-01f, 7.96858775e-01f, 1.47869810e-01f},
-    {9.89587065e-01f, 8.03205337e-01f, 1.46529128e-01f},
-    {9.88647741e-01f, 8.09578605e-01f, 1.45357284e-01f},
-    {9.87620557e-01f, 8.15977942e-01f, 1.44362644e-01f},
-    {9.86509366e-01f, 8.22400620e-01f, 1.43556679e-01f},
-    {9.85314198e-01f, 8.28845980e-01f, 1.42945116e-01f},
-    {9.84031139e-01f, 8.35315360e-01f, 1.42528388e-01f},
-    {9.82652820e-01f, 8.41811730e-01f, 1.42302653e-01f},
-    {9.81190389e-01f, 8.48328902e-01f, 1.42278607e-01f},
-    {9.79643637e-01f, 8.54866468e-01f, 1.42453425e-01f},
-    {9.77994918e-01f, 8.61432314e-01f, 1.42808191e-01f},
-    {9.76264977e-01f, 8.68015998e-01f, 1.43350944e-01f},
-    {9.74443038e-01f, 8.74622194e-01f, 1.44061156e-01f},
-    {9.72530009e-01f, 8.81250063e-01f, 1.44922913e-01f},
-    {9.70532932e-01f, 8.87896125e-01f, 1.45918663e-01f},
-    {9.68443477e-01f, 8.94563989e-01f, 1.47014438e-01f},
-    {9.66271225e-01f, 9.01249365e-01f, 1.48179639e-01f},
-    {9.64021057e-01f, 9.07950379e-01f, 1.49370428e-01f},
-    {9.61681481e-01f, 9.14672479e-01f, 1.50520343e-01f},
-    {9.59275646e-01f, 9.21406537e-01f, 1.51566019e-01f},
-    {9.56808068e-01f, 9.28152065e-01f, 1.52409489e-01f},
-    {9.54286813e-01f, 9.34907730e-01f, 1.52921158e-01f},
-    {9.51726083e-01f, 9.41670605e-01f, 1.52925363e-01f},
-    {9.49150533e-01f, 9.48434900e-01f, 1.52177604e-01f},
-    {9.46602270e-01f, 9.55189860e-01f, 1.50327944e-01f},
-    {9.44151742e-01f, 9.61916487e-01f, 1.46860789e-01f},
-    {9.41896120e-01f, 9.68589814e-01f, 1.40955606e-01f},
-    {9.40015097e-01f, 9.75158357e-01f, 1.31325517e-01f}};
-
-//Viridis data from MatPlotLib
-static std::vector<Color<float>> viridis_data = {
-    {0.26700401f, 0.00487433f, 0.32941519f},
-    {0.26851048f, 0.00960483f, 0.33542652f},
-    {0.26994384f, 0.01462494f, 0.34137895f},
-    {0.27130489f, 0.01994186f, 0.34726862f},
-    {0.27259384f, 0.02556309f, 0.35309303f},
-    {0.27380934f, 0.03149748f, 0.35885256f},
-    {0.27495242f, 0.03775181f, 0.36454323f},
-    {0.27602238f, 0.04416723f, 0.37016418f},
-    {0.2770184f, 0.05034437f, 0.37571452f},
-    {0.27794143f, 0.05632444f, 0.38119074f},
-    {0.27879067f, 0.06214536f, 0.38659204f},
-    {0.2795655f, 0.06783587f, 0.39191723f},
-    {0.28026658f, 0.07341724f, 0.39716349f},
-    {0.28089358f, 0.07890703f, 0.40232944f},
-    {0.28144581f, 0.0843197f, 0.40741404f},
-    {0.28192358f, 0.08966622f, 0.41241521f},
-    {0.28232739f, 0.09495545f, 0.41733086f},
-    {0.28265633f, 0.10019576f, 0.42216032f},
-    {0.28291049f, 0.10539345f, 0.42690202f},
-    {0.28309095f, 0.11055307f, 0.43155375f},
-    {0.28319704f, 0.11567966f, 0.43611482f},
-    {0.28322882f, 0.12077701f, 0.44058404f},
-    {0.28318684f, 0.12584799f, 0.44496f},
-    {0.283072f, 0.13089477f, 0.44924127f},
-    {0.28288389f, 0.13592005f, 0.45342734f},
-    {0.28262297f, 0.14092556f, 0.45751726f},
-    {0.28229037f, 0.14591233f, 0.46150995f},
-    {0.28188676f, 0.15088147f, 0.46540474f},
-    {0.28141228f, 0.15583425f, 0.46920128f},
-    {0.28086773f, 0.16077132f, 0.47289909f},
-    {0.28025468f, 0.16569272f, 0.47649762f},
-    {0.27957399f, 0.17059884f, 0.47999675f},
-    {0.27882618f, 0.1754902f, 0.48339654f},
-    {0.27801236f, 0.18036684f, 0.48669702f},
-    {0.27713437f, 0.18522836f, 0.48989831f},
-    {0.27619376f, 0.19007447f, 0.49300074f},
-    {0.27519116f, 0.1949054f, 0.49600488f},
-    {0.27412802f, 0.19972086f, 0.49891131f},
-    {0.27300596f, 0.20452049f, 0.50172076f},
-    {0.27182812f, 0.20930306f, 0.50443413f},
-    {0.27059473f, 0.21406899f, 0.50705243f},
-    {0.26930756f, 0.21881782f, 0.50957678f},
-    {0.26796846f, 0.22354911f, 0.5120084f},
-    {0.26657984f, 0.2282621f, 0.5143487f},
-    {0.2651445f, 0.23295593f, 0.5165993f},
-    {0.2636632f, 0.23763078f, 0.51876163f},
-    {0.26213801f, 0.24228619f, 0.52083736f},
-    {0.26057103f, 0.2469217f, 0.52282822f},
-    {0.25896451f, 0.25153685f, 0.52473609f},
-    {0.25732244f, 0.2561304f, 0.52656332f},
-    {0.25564519f, 0.26070284f, 0.52831152f},
-    {0.25393498f, 0.26525384f, 0.52998273f},
-    {0.25219404f, 0.26978306f, 0.53157905f},
-    {0.25042462f, 0.27429024f, 0.53310261f},
-    {0.24862899f, 0.27877509f, 0.53455561f},
-    {0.2468114f, 0.28323662f, 0.53594093f},
-    {0.24497208f, 0.28767547f, 0.53726018f},
-    {0.24311324f, 0.29209154f, 0.53851561f},
-    {0.24123708f, 0.29648471f, 0.53970946f},
-    {0.23934575f, 0.30085494f, 0.54084398f},
-    {0.23744138f, 0.30520222f, 0.5419214f},
-    {0.23552606f, 0.30952657f, 0.54294396f},
-    {0.23360277f, 0.31382773f, 0.54391424f},
-    {0.2316735f, 0.3181058f, 0.54483444f},
-    {0.22973926f, 0.32236127f, 0.54570633f},
-    {0.22780192f, 0.32659432f, 0.546532f},
-    {0.2258633f, 0.33080515f, 0.54731353f},
-    {0.22392515f, 0.334994f, 0.54805291f},
-    {0.22198915f, 0.33916114f, 0.54875211f},
-    {0.22005691f, 0.34330688f, 0.54941304f},
-    {0.21812995f, 0.34743154f, 0.55003755f},
-    {0.21620971f, 0.35153548f, 0.55062743f},
-    {0.21429757f, 0.35561907f, 0.5511844f},
-    {0.21239477f, 0.35968273f, 0.55171011f},
-    {0.2105031f, 0.36372671f, 0.55220646f},
-    {0.20862342f, 0.36775151f, 0.55267486f},
-    {0.20675628f, 0.37175775f, 0.55311653f},
-    {0.20490257f, 0.37574589f, 0.55353282f},
-    {0.20306309f, 0.37971644f, 0.55392505f},
-    {0.20123854f, 0.38366989f, 0.55429441f},
-    {0.1994295f, 0.38760678f, 0.55464205f},
-    {0.1976365f, 0.39152762f, 0.55496905f},
-    {0.19585993f, 0.39543297f, 0.55527637f},
-    {0.19410009f, 0.39932336f, 0.55556494f},
-    {0.19235719f, 0.40319934f, 0.55583559f},
-    {0.19063135f, 0.40706148f, 0.55608907f},
-    {0.18892259f, 0.41091033f, 0.55632606f},
-    {0.18723083f, 0.41474645f, 0.55654717f},
-    {0.18555593f, 0.4185704f, 0.55675292f},
-    {0.18389763f, 0.42238275f, 0.55694377f},
-    {0.18225561f, 0.42618405f, 0.5571201f},
-    {0.18062949f, 0.42997486f, 0.55728221f},
-    {0.17901879f, 0.43375572f, 0.55743035f},
-    {0.17742298f, 0.4375272f, 0.55756466f},
-    {0.17584148f, 0.44128981f, 0.55768526f},
-    {0.17427363f, 0.4450441f, 0.55779216f},
-    {0.17271876f, 0.4487906f, 0.55788532f},
-    {0.17117615f, 0.4525298f, 0.55796464f},
-    {0.16964573f, 0.45626209f, 0.55803034f},
-    {0.16812641f, 0.45998802f, 0.55808199f},
-    {0.1666171f, 0.46370813f, 0.55811913f},
-    {0.16511703f, 0.4674229f, 0.55814141f},
-    {0.16362543f, 0.47113278f, 0.55814842f},
-    {0.16214155f, 0.47483821f, 0.55813967f},
-    {0.16066467f, 0.47853961f, 0.55811466f},
-    {0.15919413f, 0.4822374f, 0.5580728f},
-    {0.15772933f, 0.48593197f, 0.55801347f},
-    {0.15626973f, 0.4896237f, 0.557936f},
-    {0.15481488f, 0.49331293f, 0.55783967f},
-    {0.15336445f, 0.49700003f, 0.55772371f},
-    {0.1519182f, 0.50068529f, 0.55758733f},
-    {0.15047605f, 0.50436904f, 0.55742968f},
-    {0.14903918f, 0.50805136f, 0.5572505f},
-    {0.14760731f, 0.51173263f, 0.55704861f},
-    {0.14618026f, 0.51541316f, 0.55682271f},
-    {0.14475863f, 0.51909319f, 0.55657181f},
-    {0.14334327f, 0.52277292f, 0.55629491f},
-    {0.14193527f, 0.52645254f, 0.55599097f},
-    {0.14053599f, 0.53013219f, 0.55565893f},
-    {0.13914708f, 0.53381201f, 0.55529773f},
-    {0.13777048f, 0.53749213f, 0.55490625f},
-    {0.1364085f, 0.54117264f, 0.55448339f},
-    {0.13506561f, 0.54485335f, 0.55402906f},
-    {0.13374299f, 0.54853458f, 0.55354108f},
-    {0.13244401f, 0.55221637f, 0.55301828f},
-    {0.13117249f, 0.55589872f, 0.55245948f},
-    {0.1299327f, 0.55958162f, 0.55186354f},
-    {0.12872938f, 0.56326503f, 0.55122927f},
-    {0.12756771f, 0.56694891f, 0.55055551f},
-    {0.12645338f, 0.57063316f, 0.5498411f},
-    {0.12539383f, 0.57431754f, 0.54908564f},
-    {0.12439474f, 0.57800205f, 0.5482874f},
-    {0.12346281f, 0.58168661f, 0.54744498f},
-    {0.12260562f, 0.58537105f, 0.54655722f},
-    {0.12183122f, 0.58905521f, 0.54562298f},
-    {0.12114807f, 0.59273889f, 0.54464114f},
-    {0.12056501f, 0.59642187f, 0.54361058f},
-    {0.12009154f, 0.60010387f, 0.54253043f},
-    {0.11973756f, 0.60378459f, 0.54139999f},
-    {0.11951163f, 0.60746388f, 0.54021751f},
-    {0.11942341f, 0.61114146f, 0.53898192f},
-    {0.11948255f, 0.61481702f, 0.53769219f},
-    {0.11969858f, 0.61849025f, 0.53634733f},
-    {0.12008079f, 0.62216081f, 0.53494633f},
-    {0.12063824f, 0.62582833f, 0.53348834f},
-    {0.12137972f, 0.62949242f, 0.53197275f},
-    {0.12231244f, 0.63315277f, 0.53039808f},
-    {0.12344358f, 0.63680899f, 0.52876343f},
-    {0.12477953f, 0.64046069f, 0.52706792f},
-    {0.12632581f, 0.64410744f, 0.52531069f},
-    {0.12808703f, 0.64774881f, 0.52349092f},
-    {0.13006688f, 0.65138436f, 0.52160791f},
-    {0.13226797f, 0.65501363f, 0.51966086f},
-    {0.13469183f, 0.65863619f, 0.5176488f},
-    {0.13733921f, 0.66225157f, 0.51557101f},
-    {0.14020991f, 0.66585927f, 0.5134268f},
-    {0.14330291f, 0.66945881f, 0.51121549f},
-    {0.1466164f, 0.67304968f, 0.50893644f},
-    {0.15014782f, 0.67663139f, 0.5065889f},
-    {0.15389405f, 0.68020343f, 0.50417217f},
-    {0.15785146f, 0.68376525f, 0.50168574f},
-    {0.16201598f, 0.68731632f, 0.49912906f},
-    {0.1663832f, 0.69085611f, 0.49650163f},
-    {0.1709484f, 0.69438405f, 0.49380294f},
-    {0.17570671f, 0.6978996f, 0.49103252f},
-    {0.18065314f, 0.70140222f, 0.48818938f},
-    {0.18578266f, 0.70489133f, 0.48527326f},
-    {0.19109018f, 0.70836635f, 0.48228395f},
-    {0.19657063f, 0.71182668f, 0.47922108f},
-    {0.20221902f, 0.71527175f, 0.47608431f},
-    {0.20803045f, 0.71870095f, 0.4728733f},
-    {0.21400015f, 0.72211371f, 0.46958774f},
-    {0.22012381f, 0.72550945f, 0.46622638f},
-    {0.2263969f, 0.72888753f, 0.46278934f},
-    {0.23281498f, 0.73224735f, 0.45927675f},
-    {0.2393739f, 0.73558828f, 0.45568838f},
-    {0.24606968f, 0.73890972f, 0.45202405f},
-    {0.25289851f, 0.74221104f, 0.44828355f},
-    {0.25985676f, 0.74549162f, 0.44446673f},
-    {0.26694127f, 0.74875084f, 0.44057284f},
-    {0.27414922f, 0.75198807f, 0.4366009f},
-    {0.28147681f, 0.75520266f, 0.43255207f},
-    {0.28892102f, 0.75839399f, 0.42842626f},
-    {0.29647899f, 0.76156142f, 0.42422341f},
-    {0.30414796f, 0.76470433f, 0.41994346f},
-    {0.31192534f, 0.76782207f, 0.41558638f},
-    {0.3198086f, 0.77091403f, 0.41115215f},
-    {0.3277958f, 0.77397953f, 0.40664011f},
-    {0.33588539f, 0.7770179f, 0.40204917f},
-    {0.34407411f, 0.78002855f, 0.39738103f},
-    {0.35235985f, 0.78301086f, 0.39263579f},
-    {0.36074053f, 0.78596419f, 0.38781353f},
-    {0.3692142f, 0.78888793f, 0.38291438f},
-    {0.37777892f, 0.79178146f, 0.3779385f},
-    {0.38643282f, 0.79464415f, 0.37288606f},
-    {0.39517408f, 0.79747541f, 0.36775726f},
-    {0.40400101f, 0.80027461f, 0.36255223f},
-    {0.4129135f, 0.80304099f, 0.35726893f},
-    {0.42190813f, 0.80577412f, 0.35191009f},
-    {0.43098317f, 0.80847343f, 0.34647607f},
-    {0.44013691f, 0.81113836f, 0.3409673f},
-    {0.44936763f, 0.81376835f, 0.33538426f},
-    {0.45867362f, 0.81636288f, 0.32972749f},
-    {0.46805314f, 0.81892143f, 0.32399761f},
-    {0.47750446f, 0.82144351f, 0.31819529f},
-    {0.4870258f, 0.82392862f, 0.31232133f},
-    {0.49661536f, 0.82637633f, 0.30637661f},
-    {0.5062713f, 0.82878621f, 0.30036211f},
-    {0.51599182f, 0.83115784f, 0.29427888f},
-    {0.52577622f, 0.83349064f, 0.2881265f},
-    {0.5356211f, 0.83578452f, 0.28190832f},
-    {0.5455244f, 0.83803918f, 0.27562602f},
-    {0.55548397f, 0.84025437f, 0.26928147f},
-    {0.5654976f, 0.8424299f, 0.26287683f},
-    {0.57556297f, 0.84456561f, 0.25641457f},
-    {0.58567772f, 0.84666139f, 0.24989748f},
-    {0.59583934f, 0.84871722f, 0.24332878f},
-    {0.60604528f, 0.8507331f, 0.23671214f},
-    {0.61629283f, 0.85270912f, 0.23005179f},
-    {0.62657923f, 0.85464543f, 0.22335258f},
-    {0.63690157f, 0.85654226f, 0.21662012f},
-    {0.64725685f, 0.85839991f, 0.20986086f},
-    {0.65764197f, 0.86021878f, 0.20308229f},
-    {0.66805369f, 0.86199932f, 0.19629307f},
-    {0.67848868f, 0.86374211f, 0.18950326f},
-    {0.68894351f, 0.86544779f, 0.18272455f},
-    {0.69941463f, 0.86711711f, 0.17597055f},
-    {0.70989842f, 0.86875092f, 0.16925712f},
-    {0.72039115f, 0.87035015f, 0.16260273f},
-    {0.73088902f, 0.87191584f, 0.15602894f},
-    {0.74138803f, 0.87344918f, 0.14956101f},
-    {0.75188414f, 0.87495143f, 0.14322828f},
-    {0.76237342f, 0.87642392f, 0.13706449f},
-    {0.77285183f, 0.87786808f, 0.13110864f},
-    {0.78331535f, 0.87928545f, 0.12540538f},
-    {0.79375994f, 0.88067763f, 0.12000532f},
-    {0.80418159f, 0.88204632f, 0.11496505f},
-    {0.81457634f, 0.88339329f, 0.11034678f},
-    {0.82494028f, 0.88472036f, 0.10621724f},
-    {0.83526959f, 0.88602943f, 0.1026459f},
-    {0.84556056f, 0.88732243f, 0.09970219f},
-    {0.8558096f, 0.88860134f, 0.09745186f},
-    {0.86601325f, 0.88986815f, 0.09595277f},
-    {0.87616824f, 0.89112487f, 0.09525046f},
-    {0.88627146f, 0.89237353f, 0.09537439f},
-    {0.89632002f, 0.89361614f, 0.09633538f},
-    {0.90631121f, 0.89485467f, 0.09812496f},
-    {0.91624212f, 0.89609127f, 0.1007168f},
-    {0.92610579f, 0.89732977f, 0.10407067f},
-    {0.93590444f, 0.8985704f, 0.10813094f},
-    {0.94563626f, 0.899815f, 0.11283773f},
-    {0.95529972f, 0.90106534f, 0.11812832f},
-    {0.96489353f, 0.90232311f, 0.12394051f},
-    {0.97441665f, 0.90358991f, 0.13021494f},
-    {0.98386829f, 0.90486726f, 0.13689671f},
-    {0.99324789f, 0.90615657f, 0.1439362f}};
-
-ColorMap::ColorMap(float min_val, float max_val, const std::vector<Color<float>>& color_data)
-    : min_(min_val)
-    , max_(max_val)
-    , color_data_(color_data) {
-    VTR_ASSERT(max_ >= min_);
-}
-
-Color<float> ColorMap::color(float value) const {
-    VTR_ASSERT(value >= min_);
-    VTR_ASSERT(value <= max_);
-
-    float norm_value;
-    if (range() == 0) {
-        norm_value = 0;
-    } else {
-        norm_value = (value - min_) / range();
-    }
-
-    size_t color_idx = std::round(norm_value * (color_data_.size() - 1));
-
-    VTR_ASSERT(color_idx < color_data_.size());
-
-    return color_data_[color_idx];
-}
-
-float ColorMap::min() const {
-    return min_;
-}
-
-float ColorMap::max() const {
-    return max_;
-}
-
-float ColorMap::range() const {
-    return max() - min();
-}
-
-InfernoColorMap::InfernoColorMap(float min_val, float max_val)
-    : ColorMap(min_val, max_val, inferno_data) {}
-
-PlasmaColorMap::PlasmaColorMap(float min_val, float max_val)
-    : ColorMap(min_val, max_val, plasma_data) {}
-
-ViridisColorMap::ViridisColorMap(float min_val, float max_val)
-    : ColorMap(min_val, max_val, viridis_data) {}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_color_map.h b/third_party/vtr/libs/vtrutil/src/vtr_color_map.h
deleted file mode 100644
index f313999ca..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_color_map.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef VTR_CMAP_H
-#define VTR_CMAP_H
-#include <vector>
-
-namespace vtr {
-
-///@brief A container to save the rgb components of a color
-template<class T>
-struct Color {
-    T r;
-    T g;
-    T b;
-};
-
-///@brief A class that holds a complete color map
-class ColorMap {
-  public:
-    ///@brief color map constructor
-    ColorMap(float min, float max, const std::vector<Color<float>>& color_data);
-
-    ///@brief color map destructor
-    virtual ~ColorMap() = default;
-
-    ///@brief Returns the full color corresponding to the input value
-    Color<float> color(float value) const;
-
-    ///@brief Return the min Color of this color map
-    float min() const;
-
-    ///@brief Return the max color of this color map
-    float max() const;
-
-    ///@brief Return the range of the color map
-    float range() const;
-
-  private:
-    float min_;
-    float max_;
-    std::vector<Color<float>> color_data_;
-};
-
-class InfernoColorMap : public ColorMap {
-  public:
-    InfernoColorMap(float min, float max);
-};
-
-class PlasmaColorMap : public ColorMap {
-  public:
-    PlasmaColorMap(float min, float max);
-};
-
-class ViridisColorMap : public ColorMap {
-  public:
-    ViridisColorMap(float min, float max);
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_digest.cc b/third_party/vtr/libs/vtrutil/src/vtr_digest.cc
deleted file mode 100644
index aedcd613e..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_digest.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "vtr_digest.h"
-#include "vtr_error.h"
-
-#include <iostream>
-#include <fstream>
-#include <array>
-
-#include "picosha2.h"
-
-namespace vtr {
-
-std::string secure_digest_file(const std::string& filepath) {
-    std::ifstream is(filepath);
-    if (!is) {
-        throw VtrError("Failed to open file", filepath);
-    }
-    return secure_digest_stream(is);
-}
-
-std::string secure_digest_stream(std::istream& is) {
-    //Read the stream in chunks and calculate the SHA256 digest
-    picosha2::hash256_one_by_one hasher;
-
-    std::array<char, 1024> buf;
-    while (!is.eof()) {
-        //Process a chunk
-        is.read(buf.data(), buf.size());
-        hasher.process(buf.begin(), buf.begin() + is.gcount());
-    }
-    hasher.finish();
-
-    //Return the digest as a hex string, prefixed with the hash type
-    //
-    //Prefixing with the hash type should allow us to differentiate if the
-    //hash type is ever changed in the future
-    return "SHA256:" + picosha2::get_hash_hex_string(hasher);
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_digest.h b/third_party/vtr/libs/vtrutil/src/vtr_digest.h
deleted file mode 100644
index 4d67f8f6a..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_digest.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef VTR_DIGEST_H
-#define VTR_DIGEST_H
-#include <iosfwd>
-#include <string>
-
-namespace vtr {
-
-///@brief Generate a secure hash of the file at filepath
-std::string secure_digest_file(const std::string& filepath);
-
-///@brief Generate a secure hash of a stream
-std::string secure_digest_stream(std::istream& is);
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h b/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
deleted file mode 100644
index 291ddd239..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_dynamic_bitset.h
+++ /dev/null
@@ -1,72 +0,0 @@
-#ifndef VTR_DYNAMIC_BITSET
-#define VTR_DYNAMIC_BITSET
-
-#include <limits>
-#include <vector>
-
-namespace vtr {
-/**
- * @brief A container to represent a set of flags either they are set or reset 
- *
- * It allocates any required length of bit at runtime. It is very useful in bit manipulation
- */
-template<typename Index = size_t, typename Storage = unsigned int>
-class dynamic_bitset {
-  public:
-    ///@brief Bits in underlying storage.
-    static constexpr size_t kWidth = std::numeric_limits<Storage>::digits;
-    static_assert(!std::numeric_limits<Storage>::is_signed,
-                  "dynamic_bitset storage must be unsigned!");
-    static_assert(std::numeric_limits<Storage>::is_integer,
-                  "dynamic_bitset storage must be integer!");
-
-    ///@brief Reize to the determined size
-    void resize(size_t size) {
-        array_.resize((size + kWidth - 1) / kWidth);
-    }
-
-    ///@brief Clear all the bits
-    void clear() {
-        array_.clear();
-        array_.shrink_to_fit();
-    }
-
-    ///@brief Return the size of the bitset (total number of bits)
-    size_t size() const {
-        return array_.size() * kWidth;
-    }
-
-    ///@brief Fill the whole bitset with a specific value (0 or 1)
-    void fill(bool set) {
-        if (set) {
-            std::fill(array_.begin(), array_.end(), std::numeric_limits<Storage>::max());
-        } else {
-            std::fill(array_.begin(), array_.end(), 0);
-        }
-    }
-
-    ///@brief Set a specific bit in the bit set to a specific value (0 or 1)
-    void set(Index index, bool val) {
-        size_t index_value(index);
-        VTR_ASSERT_SAFE(index_value < size());
-        if (val) {
-            array_[index_value / kWidth] |= (1 << (index_value % kWidth));
-        } else {
-            array_[index_value / kWidth] &= ~(1u << (index_value % kWidth));
-        }
-    }
-
-    ///@brief Return the value of a specific bit in the bitset
-    bool get(Index index) const {
-        size_t index_value(index);
-        VTR_ASSERT_SAFE(index_value < size());
-        return (array_[index_value / kWidth] & (1u << (index_value % kWidth))) != 0;
-    }
-
-  private:
-    std::vector<Storage> array_;
-};
-
-} // namespace vtr
-
-#endif /* VTR_DYNAMIC_BITSET */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_error.h b/third_party/vtr/libs/vtrutil/src/vtr_error.h
deleted file mode 100644
index d710c6630..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_error.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef VTR_ERROR_H
-#define VTR_ERROR_H
-
-#include <stdexcept>
-#include <string>
-
-/**
- * @file
- * @brief A utility container that can be used to identify VTR execution errors.
- * 
- * The recommended usage is to store information in this container about the error during an error event and and then throwing an exception with the container. If the exception is not handled (exception is not caught), this will result in the termination of the program.
- * 
- * Error information can be displayed using the information stored within this container.
- * 
- */
-
-namespace vtr {
-
-/**
- * @brief Container that holds information related to an error
- *
- * It holds different info related to a VTR error:
- *      - error message
- *      - file name associated with the error
- *      - line number associated with the error
- * 
- * Example Usage:
- * 
- *      // creating and throwing an exception with a VtrError container that has an error occuring in file "error_file.txt" at line number 1
- *       
- *      throw vtr::VtrError("This is a program terminating error!", "error_file.txt", 1);
- * 
- */
-class VtrError : public std::runtime_error {
-  public:
-    ///@brief VtrError constructor
-    VtrError(std::string msg = "", std::string new_filename = "", size_t new_linenumber = -1)
-        : std::runtime_error(msg)
-        , filename_(new_filename)
-        , linenumber_(new_linenumber) {}
-
-    /**
-     * @brief gets the filename 
-     *
-     * Returns the filename associated with this error.
-     * Returns an empty string if none is specified.
-     */
-    std::string filename() const { return filename_; }
-
-    ///@brief same as filename() but returns in c style string
-    const char* filename_c_str() const { return filename_.c_str(); }
-
-    /**
-     * @brief get the line number
-     *
-     * Returns the line number associated with this error.
-     * Returns zero if none is specified.
-     */
-    size_t line() const { return linenumber_; }
-
-  private:
-    std::string filename_;
-    size_t linenumber_;
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
deleted file mode 100644
index 165b9caa5..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.cc
+++ /dev/null
@@ -1,904 +0,0 @@
-#include "vtr_expr_eval.h"
-#include "vtr_error.h"
-#include "vtr_util.h"
-#include "vtr_math.h"
-
-#include <string>
-#include <sstream>
-#include <iostream>
-
-/** global variables **/
-
-/** bp_state_globals is a variable that holds a member of type BreakpointState. This member is altered by the breakpoint class, the placer, and router and holds the most updated values for variables that can trigger breakpoints (e.g move_num, temp_num etc.) **/
-BreakpointStateGlobals bp_state_globals;
-
-namespace vtr {
-
-using std::stack;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-/**this variables is used for the += operator and holds the initial value of the variable that is to be added to. after every addition, the related function compares with initial value to ensure correct incrementation **/
-static int before_addition = 0;
-
-/*---- Functions for Parsing the Symbolic Formulas ----*/
-
-/* converts specified formula to a vector in reverse-polish notation */
-static void formula_to_rpn(const char* formula, const t_formula_data& mydata, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack, bool is_breakpoint);
-
-static void get_formula_object(const char* ch, int& ichar, const t_formula_data& mydata, Formula_Object* fobj, bool is_breakpoint);
-
-/* returns integer specifying precedence of passed-in operator. higher integer
- * means higher precedence */
-static int get_fobj_precedence(const Formula_Object& fobj);
-
-/* Returns associativity of the specified operator */
-static bool op_associativity_is_left(const t_operator& op);
-
-/* used by the shunting-yard formula parser to deal with operators such as add and subtract */
-static void handle_operator(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
-
-/* used by the shunting-yard formula parser to deal with brackets, ie '(' and ')' */
-static void handle_bracket(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
-
-/* used by the shunting-yard formula parser to deal with commas, ie ','. These occur in function calls*/
-static void handle_comma(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack);
-
-/* parses revere-polish notation vector to return formula result */
-static int parse_rpn_vector(vector<Formula_Object>& rpn_vec);
-
-/* applies operation specified by 'op' to the given arguments. arg1 comes before arg2 */
-static int apply_rpn_op(const Formula_Object& arg1, const Formula_Object& arg2, const Formula_Object& op);
-
-/* checks if specified character represents an ASCII number */
-static bool is_char_number(const char ch);
-
-// returns true if ch is an operator (e.g +,-, *, etc.)
-static bool is_operator(const char ch);
-
-// returns true if the specified name is a known function operator
-static bool is_function(std::string name);
-
-// returns true if the specified name is a known compound operator
-t_compound_operator is_compound_op(const char* ch);
-
-// returns true if the specified name is a known variable
-static bool is_variable(std::string var);
-
-// returns the length of any identifier (e.g. name, function) starting at the beginning of str
-static int identifier_length(const char* str);
-
-/* increments str_ind until it reaches specified char is formula. returns true if character was found, false otherwise */
-static bool goto_next_char(int* str_ind, const string& pw_formula, char ch);
-
-//compares two strings while ignoring white space and case
-bool same_string(std::string str1, std::string str2);
-
-//checks if the block indicated by the user was one of the moved blocks in the last perturbation
-int in_blocks_affected(std::string expression_left);
-
-//the function of += operator
-bool additional_assignment_op(int arg1, int arg2);
-
-/**** Function Implementations ****/
-/* returns integer result according to specified non-piece-wise formula and data */
-int FormulaParser::parse_formula(std::string formula, const t_formula_data& mydata, bool is_breakpoint) {
-    int result = -1;
-
-    /* output in reverse-polish notation */
-    auto& rpn_output = rpn_output_;
-    rpn_output.clear();
-
-    /* now we have to run the shunting-yard algorithm to convert formula to reverse polish notation */
-    formula_to_rpn(formula.c_str(), mydata, rpn_output, op_stack_, is_breakpoint);
-
-    /* then we run an RPN parser to get the final result */
-    result = parse_rpn_vector(rpn_output);
-
-    return result;
-}
-
-/* EXPERIMENTAL:
- *
- * returns integer result according to specified piece-wise formula and data. the piecewise
- * notation specifies different formulas that should be evaluated based on the index of
- * the incoming wire in 'mydata'. for example the formula
- *
- * {0:(W/2)} t-1; {(W/2):W} t+1;
- *
- * indicates that the function "t-1" should be evaluated if the incoming wire index falls
- * within the range [0,W/2) and that "t+1" should be evaluated if it falls within the
- * [W/2,W) range. The piece-wise format is:
- *
- * {start_0:end_0} formula_0; ... {start_i;end_i} formula_i; ...
- */
-int FormulaParser::parse_piecewise_formula(const char* formula, const t_formula_data& mydata) {
-    int result = -1;
-    int str_ind = 0;
-    int str_size = 0;
-
-    int t = mydata.get_var_value("t");
-    int tmp_ind_start = -1;
-    int tmp_ind_count = -1;
-    string substr;
-
-    /* convert formula to string format */
-    string pw_formula(formula);
-    str_size = pw_formula.size();
-
-    if (pw_formula[str_ind] != '{') {
-        throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: the first character in piece-wise formula should always be '{'\n"), __FILE__, __LINE__);
-    }
-
-    /* find the range to which t corresponds */
-    /* the first character must be '{' as verified above */
-    while (str_ind != str_size - 1) {
-        /* set to true when range to which wire number corresponds has been found */
-        bool found_range = false;
-        bool char_found = false;
-        int range_start = -1;
-        int range_end = -1;
-        tmp_ind_start = -1;
-        tmp_ind_count = -1;
-
-        /* get the start of the range */
-        tmp_ind_start = str_ind + 1;
-        char_found = goto_next_char(&str_ind, pw_formula, ':');
-        if (!char_found) {
-            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", ':'), __FILE__, __LINE__);
-        }
-        tmp_ind_count = str_ind - tmp_ind_start; /* range start is between { and : */
-        substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
-        range_start = parse_formula(substr.c_str(), mydata);
-
-        /* get the end of the range */
-        tmp_ind_start = str_ind + 1;
-        char_found = goto_next_char(&str_ind, pw_formula, '}');
-        if (!char_found) {
-            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", '}'), __FILE__, __LINE__);
-        }
-        tmp_ind_count = str_ind - tmp_ind_start; /* range end is between : and } */
-        substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
-        range_end = parse_formula(substr.c_str(), mydata);
-
-        if (range_start > range_end) {
-            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: range_start, %d, is bigger than range end, %d\n", range_start, range_end), __FILE__, __LINE__);
-        }
-
-        /* is the incoming wire within this range? (inclusive) */
-        if (range_start <= t && range_end >= t) {
-            found_range = true;
-        } else {
-            found_range = false;
-        }
-
-        /* we're done if found correct range */
-        if (found_range) {
-            break;
-        }
-        char_found = goto_next_char(&str_ind, pw_formula, '{');
-        if (!char_found) {
-            throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find char %c\n", '{'), __FILE__, __LINE__);
-        }
-    }
-    /* the string index should never actually get to the end of the string because we should have found the range to which the
-     * current wire number corresponds */
-    if (str_ind == str_size - 1) {
-        throw vtr::VtrError(vtr::string_fmt("parse_piecewise_formula: could not find a closing '}'?\n"), __FILE__, __LINE__);
-    }
-
-    /* at this point str_ind should point to '}' right before the formula we're interested in starts */
-    /* get the value corresponding to this formula */
-    tmp_ind_start = str_ind + 1;
-    goto_next_char(&str_ind, pw_formula, ';');
-    tmp_ind_count = str_ind - tmp_ind_start; /* formula is between } and ; */
-    substr = pw_formula.substr(tmp_ind_start, tmp_ind_count);
-
-    /* now parse the formula corresponding to the appropriate piece-wise range */
-    result = parse_formula(substr.c_str(), mydata);
-
-    return result;
-}
-
-/* increments str_ind until it reaches specified char in formula. returns true if character was found, false otherwise */
-static bool goto_next_char(int* str_ind, const string& pw_formula, char ch) {
-    bool result = true;
-    int str_size = pw_formula.size();
-    if ((*str_ind) == str_size - 1) {
-        throw vtr::VtrError(vtr::string_fmt("goto_next_char: passed-in str_ind is already at the end of string\n"), __FILE__, __LINE__);
-    }
-
-    do {
-        (*str_ind)++;
-        if (pw_formula[*str_ind] == ch) {
-            /* found the next requested character */
-            break;
-        }
-
-    } while ((*str_ind) != str_size - 1);
-    if ((*str_ind) == str_size - 1 && pw_formula[*str_ind] != ch) {
-        result = false;
-    }
-    return result;
-}
-
-/* Parses the specified formula using a shunting yard algorithm (see wikipedia). The function's result
- * is stored in the rpn_output vector in reverse-polish notation */
-static void formula_to_rpn(const char* formula, const t_formula_data& mydata, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack, bool is_breakpoint) {
-    // Empty op_stack.
-    while (!op_stack.empty()) {
-        op_stack.pop();
-    }
-
-    Formula_Object fobj; /* for parsing formula objects */
-
-    int ichar = 0;
-    const char* ch = nullptr;
-    /* go through formula and build rpn_output along with op_stack until \0 character is hit */
-    while (true) {
-        ch = &formula[ichar];
-
-        if ('\0' == (*ch)) {
-            /* we're done */
-            break;
-        } else if (' ' == (*ch)) {
-            /* skip space */
-        } else {
-            /* parse the character */
-            get_formula_object(ch, ichar, mydata, &fobj, is_breakpoint);
-            switch (fobj.type) {
-                case E_FML_NUMBER:
-                    /* add to output vector */
-                    rpn_output.push_back(fobj);
-                    break;
-                case E_FML_OPERATOR:
-                    /* operators may be pushed to op_stack or rpn_output */
-                    handle_operator(fobj, rpn_output, op_stack);
-                    break;
-                case E_FML_BRACKET:
-                    /* brackets are only ever pushed to op_stack, not rpn_output */
-                    handle_bracket(fobj, rpn_output, op_stack);
-                    break;
-                case E_FML_COMMA:
-                    handle_comma(fobj, rpn_output, op_stack);
-                    break;
-                case E_FML_VARIABLE:
-                    /* add to output vector */
-                    rpn_output.push_back(fobj);
-                    break;
-                default:
-                    throw vtr::VtrError(vtr::string_fmt("in formula_to_rpn: unknown formula object type: %d\n", fobj.type), __FILE__, __LINE__);
-                    break;
-            }
-        }
-        ichar++;
-    }
-
-    /* pop all remaining operators off of stack */
-    Formula_Object fobj_dummy;
-    while (!op_stack.empty()) {
-        fobj_dummy = op_stack.top();
-
-        if (E_FML_BRACKET == fobj_dummy.type) {
-            throw vtr::VtrError(vtr::string_fmt("in formula_to_rpn: Mismatched brackets in user-provided formula\n"), __FILE__, __LINE__);
-        }
-
-        rpn_output.push_back(fobj_dummy);
-        op_stack.pop();
-    }
-
-    return;
-}
-
-/* Fills the formula object fobj according to specified character and mydata,
- * which help determine which numeric value, if any, gets assigned to fobj
- * ichar is incremented by the corresponding count if the need to step through the
- * character array arises */
-static void get_formula_object(const char* ch, int& ichar, const t_formula_data& mydata, Formula_Object* fobj, bool is_breakpoint) {
-    /* the character can either be part of a number, or it can be an object like W, t, (, +, etc
-     * here we have to account for both possibilities */
-
-    int id_len = identifier_length(ch);
-    //We have a variable or function name
-    std::string var_name(ch, id_len);
-    if (id_len != 0) {
-        if (is_function(var_name)) {
-            fobj->type = E_FML_OPERATOR;
-            if (var_name == "min")
-                fobj->data.op = E_OP_MIN;
-            else if (var_name == "max")
-                fobj->data.op = E_OP_MAX;
-            else if (var_name == "gcd")
-                fobj->data.op = E_OP_GCD;
-            else if (var_name == "lcm")
-                fobj->data.op = E_OP_LCM;
-            else {
-                throw vtr::VtrError(vtr::string_fmt("in get_formula_object: recognized function: %s\n", var_name.c_str()), __FILE__, __LINE__);
-            }
-
-        } else if (!is_breakpoint) {
-            //A number
-            fobj->type = E_FML_NUMBER;
-            fobj->data.num = mydata.get_var_value(
-                vtr::string_view(
-                    var_name.data(),
-                    var_name.size()));
-        } else if (is_variable(var_name)) {
-            fobj->type = E_FML_VARIABLE;
-            if (same_string(var_name, "temp_count"))
-                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->temp_count;
-            else if (same_string(var_name, "from_block"))
-                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->from_block;
-            else if (same_string(var_name, "move_num"))
-                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->move_num;
-            else if (same_string(var_name, "route_net_id"))
-                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->route_net_id;
-            else if (same_string(var_name, "in_blocks_affected"))
-                fobj->data.num = in_blocks_affected(std::string(ch));
-            else if (same_string(var_name, "router_iter"))
-                fobj->data.num = bp_state_globals.get_glob_breakpoint_state()->router_iter;
-        }
-
-        ichar += (id_len - 1); //-1 since ichar is incremented at end of loop in formula_to_rpn()
-
-    } else if (is_char_number(*ch)) {
-        /* we have a number -- use atoi to convert */
-        stringstream ss;
-        while (is_char_number(*ch)) {
-            ss << (*ch);
-            ichar++;
-            ch++;
-        }
-        ichar--;
-        fobj->type = E_FML_NUMBER;
-        fobj->data.num = vtr::atoi(ss.str().c_str());
-    } else if (is_compound_op(ch) != E_COM_OP_UNDEFINED) {
-        fobj->type = E_FML_OPERATOR;
-        t_compound_operator comp_op_code = is_compound_op(ch);
-        if (comp_op_code == E_COM_OP_EQ)
-            fobj->data.op = E_OP_EQ;
-        else if (comp_op_code == E_COM_OP_GTE)
-            fobj->data.op = E_OP_GTE;
-        else if (comp_op_code == E_COM_OP_LTE)
-            fobj->data.op = E_OP_LTE;
-        else if (comp_op_code == E_COM_OP_AND)
-            fobj->data.op = E_OP_AND;
-        else if (comp_op_code == E_COM_OP_OR)
-            fobj->data.op = E_OP_OR;
-        else if (comp_op_code == E_COM_OP_AA)
-            fobj->data.op = E_OP_AA;
-        ichar++;
-    } else {
-        switch ((*ch)) {
-            case '+':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_ADD;
-                break;
-            case '-':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_SUB;
-                break;
-            case '*':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_MULT;
-                break;
-            case '/':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_DIV;
-                break;
-            case '(':
-                fobj->type = E_FML_BRACKET;
-                fobj->data.left_bracket = true;
-                break;
-            case ')':
-                fobj->type = E_FML_BRACKET;
-                fobj->data.left_bracket = false;
-                break;
-            case ',':
-                fobj->type = E_FML_COMMA;
-                break;
-            case '>':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_GT;
-                break;
-            case '<':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_LT;
-                break;
-            case '%':
-                fobj->type = E_FML_OPERATOR;
-                fobj->data.op = E_OP_MOD;
-                break;
-            default:
-                throw vtr::VtrError(vtr::string_fmt("in get_formula_object: unsupported character: %c\n", *ch), __FILE__, __LINE__);
-                break;
-        }
-    }
-
-    return;
-}
-
-/* returns integer specifying precedence of passed-in operator. higher integer
- * means higher precedence */
-static int get_fobj_precedence(const Formula_Object& fobj) {
-    int precedence = 0;
-
-    if (E_FML_BRACKET == fobj.type || E_FML_COMMA == fobj.type) {
-        precedence = 0;
-    } else if (E_FML_OPERATOR == fobj.type) {
-        t_operator op = fobj.data.op;
-        switch (op) {
-            case E_OP_AND: //fallthrough
-            case E_OP_OR:  //fallthrough
-                precedence = 1;
-                break;
-            case E_OP_ADD: //fallthrough
-            case E_OP_SUB: //fallthrough
-            case E_OP_GT:  //fallthrough
-            case E_OP_LT:  //fallthrough
-            case E_OP_EQ:  //fallthrough
-            case E_OP_GTE: //fallthrough
-            case E_OP_LTE: //fallthrough
-            case E_OP_AA:  //falthrough
-                precedence = 2;
-                break;
-            case E_OP_MULT: //fallthrough
-            case E_OP_DIV:  //fallthrough
-            case E_OP_MOD:
-                precedence = 3;
-                break;
-            case E_OP_MIN: //fallthrough
-            case E_OP_MAX: //fallthrough
-            case E_OP_LCM: //fallthrough
-            case E_OP_GCD:
-                precedence = 4;
-                break;
-            default:
-                throw vtr::VtrError(vtr::string_fmt("in get_fobj_precedence: unrecognized operator: %d\n", op), __FILE__, __LINE__);
-                break;
-        }
-    } else {
-        throw vtr::VtrError(vtr::string_fmt("in get_fobj_precedence: no precedence possible for formula object type %d\n", fobj.type), __FILE__, __LINE__);
-    }
-
-    return precedence;
-}
-
-/* Returns associativity of the specified operator */
-static bool op_associativity_is_left(const t_operator& /*op*/) {
-    bool is_left = true;
-
-    /* associativity is 'left' for all but the power operator, which is not yet implemented */
-    //TODO:
-    //if op is 'power' set associativity is_left=false and return
-
-    return is_left;
-}
-
-/* used by the shunting-yard formula parser to deal with operators such as add and subtract */
-static void handle_operator(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
-    if (E_FML_OPERATOR != fobj.type) {
-        throw vtr::VtrError(vtr::string_fmt("in handle_operator: passed in formula object not of type operator\n"), __FILE__, __LINE__);
-    }
-    int op_pr = get_fobj_precedence(fobj);
-    bool op_assoc_is_left = op_associativity_is_left(fobj.data.op);
-
-    Formula_Object fobj_dummy;
-    bool keep_going = false;
-    do {
-        /* here we keep popping operators off the stack onto back of rpn_output while
-         * associativity of operator is 'left' and precedence op_pr = top_pr, or while
-         * precedence op_pr < top_pr */
-
-        /* determine whether we should keep popping operators off the op stack */
-        if (op_stack.empty()) {
-            keep_going = false;
-        } else {
-            /* get precedence of top operator */
-            int top_pr = get_fobj_precedence(op_stack.top());
-
-            keep_going = ((op_assoc_is_left && op_pr == top_pr)
-                          || op_pr < top_pr);
-
-            if (keep_going) {
-                /* pop top operator off stack onto the back of rpn_output */
-                fobj_dummy = op_stack.top();
-                rpn_output.push_back(fobj_dummy);
-                op_stack.pop();
-            }
-        }
-
-    } while (keep_going);
-
-    /* place new operator object on top of stack */
-    op_stack.push(fobj);
-
-    return;
-}
-
-/* used by the shunting-yard formula parser to deal with brackets, ie '(' and ')' */
-static void handle_bracket(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
-    if (E_FML_BRACKET != fobj.type) {
-        throw vtr::VtrError(vtr::string_fmt("in handle_bracket: passed-in formula object not of type bracket\n"), __FILE__, __LINE__);
-    }
-
-    /* check if left or right bracket */
-    if (fobj.data.left_bracket) {
-        /* left bracket, so simply push it onto operator stack */
-        op_stack.push(fobj);
-    } else {
-        bool keep_going = false;
-        do {
-            /* here we keep popping operators off op_stack onto back of rpn_output until a
-             * left bracket is encountered */
-
-            if (op_stack.empty()) {
-                /* didn't find an opening bracket - mismatched brackets */
-                keep_going = false;
-                throw vtr::VtrError(vtr::string_fmt("Ran out of stack while parsing brackets -- bracket mismatch in user-specified formula\n"), __FILE__, __LINE__);
-            }
-
-            Formula_Object next_fobj = op_stack.top();
-            if (E_FML_BRACKET == next_fobj.type) {
-                if (next_fobj.data.left_bracket) {
-                    /* matching bracket found -- pop off stack and finish */
-                    op_stack.pop();
-                    keep_going = false;
-                } else {
-                    /* should not find two right brackets without a left bracket in-between */
-                    keep_going = false;
-                    throw vtr::VtrError(vtr::string_fmt("Mismatched brackets encountered in user-specified formula\n"), __FILE__, __LINE__);
-                }
-            } else if (E_FML_OPERATOR == next_fobj.type) {
-                /* pop operator off stack onto the back of rpn_output */
-                Formula_Object fobj_dummy = op_stack.top();
-                rpn_output.push_back(fobj_dummy);
-                op_stack.pop();
-                keep_going = true;
-            } else {
-                keep_going = false;
-                throw vtr::VtrError(vtr::string_fmt("Found unexpected formula object on operator stack: %d\n", next_fobj.type), __FILE__, __LINE__);
-            }
-        } while (keep_going);
-    }
-    return;
-}
-
-/* used by the shunting-yard formula parser to deal with commas, ie ','. These occur in function calls*/
-static void handle_comma(const Formula_Object& fobj, vector<Formula_Object>& rpn_output, stack<Formula_Object>& op_stack) {
-    if (E_FML_COMMA != fobj.type) {
-        throw vtr::VtrError(vtr::string_fmt("in handle_comm: passed-in formula object not of type comma\n"), __FILE__, __LINE__);
-    }
-
-    //Commas are treated as right (closing) bracket since it completes a
-    //sub-expression, except that we do not cause the left (opening) brack to
-    //be popped
-
-    bool keep_going = true;
-    do {
-        /* here we keep popping operators off op_stack onto back of rpn_output until a
-         * left bracket is encountered */
-
-        if (op_stack.empty()) {
-            /* didn't find an opening bracket - mismatched brackets */
-            keep_going = false;
-            throw vtr::VtrError(vtr::string_fmt("Ran out of stack while parsing comma -- bracket mismatch in user-specified formula\n"), __FILE__, __LINE__);
-            keep_going = false;
-        }
-
-        Formula_Object next_fobj = op_stack.top();
-        if (E_FML_BRACKET == next_fobj.type) {
-            if (next_fobj.data.left_bracket) {
-                /* matching bracket found */
-                keep_going = false;
-            } else {
-                /* should not find two right brackets without a left bracket in-between */
-                throw vtr::VtrError(vtr::string_fmt("Mismatched brackets encountered in user-specified formula\n"), __FILE__, __LINE__);
-                keep_going = false;
-            }
-        } else if (E_FML_OPERATOR == next_fobj.type) {
-            /* pop operator off stack onto the back of rpn_output */
-            Formula_Object fobj_dummy = op_stack.top();
-            rpn_output.push_back(fobj_dummy);
-            op_stack.pop();
-            keep_going = true;
-        } else {
-            throw vtr::VtrError(vtr::string_fmt("Found unexpected formula object on operator stack: %d\n", next_fobj.type), __FILE__, __LINE__);
-            keep_going = false;
-        }
-
-    } while (keep_going);
-}
-
-/* parses a reverse-polish notation vector corresponding to a switchblock formula
- * and returns the integer result */
-static int parse_rpn_vector(vector<Formula_Object>& rpn_vec) {
-    int result = -1;
-
-    /* first entry should always be a number or variable name*/
-    if (E_FML_NUMBER != rpn_vec[0].type && E_FML_VARIABLE != rpn_vec[0].type) {
-        throw vtr::VtrError(vtr::string_fmt("parse_rpn_vector: first entry is not a number or variable(was %s)\n", rpn_vec[0].to_string().c_str()), __FILE__, __LINE__);
-    }
-
-    if (rpn_vec.size() == 1 && rpn_vec[0].type == E_FML_NUMBER) {
-        /* if the vector size is 1 then we just have a number (which was verified above) */
-        result = rpn_vec[0].data.num;
-    } else {
-        /* have numbers and operators */
-        Formula_Object fobj;
-        int ivec = 0;
-        /* keep going until we have gone through the whole vector */
-        while (!rpn_vec.empty()) {
-            /* keep going until we have hit an operator */
-            do {
-                ivec++; /* first item should never be operator anyway */
-                if (ivec == (int)rpn_vec.size()) {
-                    throw vtr::VtrError(vtr::string_fmt("parse_rpn_vector(): found multiple numbers in formula, but no operator\n"), __FILE__, __LINE__);
-                }
-            } while (E_FML_OPERATOR != rpn_vec[ivec].type);
-
-            /* now we apply the selected operation to the two previous entries */
-            /* the result is stored in the object that used to be the operation */
-            rpn_vec[ivec].data.num = apply_rpn_op(rpn_vec[ivec - 2], rpn_vec[ivec - 1], rpn_vec[ivec]);
-            rpn_vec[ivec].type = E_FML_NUMBER;
-
-            /* remove the previous two entries from the vector */
-            rpn_vec.erase(rpn_vec.begin() + ivec - 2, rpn_vec.begin() + ivec - 0);
-            ivec -= 2;
-
-            /* if we're down to one element, we are done */
-            if (1 == rpn_vec.size()) {
-                result = rpn_vec[ivec].data.num;
-                rpn_vec.erase(rpn_vec.begin() + ivec);
-            }
-        }
-    }
-    return result;
-}
-
-/* applies operation specified by 'op' to the given arguments. arg1 comes before arg2 */
-static int apply_rpn_op(const Formula_Object& arg1, const Formula_Object& arg2, const Formula_Object& op) {
-    int result = -1;
-
-    /* arguments must be numbers or variables */
-    if (E_FML_NUMBER != arg1.type || E_FML_NUMBER != arg2.type) {
-        if (E_FML_VARIABLE != arg1.type && E_FML_VARIABLE != arg2.type) {
-            throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: one of the arguments is not a number or variable(was '%s %s %s')\n", arg1.to_string().c_str(), op.to_string().c_str(), arg2.to_string().c_str()), __FILE__, __LINE__);
-        }
-    }
-
-    /* check that op is actually an operation */
-    if (E_FML_OPERATOR != op.type) {
-        throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: the object specified as the operation is not of operation type\n"), __FILE__, __LINE__);
-    }
-
-    /* apply operation to arguments */
-    switch (op.data.op) {
-        case E_OP_ADD:
-            result = arg1.data.num + arg2.data.num;
-            break;
-        case E_OP_SUB:
-            result = arg1.data.num - arg2.data.num;
-            break;
-        case E_OP_MULT:
-            result = arg1.data.num * arg2.data.num;
-            break;
-        case E_OP_DIV:
-            result = arg1.data.num / arg2.data.num;
-            break;
-        case E_OP_MAX:
-            result = std::max(arg1.data.num, arg2.data.num);
-            break;
-        case E_OP_MIN:
-            result = std::min(arg1.data.num, arg2.data.num);
-            break;
-        case E_OP_GCD:
-            result = vtr::gcd(arg1.data.num, arg2.data.num);
-            break;
-        case E_OP_LCM:
-            result = vtr::lcm(arg1.data.num, arg2.data.num);
-            break;
-        case E_OP_AND:
-            result = arg1.data.num && arg2.data.num;
-            break;
-        case E_OP_OR:
-            result = (arg1.data.num || arg2.data.num);
-            break;
-        case E_OP_GT:
-            result = arg1.data.num > arg2.data.num;
-            break;
-        case E_OP_LT:
-            result = arg1.data.num < arg2.data.num;
-            break;
-        case E_OP_GTE:
-            result = (arg1.data.num >= arg2.data.num);
-            break;
-        case E_OP_LTE:
-            result = (arg1.data.num <= arg2.data.num);
-            break;
-        case E_OP_EQ:
-            result = arg1.data.num == arg2.data.num;
-            break;
-        case E_OP_MOD:
-            result = arg1.data.num % arg2.data.num;
-            break;
-        case E_OP_AA:
-            result = additional_assignment_op(arg1.data.num, arg2.data.num);
-            break;
-        default:
-            throw vtr::VtrError(vtr::string_fmt("in apply_rpn_op: invalid operation: %d\n", op.data.op), __FILE__, __LINE__);
-            break;
-    }
-
-    return result;
-}
-
-/* checks if specified character represents an ASCII number */
-static bool is_char_number(const char ch) {
-    bool result = false;
-
-    if (ch >= '0' && ch <= '9') {
-        result = true;
-    } else {
-        result = false;
-    }
-
-    return result;
-}
-
-//checks if entered char is a known operator (e.g +,-,<,>,...)
-static bool is_operator(const char ch) {
-    switch (ch) {
-        case '+': //fallthrough
-        case '-': //fallthrough
-        case '*': //fallthrough
-        case '/': //fallthrough
-        case ')': //fallthrough
-        case '(': //fallthrough
-        case ',': //fallthrough
-        case '&': //fallthrough
-        case '|': //fallthrough
-        case '>': //fallthrough
-        case '<': //fallthrough
-        case '=': //fallthrough
-        case '%': //fallthrough
-            return true;
-        default:
-            return false;
-    }
-}
-
-//returns true if string signifies a function e.g max, min
-static bool is_function(std::string name) {
-    if (name == "min"
-        || name == "max"
-        || name == "gcd"
-        || name == "lcm") {
-        return true;
-    }
-    return false;
-}
-
-//returns enumerated code depending on the compound operator
-//compound operators are operators with more than one character e.g &&, >=
-t_compound_operator is_compound_op(const char* ch) {
-    if (ch[1] != '\0') {
-        if (ch[0] == '=' && ch[1] == '=')
-            return E_COM_OP_EQ;
-        else if (ch[0] == '>' && ch[1] == '=')
-            return E_COM_OP_GTE;
-        else if (ch[0] == '<' && ch[1] == '=')
-            return E_COM_OP_LTE;
-        else if (ch[0] == '&' && ch[1] == '&')
-            return E_COM_OP_AND;
-        else if (ch[0] == '|' && ch[1] == '|')
-            return E_COM_OP_OR;
-        else if (ch[0] == '+' && ch[1] == '=')
-            return E_COM_OP_AA;
-    }
-    return E_COM_OP_UNDEFINED;
-}
-
-//checks if the entered string is a known variable name
-static bool is_variable(std::string var_name) {
-    if (same_string(var_name, "from_block") || same_string(var_name, "temp_count") || same_string(var_name, "move_num") || same_string(var_name, "route_net_id") || same_string(var_name, "in_blocks_affected") || same_string(var_name, "router_iter")) {
-        return true;
-    }
-    return false;
-}
-
-//returns the length of the substring consisting of valid vairable characters from
-//the start of the string
-static int identifier_length(const char* str) {
-    int ichar = 0;
-
-    if (!str) return 0;
-
-    while (str[ichar] != '\0') {
-        //No whitespace
-        if (str[ichar] == ' ') break;
-
-        //Not an operator
-        if (is_operator(str[ichar])) break;
-
-        //First char must not be a number
-        if (ichar == 0 && is_char_number(str[ichar])) break;
-
-        ++ichar; //Next character
-    }
-
-    return ichar;
-}
-
-/* checks if the specified formula is piece-wise defined */
-bool FormulaParser::is_piecewise_formula(const char* formula) {
-    bool result = false;
-    /* if formula is piecewise, we expect '{' to be the very first character */
-    if ('{' == formula[0]) {
-        result = true;
-    } else {
-        result = false;
-    }
-    return result;
-}
-
-//compares two string while ignoring case and white space. returns true if strings are the same
-bool same_string(std::string str1, std::string str2) {
-    //earse any white space in both strings
-    str1.erase(remove(str1.begin(), str1.end(), ' '), str1.end());
-    str2.erase(remove(str2.begin(), str2.end(), ' '), str2.end());
-
-    //converting both strings to lower case to eliminate case sensivity
-    std::transform(str1.begin(), str1.end(), str1.begin(), ::tolower);
-    std::transform(str2.begin(), str2.end(), str2.begin(), ::tolower);
-
-    return (str1.compare(str2) == 0);
-}
-
-//the += operator
-bool additional_assignment_op(int arg1, int arg2) {
-    int result = 0;
-    if (before_addition == 0)
-        before_addition = arg1;
-    result = (arg1 == (before_addition + arg2));
-    if (result)
-        before_addition = 0;
-    return result;
-}
-
-//recognizes the block_id to look for (entered by the user)
-//then looks for that block_id in all the blocks moved in the last perturbation.
-//returns the block id if found, else just -1
-int in_blocks_affected(std::string expression_left) {
-    int wanted_block = -1;
-    int found_block;
-    std::stringstream ss;
-    ss << expression_left;
-    std::string s;
-
-    //finds block_id to look for
-    while (!ss.eof()) {
-        ss >> s;
-        if (std::stringstream(s) >> found_block) {
-            s = "";
-            break;
-        }
-    }
-
-    //goes through blocks_affected
-    for (size_t i = 0; i < bp_state_globals.get_glob_breakpoint_state()->blocks_affected_by_move.size(); i++) {
-        if (bp_state_globals.get_glob_breakpoint_state()->blocks_affected_by_move[i] == found_block) {
-            bp_state_globals.get_glob_breakpoint_state()->block_affected = found_block;
-            return found_block;
-        }
-    }
-    return wanted_block;
-}
-
-} //namespace vtr
-
-//returns the global variable that holds all values that can trigger a breakpoint and are updated by the router and placer
-BreakpointStateGlobals* get_bp_state_globals() {
-    return &bp_state_globals;
-}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h b/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
deleted file mode 100644
index 43aac411d..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_expr_eval.h
+++ /dev/null
@@ -1,234 +0,0 @@
-#ifndef EXPR_EVAL_H
-#define EXPR_EVAL_H
-#include <map>
-#include <string>
-#include <vector>
-#include <stack>
-#include <cstring>
-#include <iostream>
-
-#include "vtr_util.h"
-#include "vtr_error.h"
-#include "vtr_string_view.h"
-#include "vtr_flat_map.h"
-#include "breakpoint_state_globals.h"
-
-/**
- * @file
- * @brief   This file implements an expressopn evaluator
- *
- * The expression evaluator is capable of performing many operations on given variables, 
- * after parsing the expression. The parser goes character by character and identifies 
- * the type of char or chars. (e.g bracket, comma, number, operator, variable). 
- * The supported operations include addition, subtraction, multiplication, division, 
- * finding max, min, gcd, lcm, as well as boolean operators such as &&, ||, ==, >=, <= etc. 
- * The result is returned as an int value and operation precedance is taken into account. 
- * (e.g given 3-2*4, the result will be -5). This class is also used to parse expressions 
- * indicating breakpoints. The breakpoint expressions consist of variable names such as 
- * move_num, temp_num, from_block etc, and boolean operators (e.g move_num == 3). 
- * Multiple breakpoints can be expressed in one expression
- */
-
-//function declarations
-///@brief returns the global variable that holds all values that can trigger a breakpoint and are updated by the router and placer
-BreakpointStateGlobals* get_bp_state_globals();
-
-namespace vtr {
-
-/**** Structs ****/
-
-///@brief a class to hold the formula data
-class t_formula_data {
-  public:
-    ///@brief clears all the formula data
-    void clear() {
-        vars_.clear();
-    }
-
-    ///@brief set the value of a specific part of the formula
-    void set_var_value(vtr::string_view var, int value) { vars_[var] = value; }
-
-    ///@brief set the value of a specific part of the formula (the var can be c-style string)
-    void set_var_value(const char* var, int value) { vars_[vtr::string_view(var)] = value; }
-
-    ///@brief get the value of a specific part of the formula
-    int get_var_value(const std::string& var) const {
-        return get_var_value(vtr::string_view(var.data(), var.size()));
-    }
-
-    ///@brief get the value of a specific part of the formula (the var can be c-style string)
-    int get_var_value(vtr::string_view var) const {
-        auto iter = vars_.find(var);
-        if (iter == vars_.end()) {
-            std::string copy(var.data(), var.size());
-            throw vtr::VtrError(vtr::string_fmt("No value found for variable '%s' from expression\n", copy.c_str()), __FILE__, __LINE__);
-        }
-
-        return iter->second;
-    }
-
-  private:
-    vtr::flat_map<vtr::string_view, int> vars_;
-};
-
-/**** Enums ****/
-///@brief Used to identify the type of symbolic formula object
-typedef enum e_formula_obj {
-    E_FML_UNDEFINED = 0,
-    E_FML_NUMBER,
-    E_FML_BRACKET,
-    E_FML_COMMA,
-    E_FML_OPERATOR,
-    E_FML_VARIABLE,
-    E_FML_NUM_FORMULA_OBJS
-} t_formula_obj;
-
-///@brief Used to identify an operator in a formula
-typedef enum e_operator {
-    E_OP_UNDEFINED = 0,
-    E_OP_ADD,
-    E_OP_SUB,
-    E_OP_MULT,
-    E_OP_DIV,
-    E_OP_MIN,
-    E_OP_MAX,
-    E_OP_GCD,
-    E_OP_LCM,
-    E_OP_AND,
-    E_OP_OR,
-    E_OP_GT,
-    E_OP_LT,
-    E_OP_GTE,
-    E_OP_LTE,
-    E_OP_EQ,
-    E_OP_MOD,
-    E_OP_AA,
-    E_OP_NUM_OPS
-} t_operator;
-
-///@brief Used to identify operators with more than one character
-typedef enum e_compound_operator {
-    E_COM_OP_UNDEFINED = 0,
-    E_COM_OP_AND,
-    E_COM_OP_OR,
-    E_COM_OP_EQ,
-    E_COM_OP_AA,
-    E_COM_OP_GTE,
-    E_COM_OP_LTE
-
-} t_compound_operator;
-
-/**** Class Definitions ****/
-/** 
- * @brief A class represents an object in a formula
- *
- * This object can be any of the following:
- *      - a number
- *      - a bracket
- *      - an operator
- *      - a variable
- */
-class Formula_Object {
-  public:
-    ///@brief indicates the type of formula object this is
-    t_formula_obj type;
-
-    /**
-     * @brief object data, accessed based on what kind of object this is 
-     */
-    union u_Data {
-        int num;           ///< for number objects
-        t_operator op;     ///< for operator objects
-        bool left_bracket; ///< for bracket objects -- specifies if this is a left bracket
-        //std::string variable;
-
-        u_Data() { memset(this, 0, sizeof(u_Data)); }
-    } data;
-
-    ///@brief constructor
-    Formula_Object() {
-        this->type = E_FML_UNDEFINED;
-    }
-
-    ///@brief convert enum to string
-    std::string to_string() const {
-        if (type == E_FML_NUMBER || type == E_FML_VARIABLE) {
-            return std::to_string(data.num);
-        } else if (type == E_FML_BRACKET) {
-            if (data.left_bracket) {
-                return "(";
-            } else {
-                return ")";
-            }
-        } else if (type == E_FML_COMMA) {
-            return ",";
-        } else if (type == E_FML_OPERATOR) {
-            if (data.op == E_OP_ADD) {
-                return "+";
-            } else if (data.op == E_OP_SUB) {
-                return "-";
-            } else if (data.op == E_OP_MULT) {
-                return "*";
-            } else if (data.op == E_OP_DIV) {
-                return "/";
-            } else if (data.op == E_OP_MOD) {
-                return "%";
-            } else if (data.op == E_OP_AND) {
-                return "&&";
-            } else if (data.op == E_OP_OR) {
-                return "||";
-            } else if (data.op == E_OP_GT) {
-                return ">";
-            } else if (data.op == E_OP_LT) {
-                return "<";
-            } else if (data.op == E_OP_GTE) {
-                return ">=";
-            } else if (data.op == E_OP_LTE) {
-                return "<=";
-            } else if (data.op == E_OP_EQ) {
-                return "==";
-            } else if (data.op == E_OP_MIN) {
-                return "min";
-            } else if (data.op == E_OP_MAX) {
-                return "max";
-            } else if (data.op == E_OP_GCD) {
-                return "gcd";
-            } else if (data.op == E_OP_LCM) {
-                return "lcm";
-            } else if (data.op == E_OP_AA) {
-                return "+=";
-            } else {
-                return "???"; //Unkown
-            }
-        } else {
-            return "???"; //Unkown
-        }
-    }
-};
-
-///@brief A class to parse formula
-class FormulaParser {
-  public:
-    FormulaParser() = default;
-    FormulaParser(const FormulaParser&) = delete;
-    FormulaParser& operator=(const FormulaParser&) = delete;
-
-    ///@brief returns integer result according to specified formula and data
-    int parse_formula(std::string formula, const t_formula_data& mydata, bool is_breakpoint = false);
-
-    ///@brief returns integer result according to specified piece-wise formula and data
-    int parse_piecewise_formula(const char* formula, const t_formula_data& mydata);
-
-    ///@brief checks if the specified formula is piece-wise defined
-    static bool is_piecewise_formula(const char* formula);
-
-  private:
-    std::vector<Formula_Object> rpn_output_;
-
-    // stack for handling operators and brackets in formula
-    std::stack<Formula_Object> op_stack_;
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h b/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
deleted file mode 100644
index 2c0b34f85..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_flat_map.h
+++ /dev/null
@@ -1,483 +0,0 @@
-#ifndef VTR_FLAT_MAP
-#define VTR_FLAT_MAP
-#include <functional>
-#include <iterator>
-#include <vector>
-#include <algorithm>
-#include <stdexcept>
-
-#include "vtr_assert.h"
-
-namespace vtr {
-
-//Forward declaration
-template<class K, class V, class Compare = std::less<K>, class Storage = std::vector<std::pair<K, V>>>
-class flat_map;
-
-template<class K, class V, class Compare = std::less<K>, class Storage = std::vector<std::pair<K, V>>>
-class flat_map2;
-
-/**
- * @brief A function to create a flat map
- *
- * Helper function to create a flat map from a vector of pairs
- * without haveing to explicity specify the key and value types
- */
-template<class K, class V>
-flat_map<K, V> make_flat_map(std::vector<std::pair<K, V>>&& vec) {
-    return flat_map<K, V>(std::move(vec));
-}
-
-///@brief Same as make_flat_map but for flat_map2
-template<class K, class V>
-flat_map2<K, V> make_flat_map2(std::vector<std::pair<K, V>>&& vec) {
-    return flat_map2<K, V>(std::move(vec));
-}
-
-/**
- * @brief flat_map is a (nearly) std::map compatible container 
- * 
- * It uses a vector as it's underlying storage. Internally the stored elements 
- * are kept sorted allowing efficient look-up in O(logN) time via binary search.
- *
- *
- * This container is typically useful in the following scenarios:
- *    - Reduced memory usage if key/value are small (std::map needs to store pointers to
- *      other BST nodes which can add substantial overhead for small keys/values)
- *    - Faster search/iteration by exploiting data locality (all elments are in continguous
- *      memory enabling better spatial locality)
- *
- * The container deviates from the behaviour of std::map in the following important ways:
- *    - Insertion/erase takes O(N) instead of O(logN) time
- *    - Iterators may be invalidated on insertion/erase (i.e. if the vector is reallocated)
- *
- * The slow insertion/erase performance makes this container poorly suited to maps that
- * frequently add/remove new keys. If this is required you likely want std::map or
- * std::unordered_map. However if the map is constructed once and then repeatedly quieried,
- * consider using the range or vector-based constructors which initializes the flat_map in
- * O(NlogN) time.
- */
-template<class K, class T, class Compare, class Storage>
-class flat_map {
-  public:
-    typedef K key_type;
-    typedef T mapped_type;
-    typedef std::pair<K, T> value_type;
-    typedef Compare key_compare;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef typename Storage::iterator iterator;
-    typedef typename Storage::const_iterator const_iterator;
-    typedef typename Storage::reverse_iterator reverse_iterator;
-    typedef typename Storage::const_reverse_iterator const_reverse_iterator;
-    typedef typename Storage::difference_type difference_type;
-    typedef typename Storage::size_type size_type;
-
-    class value_compare;
-
-  public:
-    ///@brief Standard constructors
-    flat_map() = default;
-    flat_map(const flat_map&) = default;
-    flat_map(flat_map&&) = default;
-    flat_map& operator=(const flat_map&) = default;
-    flat_map& operator=(flat_map&&) = default;
-
-    ///@brief range constructor
-    template<class InputIterator>
-    flat_map(InputIterator first, InputIterator last) {
-        // Copy the values
-        std::copy(first, last, std::back_inserter(vec_));
-
-        sort();
-        uniquify();
-    }
-
-    ///@brief direct vector constructor
-    explicit flat_map(Storage&& values) {
-        assign(std::move(values));
-    }
-
-    /**
-     * @brief Move the values
-     * 
-     * Should be more efficient than the range constructor which 
-     * must copy each element
-     */
-    void assign(Storage&& values) {
-        vec_ = std::move(values);
-
-        sort();
-        uniquify();
-    }
-
-    ///@brief By moving the values this should be more efficient than the range constructor which must copy each element
-    void assign_sorted(Storage&& values) {
-        vec_ = std::move(values);
-        if (vec_.size() > 1) {
-            for (size_t i = 0; i < vec_.size() - 1; ++i) {
-                VTR_ASSERT_SAFE(vec_[i].first < vec_[i + 1].first);
-            }
-        }
-    }
-
-    ///@brief Return an iterator pointing to the first element in the sequence:
-    iterator begin() { return vec_.begin(); }
-
-    ///@brief Return a constant iterator pointing to the first element in the sequence:
-    const_iterator begin() const { return vec_.begin(); }
-
-    ///@brief Returns an iterator referring to the past-the-end element in the vector container.
-    iterator end() { return vec_.end(); }
-
-    ///@brief Returns a constant iterator referring to the past-the-end element in the vector container.
-    const_iterator end() const { return vec_.end(); }
-
-    ///@brief Returns a reverse iterator which points to the last element of the map.
-    reverse_iterator rbegin() { return vec_.rbegin(); }
-
-    ///@brief Returns a constant reverse iterator which points to the last element of the map.
-    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
-
-    ///@brief Returns a reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
-    reverse_iterator rend() { return vec_.rend(); }
-
-    ///@brief Returns a constant reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
-    const_reverse_iterator rend() const { return vec_.rend(); }
-
-    ///@brief Returns a constant_iterator to the first element in the underlying vector
-    const_iterator cbegin() const { return vec_.begin(); }
-
-    ///@brief Returns a const_iterator pointing to the past-the-end element in the container.
-    const_iterator cend() const { return vec_.end(); }
-
-    ///@brief Returns a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
-    const_reverse_iterator crbegin() const { return vec_.rbegin(); }
-
-    ///@brief Returns a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
-    const_reverse_iterator crend() const { return vec_.rend(); }
-
-    ///@brief Return true if the underlying vector is empty
-    bool empty() const { return vec_.empty(); }
-
-    ///@brief Return the container size
-    size_type size() const { return vec_.size(); }
-
-    ///@brief Return the underlying vector's max size
-    size_type max_size() const { return vec_.max_size(); }
-
-    ///@brief The constant version of operator []
-    const mapped_type& operator[](const key_type& key) const {
-        auto iter = find(key);
-        if (iter == end()) {
-            //Not found
-            throw std::out_of_range("Invalid key");
-        }
-
-        return iter->second;
-    }
-
-    ///@brief operator []
-    mapped_type& operator[](const key_type& key) {
-        auto iter = std::lower_bound(begin(), end(), key, value_comp());
-        if (iter == end()) {
-            // The new element should be placed at the end, so do so.
-            vec_.emplace_back(std::make_pair(key, mapped_type()));
-            return vec_.back().second;
-        } else {
-            if (iter->first == key) {
-                // The element already exists, return it.
-                return iter->second;
-            } else {
-                // The element does not exist, insert such that vector remains
-                // sorted.
-                iter = vec_.emplace(iter, std::make_pair(key, mapped_type()));
-                return iter->second;
-            }
-        }
-    }
-
-    ///@brief operator at()
-    mapped_type& at(const key_type& key) {
-        return const_cast<mapped_type&>(const_cast<const flat_map*>(this)->at(key));
-    }
-
-    ///@brief The constant version of at() operator
-    const mapped_type& at(const key_type& key) const {
-        auto iter = find(key);
-        if (iter == end()) {
-            throw std::out_of_range("Invalid key");
-        }
-        return iter->second;
-    }
-
-    ///@brief Insert value
-    std::pair<iterator, bool> insert(const value_type& value) {
-        auto iter = lower_bound(value.first);
-        if (iter != end() && keys_equivalent(iter->first, value.first)) {
-            //Found existing
-            return std::make_pair(iter, false);
-        } else {
-            //Insert
-            iter = insert(iter, value);
-
-            return std::make_pair(iter, true);
-        }
-    }
-
-    ///@brief Emplace function
-    std::pair<iterator, bool> emplace(const value_type&& value) {
-        auto iter = lower_bound(value.first);
-        if (iter != end() && keys_equivalent(iter->first, value.first)) {
-            //Found existing
-            return std::make_pair(iter, false);
-        } else {
-            //Emplace
-            iter = emplace(iter, value);
-
-            return std::make_pair(iter, true);
-        }
-    }
-
-    ///@brief Insert value with position hint
-    iterator insert(const_iterator position, const value_type& value) {
-        //In a legal position
-        VTR_ASSERT(position == begin() || value_comp()(*(position - 1), value));
-        VTR_ASSERT((size() > 0 && (position + 1) == end()) || position == end() || !value_comp()(*(position + 1), value));
-
-        iterator iter = vec_.insert(position, value);
-
-        return iter;
-    }
-
-    ///@brief Emplace value with position hint
-    iterator emplace(const_iterator position, const value_type& value) {
-        //In a legal position
-        VTR_ASSERT(position == begin() || value_comp()(*(position - 1), value));
-        VTR_ASSERT((size() > 0 && (position + 1) == end()) || position == end() || !value_comp()(*(position + 1), value));
-
-        iterator iter = vec_.emplace(position, value);
-
-        return iter;
-    }
-
-    ///@brief Insert range
-    template<class InputIterator>
-    void insert(InputIterator first, InputIterator last) {
-        vec_.insert(vec_.end(), first, last);
-
-        //TODO: could be more efficient
-        sort();
-        uniquify();
-    }
-
-    ///@brief Erase by key
-    void erase(const key_type& key) {
-        auto iter = find(key);
-        if (iter != end()) {
-            vec_.erase(iter);
-        }
-    }
-
-    ///@brief Erase at iterator
-    void erase(const_iterator position) {
-        vec_.erase(position);
-    }
-
-    ///@brief Erase range
-    void erase(const_iterator first, const_iterator last) {
-        vec_.erase(first, last);
-    }
-
-    ///@brief swap two flat maps
-    void swap(flat_map& other) { std::swap(*this, other); }
-
-    ///@brief clear the flat map
-    void clear() { vec_.clear(); }
-
-    ///@brief templated emplace function
-    template<class... Args>
-    iterator emplace(const key_type& key, Args&&... args) {
-        auto iter = lower_bound(key);
-        if (iter != end() && keys_equivalent(iter->first, key)) {
-            //Found
-            return std::make_pair(iter, false);
-        } else {
-            //Emplace
-            iter = emplace_hint(iter, key, std::forward<Args>(args)...);
-            return std::make_pair(iter, true);
-        }
-    }
-
-    ///@brief templated emplace_hint function
-    template<class... Args>
-    iterator emplace_hint(const_iterator position, Args&&... args) {
-        return vec_.emplace(position, std::forward<Args>(args)...);
-    }
-
-    ///@brief Reserve a minimum capacity for the underlying vector
-    void reserve(size_type n) { vec_.reserve(n); }
-
-    ///@brief Reduce the capacity of the underlying vector to fit its size
-    void shrink_to_fit() { vec_.shrink_to_fit(); }
-
-    ///@brief
-    key_compare key_comp() const { return key_compare(); }
-
-    ///@brief
-    value_compare value_comp() const { return value_compare(key_comp()); }
-
-    ///@brief Find a key and return an iterator to the found key
-    iterator find(const key_type& key) {
-        const_iterator const_iter = const_cast<const flat_map*>(this)->find(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Find a key and return a constant iterator to the found key
-    const_iterator find(const key_type& key) const {
-        auto iter = lower_bound(key);
-        if (iter != end() && keys_equivalent(iter->first, key)) {
-            //Found
-            return iter;
-        }
-        return end();
-    }
-
-    ///@brief Return the count of occurances of a key
-    size_type count(const key_type& key) const {
-        return (find(key) == end()) ? 0 : 1;
-    }
-
-    ///@brief lower bound function
-    iterator lower_bound(const key_type& key) {
-        const_iterator const_iter = const_cast<const flat_map*>(this)->lower_bound(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Return a constant iterator to the lower bound
-    const_iterator lower_bound(const key_type& key) const {
-        return std::lower_bound(begin(), end(), key, value_comp());
-    }
-
-    ///@brief upper bound function
-    iterator upper_bound(const key_type& key) {
-        const_iterator const_iter = const_cast<const flat_map*>(this)->upper_bound(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Return a constant iterator to the upper bound
-    const_iterator upper_bound(const key_type& key) const {
-        return std::upper_bound(begin(), end(), key, value_comp());
-    }
-
-    ///@brief Returns a range containing all elements equivalent to "key"
-    std::pair<iterator, iterator> equal_range(const key_type& key) {
-        auto const_iter_pair = const_cast<const flat_map*>(this)->equal_range(key);
-        return std::pair<iterator, iterator>(iterator(const_iter_pair.first), iterator(const_iter_pair.second));
-    }
-
-    ///@brief Returns a constant range containing all elements equivalent to "key"
-    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
-        return std::equal_range(begin(), end(), key);
-    }
-
-  public:
-    ///@brief Swaps 2 flat maps
-    friend void swap(flat_map& lhs, flat_map& rhs) { std::swap(lhs.vec_, rhs.vec_); }
-
-  private:
-    bool keys_equivalent(const key_type& lhs, const key_type& rhs) const {
-        return !key_comp()(lhs, rhs) && !key_comp()(rhs, lhs);
-    }
-
-    void sort() {
-        std::sort(vec_.begin(), vec_.end(), value_comp());
-    }
-
-    void uniquify() {
-        //Uniquify
-        auto key_equal_pred = [this](const value_type& lhs, const value_type& rhs) {
-            return !value_comp()(lhs, rhs) && !value_comp()(rhs, lhs);
-        };
-        vec_.erase(std::unique(vec_.begin(), vec_.end(), key_equal_pred), vec_.end());
-    }
-
-    iterator convert_to_iterator(const_iterator const_iter) {
-        /*
-         * A work around as there is no conversion betweena const_iterator and iterator.
-         *
-         * We intiailize i to the start of the container and then advance it by
-         * the distance to const_iter. The resulting i points to the same element
-         * as const_iter
-         * 
-         * Note that to be able to call std::distance with an iterator and
-         * const_iterator we need to specify the type as const_iterator (relying
-         * on the implicit conversion from iterator to const_iterator for i)
-         *
-         * Since the iterators are really vector (i.e. random-access) iterators
-         * this takes constant time
-         */
-        iterator i = begin();
-        std::advance(i, std::distance<const_iterator>(i, const_iter));
-        return i;
-    }
-
-  private:
-    Storage vec_;
-};
-
-/**
- * @brief Another flat_map container
- *
- * Like flat_map, but operator[] never inserts and directly returns the mapped value
- */
-template<class K, class T, class Compare, class Storage>
-class flat_map2 : public flat_map<K, T, Compare, Storage> {
-  public:
-    ///@brief Constructor
-    flat_map2() {}
-    explicit flat_map2(std::vector<typename flat_map2<K, T, Compare, Storage>::value_type>&& values)
-        : flat_map<K, T, Compare>(std::move(values)) {}
-
-    ///@brief const [] operator
-    const T& operator[](const K& key) const {
-        auto itr = this->find(key);
-        if (itr == this->end()) {
-            throw std::logic_error("Key not found");
-        }
-        return itr->second;
-    }
-
-    ///@brief [] operator
-    T& operator[](const K& key) {
-        return const_cast<T&>(const_cast<const flat_map2*>(this)->operator[](key));
-    }
-};
-
-///@brief A class to perform the comparison operation for the flat map
-template<class K, class T, class Compare, class Storage>
-class flat_map<K, T, Compare, Storage>::value_compare {
-    friend class flat_map;
-
-  public:
-    bool operator()(const value_type& x, const value_type& y) const {
-        return comp(x.first, y.first);
-    }
-
-    //For std::lower_bound, std::upper_bound
-    bool operator()(const value_type& x, const key_type& y) const {
-        return comp(x.first, y);
-    }
-    bool operator()(const key_type& x, const value_type& y) const {
-        return comp(x, y.first);
-    }
-
-  private:
-    value_compare(Compare c)
-        : comp(c) {}
-
-    Compare comp;
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_geometry.h b/third_party/vtr/libs/vtrutil/src/vtr_geometry.h
deleted file mode 100644
index 3685c3086..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_geometry.h
+++ /dev/null
@@ -1,312 +0,0 @@
-#ifndef VTR_GEOMETRY_H
-#define VTR_GEOMETRY_H
-#include "vtr_range.h"
-#include "vtr_assert.h"
-
-#include <cstdio> // vtr_geometry.tpp uses printf()
-
-#include <vector>
-#include <tuple>
-#include <limits>
-#include <type_traits>
-
-/**
- * @file
- * @brief   This file include differents different geometry classes
- */
-
-namespace vtr {
-
-/*
- * Forward declarations
- */
-template<class T>
-class Point;
-
-template<class T>
-class Rect;
-
-template<class T>
-class Line;
-
-template<class T>
-class RectUnion;
-
-template<class T>
-bool operator==(Point<T> lhs, Point<T> rhs);
-template<class T>
-bool operator!=(Point<T> lhs, Point<T> rhs);
-template<class T>
-bool operator<(Point<T> lhs, Point<T> rhs);
-
-template<class T>
-bool operator==(const Rect<T>& lhs, const Rect<T>& rhs);
-template<class T>
-bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs);
-
-template<class T>
-bool operator==(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
-template<class T>
-bool operator!=(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
-/*
- * Class Definitions
- */
-
-/**
- * @brief A point in 2D space
- *
- * This class represents a point in 2D space. Hence, it holds both
- * x and y components of the point. 
- */
-template<class T>
-class Point {
-  public: //Constructors
-    Point(T x_val, T y_val) noexcept;
-
-  public: //Accessors
-    ///@brief x coordinate
-    T x() const;
-
-    ///@brief y coordinate
-    T y() const;
-
-    ///@brief == operator
-    friend bool operator== <>(Point<T> lhs, Point<T> rhs);
-
-    ///@brief != operator
-    friend bool operator!= <>(Point<T> lhs, Point<T> rhs);
-
-    ///@brief < operator
-    friend bool operator< <>(Point<T> lhs, Point<T> rhs);
-
-  public: //Mutators
-    ///@brief Set x and y values
-    void set(T x_val, T y_val);
-
-    ///@brief set x value
-    void set_x(T x_val);
-
-    ///@brief set y value
-    void set_y(T y_val);
-
-    ///@brief Swap x and y values
-    void swap();
-
-  private:
-    T x_;
-    T y_;
-};
-
-/**
- * @brief A 2D rectangle
- *
- * This class represents a 2D rectangle. It can be created with 
- * its 4 points or using the bottom left and the top rights ones only
- */
-template<class T>
-class Rect {
-  public: //Constructors
-    ///@brief default constructor
-    Rect();
-
-    ///@brief construct using 4 vertex
-    Rect(T left_val, T bottom_val, T right_val, T top_val);
-
-    ///@brief construct using the bottom left and the top right vertex
-    Rect(Point<T> bottom_left_val, Point<T> top_right_val);
-
-    /**
-     * @brief Constructs a rectangle that only contains the given point
-     *
-     * Rect(p1).contains(p2) => p1 == p2
-     * It is only enabled for integral types, because making this work for floating point types would be difficult and brittle.
-     * The following line only enables the constructor if std::is_integral<T>::value == true
-     */
-    template<typename U = T, typename std::enable_if<std::is_integral<U>::value>::type...>
-    Rect(Point<U> point);
-
-  public: //Accessors
-    ///@brief xmin coordinate
-    T xmin() const;
-
-    ///@brief xmax coordinate
-    T xmax() const;
-
-    ///@brief ymin coodrinate
-    T ymin() const;
-
-    ///@brief ymax coordinate
-    T ymax() const;
-
-    ///@brief Return the bottom left point
-    Point<T> bottom_left() const;
-
-    ///@brief Return the top right point
-    Point<T> top_right() const;
-
-    ///@brief Return the rectangle width
-    T width() const;
-
-    ///@brief Return the rectangle height
-    T height() const;
-
-    ///@brief Returns true if the point is fully contained within the rectangle (excluding the top-right edges)
-    bool contains(Point<T> point) const;
-
-    ///@brief Returns true if the point is strictly contained within the region (excluding all edges)
-    bool strictly_contains(Point<T> point) const;
-
-    ///@brief Returns true if the point is coincident with the rectangle (including the top-right edges)
-    bool coincident(Point<T> point) const;
-
-    ///@brief Returns true if other is contained within the rectangle (including all edges)
-    bool contains(const Rect<T>& other) const;
-
-    /**
-     * @brief Checks whether the rectangle is empty
-     *
-     * Returns true if no points are contained in the rectangle
-     * rect.empty() => not exists p. rect.contains(p)
-     * This also implies either the width or height is 0.
-     */
-    bool empty() const;
-
-    ///@brief == operator
-    friend bool operator== <>(const Rect<T>& lhs, const Rect<T>& rhs);
-
-    ///@brief != operator
-    friend bool operator!= <>(const Rect<T>& lhs, const Rect<T>& rhs);
-
-  public: //Mutators
-    ///@brief set xmin to a point
-    void set_xmin(T xmin_val);
-
-    ///@brief set ymin to a point
-    void set_ymin(T ymin_val);
-
-    ///@brief set xmax to a point
-    void set_xmax(T xmax_val);
-
-    ///@brief set ymax to a point
-    void set_ymax(T ymax_val);
-
-    ///@brief Equivalent to `*this = bounding_box(*this, other)`
-    Rect<T>& expand_bounding_box(const Rect<T>& other);
-
-  private:
-    Point<T> bottom_left_;
-    Point<T> top_right_;
-};
-
-/**
- * @brief Return the smallest rectangle containing both given rectangles
- *
- * Note that this isn't a union and the resulting rectangle may include points not in either given rectangle
- */
-template<class T>
-Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs);
-
-///@brief Return the intersection of two given rectangles
-template<class T>
-Rect<T> intersection(const Rect<T>& lhs, const Rect<T>& rhs);
-
-//Prints a rectangle
-template<class T>
-static void print_rect(FILE* fp, const Rect<T> rect);
-
-//Sample on a uniformly spaced grid within a rectangle
-//  sample(vtr::Rect(l, h), 0, 0, M) == l
-//  sample(vtr::Rect(l, h), M, M, M) == h
-//To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
-//Only defined for integral types
-
-/**
- * @brief Sample on a uniformly spaced grid within a rectangle
- *
- * sample(vtr::Rect(l, h), 0, 0, M) == l
- * sample(vtr::Rect(l, h), M, M, M) == h
- * To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
- * Only defined for integral types
- */
-
-template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
-Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d);
-
-///@brief clamps v to be between low (lo) and high (hi), inclusive.
-template<class T>
-static constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
-    return std::min(std::max(v, lo), hi);
-}
-
-/**
- * @brief A 2D line
- *
- * It is constructed using a vector of the line points
- */
-template<class T>
-class Line {
-  public: //Types
-    typedef typename std::vector<Point<T>>::const_iterator point_iter;
-    typedef vtr::Range<point_iter> point_range;
-
-  public: //Constructors
-    ///@brief contructor
-    Line(std::vector<Point<T>> line_points);
-
-  public: //Accessors
-    ///@brief Returns the bounding box
-    Rect<T> bounding_box() const;
-
-    ///@brief Returns a range of constituent points
-    point_range points() const;
-
-  private:
-    std::vector<Point<T>> points_;
-};
-
-///@brief A union of 2d rectangles
-template<class T>
-class RectUnion {
-  public: //Types
-    typedef typename std::vector<Rect<T>>::const_iterator rect_iter;
-    typedef vtr::Range<rect_iter> rect_range;
-
-  public: //Constructors
-    ///@brief Construct from a set of rectangles
-    RectUnion(std::vector<Rect<T>> rects);
-
-  public: //Accessors
-    ///@brief Returns the bounding box of all rectangles in the union
-    Rect<T> bounding_box() const;
-
-    ///@brief Returns true if the point is fully contained within the region (excluding top-right edges)
-    bool contains(Point<T> point) const;
-
-    ///@brief Returns true if the point is strictly contained within the region (excluding all edges)
-    bool strictly_contains(Point<T> point) const;
-
-    ///@brief Returns true if the point is coincident with the region (including the top-right edges)
-    bool coincident(Point<T> point) const;
-
-    ///@brief Returns a range of all constituent rectangles
-    rect_range rects() const;
-
-    /**
-     * @brief Checks whether two RectUnions have identical representations
-     *
-     * Note: does not check whether the representations they are equivalent
-     */
-    friend bool operator== <>(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
-
-    ///@brief != operator
-    friend bool operator!= <>(const RectUnion<T>& lhs, const RectUnion<T>& rhs);
-
-  private:
-    // Note that a union of rectanges may have holes and may not be contiguous
-    std::vector<Rect<T>> rects_;
-};
-
-} // namespace vtr
-
-#include "vtr_geometry.tpp"
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp b/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
deleted file mode 100644
index 2010700fc..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_geometry.tpp
+++ /dev/null
@@ -1,347 +0,0 @@
-namespace vtr {
-/*
- * Point
- */
-
-template<class T>
-Point<T>::Point(T x_val, T y_val) noexcept
-    : x_(x_val)
-    , y_(y_val) {
-    //pass
-}
-
-template<class T>
-T Point<T>::x() const {
-    return x_;
-}
-
-template<class T>
-T Point<T>::y() const {
-    return y_;
-}
-
-template<class T>
-bool operator==(Point<T> lhs, Point<T> rhs) {
-    return lhs.x() == rhs.x()
-           && lhs.y() == rhs.y();
-}
-
-template<class T>
-bool operator!=(Point<T> lhs, Point<T> rhs) {
-    return !(lhs == rhs);
-}
-
-template<class T>
-bool operator<(Point<T> lhs, Point<T> rhs) {
-    return std::make_tuple(lhs.x(), lhs.y()) < std::make_tuple(rhs.x(), rhs.y());
-}
-
-//Mutators
-template<class T>
-void Point<T>::set(T x_val, T y_val) {
-    x_ = x_val;
-    y_ = y_val;
-}
-
-template<class T>
-void Point<T>::set_x(T x_val) {
-    x_ = x_val;
-}
-
-template<class T>
-void Point<T>::set_y(T y_val) {
-    y_ = y_val;
-}
-
-template<class T>
-void Point<T>::swap() {
-    std::swap(x_, y_);
-}
-
-/*
- * Rect
- */
-template<class T>
-Rect<T>::Rect()
-    : Rect<T>(Point<T>(0, 0), Point<T>(0, 0)) {
-    //pass
-}
-
-template<class T>
-Rect<T>::Rect(T left_val, T bottom_val, T right_val, T top_val)
-    : Rect<T>(Point<T>(left_val, bottom_val), Point<T>(right_val, top_val)) {
-    //pass
-}
-
-template<class T>
-Rect<T>::Rect(Point<T> bottom_left_val, Point<T> top_right_val)
-    : bottom_left_(bottom_left_val)
-    , top_right_(top_right_val) {
-    //pass
-}
-
-//Only defined for integral types
-template<class T>
-template<typename U, typename std::enable_if<std::is_integral<U>::value>::type...>
-Rect<T>::Rect(Point<U> point)
-    : bottom_left_(point)
-    , top_right_(point.x() + 1,
-                 point.y() + 1) {
-    //pass
-}
-
-template<class T>
-T Rect<T>::xmin() const {
-    return bottom_left_.x();
-}
-
-template<class T>
-T Rect<T>::xmax() const {
-    return top_right_.x();
-}
-
-template<class T>
-T Rect<T>::ymin() const {
-    return bottom_left_.y();
-}
-
-template<class T>
-T Rect<T>::ymax() const {
-    return top_right_.y();
-}
-
-template<class T>
-Point<T> Rect<T>::bottom_left() const {
-    return bottom_left_;
-}
-
-template<class T>
-Point<T> Rect<T>::top_right() const {
-    return top_right_;
-}
-
-template<class T>
-T Rect<T>::width() const {
-    return xmax() - xmin();
-}
-
-template<class T>
-T Rect<T>::height() const {
-    return ymax() - ymin();
-}
-
-template<class T>
-bool Rect<T>::contains(Point<T> point) const {
-    //Up-to but not including right or top edges
-    return point.x() >= xmin() && point.x() < xmax()
-           && point.y() >= ymin() && point.y() < ymax();
-}
-
-template<class T>
-bool Rect<T>::strictly_contains(Point<T> point) const {
-    //Excluding edges
-    return point.x() > xmin() && point.x() < xmax()
-           && point.y() > ymin() && point.y() < ymax();
-}
-
-template<class T>
-bool Rect<T>::coincident(Point<T> point) const {
-    //Including right or top edges
-    return point.x() >= xmin() && point.x() <= xmax()
-           && point.y() >= ymin() && point.y() <= ymax();
-}
-
-template<class T>
-bool Rect<T>::contains(const Rect<T>& other) const {
-    //Including all edges
-    return other.xmin() >= xmin() && other.xmax() <= xmax()
-           && other.ymin() >= ymin() && other.ymax() <= ymax();
-}
-
-template<class T>
-bool Rect<T>::empty() const {
-    return xmax() <= xmin() || ymax() <= ymin();
-}
-
-template<class T>
-bool operator==(const Rect<T>& lhs, const Rect<T>& rhs) {
-    return lhs.bottom_left() == rhs.bottom_left()
-           && lhs.top_right() == rhs.top_right();
-}
-
-template<class T>
-bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs) {
-    return !(lhs == rhs);
-}
-
-template<class T>
-Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs) {
-    return Rect<T>(std::min(lhs.xmin(), rhs.xmin()),
-                   std::min(lhs.ymin(), rhs.ymin()),
-                   std::max(lhs.xmax(), rhs.xmax()),
-                   std::max(lhs.ymax(), rhs.ymax()));
-}
-
-template<class T>
-Rect<T> intersection(const Rect<T>& lhs, const Rect<T>& rhs) {
-    return Rect<T>(std::max(lhs.xmin(), rhs.xmin()),
-                   std::max(lhs.ymin(), rhs.ymin()),
-                   std::min(lhs.xmax(), rhs.xmax()),
-                   std::min(lhs.ymax(), rhs.ymax()));
-}
-template<class T>
-static void print_rect(FILE* fp, const Rect<T> rect) {
-    fprintf(fp, "\txmin: %d\n", rect.xmin());
-    fprintf(fp, "\tymin: %d\n", rect.ymin());
-    fprintf(fp, "\txmax: %d\n", rect.xmax());
-    fprintf(fp, "\tymax: %d\n", rect.ymax());
-}
-//Only defined for integral types
-template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
-Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) {
-    VTR_ASSERT(d > 0 && x <= d && y <= d && !r.empty());
-    return Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d,
-                    (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d);
-}
-
-template<class T>
-void Rect<T>::set_xmin(T xmin_val) {
-    bottom_left_.set_x(xmin_val);
-}
-
-template<class T>
-void Rect<T>::set_ymin(T ymin_val) {
-    bottom_left_.set_y(ymin_val);
-}
-
-template<class T>
-void Rect<T>::set_xmax(T xmax_val) {
-    top_right_.set_x(xmax_val);
-}
-
-template<class T>
-void Rect<T>::set_ymax(T ymax_val) {
-    top_right_.set_y(ymax_val);
-}
-
-template<class T>
-Rect<T>& Rect<T>::expand_bounding_box(const Rect<T>& other) {
-    *this = bounding_box(*this, other);
-    return *this;
-}
-
-/*
- * Line
- */
-template<class T>
-Line<T>::Line(std::vector<Point<T>> line_points)
-    : points_(line_points) {
-    //pass
-}
-
-template<class T>
-Rect<T> Line<T>::bounding_box() const {
-    T xmin = std::numeric_limits<T>::max();
-    T ymin = std::numeric_limits<T>::max();
-    T xmax = std::numeric_limits<T>::min();
-    T ymax = std::numeric_limits<T>::min();
-
-    for (const auto& point : points()) {
-        xmin = std::min(xmin, point.x());
-        ymin = std::min(ymin, point.y());
-        xmax = std::max(xmax, point.x());
-        ymax = std::max(ymax, point.y());
-    }
-
-    return Rect<T>(xmin, ymin, xmax, ymax);
-}
-
-template<class T>
-typename Line<T>::point_range Line<T>::points() const {
-    return vtr::make_range(points_.begin(), points_.end());
-}
-
-/*
- * RectUnion
- */
-template<class T>
-RectUnion<T>::RectUnion(std::vector<Rect<T>> rectangles)
-    : rects_(rectangles) {
-    //pass
-}
-
-template<class T>
-Rect<T> RectUnion<T>::bounding_box() const {
-    T xmin = std::numeric_limits<T>::max();
-    T ymin = std::numeric_limits<T>::max();
-    T xmax = std::numeric_limits<T>::min();
-    T ymax = std::numeric_limits<T>::min();
-
-    for (const auto& rect : rects_) {
-        xmin = std::min(xmin, rect.xmin());
-        ymin = std::min(ymin, rect.ymin());
-        xmax = std::max(xmax, rect.xmax());
-        ymax = std::max(ymax, rect.ymax());
-    }
-
-    return Rect<T>(xmin, ymin, xmax, ymax);
-}
-
-template<class T>
-bool RectUnion<T>::contains(Point<T> point) const {
-    for (const auto& rect : rects()) {
-        if (rect.contains(point)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-template<class T>
-bool RectUnion<T>::strictly_contains(Point<T> point) const {
-    for (const auto& rect : rects()) {
-        if (rect.strictly_contains(point)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-template<class T>
-bool RectUnion<T>::coincident(Point<T> point) const {
-    for (const auto& rect : rects()) {
-        if (rect.coincident(point)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-template<class T>
-typename RectUnion<T>::rect_range RectUnion<T>::rects() const {
-    return vtr::make_range(rects_.begin(), rects_.end());
-}
-
-template<class T>
-bool operator==(const RectUnion<T>& lhs, const RectUnion<T>& rhs) {
-    //Currently checks for an identical *representation* (not whether the
-    //representations are equivalent)
-
-    if (lhs.rects_.size() != rhs.rects_.size()) {
-        return false;
-    }
-
-    for (size_t i = 0; i < lhs.rects_.size(); ++i) {
-        if (lhs.rects_[i] != rhs.rects_[i]) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-template<class T>
-bool operator!=(const RectUnion<T>& lhs, const RectUnion<T>& rhs) {
-    return !(lhs == rhs);
-}
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_hash.h b/third_party/vtr/libs/vtrutil/src/vtr_hash.h
deleted file mode 100644
index 7e8e6fa42..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_hash.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef VTR_HASH_H
-#define VTR_HASH_H
-#include <functional>
-
-namespace vtr {
-
-/**
- * @brief Hashes v and combines it with seed (as in boost)
- *
- * This is typically used to implement std::hash for composite types.
- */
-template<class T>
-inline void hash_combine(std::size_t& seed, const T& v) {
-    std::hash<T> hasher;
-    seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-}
-
-struct hash_pair {
-    template<class T1, class T2>
-    std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
-        auto hash1 = std::hash<T1>{}(pair.first);
-        auto hash2 = std::hash<T2>{}(pair.second);
-
-        return hash1 ^ hash2;
-    }
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h b/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
deleted file mode 100644
index c0ef38cfc..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_linear_map.h
+++ /dev/null
@@ -1,312 +0,0 @@
-#ifndef VTR_LINEAR_MAP_H
-#define VTR_LINEAR_MAP_H
-#include <vector>
-#include <stdexcept>
-
-#include "vtr_sentinels.h"
-
-namespace vtr {
-/**
- * @brief A std::map-like container which is indexed by K
- *
- * The main use of this container is to behave like a std::map which is optimized to hold
- * mappings between a dense linear range of keys (e.g. vtr::StrongId).
- *
- * Requires that K be convertable to size_t with the size_t operator (i.e. size_t()), and
- * that the conversion results in a linearly increasing index into the underlying vector.
- * Also requires that K() return the sentinel value used to mark invalid entries.
- *
- * If you only need to access the value associated with the key consider using vtr::vector_map
- * instead, which provides a similar but more std::vector-like interface.
- * 
- * Note that it is possible to use linear_map with sparse/non-contiguous keys, but this is typically
- * memory inefficient as the underlying vector will allocate space for [0..size_t(max_key)-1],
- * where max_key is the largest key that has been inserted.
- *
- * As with a std::vector, it is the caller's responsibility to ensure there is sufficient space
- * when a given index/key before it is accessed. The exception to this are the find() and insert()
- * methods which handle non-existing keys gracefully.
- */
-template<class K, class T, class Sentinel = DefaultSentinel<K>>
-class linear_map {
-  public:
-    typedef K key_type;
-    typedef T mapped_type;
-    typedef std::pair<K, T> value_type;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef typename std::vector<value_type>::iterator iterator;
-    typedef typename std::vector<value_type>::const_iterator const_iterator;
-    typedef typename std::vector<value_type>::reverse_iterator reverse_iterator;
-    typedef typename std::vector<value_type>::const_reverse_iterator const_reverse_iterator;
-    typedef typename std::vector<value_type>::difference_type difference_type;
-    typedef typename std::vector<value_type>::size_type size_type;
-
-  public:
-    ///@brief Standard big 5 constructors
-    linear_map() = default;
-    linear_map(const linear_map&) = default;
-    linear_map(linear_map&&) = default;
-    linear_map& operator=(const linear_map&) = default;
-    linear_map& operator=(linear_map&&) = default;
-
-    linear_map(size_t num_keys)
-        : vec_(num_keys, std::make_pair(sentinel(), T())) //Initialize all with sentinel values
-    {}
-
-    ///@brief Return an iterator to the first element
-    iterator begin() { return vec_.begin(); }
-
-    ///@brief Return a constant iterator to the first element
-    const_iterator begin() const { return vec_.begin(); }
-
-    ///@brief Return an iterator to the last element
-    iterator end() { return vec_.end(); }
-
-    ///@brief Return a constant iterator to the last element
-    const_iterator end() const { return vec_.end(); }
-
-    ///@brief Return a reverse iterator to the last element
-    reverse_iterator rbegin() { return vec_.rbegin(); }
-
-    ///@brief Return a constant reverse iterator to the last element
-    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
-
-    ///@brief Return a reverse iterator pointing to the theoretical element preceding the first element
-    reverse_iterator rend() { return vec_.rend(); }
-
-    ///@brief Return a constant reverse iterator pointing to the theoretical element preceding the first element
-    const_reverse_iterator rend() const { return vec_.rend(); }
-
-    ///@brief Return a const iterator to the first element
-    const_iterator cbegin() const { return vec_.begin(); }
-
-    ///@brief Return a const_iterator pointing to the past-the-end element in the container
-    const_iterator cend() const { return vec_.end(); }
-
-    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
-    const_reverse_iterator crbegin() const { return vec_.rbegin(); }
-
-    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
-    const_reverse_iterator crend() const { return vec_.rend(); }
-
-    ///@brief Return true if the container is empty
-    bool empty() const { return vec_.empty(); }
-
-    ///@brief Return the size of the container
-    size_type size() const { return vec_.size(); }
-
-    ///@brief Return the maximum size of the container
-    size_type max_size() const { return vec_.max_size(); }
-
-    ///@brief [] operator
-    mapped_type& operator[](const key_type& key) {
-        auto iter = find(key);
-        if (iter == end()) {
-            //Not found, create it
-            iter = insert(std::make_pair(key, mapped_type())).first;
-        }
-
-        return iter->second;
-    }
-
-    ///@brief at() operator
-    mapped_type& at(const key_type& key) {
-        return const_cast<mapped_type&>(const_cast<const linear_map*>(this)->at(key));
-    }
-
-    ///@brief constant at() operator
-    const mapped_type& at(const key_type& key) const {
-        auto iter = find(key);
-        if (iter == end()) {
-            throw std::out_of_range("Invalid key");
-        }
-        return iter->second;
-    }
-
-    ///@brief Insert value
-    std::pair<iterator, bool> insert(const value_type& value) {
-        auto iter = find(value.first);
-        if (iter != end()) {
-            //Found existing
-            return std::make_pair(iter, false);
-        } else {
-            //Insert
-            size_t index = size_t(value.first);
-
-            if (index >= vec_.size()) {
-                //Make space, initialize empty slots with sentinel values
-                vec_.resize(index + 1, std::make_pair(sentinel(), T()));
-            }
-
-            vec_[index] = value;
-
-            return std::make_pair(vec_.begin() + index, true);
-        }
-    }
-
-    ///@brief Insert range
-    template<class InputIterator>
-    void insert(InputIterator first, InputIterator last) {
-        for (InputIterator iter = first; iter != last; ++iter) {
-            insert(*iter);
-        }
-    }
-
-    ///@brief Erase by key
-    void erase(const key_type& key) {
-        auto iter = find(key);
-        if (iter != end()) {
-            erase(iter);
-        }
-    }
-
-    ///@brief Erase at iterator
-    void erase(const_iterator position) {
-        iterator pos = convert_to_iterator(position);
-        pos->first = sentinel(); //Mark invalid
-    }
-
-    ///@brief Erase range
-    void erase(const_iterator first, const_iterator last) {
-        for (auto iter = first; iter != last; ++iter) {
-            erase(iter);
-        }
-    }
-
-    ///@brief Swap two linear maps
-    void swap(linear_map& other) { std::swap(vec_, other.vec_); }
-
-    ///@brief Clear the container
-    void clear() { vec_.clear(); }
-
-    ///@brief Emplace
-    template<class... Args>
-    std::pair<iterator, bool> emplace(const key_type& key, Args&&... args) {
-        auto iter = find(key);
-        if (iter != end()) {
-            //Found
-            return std::make_pair(iter, false);
-        } else {
-            //Emplace
-            size_t index = size_t(key);
-
-            if (index >= vec_.size()) {
-                //Make space, initialize empty slots with sentinel values
-                vec_.resize(index + 1, value_type(sentinel(), T()));
-            }
-
-            vec_[index] = value_type(key, std::forward<Args>(args)...);
-
-            return std::make_pair(vec_.begin() + index, true);
-        }
-    }
-
-    ///@brief Requests that the underlying vector capacity be at least enough to contain n elements.
-    void reserve(size_type n) { vec_.reserve(n); }
-
-    ///@brief Reduces the capacity of the container to fit its size and destroys all elements beyond the capacity.
-    void shrink_to_fit() { vec_.shrink_to_fit(); }
-
-    ///@brief Returns an iterator to the first element in the range [first,last) that compares equal to val. If no such element is found, the function returns last.
-    iterator find(const key_type& key) {
-        const_iterator const_iter = const_cast<const linear_map*>(this)->find(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Returns a constant iterator to the first element in the range [first,last) that compares equal to val. If no such element is found, the function returns last.
-    const_iterator find(const key_type& key) const {
-        size_t index = size_t(key);
-
-        if (index < vec_.size() && vec_[index].first != sentinel()) {
-            return vec_.begin() + index;
-        }
-        return end();
-    }
-
-    ///@brief Returns the number of elements in the range [first,last) that compare equal to val.
-    size_type count(const key_type& key) const {
-        return (find(key) == end()) ? 0 : 1;
-    }
-
-    ///@brief Returns an iterator pointing to the first element in the range [first,last) which does not compare less than val.
-    iterator lower_bound(const key_type& key) {
-        const_iterator const_iter = const_cast<const linear_map*>(this)->lower_bound(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Returns a constant iterator pointing to the first element in the range [first,last) which does not compare less than val.
-    const_iterator lower_bound(const key_type& key) const {
-        return find(key);
-    }
-
-    ///@brief Returns an iterator pointing to the first element in the range [first,last) which compares greater than val.
-    iterator upper_bound(const key_type& key) {
-        const_iterator const_iter = const_cast<const linear_map*>(this)->upper_bound(key);
-        return convert_to_iterator(const_iter);
-    }
-
-    ///@brief Returns a constant iterator pointing to the first element in the range [first,last) which compares greater than val.
-    const_iterator upper_bound(const key_type& key) const {
-        auto iter = find(key);
-        return (iter != end()) ? iter + 1 : end();
-    }
-
-    ///@brief Returns the bounds of the subrange that includes all the elements of the range [first,last) with values equivalent to val.
-    std::pair<iterator, iterator> equal_range(const key_type& key) {
-        auto const_iter_pair = const_cast<const linear_map*>(this)->equal_range(key);
-        return std::pair<iterator, iterator>(iterator(const_iter_pair.first), iterator(const_iter_pair.second));
-    }
-
-    ///@brief Returns constant bounds of the subrange that includes all the elements of the range [first,last) with values equivalent to val.
-    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
-        auto lb_iter = lower_bound(key);
-        auto ub_iter = upper_bound(key);
-        return (lb_iter != end()) ? std::make_pair(lb_iter, ub_iter) : std::make_pair(ub_iter, ub_iter);
-    }
-
-    ///@brief Return the size of valid elements
-    size_type valid_size() const {
-        size_t valid_cnt = 0;
-        for (const auto& kv : vec_) {
-            if (kv.first != sentinel()) {
-                ++valid_cnt;
-            }
-        }
-        return valid_cnt;
-    }
-
-  public:
-    friend void swap(linear_map& lhs, linear_map& rhs) {
-        std::swap(lhs.vec_, rhs.vec_);
-    }
-
-  private:
-    iterator convert_to_iterator(const_iterator const_iter) {
-        /*
-         * This is a work around for the fact that there is no conversion between a const_iterator and iterator.
-         * 
-         * We intiailize i to the start of the container and then advance it by
-         * the distance to const_iter. The resulting i points to the same element
-         * as const_iter
-         *
-         * Note that to be able to call std::distance with an iterator and
-         * const_iterator we need to specify the type as const_iterator (relying
-         * on the implicit conversion from iterator to const_iterator for i)
-         *
-         * Since the iterators are really vector (i.e. random-access) iterators
-         * both distance and advance take constant time
-         */
-        iterator i = begin();
-        std::advance(i, std::distance<const_iterator>(i, const_iter));
-        return i;
-    }
-
-    constexpr K sentinel() const { return Sentinel::INVALID(); }
-
-  private:
-    std::vector<value_type> vec_;
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_list.cc b/third_party/vtr/libs/vtrutil/src/vtr_list.cc
deleted file mode 100644
index ce354dfe3..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_list.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <cstdlib>
-
-#include "vtr_list.h"
-#include "vtr_memory.h"
-
-namespace vtr {
-
-t_linked_vptr* insert_in_vptr_list(t_linked_vptr* head, void* vptr_to_add) {
-    /* Inserts a new element at the head of a linked list of void pointers. *
-     * Returns the new head of the list.                                    */
-
-    return new t_linked_vptr{vptr_to_add, head}; /* New head of the list */
-}
-
-/* Deletes the element at the head of a linked list of void pointers. *
- * Returns the new head of the list.                                    */
-t_linked_vptr* delete_in_vptr_list(t_linked_vptr* head) {
-    if (head == nullptr)
-        return nullptr;
-    t_linked_vptr* const linked_vptr = head->next;
-    delete head;
-    return linked_vptr; /* New head of the list */
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_list.h b/third_party/vtr/libs/vtrutil/src/vtr_list.h
deleted file mode 100644
index 8403742c7..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_list.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef VTR_LIST_H
-#define VTR_LIST_H
-
-/**
- * @file
- * @brief Linked lists of void pointers and integers, respectively.
- */
-
-namespace vtr {
-
-///@brief Linked list node struct
-struct t_linked_vptr {
-    void* data_vptr;
-    struct t_linked_vptr* next;
-};
-
-///@brief Inserts a node to a list
-t_linked_vptr* insert_in_vptr_list(t_linked_vptr* head,
-                                   void* vptr_to_add);
-
-///@brief Delete a list
-t_linked_vptr* delete_in_vptr_list(t_linked_vptr* head);
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_log.cc b/third_party/vtr/libs/vtrutil/src/vtr_log.cc
deleted file mode 100644
index 0615e71ab..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_log.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <string>
-#include <fstream>
-#include <cstdarg>
-
-#include "vtr_util.h"
-#include "vtr_log.h"
-#include "log.h"
-
-namespace vtr {
-PrintHandlerInfo printf = log_print_info;
-PrintHandlerInfo printf_info = log_print_info;
-PrintHandlerWarning printf_warning = log_print_warning;
-PrintHandlerError printf_error = log_print_error;
-PrintHandlerDirect printf_direct = log_print_direct;
-
-void set_log_file(const char* filename) {
-    log_set_output_file(filename);
-}
-
-} // namespace vtr
-
-void add_warnings_to_suppress(std::string function_name) {
-    warnings_to_suppress.insert(function_name);
-}
-
-void set_noisy_warn_log_file(std::string log_file_name) {
-    std::ofstream log;
-    log.open(log_file_name, std::ifstream::out | std::ifstream::trunc);
-    log.close();
-    noisy_warn_log_file = log_file_name;
-}
-
-void print_or_suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...) {
-    std::string function_name(pszFuncName);
-
-    va_list va_args;
-    va_start(va_args, pszMessage);
-    std::string msg = vtr::vstring_fmt(pszMessage, va_args);
-    va_end(va_args);
-
-    auto result = warnings_to_suppress.find(function_name);
-    if (result == warnings_to_suppress.end()) {
-        vtr::printf_warning(pszFileName, lineNum, msg.data());
-    } else if (!noisy_warn_log_file.empty()) {
-        std::ofstream log;
-        log.open(noisy_warn_log_file.data(), std::ios_base::app);
-        log << "Warning:\n\tfile: " << pszFileName << "\n\tline: " << lineNum << "\n\tmessage: " << msg << std::endl;
-        log.close();
-    }
-}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_log.h b/third_party/vtr/libs/vtrutil/src/vtr_log.h
deleted file mode 100644
index 3c52e249d..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_log.h
+++ /dev/null
@@ -1,167 +0,0 @@
-#ifndef VTR_LOG_H
-#define VTR_LOG_H
-#include <tuple>
-#include <unordered_set>
-#include <string>
-
-/**
- * @file
- * @brief This header defines useful logging macros for VTR projects.
- *
- * Message Type
- * ============
- *
- * Three types of log message types are defined:
- *     - VTR_LOG         : The standard 'info' type log message
- *     - VTR_LOG_WARN    : A warning log message. This represents unusual condition that may indicate an issue but executiom continues
- *     - VTR_LOG_ERROR   : An error log message. This represents a clear issue that should result in stopping the program execution. Please note that using this log message will not actually terminate the program. So a VtrError should be thrown after all the neccessary VTR_LOG_ERROR messages are printed. 
- * 
- * For example:
- *
- *      VTR_LOG("This produces a regular '%s' message\n", "info");
- *      VTR_LOG_WARN("This produces a '%s' message\n", "warning");
- *      VTR_LOG_ERROR("This produces an '%s' message\n", "error");
- *
- * Conditional Logging
- * ===================
- *
- * Each of the three message types also have a VTR_LOGV_* variant,
- * which will cause the message to be logged if a user-defined condition
- * is satisifed.
- *
- * For example:
- *
- *      VTR_LOGV(verbosity > 5, "This message will be logged only if verbosity is greater than %d\n", 5);
- *      VTR_LOGV_WARN(verbose, "This warning message will be logged if verbose is true\n");
- *      VTR_LOGV_ERROR(false, "This error message will never be logged\n");
- *
- * Custom Location Logging
- * =======================
- *
- * Each of the three message types also have a VTR_LOGF_* variant,
- * which will cause the message to be logged for a custom file and
- *
- * For example:
- *
- *      VTR_LOGF("my_file.txt", "This message will be logged from file 'my_file.txt' line %d\n", 42);
- *  
- * Debug Logging
- * =============
- *
- * For debug purposes it may be useful to have additional logging.
- * This is supported by VTR_LOG_DEBUG() and VTR_LOGV_DEBUG().
- *
- * To avoid run-time overhead, these are only enabled if VTR_ENABLE_DEBUG_LOGGING 
- * is defined (disabled by default).
- */
-
-// Unconditional logging macros
-#define VTR_LOG(...) VTR_LOGV(true, __VA_ARGS__)
-#define VTR_LOG_WARN(...) VTR_LOGV_WARN(true, __VA_ARGS__)
-#define VTR_LOG_ERROR(...) VTR_LOGV_ERROR(true, __VA_ARGS__)
-#define VTR_LOG_NOP(...) VTR_LOGV_NOP(true, __VA_ARGS__)
-
-// Conditional logging macros
-#define VTR_LOGV(expr, ...) VTR_LOGVF(expr, __FILE__, __LINE__, __VA_ARGS__)
-#define VTR_LOGV_WARN(expr, ...) VTR_LOGVF_WARN(expr, __FILE__, __LINE__, __VA_ARGS__)
-#define VTR_LOGV_ERROR(expr, ...) VTR_LOGVF_ERROR(expr, __FILE__, __LINE__, __VA_ARGS__)
-#define VTR_LOGV_NOP(expr, ...) VTR_LOGVF_NOP(expr, __FILE__, __LINE__, __VA_ARGS__)
-
-// Custom file-line location logging macros
-#define VTR_LOGF(file, line, ...) VTR_LOGVF(true, file, line, __VA_ARGS__)
-#define VTR_LOGF_WARN(file, line, ...) VTR_LOGVF_WARN(true, file, line, __VA_ARGS__)
-#define VTR_LOGF_ERROR(file, line, ...) VTR_LOGVF_ERROR(true, file, line, __VA_ARGS__)
-#define VTR_LOGF_NOP(file, line, ...) VTR_LOGVF_NOP(true, file, line, __VA_ARGS__)
-
-// Custom file-line-func location logging macros
-#define VTR_LOGFF_WARN(file, line, func, ...) VTR_LOGVFF_WARN(true, file, line, func, __VA_ARGS__)
-
-// Conditional logging and custom file-line location macros
-#define VTR_LOGVF(expr, file, line, ...)    \
-    do {                                    \
-        if (expr) vtr::printf(__VA_ARGS__); \
-    } while (false)
-
-#define VTR_LOGVF_WARN(expr, file, line, ...)                                   \
-    do {                                                                        \
-        if (expr) print_or_suppress_warning(file, line, __func__, __VA_ARGS__); \
-    } while (false)
-
-#define VTR_LOGVF_ERROR(expr, file, line, ...)                \
-    do {                                                      \
-        if (expr) vtr::printf_error(file, line, __VA_ARGS__); \
-    } while (false)
-
-// Conditional logging and custom file-line-func location macros
-#define VTR_LOGVFF_WARN(expr, file, line, func, ...)                        \
-    do {                                                                    \
-        if (expr) print_or_suppress_warning(file, line, func, __VA_ARGS__); \
-    } while (false)
-
-/*
- * No-op version of logging macro which avoids unused parameter warnings.
- *
- * Note that to avoid unused parameter warnings we call sizeof() and cast
- * the result to void. sizeof is evaluated at compile time so there is no
- * run-time overhead.
- *
- * Also note the use of std::make_tuple to ensure all arguments in VA_ARGS
- * are used.
- */
-#define VTR_LOGVF_NOP(expr, file, line, ...)                     \
-    do {                                                         \
-        static_cast<void>(sizeof(expr));                         \
-        static_cast<void>(sizeof(file));                         \
-        static_cast<void>(sizeof(line));                         \
-        static_cast<void>(sizeof(std::make_tuple(__VA_ARGS__))); \
-    } while (false)
-
-// Debug logging macros
-#ifdef VTR_ENABLE_DEBUG_LOGGING //Enable
-#    define VTR_LOG_DEBUG(...) VTR_LOG(__VA_ARGS__)
-#    define VTR_LOGV_DEBUG(expr, ...) VTR_LOGV(expr, __VA_ARGS__)
-#else //Disable
-#    define VTR_LOG_DEBUG(...) VTR_LOG_NOP(__VA_ARGS__)
-#    define VTR_LOGV_DEBUG(expr, ...) VTR_LOGV_NOP(expr, __VA_ARGS__)
-#endif
-
-namespace vtr {
-
-typedef void (*PrintHandlerInfo)(const char* pszMessage, ...);
-typedef void (*PrintHandlerWarning)(const char* pszFileName, unsigned int lineNum, const char* pszMessage, ...);
-typedef void (*PrintHandlerError)(const char* pszFileName, unsigned int lineNum, const char* pszMessage, ...);
-typedef void (*PrintHandlerDirect)(const char* pszMessage, ...);
-
-extern PrintHandlerInfo printf; //Same as printf_info
-extern PrintHandlerInfo printf_info;
-extern PrintHandlerWarning printf_warning;
-extern PrintHandlerError printf_error;
-extern PrintHandlerDirect printf_direct;
-
-void set_log_file(const char* filename);
-
-} // namespace vtr
-
-static std::unordered_set<std::string> warnings_to_suppress;
-static std::string noisy_warn_log_file;
-
-/**
- * @brief The following data structure and functions allow to suppress noisy warnings and direct them into an external file, if specified.
- */
-void add_warnings_to_suppress(std::string function_name);
-
-/**
- * @brief This function creates a new log file to hold the suppressed warnings. If the file already exists, it is cleared out first.
- */
-void set_noisy_warn_log_file(std::string log_file_name);
-
-/** 
- * @brief This function checks whether to print or to suppress warning
- *
- * This function checks whether the function from which the warning has been called
- *  is in the set of warnings_to_suppress. If so, the warning is printed on the
- * noisy_warn_log_file, otherwise it is printed on stdout (or the regular log file)
- */
-void print_or_suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...);
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_logic.h b/third_party/vtr/libs/vtrutil/src/vtr_logic.h
deleted file mode 100644
index 30d44c4a6..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_logic.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Put this above guard so that TRUE/FALSE are undef'ed
-// even if this file was already included earlier.
-#ifndef VTR_LOGIC_H
-#define VTR_LOGIC_H
-
-#ifdef FALSE
-#    undef FALSE
-#endif
-#define FALSE FALSE
-
-#ifdef TRUE
-#    undef TRUE
-#endif
-#define TRUE TRUE
-
-constexpr int FALSE = 0;
-constexpr int TRUE = 1;
-
-namespace vtr {
-
-/**
- * @brief This class represents the different supported logic values
- */
-enum class LogicValue {
-    FALSE = 0,
-    TRUE = 1,
-    DONT_CARE = 2,
-    UNKOWN = 3
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_map_util.h b/third_party/vtr/libs/vtrutil/src/vtr_map_util.h
deleted file mode 100644
index fd1cdd4f8..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_map_util.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef VTR_MAP_UTIL_H
-#define VTR_MAP_UTIL_H
-
-#include "vtr_pair_util.h"
-#include "vtr_range.h"
-
-namespace vtr {
-
-///@brief An iterator who wraps a std::map iterator to return it's key
-template<typename Iter>
-using map_key_iter = pair_first_iter<Iter>;
-
-///@brief An iterator who wraps a std::map iterator to return it's value
-template<typename Iter>
-using map_value_iter = pair_second_iter<Iter>;
-
-///@brief Returns a range iterating over a std::map's keys
-template<typename T>
-auto make_key_range(T b, T e) {
-    using key_iter = map_key_iter<T>;
-    return vtr::make_range(key_iter(b), key_iter(e));
-}
-
-///@brief Returns a range iterating over a std::map's keys
-template<typename Container>
-auto make_key_range(const Container& c) {
-    return make_key_range(std::begin(c), std::end(c));
-}
-
-///@brief Returns a range iterating over a std::map's values
-template<typename T>
-auto make_value_range(T b, T e) {
-    using value_iter = map_value_iter<T>;
-    return vtr::make_range(value_iter(b), value_iter(e));
-}
-
-///@brief Returns a range iterating over a std::map's values
-template<typename Container>
-auto make_value_range(const Container& c) {
-    return make_value_range(std::begin(c), std::end(c));
-}
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_math.cc b/third_party/vtr/libs/vtrutil/src/vtr_math.cc
deleted file mode 100644
index 32594753c..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_math.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <map>
-#include <algorithm>
-
-#include "vtr_assert.h"
-#include "vtr_error.h"
-#include "vtr_math.h"
-
-namespace vtr {
-
-///@brief Calculates the value pow(base, exp)
-int ipow(int base, int exp) {
-    int result = 1;
-
-    VTR_ASSERT(exp >= 0);
-
-    while (exp) {
-        if (exp & 1)
-            result *= base;
-        exp >>= 1;
-        base *= base;
-    }
-    return result;
-}
-
-float median(std::vector<float> vector) {
-    VTR_ASSERT(vector.size() > 0);
-
-    std::sort(vector.begin(), vector.end());
-
-    auto size = vector.size();
-    if (size % 2 == 0) {
-        return (float)(vector[size / 2 - 1] + vector[size / 2]) / 2;
-    }
-
-    return (float)vector[size / 2];
-}
-
-/**
- * @brief Linear interpolation/Extrapolation 
- *
- * Performs linear interpolation or extrapolation on the set of (x,y) values specified by the xy_map.
- * A requested x value is passed in, and we return the interpolated/extrapolated y value at this requested value of x.
- * Meant for maps where both key and element are numbers.
- * This is specifically enforced by the explicit instantiations below this function. i.e. only templates
- * using those types listed in the explicit instantiations below are allowed 
- */
-template<typename X, typename Y>
-Y linear_interpolate_or_extrapolate(const std::map<X, Y>* xy_map, X requested_x) {
-    Y result;
-
-    /* the intention of this function is to interpolate/extrapolate. we can't do so with less than 2 values in the xy_map */
-    if (xy_map->size() < 2) {
-        throw VtrError("linear_interpolate_or_extrapolate: cannot interpolate/extrapolate based on less than 2 (x,y) pairs", __FILE__, __LINE__);
-    }
-
-    auto itr = xy_map->find(requested_x);
-    if (itr != xy_map->end()) {
-        /* requested x already exists in the x,y map */
-        result = itr->second;
-    } else {
-        /* requested x does not exist in the x,y map. need to interpolate/extrapolate */
-
-        typename std::map<X, Y>::const_iterator it;
-        double x_low, x_high, y_low, y_high;
-        double slope, reference_y, delta_x;
-
-        /* get first x greater than the one requested */
-        it = xy_map->upper_bound(requested_x);
-
-        if (it == xy_map->end()) {
-            /* need to extrapolate to higher x. based on the y values at the two largest x values */
-            it--;
-            x_high = (double)it->first;
-            y_high = (double)it->second;
-            it--;
-            x_low = (double)it->first;
-            y_low = (double)it->second;
-        } else if (it == xy_map->begin()) {
-            /* need to extrapolate to lower x. based on the y values at the two smallest x */
-            x_low = (double)it->first;
-            y_low = (double)it->second;
-            it++;
-            x_high = (double)it->first;
-            y_high = (double)it->second;
-        } else {
-            /* need to interpolate. based on y values at x just above/below
-             * the one we want */
-            x_high = (double)it->first;
-            y_high = (double)it->second;
-            it--;
-            x_low = (double)it->first;
-            y_low = (double)it->second;
-        }
-
-        slope = (y_high - y_low) / (x_high - x_low);
-        reference_y = y_low;
-        delta_x = (double)requested_x - x_low;
-        result = (Y)(reference_y + (slope * delta_x));
-    }
-
-    return result;
-}
-template double linear_interpolate_or_extrapolate(const std::map<int, double>* xy_map, int requested_x);       /* (int,double) */
-template double linear_interpolate_or_extrapolate(const std::map<double, double>* xy_map, double requested_x); /* (double,double) */
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_math.h b/third_party/vtr/libs/vtrutil/src/vtr_math.h
deleted file mode 100644
index 74b4ccebf..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_math.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef VTR_MATH_H
-#define VTR_MATH_H
-
-#include <map>
-#include <vector>
-#include <cmath>
-
-#include "vtr_assert.h"
-
-/**
- * @file
- *
- * @brief This file defines some math operations
- */
-
-namespace vtr {
-/*********************** Math operations *************************************/
-
-///@brief Calculates the value pow(base, exp)
-int ipow(int base, int exp);
-
-///@brief Returns the median of an input vector.
-float median(std::vector<float> vector);
-
-///@brief Linear interpolation/Extrapolation
-template<typename X, typename Y>
-Y linear_interpolate_or_extrapolate(const std::map<X, Y>* xy_map, X requested_x);
-
-///@brief Integer rounding conversion for floats
-constexpr int nint(float val) { return static_cast<int>(val + 0.5); }
-
-///@brief Returns a 'safe' ratio which evaluates to zero if the denominator is zero
-template<typename T>
-T safe_ratio(T numerator, T denominator) {
-    if (denominator == T(0)) {
-        return 0;
-    }
-    return numerator / denominator;
-}
-
-///@brief Returns the median of the elements in range [first, last]
-template<typename InputIterator>
-double median(InputIterator first, InputIterator last) {
-    auto len = std::distance(first, last);
-    auto iter = first + len / 2;
-
-    if (len % 2 == 0) {
-        return (*iter + *(iter + 1)) / 2;
-    } else {
-        return *iter;
-    }
-}
-
-///@brief Returns the median of a whole container
-template<typename Container>
-double median(Container c) {
-    return median(std::begin(c), std::end(c));
-}
-
-/**
- * @brief Returns the geometric mean of the elments in range [first, last)
- *
- * To avoid potential round-off issues we transform the standard formula:
- *
- *      geomean = ( v_1 * v_2 * ... * v_n) ^ (1/n)
- *
- * by taking the log:
- *
- *      geomean = exp( (1 / n) * (log(v_1) + log(v_2) + ... + log(v_n)))
- */
-template<typename InputIterator>
-double geomean(InputIterator first, InputIterator last, double init = 1.) {
-    double log_sum = std::log(init);
-    size_t n = 0;
-    for (auto iter = first; iter != last; ++iter) {
-        log_sum += std::log(*iter);
-        n += 1;
-    }
-
-    if (n == 0) {
-        return init;
-    } else {
-        return std::exp((1. / n) * log_sum);
-    }
-}
-
-///@brief Returns the geometric mean of a whole container
-template<typename Container>
-double geomean(Container c) {
-    return geomean(std::begin(c), std::end(c));
-}
-
-///@brief Returns the arithmatic mean of the elements in range [first, last]
-template<typename InputIterator>
-double arithmean(InputIterator first, InputIterator last, double init = 0.) {
-    double sum = init;
-    size_t n = 0;
-    for (auto iter = first; iter != last; ++iter) {
-        sum += *iter;
-        n += 1;
-    }
-
-    if (n == 0) {
-        return init;
-    } else {
-        return sum / n;
-    }
-}
-
-///@brief Returns the aritmatic mean of a whole container
-template<typename Container>
-double arithmean(Container c) {
-    return arithmean(std::begin(c), std::end(c));
-}
-
-/**
- * @brief Returns the greatest common divisor of x and y
- *
- * Note that T should be an integral type
- */
-template<typename T>
-static T gcd(T x, T y) {
-    static_assert(std::is_integral<T>::value, "T must be integral");
-    // Euclidean algorithm
-    if (y == 0) {
-        return x;
-    }
-    return gcd(y, x % y);
-}
-
-/**
- * @brief Return the least common multiple of x and y
- *
- * Note that T should be an integral type
- */
-template<typename T>
-T lcm(T x, T y) {
-    static_assert(std::is_integral<T>::value, "T must be integral");
-
-    if (x == 0 && y == 0) {
-        return 0;
-    } else {
-        return (x / gcd(x, y)) * y;
-    }
-}
-
-constexpr double DEFAULT_REL_TOL = 1e-9;
-constexpr double DEFAULT_ABS_TOL = 0;
-
-///@brief Return true if a and b values are close to each other
-template<class T>
-bool isclose(T a, T b, T rel_tol, T abs_tol) {
-    if (std::isinf(a) && std::isinf(b)) return (std::signbit(a) == std::signbit(b));
-    if (std::isnan(a) && std::isnan(b)) return false;
-
-    T abs_largest = std::max(std::abs(a), std::abs(b));
-    return std::abs(a - b) <= std::max(rel_tol * abs_largest, abs_tol);
-}
-
-///@brief Return true if a and b values are close to each other (using the default tolerances)
-template<class T>
-bool isclose(T a, T b) {
-    return isclose<T>(a, b, DEFAULT_REL_TOL, DEFAULT_ABS_TOL);
-}
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_memory.cc b/third_party/vtr/libs/vtrutil/src/vtr_memory.cc
deleted file mode 100644
index 39d6b244b..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_memory.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <cstddef>
-#include <cstdlib>
-#include <math.h>
-
-#include "vtr_assert.h"
-#include "vtr_list.h"
-#include "vtr_memory.h"
-#include "vtr_error.h"
-#include "vtr_util.h"
-#include "vtr_log.h"
-
-#ifndef __GLIBC__
-#    include <stdlib.h>
-#else
-#    include <malloc.h>
-#endif
-
-namespace vtr {
-
-#ifndef __GLIBC__
-int malloc_trim(size_t /*pad*/) {
-    return 0;
-}
-#else
-int malloc_trim(size_t pad) {
-    return ::malloc_trim(pad);
-}
-#endif
-
-void* free(void* some) {
-    if (some) {
-        std::free(some);
-        some = nullptr;
-    }
-    return nullptr;
-}
-
-void* calloc(size_t nelem, size_t size) {
-    void* ret;
-    if (nelem == 0) {
-        return nullptr;
-    }
-
-    if ((ret = std::calloc(nelem, size)) == nullptr) {
-        throw VtrError("Unable to calloc memory.", __FILE__, __LINE__);
-    }
-    return ret;
-}
-
-void* malloc(size_t size) {
-    void* ret;
-    if (size == 0) {
-        return nullptr;
-    }
-
-    if ((ret = std::malloc(size)) == nullptr && size != 0) {
-        throw VtrError("Unable to malloc memory.", __FILE__, __LINE__);
-    }
-    return ret;
-}
-
-void* realloc(void* ptr, size_t size) {
-    void* ret;
-
-    ret = std::realloc(ptr, size);
-    if (nullptr == ret && size != 0) {
-        throw VtrError(string_fmt("Unable to realloc memory (ptr=%p, size=%d).", ptr, size),
-                       __FILE__, __LINE__);
-    }
-    return ret;
-}
-
-void* chunk_malloc(size_t size, t_chunk* chunk_info) {
-    /* This routine should be used for allocating fairly small data             *
-     * structures where memory-efficiency is crucial.  This routine allocates   *
-     * large "chunks" of data, and parcels them out as requested.  Whenever     *
-     * it mallocs a new chunk it adds it to the linked list pointed to by       *
-     * chunk_info->chunk_ptr_head.  This list can be used to free the	    *
-     * chunked memory.							    *
-     * Information about the currently open "chunk" must be stored by the       *
-     * user program.  chunk_info->mem_avail_ptr points to an int storing	    *
-     * how many bytes are left in the current chunk, while			    *
-     * chunk_info->next_mem_loc_ptr is the address of a pointer to the	    *
-     * next free bytes in the chunk.  To start a new chunk, simply set	    *
-     * chunk_info->mem_avail_ptr = 0.  Each independent set of data		    *
-     * structures should use a new chunk.                                       */
-
-    /* To make sure the memory passed back is properly aligned, I must *
-     * only send back chunks in multiples of the worst-case alignment  *
-     * restriction of the machine.  On most machines this should be    *
-     * a long, but on 64-bit machines it might be a long long or a     *
-     * double.  Change the typedef below if this is the case.          */
-
-    typedef size_t Align;
-
-    constexpr int CHUNK_SIZE = 32768;
-    constexpr int FRAGMENT_THRESHOLD = 100;
-
-    char* tmp_ptr;
-    int aligned_size;
-
-    VTR_ASSERT(chunk_info->mem_avail >= 0);
-
-    if ((size_t)(chunk_info->mem_avail) < size) { /* Need to malloc more memory. */
-        if (size > CHUNK_SIZE) {                  /* Too big, use standard routine. */
-                                                  /* Want to allocate a block of memory the size of size.
-                                                   * i.e. malloc(size) */
-            tmp_ptr = new char[size];
-
-            /* When debugging, uncomment the code below to see if memory allocation size */
-            /* makes sense */
-            //#ifdef DEBUG
-            // vtr_printf("NB: my_chunk_malloc got a request for %d bytes.\n", size);
-            // vtr_printf("You should consider using vtr::malloc for such big requests.\n");
-            // #endif
-
-            VTR_ASSERT(chunk_info != nullptr);
-            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, tmp_ptr);
-            return tmp_ptr;
-        }
-
-        if (chunk_info->mem_avail < FRAGMENT_THRESHOLD) { /* Only a small scrap left. */
-            chunk_info->next_mem_loc_ptr = new char[CHUNK_SIZE];
-            chunk_info->mem_avail = CHUNK_SIZE;
-            VTR_ASSERT(chunk_info != nullptr);
-            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, chunk_info->next_mem_loc_ptr);
-        }
-
-        /* Execute else clause only when the chunk we want is pretty big,  *
-         * and would leave too big an unused fragment.  Then we use malloc *
-         * to allocate normally.                                           */
-
-        else {
-            tmp_ptr = new char[size];
-            VTR_ASSERT(chunk_info != nullptr);
-            chunk_info->chunk_ptr_head = insert_in_vptr_list(chunk_info->chunk_ptr_head, tmp_ptr);
-
-            return tmp_ptr;
-        }
-    }
-
-    /* Find the smallest distance to advance the memory pointer and keep *
-     * everything aligned.                                               */
-
-    if (size % sizeof(Align) == 0) {
-        aligned_size = size;
-    } else {
-        aligned_size = size + sizeof(Align) - size % sizeof(Align);
-    }
-
-    tmp_ptr = chunk_info->next_mem_loc_ptr;
-    chunk_info->next_mem_loc_ptr += aligned_size;
-    chunk_info->mem_avail -= aligned_size;
-    return tmp_ptr;
-}
-
-void free_chunk_memory(t_chunk* chunk_info) {
-    /* Frees the memory allocated by a sequence of calls to my_chunk_malloc. */
-
-    t_linked_vptr *curr_ptr, *prev_ptr;
-
-    curr_ptr = chunk_info->chunk_ptr_head;
-
-    while (curr_ptr != nullptr) {
-        /* Must cast pointers to type char*, since the're of type void*, which delete can't
-         * be called on.*/
-        delete[]((char*)curr_ptr->data_vptr); /* Free memory "chunk". */
-        prev_ptr = curr_ptr;
-        curr_ptr = curr_ptr->next;
-        delete (t_linked_vptr*)prev_ptr; /* Free memory used to track "chunk". */
-    }
-
-    chunk_info->chunk_ptr_head = nullptr;
-    chunk_info->mem_avail = 0;
-    chunk_info->next_mem_loc_ptr = nullptr;
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_memory.h b/third_party/vtr/libs/vtrutil/src/vtr_memory.h
deleted file mode 100644
index 46a486569..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_memory.h
+++ /dev/null
@@ -1,151 +0,0 @@
-#ifndef VTR_MEMORY_H
-#define VTR_MEMORY_H
-#include <cstddef>
-#include <cstdlib>
-#include <new>
-
-#ifdef _WIN32
-#    include <cerrno>
-#    include <malloc.h>
-#endif
-
-namespace vtr {
-
-/**
- * @brief This function will force the container to be cleared
- *
- * It release it's held memory.
- * For efficiency, STL containers usually don't
- * release their actual heap-allocated memory until
- * destruction (even if Container::clear() is called).
- */
-template<typename Container>
-void release_memory(Container& container) {
-    ///@brief Force a re-allocation to happen by swapping in a new (empty) container.
-    Container().swap(container);
-}
-
-struct t_linked_vptr; //Forward declaration
-
-/**
- * This structure keeps track to chenks of memory
- *
- * This structure is to keep track of chunks of memory that is being	
- * allocated to save overhead when allocating very small memory pieces. 
- * For a complete description, please see the comment in chunk_malloc
- */
-struct t_chunk {
-    t_linked_vptr* chunk_ptr_head = nullptr;
-
-    //chunk_ptr_head->data_vptr: head of the entire linked
-    //list of allocated "chunk" memory;
-    //chunk_ptr_head->next: pointer to the next chunk on the linked list
-    int mem_avail = 0;                ///< number of bytes left in the current chunk
-    char* next_mem_loc_ptr = nullptr; ///< pointer to the first available (free) byte in the current chunk
-};
-
-void* free(void* some);
-void* calloc(size_t nelem, size_t size);
-void* malloc(size_t size);
-void* realloc(void* ptr, size_t size);
-
-void* chunk_malloc(size_t size, t_chunk* chunk_info);
-void free_chunk_memory(t_chunk* chunk_info);
-
-///@brief Like chunk_malloc, but with proper C++ object initialization
-template<typename T>
-T* chunk_new(t_chunk* chunk_info) {
-    void* block = chunk_malloc(sizeof(T), chunk_info);
-
-    return new (block) T(); //Placement new
-}
-
-///@brief Call the destructor of an obj which must have been allocated in the specified chunk
-template<typename T>
-void chunk_delete(T* obj, t_chunk* /*chunk_info*/) {
-    if (obj) {
-        obj->~T(); // Manually call destructor
-        // Currently we don't mark the unused memory as free
-    }
-}
-
-/**
- * @brief Cross platform wrapper around GNU's malloc_trim()
- *
- * TODO: This is only used in one place within VPR, consider removing it
- */
-int malloc_trim(size_t pad);
-
-inline int memalign(void** ptr_out, size_t align, size_t size) {
-#ifdef _WIN32
-    void* temp_ptr = _aligned_malloc(size, align);
-    if (temp_ptr != NULL) {
-        *ptr_out = temp_ptr;
-        return 0;
-    } else {
-        return errno;
-    }
-#else
-    return posix_memalign(ptr_out, align, size);
-#endif
-}
-
-/**
- * @brief A macro generates a prefetch instruction on all architectures that include it.
- * 
- * This is all modern x86 and ARM64 platforms.
- *
- * This is a macro because it has to be.  rw and locality must be constants,
- * not just constexpr.
- */
-#define VTR_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
-
-/**
- * @brief aligned_allocator is a STL allocator that allocates memory in an aligned fashion
- *
- * works if supported by the platform
- * 
- * It is worth noting the C++20 std::allocator does aligned allocations, but
- * C++20 has poor support.
- */
-template<class T>
-struct aligned_allocator {
-    using value_type = T;
-    using pointer = T*;
-    using const_pointer = const T*;
-    using reference = T&;
-    using const_reference = const T&;
-    using size_type = std::size_t;
-    using difference_type = std::ptrdiff_t;
-
-    pointer allocate(size_type n, const void* /*hint*/ = 0) {
-        void* data;
-        int ret = vtr::memalign(&data, alignof(T), sizeof(T) * n);
-        if (ret != 0) {
-            throw std::bad_alloc();
-        }
-        return static_cast<pointer>(data);
-    }
-
-    void deallocate(T* p, size_type /*n*/) {
-#ifdef _WIN32
-        _aligned_free(p);
-#else
-        vtr::free(p);
-#endif
-    }
-};
-
-/**
- * @brief compare two aligned_allocators.
- *
- * Since the allocator doesn't have any internal state, all allocators for a given type are the same.
- */
-template<typename T>
-bool operator==(const aligned_allocator<T>&, const aligned_allocator<T>&) {
-    return true;
-}
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
deleted file mode 100644
index c3a4692ed..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_ndmatrix.h
+++ /dev/null
@@ -1,409 +0,0 @@
-#ifndef VTR_ND_MATRIX_H
-#define VTR_ND_MATRIX_H
-#include <algorithm>
-#include <array>
-#include <memory>
-
-#include "vtr_assert.h"
-
-namespace vtr {
-
-/**
- * @brief Proxy class for a sub-matrix of a NdMatrix class.
- * 
- * This is used to allow chaining of array indexing [] operators in a natural way.
- *
- * Each instance of this class peels off one-dimension and returns a NdMatrixProxy representing
- * the resulting sub-matrix. This is repeated recursively until we hit the 1-dimensional base-case.
- *
- * Since this expansion happens at compiler time all the proxy classes get optimized away,
- * yielding both high performance and generality.
- * 
- * Recursive case: N-dimensional array
- */
-template<typename T, size_t N>
-class NdMatrixProxy {
-  public:
-    static_assert(N > 0, "Must have at least one dimension");
-
-    /**
-     * @brief Construct a matrix proxy object
-     *
-     *    @param dim_sizes: Array of dimension sizes
-     *    @param idim: The dimension associated with this proxy
-     *    @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
-     *    @param  start: Pointer to the start of the sub-matrix this proxy represents
-     */
-    NdMatrixProxy<T, N>(const size_t* dim_sizes, const size_t* dim_strides, T* start)
-        : dim_sizes_(dim_sizes)
-        , dim_strides_(dim_strides)
-        , start_(start) {}
-
-    NdMatrixProxy<T, N>& operator=(const NdMatrixProxy<T, N>& other) = delete;
-
-    ///@brief const [] operator
-    const NdMatrixProxy<T, N - 1> operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(index < dim_sizes_[0], "Index out of range (above dimension maximum)");
-        VTR_ASSERT_SAFE_MSG(dim_sizes_[1] > 0, "Can not index into zero-sized dimension");
-
-        // Strip off one dimension
-        return NdMatrixProxy<T, N - 1>(
-            dim_sizes_ + 1,                    // Pass the dimension information
-            dim_strides_ + 1,                  // Pass the stride for the next dimension
-            start_ + dim_strides_[0] * index); // Advance to index in this dimension
-    }
-
-    ///@brief [] operator
-    NdMatrixProxy<T, N - 1> operator[](size_t index) {
-        // Call the const version and cast-away constness
-        return const_cast<const NdMatrixProxy<T, N>*>(this)->operator[](index);
-    }
-
-  private:
-    const size_t* dim_sizes_;
-    const size_t* dim_strides_;
-    T* start_;
-};
-
-///@brief Base case: 1-dimensional array
-template<typename T>
-class NdMatrixProxy<T, 1> {
-  public:
-    /**
-     * @brief Construct a 1-d matrix proxy object
-     *
-     *    @param dim_sizes: Array of dimension sizes
-     *    @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
-     *    @param  start: Pointer to the start of the sub-matrix this proxy represents
-     */
-    NdMatrixProxy<T, 1>(const size_t* dim_sizes, const size_t* dim_stride, T* start)
-        : dim_sizes_(dim_sizes)
-        , dim_strides_(dim_stride)
-        , start_(start) {}
-
-    NdMatrixProxy<T, 1>& operator=(const NdMatrixProxy<T, 1>& other) = delete;
-
-    ///@brief const [] operator
-    const T& operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(dim_strides_[0] == 1, "Final dimension must have stride 1");
-        VTR_ASSERT_SAFE_MSG(index < dim_sizes_[0], "Index out of range (above dimension maximum)");
-
-        //Base case
-        return start_[index];
-    }
-
-    ///@brief [] operator
-    T& operator[](size_t index) {
-        // Call the const version and cast-away constness
-        return const_cast<T&>(const_cast<const NdMatrixProxy<T, 1>*>(this)->operator[](index));
-    }
-
-    /**
-     * @brief  Backward compitability
-     *
-     * For legacy compatibility (i.e. code expecting a pointer) we allow this base dimension
-     * case to retrieve a raw pointer to the last dimension elements.
-     *
-     * Note that it is the caller's responsibility to use this correctly; care must be taken
-     * not to clobber elements in other dimensions
-     */
-    const T* data() const {
-        return start_;
-    }
-
-    ///@brief same as above but allow update the value
-    T* data() {
-        // Call the const version and cast-away constness
-        return const_cast<T*>(const_cast<const NdMatrixProxy<T, 1>*>(this)->data());
-    }
-
-  private:
-    const size_t* dim_sizes_;
-    const size_t* dim_strides_;
-    T* start_;
-};
-
-/**
- * @brief Base class for an N-dimensional matrix
- *
- * Base class for an N-dimensional matrix supporting arbitrary index ranges per dimension.
- * This class implements all of the matrix handling (lifetime etc.) except for indexing
- * (which is implemented in the NdMatrix class). Indexing is split out to allows specialization
- * (of indexing for N = 1.
- *
- * Implementation:
- * 
- * This class uses a single linear array to store the matrix in c-style (row major)
- * order. That is, the right-most index is laid out contiguous memory.
- *
- * This should improve memory usage (no extra pointers to store for each dimension),
- * and cache locality (less indirection via pointers, predictable strides).
- *
- * The indicies are calculated based on the dimensions to access the appropriate elements.
- * Since the indexing calculations are visible to the compiler at compile time they can be
- * optimized to be efficient.
- */
-template<typename T, size_t N>
-class NdMatrixBase {
-  public:
-    static_assert(N >= 1, "Minimum dimension 1");
-
-    ///@brief An empty matrix (all dimensions size zero)
-    NdMatrixBase() {
-        clear();
-    }
-
-    /**
-     * @brief Specified dimension sizes:
-     *
-     *      [0..dim_sizes[0])
-     *      [0..dim_sizes[1])
-     *      ...
-     *      with optional fill value
-     */
-    NdMatrixBase(std::array<size_t, N> dim_sizes, T value = T()) {
-        resize(dim_sizes, value);
-    }
-
-  public: //Accessors
-    ///@brief Returns the size of the matrix (number of elements)
-    size_t size() const {
-        VTR_ASSERT_DEBUG_MSG(calc_size() == size_, "Calculated and current matrix size must be consistent");
-        return size_;
-    }
-
-    ///@brief Returns true if there are no elements in the matrix
-    bool empty() const {
-        return size() == 0;
-    }
-
-    ///@brief Returns the number of dimensions (i.e. N)
-    size_t ndims() const {
-        return dim_sizes_.size();
-    }
-
-    ///@brief Returns the size of the ith dimension
-    size_t dim_size(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return dim_sizes_[i];
-    }
-
-    ///@brief Returns the starting index of ith dimension
-    size_t begin_index(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return 0;
-    }
-
-    ///@brief Returns the one-past-the-end index of the ith dimension
-    size_t end_index(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return dim_sizes_[i];
-    }
-
-    ///@brief const Flat accessors of NdMatrix
-    const T& get(size_t i) const {
-        VTR_ASSERT_SAFE(i < size_);
-        return data_[i];
-    }
-
-    ///@brief Flat accessors of NdMatrix
-    T& get(size_t i) {
-        VTR_ASSERT_SAFE(i < size_);
-        return data_[i];
-    }
-
-  public: //Mutators
-    ///@brief Set all elements to 'value'
-    void fill(T value) {
-        std::fill(data_.get(), data_.get() + size(), value);
-    }
-
-    /**
-     * @brief Resize the matrix to the specified dimension ranges
-     *
-     * If 'value' is specified all elements will be initialized to it,
-     * otherwise they will be default constructed.
-     */
-    void resize(std::array<size_t, N> dim_sizes, T value = T()) {
-        dim_sizes_ = dim_sizes;
-        size_ = calc_size();
-        alloc();
-        fill(value);
-        if (size_ > 0) {
-            dim_strides_[0] = size_ / dim_sizes_[0];
-            for (size_t dim = 1; dim < N; ++dim) {
-                dim_strides_[dim] = dim_strides_[dim - 1] / dim_sizes_[dim];
-            }
-        } else {
-            dim_strides_.fill(0);
-        }
-    }
-
-    ///@brief Reset the matrix to size zero
-    void clear() {
-        data_.reset(nullptr);
-        dim_sizes_.fill(0);
-        dim_strides_.fill(0);
-        size_ = 0;
-    }
-
-  public: //Lifetime management
-    ///@brief Copy constructor
-    NdMatrixBase(const NdMatrixBase& other)
-        : NdMatrixBase(other.dim_sizes_) {
-        std::copy(other.data_.get(), other.data_.get() + other.size(), data_.get());
-    }
-
-    ///@brief Move constructor
-    NdMatrixBase(NdMatrixBase&& other)
-        : NdMatrixBase() {
-        swap(*this, other);
-    }
-
-    /**
-     * @brief Copy/move assignment
-     *
-     * Note that rhs is taken by value (copy-swap idiom)
-     */
-    NdMatrixBase& operator=(NdMatrixBase rhs) {
-        swap(*this, rhs);
-        return *this;
-    }
-
-    ///@brief Swap two NdMatrixBase objects
-    friend void swap(NdMatrixBase<T, N>& m1, NdMatrixBase<T, N>& m2) {
-        using std::swap;
-        swap(m1.size_, m2.size_);
-        swap(m1.dim_sizes_, m2.dim_sizes_);
-        swap(m1.dim_strides_, m2.dim_strides_);
-        swap(m1.data_, m2.data_);
-    }
-
-  private:
-    ///@brief Allocate space for all the elements
-    void alloc() {
-        data_ = std::make_unique<T[]>(size());
-    }
-
-    ///@brief Returns the size of the matrix (number of elements) calucated from the current dimensions
-    size_t calc_size() const {
-        ///@brief Size is the product of all dimension sizes
-        size_t cnt = dim_size(0);
-        for (size_t idim = 1; idim < ndims(); ++idim) {
-            cnt *= dim_size(idim);
-        }
-        return cnt;
-    }
-
-  protected:
-    size_t size_ = 0;
-    std::array<size_t, N> dim_sizes_;
-    std::array<size_t, N> dim_strides_;
-    std::unique_ptr<T[]> data_ = nullptr;
-};
-
-/**
- * @brief An N-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
- * 
- * Examples:
- * 
- *       //A 2-dimensional matrix with indicies [0..4][0..9]
- *       NdMatrix<int,2> m1({5,10});
- * 
- *       //Accessing an element
- *       int i = m1[3][5];
- * 
- *       //Setting an element
- *       m1[2][8] = 0;
- * 
- *       //A 3-dimensional matrix with indicies [0..4][0..9][0..19]
- *       NdMatrix<int,3> m2({5,10,20});
- * 
- *       //A 2-dimensional matrix with indicies [0..4][0..9], with all entries
- *       //initialized to 42
- *       NdMatrix<int,2> m3({5,10}, 42);
- * 
- *       //Filling all entries with value 101
- *       m3.fill(101);
- * 
- *       //Resizing an existing matrix (all values reset to default constucted value)
- *       m3.resize({5,5})
- * 
- *       //Resizing an existing matrix (all elements set to value 88)
- *       m3.resize({15,55}, 88)
- */
-template<typename T, size_t N>
-class NdMatrix : public NdMatrixBase<T, N> {
-    //General case
-    static_assert(N >= 2, "Minimum dimension 2");
-
-  public:
-    ///@brief Use the base constructors
-    using NdMatrixBase<T, N>::NdMatrixBase;
-
-  public:
-    /**
-     * @brief Access an element
-     *
-     * Returns a proxy-object to allow chained array-style indexing  (N >= 2 case)
-     */
-    const NdMatrixProxy<T, N - 1> operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(this->dim_size(1) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(index < this->dim_sizes_[0], "Index out of range (above dimension maximum)");
-
-        // Peel off the first dimension
-        return NdMatrixProxy<T, N - 1>(
-            this->dim_sizes_.data() + 1,                        //Pass the dimension information
-            this->dim_strides_.data() + 1,                      //Pass the stride for the next dimension
-            this->data_.get() + this->dim_strides_[0] * index); //Advance to index in this dimension
-    }
-
-    /**
-     * @brief Access an element
-     *
-     * Returns a proxy-object to allow chained array-style indexing
-     */
-    NdMatrixProxy<T, N - 1> operator[](size_t index) {
-        //Call the const version, since returned by value don't need to worry about const
-        return const_cast<const NdMatrix<T, N>*>(this)->operator[](index);
-    }
-};
-
-/**
- * @brief A 1-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
- *
- * This is considered a specialization for N=1
- */
-template<typename T>
-class NdMatrix<T, 1> : public NdMatrixBase<T, 1> {
-  public:
-    ///@brief Use the base constructors
-    using NdMatrixBase<T, 1>::NdMatrixBase;
-
-  public:
-    ///@brief Access an element (immutable)
-    const T& operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(index >= 0, "Index out of range (below dimension minimum)");
-        VTR_ASSERT_SAFE_MSG(index < this->dim_sizes_[0], "Index out of range (above dimension maximum)");
-
-        return this->data_[index];
-    }
-
-    ///@brief Access an element (mutable)
-    T& operator[](size_t index) {
-        //Call the const version, and cast away const-ness
-        return const_cast<T&>(const_cast<const NdMatrix<T, 1>*>(this)->operator[](index));
-    }
-};
-
-///@brief Convenient short forms for common NdMatricies
-template<typename T>
-using Matrix = NdMatrix<T, 2>;
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
deleted file mode 100644
index c42553038..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_ndoffsetmatrix.h
+++ /dev/null
@@ -1,459 +0,0 @@
-#ifndef VTR_ND_OFFSET_MATRIX_H
-#define VTR_ND_OFFSET_MATRIX_H
-#include <array>
-#include <memory>
-
-#include "vtr_assert.h"
-
-namespace vtr {
-
-/**
- * @brief  A half-open range specification for a matrix dimension [begin_index, last_index)
- * 
- * It comes with valid indicies from [begin_index() ... end_index()-1], provided size() > 0.
- */
-class DimRange {
-  public:
-    ///@brief default constructor
-    DimRange() = default;
-
-    ///@brief a constructor with begin_index, end_index
-    DimRange(size_t begin, size_t end)
-        : begin_index_(begin)
-        , end_index_(end) {}
-
-    ///@brief Return the begin index
-    size_t begin_index() const { return begin_index_; }
-
-    ///@brief Return the end index
-    size_t end_index() const { return end_index_; }
-
-    ///@brief Return the size
-    size_t size() const { return end_index_ - begin_index_; }
-
-  private:
-    size_t begin_index_ = 0;
-    size_t end_index_ = 0;
-};
-
-/**
- * @brief Proxy class for a sub-matrix of a NdOffsetMatrix class.
- *
- * This is used to allow chaining of array indexing [] operators in a natural way.
- * 
- * Each instance of this class peels off one-dimension and returns a NdOffsetMatrixProxy representing
- * the resulting sub-matrix. This is repeated recursively until we hit the 1-dimensional base-case.
- * 
- * Since this expansion happens at compiler time all the proxy classes get optimized away,
- * yielding both high performance and generality.
- * 
- * Recursive case: N-dimensional array
- */
-template<typename T, size_t N>
-class NdOffsetMatrixProxy {
-  public:
-    static_assert(N > 0, "Must have at least one dimension");
-
-    /**
-     * @brief Construct a matrix proxy object
-     *
-     *   dim_ranges: Array of DimRange objects
-     * idim: The dimension associated with this proxy
-     *  dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
-     *  start: Pointer to the start of the sub-matrix this proxy represents
-     */
-    NdOffsetMatrixProxy<T, N>(const DimRange* dim_ranges, size_t idim, size_t dim_stride, T* start)
-        : dim_ranges_(dim_ranges)
-        , idim_(idim)
-        , dim_stride_(dim_stride)
-        , start_(start) {}
-
-    ///@brief const [] operator
-    const NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(index >= dim_ranges_[idim_].begin_index(), "Index out of range (below dimension minimum)");
-        VTR_ASSERT_SAFE_MSG(index < dim_ranges_[idim_].end_index(), "Index out of range (above dimension maximum)");
-
-        /*
-         * Calculate the effective index
-         *
-         * The elements are stored in zero-indexed form, so we need to adjust
-         * for any non-zero minimum index
-         */
-        size_t effective_index = index - dim_ranges_[idim_].begin_index();
-
-        //Determine the stride of the next dimension
-        size_t next_dim_stride = dim_stride_ / dim_ranges_[idim_ + 1].size();
-
-        //Strip off one dimension
-        return NdOffsetMatrixProxy<T, N - 1>(dim_ranges_,                             //Pass the dimension information
-                                             idim_ + 1,                               //Pass the next dimension
-                                             next_dim_stride,                         //Pass the stride for the next dimension
-                                             start_ + dim_stride_ * effective_index); //Advance to index in this dimension
-    }
-
-    ///@brief [] operator
-    NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) {
-        //Call the const version and cast-away constness
-        return const_cast<const NdOffsetMatrixProxy<T, N>*>(this)->operator[](index);
-    }
-
-  private:
-    const DimRange* dim_ranges_;
-    const size_t idim_;
-    const size_t dim_stride_;
-    T* start_;
-};
-
-///@brief Base case: 1-dimensional array
-template<typename T>
-class NdOffsetMatrixProxy<T, 1> {
-  public:
-    /**
-     * @brief Construct a matrix proxy object
-     *
-     *     - dim_ranges: Array of DimRange objects
-     *     - dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension)
-     *     - start: Pointer to the start of the sub-matrix this proxy represents
-     */
-    NdOffsetMatrixProxy<T, 1>(const DimRange* dim_ranges, size_t idim, size_t dim_stride, T* start)
-        : dim_ranges_(dim_ranges)
-        , idim_(idim)
-        , dim_stride_(dim_stride)
-        , start_(start) {}
-
-    ///@brief const [] operator
-    const T& operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(dim_stride_ == 1, "Final dimension must have stride 1");
-        VTR_ASSERT_SAFE_MSG(index >= dim_ranges_[idim_].begin_index(), "Index out of range (below dimension minimum)");
-        VTR_ASSERT_SAFE_MSG(index < dim_ranges_[idim_].end_index(), "Index out of range (above dimension maximum)");
-
-        //The elements are stored in zero-indexed form, so we need to adjust
-        //for any non-zero minimum index
-        size_t effective_index = index - dim_ranges_[idim_].begin_index();
-
-        //Base case
-        return start_[effective_index];
-    }
-
-    ///@brief [] operator
-    T& operator[](size_t index) {
-        //Call the const version and cast-away constness
-        return const_cast<T&>(const_cast<const NdOffsetMatrixProxy<T, 1>*>(this)->operator[](index));
-    }
-
-  private:
-    const DimRange* dim_ranges_;
-    const size_t idim_;
-    const size_t dim_stride_;
-    T* start_;
-};
-
-/**
- * @brief Base class for an N-dimensional matrix supporting arbitrary index ranges per dimension.
- *
- * This class implements all of the matrix handling (lifetime etc.) except for indexing
- * (which is implemented in the NdOffsetMatrix class). Indexing is split out to allows specialization
- * of indexing for N = 1.
- * 
- * Implementation:
- * 
- * This class uses a single linear array to store the matrix in c-style (row major)
- * order. That is, the right-most index is laid out contiguous memory.
- * 
- * This should improve memory usage (no extra pointers to store for each dimension),
- * and cache locality (less indirection via pointers, predictable strides).
- * 
- * The indicies are calculated based on the dimensions to access the appropriate elements.
- * Since the indexing calculations are visible to the compiler at compile time they can be
- * optimized to be efficient.
- */
-template<typename T, size_t N>
-class NdOffsetMatrixBase {
-  public:
-    static_assert(N >= 1, "Minimum dimension 1");
-
-    ///@brief An empty matrix (all dimensions size zero)
-    NdOffsetMatrixBase() {
-        clear();
-    }
-
-    /** 
-     * @brief Specified dimension sizes:
-     *
-     *      [0..dim_sizes[0])
-     *      [0..dim_sizes[1])
-     *      ...
-     * with optional fill value
-     */
-    NdOffsetMatrixBase(std::array<size_t, N> dim_sizes, T value = T()) {
-        resize(dim_sizes, value);
-    }
-
-    /**
-     * @brief Specified dimension index ranges:
-     *
-     *      [dim_ranges[0].begin_index() ... dim_ranges[1].end_index())
-     *      [dim_ranges[1].begin_index() ... dim_ranges[1].end_index())
-     *      ...
-     * with optional fill value
-     */
-    NdOffsetMatrixBase(std::array<DimRange, N> dim_ranges, T value = T()) {
-        resize(dim_ranges, value);
-    }
-
-  public: //Accessors
-    ///@brief Returns the size of the matrix (number of elements)
-    size_t size() const {
-        ///@brief Size is the product of all dimension sizes
-        size_t cnt = dim_size(0);
-        for (size_t idim = 1; idim < ndims(); ++idim) {
-            cnt *= dim_size(idim);
-        }
-        return cnt;
-    }
-
-    ///@brief Returns true if there are no elements in the matrix
-    bool empty() const {
-        return size() == 0;
-    }
-
-    ///@brief Returns the number of dimensions (i.e. N)
-    size_t ndims() const {
-        return dim_ranges_.size();
-    }
-
-    ///@brief Returns the size of the ith dimension
-    size_t dim_size(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return dim_ranges_[i].size();
-    }
-
-    ///@brief Returns the starting index of ith dimension
-    size_t begin_index(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return dim_ranges_[i].begin_index();
-    }
-
-    ///@brief Returns the one-past-the-end index of the ith dimension
-    size_t end_index(size_t i) const {
-        VTR_ASSERT_SAFE(i < ndims());
-
-        return dim_ranges_[i].end_index();
-    }
-
-  public: //Mutators
-    ///@brief Set all elements to 'value'
-    void fill(T value) {
-        std::fill(data_.get(), data_.get() + size(), value);
-    }
-
-    /**
-     * @brief Resize the matrix to the specified dimensions
-     *
-     * If 'value' is specified all elements will be initialized to it,
-     * otherwise they will be default constructed.
-     */
-    void resize(std::array<size_t, N> dim_sizes, T value = T()) {
-        //Convert dimension to range [0..dim)
-        for (size_t i = 0; i < dim_sizes.size(); ++i) {
-            dim_ranges_[i] = {0, dim_sizes[i]};
-        }
-        alloc();
-        fill(value);
-    }
-
-    /**
-     * @brief Resize the matrix to the specified dimension ranges
-     *
-     * If 'value' is specified all elements will be initialized to it,
-     * otherwise they will be default constructed.
-     */
-    void resize(std::array<DimRange, N> dim_ranges, T value = T()) {
-        dim_ranges_ = dim_ranges;
-        alloc();
-        fill(value);
-    }
-
-    ///@brief Reset the matrix to size zero
-    void clear() {
-        data_.reset(nullptr);
-        for (size_t i = 0; i < dim_ranges_.size(); ++i) {
-            dim_ranges_[i] = {0, 0};
-        }
-    }
-
-  public: //Lifetime management
-    ///@brief Copy constructor
-    NdOffsetMatrixBase(const NdOffsetMatrixBase& other)
-        : NdOffsetMatrixBase(other.dim_ranges_) {
-        std::copy(other.data_.get(), other.data_.get() + other.size(), data_.get());
-    }
-
-    ///@brief Move constructor
-    NdOffsetMatrixBase(NdOffsetMatrixBase&& other)
-        : NdOffsetMatrixBase() {
-        swap(*this, other);
-    }
-
-    /**
-     * @brief Copy/move assignment
-     *
-     * Note that rhs is taken by value (copy-swap idiom)
-     */
-    NdOffsetMatrixBase& operator=(NdOffsetMatrixBase rhs) {
-        swap(*this, rhs);
-        return *this;
-    }
-
-    ///@brief Swap two NdOffsetMatrixBase objects
-    friend void swap(NdOffsetMatrixBase<T, N>& m1, NdOffsetMatrixBase<T, N>& m2) {
-        using std::swap;
-        swap(m1.dim_ranges_, m2.dim_ranges_);
-        swap(m1.data_, m2.data_);
-    }
-
-  private:
-    // Allocate space for all the elements
-    void alloc() {
-        data_ = std::make_unique<T[]>(size());
-    }
-
-  protected:
-    std::array<DimRange, N> dim_ranges_;
-    std::unique_ptr<T[]> data_ = nullptr;
-};
-
-/**
- * @brief An N-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
- * 
- * If no second template parameter is provided defaults to a 2-dimensional
- * matrix
- * 
- * Examples:
- * 
- *       //A 2-dimensional matrix with indicies [0..4][0..9]
- *       NdOffsetMatrix<int,2> m1({5,10});
- * 
- *       //Accessing an element
- *       int i = m4[3][5];
- * 
- *       //Setting an element
- *       m4[6][20] = 0;
- * 
- *       //A 2-dimensional matrix with indicies [2..6][5..9]
- *       // Note that C++ requires one more set of curly brace than you would expect
- *       NdOffsetMatrix<int,2> m2({{{2,7},{5,10}}});
- * 
- *       //A 3-dimensional matrix with indicies [0..4][0..9][0..19]
- *       NdOffsetMatrix<int,3> m3({5,10,20});
- * 
- *       //A 3-dimensional matrix with indicies [2..6][1..19][50..89]
- *       NdOffsetMatrix<int,3> m4({{{2,7}, {1,20}, {50,90}}});
- * 
- *       //A 2-dimensional matrix with indicies [2..6][1..20], with all entries
- *       //intialized to 42
- *       NdOffsetMatrix<int,2> m4({{{2,7}, {1,21}}}, 42);
- * 
- *       //A 2-dimensional matrix with indicies [0..4][0..9], with all entries
- *       //initialized to 42
- *       NdOffsetMatrix<int,2> m1({5,10}, 42);
- * 
- *       //Filling all entries with value 101
- *       m1.fill(101);
- * 
- *       //Resizing an existing matrix (all values reset to default constucted value)
- *       m1.resize({5,5})
- * 
- *       //Resizing an existing matrix (all elements set to value 88)
- *       m1.resize({15,55}, 88)
- */
-template<typename T, size_t N>
-class NdOffsetMatrix : public NdOffsetMatrixBase<T, N> {
-    //General case
-    static_assert(N >= 2, "Minimum dimension 2");
-
-  public:
-    ///@brief Use the base constructors
-    using NdOffsetMatrixBase<T, N>::NdOffsetMatrixBase;
-
-  public:
-    /**
-     * @brief Access an element
-     *
-     * Returns a proxy-object to allow chained array-style indexing  (N >= 2 case)
-     * template<typename = typename std::enable_if<N >= 2>::type, typename T1=T>
-     */
-    const NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(this->dim_size(1) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(index >= this->dim_ranges_[0].begin_index(), "Index out of range (below dimension minimum)");
-        VTR_ASSERT_SAFE_MSG(index < this->dim_ranges_[0].end_index(), "Index out of range (above dimension maximum)");
-
-        /*
-         * Clacluate the effective index
-         * 
-         * The elements are stored in zero-indexed form, so adjust for any
-         * non-zero minimum index in this dimension
-         */
-        size_t effective_index = index - this->dim_ranges_[0].begin_index();
-
-        //Calculate the stride for the current dimension
-        size_t dim_stride = this->size() / this->dim_size(0);
-
-        //Calculate the stride for the next dimension
-        size_t next_dim_stride = dim_stride / this->dim_size(1);
-
-        //Peel off the first dimension
-        return NdOffsetMatrixProxy<T, N - 1>(this->dim_ranges_.data(),                          //Pass the dimension information
-                                             1,                                                 //Pass the next dimension
-                                             next_dim_stride,                                   //Pass the stride for the next dimension
-                                             this->data_.get() + dim_stride * effective_index); //Advance to index in this dimension
-    }
-
-    /**
-     * @brief Access an element
-     *
-     * Returns a proxy-object to allow chained array-style indexing
-     */
-    NdOffsetMatrixProxy<T, N - 1> operator[](size_t index) {
-        //Call the const version, since returned by value don't need to worry about const
-        return const_cast<const NdOffsetMatrix<T, N>*>(this)->operator[](index);
-    }
-};
-
-/**
- * @brief A 1-dimensional matrix supporting arbitrary (continuous) index ranges per dimension.
- *
- * This is considered a specialization for N=1
- */
-template<typename T>
-class NdOffsetMatrix<T, 1> : public NdOffsetMatrixBase<T, 1> {
-  public:
-    ///@brief Use the base constructors
-    using NdOffsetMatrixBase<T, 1>::NdOffsetMatrixBase;
-
-  public:
-    ///@brief Access an element (immutable)
-    const T& operator[](size_t index) const {
-        VTR_ASSERT_SAFE_MSG(this->dim_size(0) > 0, "Can not index into size zero dimension");
-        VTR_ASSERT_SAFE_MSG(index >= this->dim_ranges_[0].begin_index(), "Index out of range (below dimension minimum)");
-        VTR_ASSERT_SAFE_MSG(index < this->dim_ranges_[0].end_index(), "Index out of range (above dimension maximum)");
-
-        return this->data_[index];
-    }
-
-    ///@brief Access an element (mutable)
-    T& operator[](size_t index) {
-        //Call the const version, and cast away const-ness
-        return const_cast<T&>(const_cast<const NdOffsetMatrix<T, 1>*>(this)->operator[](index));
-    }
-};
-
-///@brief Convenient short forms for common NdMatricies
-template<typename T>
-using OffsetMatrix = NdOffsetMatrix<T, 2>;
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h b/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
deleted file mode 100644
index 199c5cb4c..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_ostream_guard.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef VTR_OSTREAM_GUARD_H
-#define VTR_OSTREAM_GUARD_H
-
-namespace vtr {
-
-///@brief A RAII guard class to ensure restoration of output stream format
-class OsFormatGuard {
-  public:
-    ///@brief constructor
-    explicit OsFormatGuard(std::ostream& os)
-        : os_(os)
-        , flags_(os_.flags()) //Save formatting flag state
-        , width_(os_.width())
-        , precision_(os.precision())
-        , fill_(os.fill()) {}
-
-    ///@brief destructor
-    ~OsFormatGuard() {
-        os_.flags(flags_); //Restore
-        os_.width(width_);
-        os_.precision(precision_);
-        os_.fill(fill_);
-    }
-
-    OsFormatGuard(const OsFormatGuard&) = delete;
-    OsFormatGuard& operator=(const OsFormatGuard&) = delete;
-    OsFormatGuard(const OsFormatGuard&&) = delete;
-    OsFormatGuard& operator=(const OsFormatGuard&&) = delete;
-
-  private:
-    std::ostream& os_;
-    std::ios::fmtflags flags_;
-    std::streamsize width_;
-    std::streamsize precision_;
-    char fill_;
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h b/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
deleted file mode 100644
index feabbd1ac..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_pair_util.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef VTR_PAIR_UTIL_H
-#define VTR_PAIR_UTIL_H
-
-#include "vtr_range.h"
-
-namespace vtr {
-/**
- * @brief Iterator which derefernces the 'first' element of a std::pair iterator
- */
-template<typename PairIter>
-class pair_first_iter {
-  public:
-    using iterator_category = std::bidirectional_iterator_tag;
-    using value_type = typename PairIter::value_type::first_type;
-    using difference_type = void;
-    using pointer = value_type*;
-    using reference = value_type&;
-
-    ///@brief constructor
-    pair_first_iter(PairIter init)
-        : iter_(init) {}
-
-    ///@brief increment operator (++)
-    auto operator++() {
-        iter_++;
-        return *this;
-    }
-
-    ///@brief decrement operator (\-\-)
-    auto operator--() {
-        iter_--;
-        return *this;
-    }
-
-    ///@brief dereference * operator
-    auto operator*() { return iter_->first; }
-
-    ///@brief -> operator
-    auto operator-> () { return &iter_->first; }
-
-    ///@brief == operator
-    friend bool operator==(const pair_first_iter lhs, const pair_first_iter rhs) { return lhs.iter_ == rhs.iter_; }
-
-    ///@brief != operator
-    friend bool operator!=(const pair_first_iter lhs, const pair_first_iter rhs) { return !(lhs == rhs); }
-
-  private:
-    PairIter iter_;
-};
-
-/**
- *Iterator which derefernces the 'second' element of a std::pair iterator
- */
-template<typename PairIter>
-class pair_second_iter {
-  public:
-    using iterator_category = std::bidirectional_iterator_tag;
-    using value_type = typename PairIter::value_type::second_type;
-    using difference_type = void;
-    using pointer = value_type*;
-    using reference = value_type&;
-
-    ///@brief constructor
-    pair_second_iter(PairIter init)
-        : iter_(init) {}
-
-    ///@brief increment operator (++)
-    auto operator++() {
-        iter_++;
-        return *this;
-    }
-
-    ///@brief decrement operator (--)
-    auto operator--() {
-        iter_--;
-        return *this;
-    }
-
-    ///@brief dereference * operator
-    auto operator*() { return iter_->second; }
-
-    ///@brief -> operator
-    auto operator-> () { return &iter_->second; }
-
-    ///@brief == operator
-    friend bool operator==(const pair_second_iter lhs, const pair_second_iter rhs) { return lhs.iter_ == rhs.iter_; }
-
-    ///@brief != operator
-    friend bool operator!=(const pair_second_iter lhs, const pair_second_iter rhs) { return !(lhs == rhs); }
-
-  private:
-    PairIter iter_;
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_path.cc b/third_party/vtr/libs/vtrutil/src/vtr_path.cc
deleted file mode 100644
index e6bf293d7..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_path.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include "vtr_path.h"
-
-#include "vtr_util.h"
-
-//TODO: currently this file assumes unix-like
-//      in the future support windows
-#include <unistd.h>
-
-#include <sstream>
-
-namespace vtr {
-
-const std::string PATH_DELIM = "/";
-
-//Splits off the name and extension (including ".") of the specified filename
-std::array<std::string, 2> split_ext(const std::string& filename) {
-    std::array<std::string, 2> name_ext;
-    auto pos = filename.find_last_of('.');
-
-    if (pos == std::string::npos) {
-        //No extension
-        pos = filename.size();
-    }
-
-    name_ext[0] = std::string(filename, 0, pos);
-    name_ext[1] = std::string(filename, pos, filename.size() - pos);
-
-    return name_ext;
-}
-
-std::string basename(const std::string& path) {
-    auto elements = split(path, PATH_DELIM);
-
-    std::string str;
-    if (elements.size() > 0) {
-        //Return the last path element
-        str = elements[elements.size() - 1];
-    }
-
-    return str;
-}
-
-std::string dirname(const std::string& path) {
-    auto elements = split(path, PATH_DELIM);
-
-    std::string str;
-    if (elements.size() > 0) {
-        //We need to start the dirname with a PATH_DELIM if path started with one
-        if (starts_with(path, PATH_DELIM)) {
-            str += PATH_DELIM;
-        }
-
-        //Join all except the last path element
-        str += join(elements.begin(), elements.end() - 1, PATH_DELIM);
-
-        //We append a final PATH_DELIM to allow clients to just append directly to the
-        //returned value
-        str += PATH_DELIM;
-    }
-
-    return str;
-}
-
-std::string getcwd() {
-    constexpr size_t BUF_SIZE = 500;
-    char buf[BUF_SIZE];
-
-    if (::getcwd(buf, BUF_SIZE)) {
-        return std::string(buf);
-    }
-
-    //Check the global errno
-    int error = errno;
-
-    switch (error) {
-        case EACCES:
-            throw std::runtime_error("Access denied");
-
-        default: {
-            std::stringstream str;
-            str << "Unrecognised error" << error;
-            throw std::runtime_error(str.str());
-        }
-    }
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_path.h b/third_party/vtr/libs/vtrutil/src/vtr_path.h
deleted file mode 100644
index a48d2bdb5..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_path.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef VTR_PATH_H
-#define VTR_PATH_H
-#include <string>
-#include <array>
-
-/**
- * @file 
- * @brief This file defines some useful utilities to handle paths
- */
-namespace vtr {
-
-///@brief Splits off the name and extension (including ".") of the specified filename
-std::array<std::string, 2> split_ext(const std::string& filename);
-
-/**
- * @brief Returns the basename of path (i.e. the last filename component)
- *
- *  For example, the path "/home/user/my_files/test.blif" -> "test.blif"
- */
-std::string basename(const std::string& path);
-
-/**
- * Returns the dirname of path (i.e. everything except the last filename component)
- *
- *  For example, the path "/home/user/my_files/test.blif" -> "/home/user/my_files/"
- */
-std::string dirname(const std::string& path);
-
-///@brief Returns the current working directory
-std::string getcwd();
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h b/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
deleted file mode 100644
index bbe7fea78..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_ragged_matrix.h
+++ /dev/null
@@ -1,255 +0,0 @@
-#ifndef VTR_RAGGED_MATRIX_H
-#define VTR_RAGGED_MATRIX_H
-#include <vector>
-#include <iterator>
-
-#include "vtr_assert.h"
-#include "vtr_array_view.h"
-
-namespace vtr {
-
-/**
- * @brief A 2 dimensional 'ragged' matrix with rows indexed by Index0, and each row of variable length (indexed by Index1)
- * 
- * Example:
- * 
- *       std::vector<int> row_sizes = {1, 5, 3, 10};
- *       FlatRaggedMatrix<float> matrix(row_sizes);
- * 
- *       //Fill in all entries with ascending values
- *       float value = 1.;
- *       for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
- *           for (size_t icol = 0; icol < row_sizes[irow]; ++icoll) {
- *               matrix[irow][icol] = value;
- *               value += 1.;
- *           }
- *       }
- * 
- * 
- * For efficiency, this class uses a flat memory layout,
- * where all elements are laid out contiguiously (one row
- * after another).
- * 
- * Expects Index0 and Index1 to be convertable to size_t.
- */
-template<typename T, typename Index0 = size_t, typename Index1 = size_t>
-class FlatRaggedMatrix {
-  public:
-    ///@brief default constructor
-    FlatRaggedMatrix() = default;
-
-    /**
-     * @brief Constructs matrix with 'nrows' rows. 
-     *
-     * The row length is determined by calling 
-     * 'row_length_callback' with the associated row index.
-     */
-    template<class Callback>
-    FlatRaggedMatrix(size_t nrows, Callback& row_length_callback, T default_value = T())
-        : FlatRaggedMatrix(RowLengthIterator<Callback>(0, row_length_callback),
-                           RowLengthIterator<Callback>(nrows, row_length_callback),
-                           default_value) {}
-
-    ///@brief Constructs matrix from a container of row lengths
-    template<class Container>
-    FlatRaggedMatrix(Container container, T default_value = T())
-        : FlatRaggedMatrix(std::begin(container), std::end(container), default_value) {}
-
-    /**
-     * @brief Constructs matrix from an iterator range. 
-     *
-     * The length of the range is the number of rows, and iterator values are the row lengths. 
-     */
-    template<class Iter>
-    FlatRaggedMatrix(Iter row_size_first, Iter row_size_last, T default_value = T()) {
-        size_t nrows = std::distance(row_size_first, row_size_last);
-        first_elem_.resize(nrows + 1, -1); //+1 for sentinel
-
-        size_t nelem = 0;
-        size_t irow = 0;
-        for (Iter iter = row_size_first; iter != row_size_last; ++iter) {
-            first_elem_[irow] = nelem;
-
-            nelem += *iter;
-            ++irow;
-        }
-
-        //Sentinel
-        first_elem_[irow] = nelem;
-
-        data_.resize(nelem + 1, default_value); //+1 for sentinel
-    }
-
-  public: //Accessors
-    ///@brief Iterators to *all* elements
-    auto begin() {
-        return data_.begin();
-    }
-
-    ///@brief Iterator to the last element of the matrix
-    auto end() {
-        if (empty()) {
-            return data_.end();
-        }
-        return data_.end() - 1;
-    }
-
-    ///@brief Iterator to the first element of the matrix (immutable)
-    auto begin() const {
-        return data_.begin();
-    }
-
-    ///@brief Iterator to the last element of the matrix (immutable)
-    auto end() const {
-        if (empty()) {
-            return data_.end();
-        }
-        return data_.end() - 1;
-    }
-
-    ///@brief Return the size of the matrix
-    size_t size() const {
-        if (data_.empty()) {
-            return 0;
-        }
-        return data_.size() - 1; //-1 for sentinel
-    }
-
-    ///@brief Return true if empty
-    bool empty() const {
-        return size() == 0;
-    }
-
-    ///@brief Indexing operators for the first dimension
-    vtr::array_view<T> operator[](Index0 i) {
-        int idx = size_t(i);
-        T* first = &data_[first_elem_[idx]];
-        T* last = &data_[first_elem_[idx + 1]];
-        return vtr::array_view<T>(first,
-                                  last - first);
-    }
-
-    ///@brief Indexing operators for the first dimension (immutable)
-    vtr::array_view<const T> operator[](Index0 i) const {
-        int idx = size_t(i);
-        const T* first = &data_[first_elem_[idx]];
-        const T* last = &data_[first_elem_[idx + 1]];
-        return vtr::array_view<const T>(first,
-                                        last - first);
-    }
-
-    ///@brief Clears the matrix
-    void clear() {
-        data_.clear();
-        first_elem_.clear();
-    }
-
-    ///@brief Swaps two matrices
-    void swap(FlatRaggedMatrix<T, Index0, Index1>& other) {
-        std::swap(data_, other.data_);
-        std::swap(first_elem_, other.first_elem_);
-    }
-
-    ///@brief Swaps two matrices
-    friend void swap(FlatRaggedMatrix<T, Index0, Index1>& lhs, FlatRaggedMatrix<T, Index0, Index1>& rhs) {
-        lhs.swap(rhs);
-    }
-
-  public: //Types
-    ///@brief Proxy class used to represent a 'row' in the matrix
-    template<typename U>
-    class ProxyRow {
-      public:
-        ///@brief constructor
-        ProxyRow(U* first, U* last)
-            : first_(first)
-            , last_(last) {}
-
-        ///@brief Return iterator to the first element
-        U* begin() { return first_; }
-        ///@brief Return iterator to the last element
-        U* end() { return last_; }
-
-        ///@brief Return iterator to the first element (immutable)
-        const U* begin() const { return first_; }
-        ///@brief Return iterator to the last element (immutable)
-        const U* end() const { return last_; }
-
-        ///@brief Return the size of the row
-        size_t size() const { return last_ - first_; }
-
-        ///@brief indexing [] operator
-        U& operator[](Index1 j) {
-            VTR_ASSERT_SAFE(size_t(j) < size());
-            return first_[size_t(j)];
-        }
-
-        ///@brief indexing [] operator (immutable)
-        const U& operator[](Index1 j) const {
-            VTR_ASSERT_SAFE(size_t(j) < size());
-            return first_[size_t(j)];
-        }
-
-        ///@brief Return iterator to the first element
-        U* data() {
-            return first_;
-        }
-
-        ///@brief Return iterator to the first element (immutable)
-        U* data() const {
-            return first_;
-        }
-
-      private:
-        U* first_;
-        U* last_;
-    };
-
-  private:
-    /*
-     * Iterator for constructing FlatRaggedMatrix.
-     *
-     * uses a callback to determine row lengths.
-     */
-    template<class Callback>
-    class RowLengthIterator : public std::iterator<std::random_access_iterator_tag, size_t> {
-      public:
-        RowLengthIterator(size_t irow, Callback& callback)
-            : irow_(irow)
-            , callback_(callback) {}
-
-        RowLengthIterator& operator++() {
-            ++irow_;
-            return *this;
-        }
-
-        bool operator==(const RowLengthIterator& other) {
-            return irow_ == other.irow_;
-        }
-
-        bool operator!=(const RowLengthIterator& other) {
-            return !(*this == other);
-        }
-
-        int operator-(const RowLengthIterator& other) {
-            return irow_ - other.irow_;
-        }
-
-        size_t operator*() {
-            //Call the callback to get the row length
-            return callback_(Index0(irow_));
-        }
-
-      private:
-        size_t irow_;
-        Callback& callback_;
-    };
-
-  private:
-    std::vector<T> data_;
-    std::vector<int> first_elem_;
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_random.cc b/third_party/vtr/libs/vtrutil/src/vtr_random.cc
deleted file mode 100644
index 3427e5fc2..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_random.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <cstddef>
-
-#include "vtr_random.h"
-#include "vtr_util.h"
-#include "vtr_error.h"
-
-#define CHECK_RAND
-
-namespace vtr {
-/* Portable random number generator defined below.  Taken from ANSI C by  *
- * K & R.  Not a great generator, but fast, and good enough for my needs. */
-
-constexpr size_t IA = 1103515245u;
-constexpr size_t IC = 12345u;
-constexpr size_t IM = 2147483648u;
-
-static RandState random_state = 0;
-
-/**
- * @brief The pseudo-random number generator is initialized using the argument passed as seed.
- */
-void srandom(int seed) {
-    random_state = (unsigned int)seed;
-}
-
-/* returns the random_state value */
-RandState get_random_state() {
-    return random_state;
-}
-
-int irand(int imax, RandState& state) {
-    /* Creates a random integer between 0 and imax, inclusive.  i.e. [0..imax] */
-    int ival;
-
-    /* state = (state * IA + IC) % IM; */
-    state = state * IA + IC; /* Use overflow to wrap */
-    ival = state & (IM - 1); /* Modulus */
-    ival = (int)((float)ival * (float)(imax + 0.999) / (float)IM);
-
-#ifdef CHECK_RAND
-    if ((ival < 0) || (ival > imax)) {
-        if (ival == imax + 1) {
-            /* Due to random floating point rounding, sometimes above calculation gives number greater than ival by 1 */
-            ival = imax;
-        } else {
-            throw VtrError(string_fmt("Bad value in my_irand, imax = %d  ival = %d", imax, ival), __FILE__, __LINE__);
-        }
-    }
-#endif
-
-    return ival;
-}
-
-int irand(int imax) {
-    return irand(imax, random_state);
-}
-
-float frand() {
-    /* Creates a random float between 0 and 1.  i.e. [0..1).        */
-
-    float fval;
-    int ival;
-
-    random_state = random_state * IA + IC; /* Use overflow to wrap */
-    ival = random_state & (IM - 1);        /* Modulus */
-    fval = (float)ival / (float)IM;
-
-#ifdef CHECK_RAND
-    if ((fval < 0) || (fval > 1.)) {
-        throw VtrError(string_fmt("Bad value in my_frand, fval = %g", fval), __FILE__, __LINE__);
-    }
-#endif
-
-    return (fval);
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_random.h b/third_party/vtr/libs/vtrutil/src/vtr_random.h
deleted file mode 100644
index c5a3ce663..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_random.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef VTR_RANDOM_H
-#define VTR_RANDOM_H
-#include <algorithm> //For std::swap
-
-namespace vtr {
-/*********************** Portable random number generators *******************/
-typedef unsigned RandState;
-
-/**
- * @brief The pseudo-random number generator is initialized using the argument passed as seed.
- */
-void srandom(int seed);
-
-///@brief Return The random number generator state
-RandState get_random_state();
-
-///@brief Return a randomly generated integer less than or equal imax
-int irand(int imax);
-
-///@brief Return a randomly generated integer less than or equal imax using the generator (rand_state)
-int irand(int imax, RandState& rand_state);
-
-///@brief Return a randomly generated float number between [0,1]
-float frand();
-
-/**
- * @brief Portable/invariant version of std::shuffle
- *
- * Note that std::shuffle relies on std::uniform_int_distribution
- * which can produce different sequences accross different
- * compilers/compiler versions.
- * 
- * This version should be deterministic/invariant. However,  since
- * it uses vtr::irand(), may not be as well distributed as std::shuffle.
- */
-template<typename Iter>
-void shuffle(Iter first, Iter last, RandState& rand_state) {
-    for (auto i = (last - first) - 1; i > 0; --i) {
-        using std::swap;
-        swap(first[i], first[irand(i, rand_state)]);
-    }
-}
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_range.h b/third_party/vtr/libs/vtrutil/src/vtr_range.h
deleted file mode 100644
index 493a379fb..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_range.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef VTR_RANGE_H
-#define VTR_RANGE_H
-#include <iterator>
-
-namespace vtr {
-/**
- * @brief The vtr::Range template models a range defined by two iterators of type T.
- *
- * It allows conveniently returning a range from a single function call
- * without having to explicity expose the underlying container, or make two
- * explicit calls to retrieve the associated begin and end iterators.
- * It also enables the easy use of range-based-for loops.
- *
- * For example:
- *
- *      class My Data {
- *          public:
- *              typdef std::vector<int>::const_iterator my_iter;
- *              vtr::Range<my_iter> data();
- *          ...
- *          private:
- *              std::vector<int> data_;
- *      };
- *
- *      ...
- *
- *      MyDat my_data;
- *
- *      //fill my_data
- *
- *      for(int val : my_data.data()) {
- *          //work with values stored in my_data
- *      }
- *
- * The empty() and size() methods are convenience wrappers around the relevant
- * iterator comparisons.
- *
- * Note that size() is only constant time if T is a random-access iterator!
- */
-template<typename T>
-class Range {
-  public:
-    ///@brief constructor
-    Range(T b, T e)
-        : begin_(b)
-        , end_(e) {}
-    ///@brief Return an iterator to the start of the range
-    T begin() { return begin_; }
-    ///@brief Return an iterator to the end of the range
-    T end() { return end_; }
-    ///@brief Return an iterator to the start of the range (immutable)
-    const T begin() const { return begin_; }
-    ///@brief Return an iterator to the end of the range (immutable)
-    const T end() const { return end_; }
-    ///@brief Return true if empty
-    bool empty() { return begin_ == end_; }
-    ///@brief Return the range size
-    size_t size() { return std::distance(begin_, end_); }
-
-  private:
-    T begin_;
-    T end_;
-};
-
-/**
- * @brief Creates a vtr::Range from a pair of iterators.
- *
- *  Unlike using the vtr::Range() constructor (which requires specifying
- *  the template type T, using vtr::make_range() infers T from the arguments.
- *
- * Example usage:
- *  auto my_range = vtr::make_range(my_vec.begin(), my_vec.end());
- */
-template<typename T>
-auto make_range(T b, T e) { return Range<T>(b, e); }
-
-/**
- * @brief Creates a vtr::Range from a container
- */
-template<typename Container>
-auto make_range(const Container& c) { return make_range(std::begin(c), std::end(c)); }
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc b/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
deleted file mode 100644
index a3b74c04c..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_rusage.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "vtr_rusage.h"
-
-#ifdef __unix__
-#    include <sys/time.h>
-#    include <sys/resource.h>
-#endif
-
-namespace vtr {
-
-///@brief Returns the maximum resident set size in bytes, or zero if unable to determine.
-size_t get_max_rss() {
-    size_t max_rss = 0;
-
-#ifdef __unix__
-    rusage usage;
-    int result = getrusage(RUSAGE_SELF, &usage);
-
-    if (result == 0) { //Success
-        //ru_maxrss is in kilobytes, convert to bytes
-        max_rss = usage.ru_maxrss * 1024;
-    }
-#else
-    //Do nothing, other platform specific code could be added here
-    //with appropriate defines
-#endif
-
-    return max_rss;
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_rusage.h b/third_party/vtr/libs/vtrutil/src/vtr_rusage.h
deleted file mode 100644
index b69dc438a..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_rusage.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef VTR_RUSAGE_H
-#define VTR_RUSAGE_H
-#include <cstddef>
-
-namespace vtr {
-
-///@brief Returns the maximum resident set size in bytes, or zero if unable to determine.
-size_t get_max_rss();
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h b/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
deleted file mode 100644
index 036fd593b..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_sentinels.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef VTR_SENTINELS_H
-#define VTR_SENTINELS_H
-
-/**
- * @file
- * @brief This header defines different sentinal value classes
- */
-namespace vtr {
-
-/**
- * @brief The Default sentinal value class
- *
- * Some specialized containers like vtr::linear_map and
- * vtr::vector_map require sentinel values to mark invalid/uninitialized
- * values. By convention, such containers query the sentinel objects static
- * INVALID() member function to retrieve the sentinel value.
- * 
- * These classes allows users to specify a custom sentinel value.
- * 
- * Usually the containers default to DefaultSentinel
- * 
- * The sentinel value is the default constructed value of the type
- */
-template<class T>
-class DefaultSentinel {
-  public:
-    constexpr static T INVALID() { return T(); }
-};
-
-///@brief Specialization for pointer types
-template<class T>
-class DefaultSentinel<T*> {
-  public:
-    constexpr static T* INVALID() { return nullptr; }
-};
-
-///@brief The sentile value is a specified value of the type
-template<class T, T val>
-class CustomSentinel {
-  public:
-    constexpr static T INVALID() { return T(val); }
-};
-
-///@brief The common case where -1 is used as the sentinel value
-template<class T>
-using MinusOneSentinel = CustomSentinel<T, -1>;
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h b/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
deleted file mode 100644
index 5fe755201..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_small_vector.h
+++ /dev/null
@@ -1,854 +0,0 @@
-#ifndef VTR_SMALL_VECTOR
-#define VTR_SMALL_VECTOR
-#include <memory>
-#include <algorithm>
-#include <limits>
-#include <cstdint>
-#include <array>
-#include "vtr_assert.h"
-
-namespace vtr {
-
-namespace small_vector_impl {
-
-/**
- * @brief The long format view of the vector.
- *
- * It consists of a dynamically allocated array, capacity and size.
- */
-template<class T, class S>
-struct long_format {
-    T* data_ = nullptr;
-    S capacity_ = 0;
-    S size_ = 0;
-};
-
-/**
- * @brief The short format view of the vector. 
- *
- * It consists of an in-place (potentially empty)
- * array of objects, a pad, and a size.
- */
-template<class T, class S, size_t CAPACITY, size_t PAD>
-struct short_format {
-    std::array<T, CAPACITY> data_;
-    std::array<uint8_t, PAD> pad_; ///< Padding to keep size_ aligned in both long_format and short_format
-    S size_ = 0;
-};
-
-/**
- * @brief A specialized version of short_format for padding of size zero.
- *
- * Since a std::array with zero array size may still have non-zero sizeof()
- */
-template<class T, class S, size_t CAPACITY>
-struct short_format<T, S, CAPACITY, 0> {
-    std::array<T, CAPACITY> data_;
-    S size_ = 0;
-};
-
-} // namespace small_vector_impl
-
-/**
- * @brief vtr::small_vector is a std::vector like container which:
- *
- *   - consumes less memory: sizeof(vtr::small_vector) < sizeof(std::vector)
- *   - possibly stores elements in-place (i.e. within the object)
- * 
- * On a typical LP64 system a vtr::small_vector consumes 16 bytes by default and supports
- * vectors up to ~2^32 elements long, while a std::vector consumes 24 bytes and supports up
- * to ~2^64 elements. The type used to store the size and capacity is configurable,
- * and set by the second template parameter argument. Setting it to size_t will replicate
- * std::vector's characteristics.
- * 
- * For short vectors vtr::small_vector will try to store elements in-place (i.e. within the
- * vtr::small_vector object) instead of dynamically allocating an array (by re-using the
- * internal storage for the pointer, size and capacity). Whether this is possible depends on
- * the size and alignment requirements of the value type, as compared to
- * vtr::small_vector. If in-place storage is not possible (e.g. due to a large value
- * type, or a large number of elements) a dynamic buffer is allocated (similar to
- * std::vector).
- * 
- * This is a highly specialized container. Unless you have specifically measured it's
- * usefulness you should use std::vector.
- */
-template<class T, class S = uint32_t>
-class small_vector {
-  public: //Types
-    typedef T value_type;
-    //typedef allocator_type //Allocator, unimplemented
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
-    typedef value_type* pointer;
-    typedef const value_type* const_pointer;
-
-    typedef T* iterator;
-    typedef const T* const_iterator;
-    typedef std::reverse_iterator<iterator> reverse_iterator;
-    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-
-    typedef ptrdiff_t difference_type;
-    typedef S size_type;
-
-  public: //Constructors
-    ///@brief constructor
-    small_vector() {
-        if (SHORT_CAPACITY == 0) {
-            long_.data_ = nullptr;
-            long_.capacity_ = 0;
-        }
-        set_size(0);
-    }
-    ///@brief constructor
-    small_vector(size_type nelem)
-        : small_vector() {
-        reserve(nelem);
-        for (size_type i = 0; i < nelem; i++) {
-            emplace_back();
-        }
-        set_size(0);
-    }
-
-  public: //Accessors
-    ///@brief Return a const_iterator to the first element
-    const_iterator begin() const {
-        return cbegin();
-    }
-
-    ///@brief Return a const_iterator pointing to the past-the-end element in the container.
-    const_iterator end() const {
-        return cend();
-    }
-
-    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
-    const_reverse_iterator rbegin() const {
-        return crbegin();
-    }
-
-    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
-    const_reverse_iterator rend() const {
-        return crend();
-    }
-
-    ///@brief Return a const_iterator pointing to the first element in the container.
-    const_iterator cbegin() const {
-        if (is_short()) {
-            return short_.data_.data();
-        }
-        return long_.data_;
-    }
-
-    ///@brief a const_iterator pointing to the past-the-end element in the container.
-    const_iterator cend() const {
-        if (is_short()) {
-            return short_.data_.data() + size();
-        }
-        return long_.data_ + size();
-    }
-
-    ///@brief Return a const_reverse_iterator pointing to the last element in the container (i.e., its reverse beginning).
-    const_reverse_iterator crbegin() const {
-        return const_reverse_iterator(cend());
-    }
-
-    ///@brief Return a const_reverse_iterator pointing to the theoretical element preceding the first element in the container (which is considered its reverse end).
-    const_reverse_iterator crend() const {
-        return const_reverse_iterator(cbegin());
-    }
-
-    ///@brief return the vector size (Padding ensures long/short format sizes are always aligned)
-    size_type size() const {
-        return long_.size_;
-    }
-
-    ///@brief Return the maximum size
-    size_t max_size() const {
-        return std::numeric_limits<S>::max();
-    }
-
-    ///@brief Return the vector capacity
-    size_type capacity() const {
-        if (is_short()) {
-            return SHORT_CAPACITY; //Fixed capacity
-        }
-        return long_.capacity_;
-    }
-
-    ///@brief Return true if empty
-    bool empty() const { return size() == 0; }
-
-    ///@brief Immutable indexing operator []
-    const_reference operator[](size_t i) const {
-        if (is_short()) {
-            return short_.data_[i];
-        }
-        return long_.data_[i];
-    }
-
-    ///@brief Immutable at() operator
-    const_reference at(size_t i) const {
-        if (i > size()) {
-            throw std::out_of_range("Index out of bounds");
-        }
-        return operator[](i);
-    }
-
-    ///@brief Return a constant reference to the first element
-    const_reference front() const {
-        return *begin();
-    }
-
-    ///@brief Return a constant reference to the last element
-    const_reference back() const {
-        return *(end() - 1);
-    }
-
-    ///@brief Return a constant pointer to the vector data
-    const_pointer data() const {
-        if (is_short()) {
-            short_.data_;
-        }
-        return long_.data_;
-    }
-
-  public: //Mutators
-    ///@brief Return an iterator pointing to the first element in the sequence
-    iterator begin() {
-        //Call const method and cast-away constness
-        return const_cast<iterator>(const_cast<const small_vector<T, S>*>(this)->begin());
-    }
-
-    ///@brief Return an iterator referring to the past-the-end element in the vector container.
-    iterator end() {
-        return const_cast<iterator>(const_cast<const small_vector<T, S>*>(this)->end());
-    }
-
-    ///@brief Return a reverse iterator pointing to the last element in the vector (i.e., its reverse beginning).
-    reverse_iterator rbegin() {
-        //Call const method and cast-away constness
-        return reverse_iterator(const_cast<small_vector<T, S>*>(this)->end());
-    }
-
-    ///@brief Return  a reverse iterator pointing to the theoretical element preceding the first element in the vector (which is considered its reverse end).
-    reverse_iterator rend() {
-        return reverse_iterator(const_cast<small_vector<T, S>*>(this)->begin());
-    }
-
-    ///@brief Resizes the container so that it contains n elements
-    void resize(size_type n) {
-        resize(n, value_type());
-    }
-
-    ///@brief Resizes the container so that it contains n elements and fills it with val
-    void resize(size_type n, value_type val) {
-        if (n < size()) {
-            //Remove at end
-            erase(begin() + n, end());
-        } else if (n > size()) {
-            //Insert new elements at end
-            insert(end(), n - size(), val);
-        }
-    }
-
-    /**
-     * @brief Reserve memory for a spicific number of elemnts
-     *
-     * Don't change capacity unless requested number of elements is both:
-     *   - More than the short capacity (no need to reserve up to short capacity)
-     *   - Greater than the current size (capacity can never be below size)
-     */
-    void reserve(size_type num_elems) {
-        if (num_elems > SHORT_CAPACITY && num_elems > size()) {
-            change_capacity(num_elems);
-        }
-    }
-
-    ///@brief Requests the container to reduce its capacity to fit its size.
-    void shrink_to_fit() {
-        if (!is_short()) {
-            change_capacity(size());
-        }
-    }
-
-    ///@brief Indexing operator []
-    reference operator[](size_t i) {
-        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->operator[](i));
-    }
-
-    ///@brief at() operator
-    reference at(size_t i) {
-        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->at(i));
-    }
-
-    ///@brief Returns a reference to the first element in the vector.
-    reference front() {
-        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->front());
-    }
-
-    ///@brief Returns a reference to the last element in the vector.
-    reference back() {
-        return const_cast<reference>(const_cast<const small_vector<T, S>*>(this)->back());
-    }
-
-    pointer data() {
-        return const_cast<pointer>(const_cast<const small_vector<T, S>*>(this)->data());
-    }
-
-    /**
-     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
-     *
-     * Input iterators to the initial and final positions in a sequence. The range used is [first,last),
-     * which includes all the elements between first and last, including the element pointed by first 
-     * but not the element pointed by last.
-     */
-    template<class InputIterator>
-    void assign(InputIterator first, InputIterator last) {
-        insert(begin(), first, last);
-    }
-
-    /**
-     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
-     *
-     * Resize the vector to n and fill it with val
-     */
-    void assign(size_type n, const value_type& val) {
-        insert(begin(), n, val);
-    }
-
-    /**
-     * @brief Assigns new contents to the vector, replacing its current contents, and modifying its size accordingly.
-     *
-     * The compiler will automatically construct such objects from initializer list declarators (il)
-     */
-    void assign(std::initializer_list<value_type> il) {
-        assign(il.begin(), il.end());
-    }
-
-    ///@brief Construct default value_type at new location
-    void push_back(value_type value) {
-        auto new_ptr = next_back();
-
-        new (new_ptr) T();
-
-        //Since we took a copy in the argument, we can move it
-        //into the new location
-        *new_ptr = std::move(value);
-    }
-
-    ///@brief Removes the last element in the vector, effectively reducing the container size by one.
-    void pop_back() {
-        if (size() > 0) {
-            erase(end() - 1);
-        }
-    }
-
-    ///@brief The vector is extended by inserting new elements before the element at the specified position, effectively increasing the container size by the number of elements inserted.
-    iterator insert(const_iterator position, const value_type& val) {
-        return insert(position, 1, val);
-    }
-
-    /** 
-     * @brief Insert a new value
-     *
-     * Location of position as an index, which will be
-     * unchanged if the underlying storage is reallocated
-     */
-    iterator insert(const_iterator position, size_type n, const value_type& val) {
-        size_type i = std::distance(cbegin(), position);
-
-        /*
-         * If needed, grow capacity
-         *
-         * Note that change_capacity will automatically convert from short to long
-         * format if required.
-         */
-        size_type new_size = size() + n;
-        if (capacity() < new_size) {
-            change_capacity(new_size);
-        }
-
-        iterator first = begin() + i;
-        iterator last = first + n;
-        reverse_swap_elements(first, end(), end() + n - 1);
-
-        //Insert new values at end
-        std::uninitialized_fill(first, last, val);
-
-        set_size(new_size);
-
-        return first;
-    }
-
-    ///@brief Insert n elements at position position and fill them with value val
-    iterator insert(const_iterator position, size_type n, value_type&& val) {
-        return insert(position, n, value_type(val)); //TODO: optimize for moved val
-    }
-
-    //Range insert
-    //template<class InputIterator>
-    //iterator insert(const_iterator position, InputIterator first, InputIterator last) {
-    ////Location of position as an index, which will be
-    ////unchanged if the underlying storage is reallocated
-    //size_type i = std::distance(cbegin(), position);
-    //size_type n = std::distance(first, last);
-
-    ////If needed, grow capacity
-    ////
-    ////Note that change_capacity will automatically convert from short to long
-    ////format if required.
-    //size_type new_size = size() + n;
-    //if (capacity() < new_size) {
-    //change_capacity(new_size);
-    //}
-
-    //reverse_swap_elements(begin() + i, end(), end() + n - 1);
-
-    ////Insert new values at end
-    //std::uninitialized_copy(first, last, begin() + i);
-
-    //set_size(new_size);
-
-    //return begin() + i;
-    //}
-
-    ///@brief Removes from the vector a single element (position).
-    iterator erase(const_iterator position) {
-        return erase(position, position + 1);
-    }
-
-    ///@brief Removes from the vector either a range of elements ([first,last)).
-    iterator erase(const_iterator first, const_iterator last) {
-        //Number of elements to erase
-        size_type n = std::distance(first, last);
-
-        //Location of position as an index, which will be
-        //unchanged if the underlying storage is changed
-        size_type i_first = std::distance(cbegin(), first);
-
-        size_type new_size = size() - n;
-
-        const_iterator position = first;
-
-        if (!is_short() && new_size <= SHORT_CAPACITY) {
-            //Convert from long format to short/in-place format
-
-            //Keep handle on buffer and original size
-            auto buff_ptr = long_.data_;
-            size_type orig_size = size();
-
-            //Copy into in-place the valid (not-to-be-erased) values in
-            //[begin, first) and [last, end)
-            //
-            //Note that we can use uninitialized_copy since the long format
-            //has only basic data types, which have no destructors to call
-            auto buff_begin = buff_ptr;
-            auto buff_end = buff_begin + orig_size;
-            auto erase_begin = buff_ptr + i_first;
-            auto erase_end = erase_begin + n;
-
-            //Copy from beginning until start of erase
-            auto inplace_ptr = short_.data_.data();
-            for (auto buff_itr = buff_begin; buff_itr != erase_begin; ++buff_itr) {
-                new (inplace_ptr++) T(*buff_itr);
-            }
-            //Copy from end of erase until end of buf
-            for (auto buff_itr = erase_end; buff_itr != buff_end; ++buff_itr) {
-                new (inplace_ptr++) T(*buff_itr);
-            }
-
-            VTR_ASSERT_SAFE(std::distance(short_.data_.data(), inplace_ptr) == new_size);
-
-            //Clean-up elements in buffer and free it
-            destruct_elements(buff_begin, buff_end);
-            dealloc(buff_ptr);
-
-            //New position
-            position = begin() + i_first;
-        } else {
-            //Remove elements in either long or short formats
-
-            iterator first2 = begin() + i_first;
-            iterator last2 = first2 + n;
-
-            //Swap all elements in [first, last) to the end.
-            //That is with those within [last, end())
-            if (last2 < end()) {
-                swap_elements(last2, end(), first2);
-            }
-
-            //Finally destruct the elements in [last, end()); that is the
-            //elements which were originally to be erased
-            destruct_elements(end() - n, end());
-
-            //Note that capacity is unchanged, so we do not need to change
-            //position in this case
-        }
-
-        //Shrink size
-        set_size(new_size);
-
-        return begin() + std::distance(cbegin(), position);
-    }
-
-    ///@brief Exchanges the content of the container by the content of x, which is another vector object of the same type. Sizes may differ.
-    void swap(small_vector<T, S>& other) {
-        swap(*this, other);
-    }
-
-    ///@brief swaps two vectors
-    friend void swap(small_vector<T, S>& lhs, small_vector<T, S>& rhs) {
-        using std::swap;
-
-        if (lhs.is_short() && rhs.is_short()) {
-            //Both short
-            std::swap(lhs.short_, rhs.short_);
-        } else if (!lhs.is_short() && !rhs.is_short()) {
-            //Both long
-            std::swap(lhs.long_, rhs.long_);
-        } else {
-            //Mixed long/short
-            VTR_ASSERT_SAFE(lhs.is_short() != rhs.is_short());
-
-            auto& long_vec = ((lhs.is_short()) ? rhs : lhs);
-            auto& short_vec = ((lhs.is_short()) ? lhs : rhs);
-
-            /** 
-             * @brief Swapping two vectors of different formats
-             *
-             * If the two vectors are in different formats we can't just swap them,
-             * since the short format has real values (potentially with destructors),
-             * while the long format has only basic data types.
-             * 
-             * Instead we copy the short_vec values into long, destruct the original short_vec
-             * values and then set short_vec to point to long_vec's original buffer (avoids
-             * extra copy of long elements).
-             *
-             * Save long data
-             */
-            pointer long_buf = long_vec.long_.data_;
-            size_type long_size = long_vec.long_size_;
-            size_type long_capacity = long_vec.long_.capacity_;
-
-            /**
-             * @brief Copy short data into long
-             *
-             * Note that the long format contains only basic data types with no destructors to call,
-             * so we can use uninitialzed copy
-             */
-            std::uninitialized_copy(short_vec.short_.begin(), short_vec.short_.end(), long_vec.short_.data_);
-            long_vec.short_.size_ = short_vec.size();
-
-            //Destroy original elements in short
-            short_vec.destruct_elements();
-
-            //Copy long data into short
-            short_vec.long_.data = long_buf;
-            short_vec.long_.capacity_ = long_capacity;
-            short_vec.long_.size_ = long_size;
-        }
-    }
-
-    ///@brief Removes all elements from the vector (which are destroyed), leaving the container with a size of 0.
-    void clear() {
-        //Destruct all elements and clear size, but do not free memory
-        destruct_elements();
-        set_size(0);
-    }
-
-    ///@brief Inserts a new element at the end of the vector, right after its current last element. This new element is constructed in place using args as the arguments for its constructor.
-    template<typename... Args>
-    void emplace_back(Args&&... args) {
-        //Construct in-place
-        new (next_back()) T(std::forward<Args>(args)...);
-    }
-
-    //Unsupported: Emplace at position
-    //template<typename... Args>
-    //void emplace(const_iterator position, Args&&... args) {
-    //throw std::logic_error("unimplemented");
-    //}
-
-  public: //Comparisons
-    ///@brief == p[erator
-    friend bool operator==(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        if (lhs.size() != rhs.size()) {
-            return false;
-        }
-        return std::equal(lhs.begin(), lhs.end(),
-                          rhs.begin());
-    }
-
-    ///@brief < operator
-    friend bool operator<(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        return std::lexicographical_compare(lhs.begin(), lhs.end(),
-                                            rhs.begin(), rhs.end());
-    }
-
-    ///@brief != operator
-    friend bool operator!=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        return !(lhs == rhs);
-    }
-
-    ///@brief > operator
-    friend bool operator>(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        return rhs < lhs;
-    }
-
-    ///@brief <= operator
-    friend bool operator<=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        return !(rhs < lhs);
-    }
-
-    ///@brief >= operator
-    friend bool operator>=(const small_vector<T, S>& lhs, const small_vector<T, S>& rhs) {
-        return !(lhs < rhs);
-    }
-
-  public: //Lifetime management
-    ///@brief destructor
-    ~small_vector() {
-        destruct_elements();
-        if (!is_short()) {
-            dealloc(long_.data_);
-        }
-    }
-
-    ///@brief copy constructor
-    small_vector(const small_vector& other) {
-        if (other.is_short()) {
-            ~small_vector(); //Clean-up elements
-
-            //Copy in place
-            short_ = other.short_;
-        } else {
-            if (!is_short() && capacity() >= other.size()) {
-                //Re-use existing buffer, since it has sufficient capacity
-                destruct_elements();
-
-            } else {
-                ~small_vector(); //Clean-up elements, potentially freeing buffer
-
-                //Create new buffer of exact size
-                long_.data_ = alloc(other.size());
-                long_.capacity_ = other.size();
-            }
-
-            set_size(other.size());
-
-            //Copy elements
-            std::uninitialized_copy(other.begin(), other.end(), long_.data_);
-        }
-    }
-
-    ///@brief copy and swap constructor
-    small_vector(small_vector&& other)
-        : small_vector() {
-        swap(*this, other); //Copy-swap
-    }
-
-    small_vector& operator=(small_vector other) {
-        swap(*this, other); //Copy-swap
-        return *this;
-    }
-
-  private: //Internal types
-    static constexpr size_t LONG_FMT_SIZE = sizeof(small_vector_impl::long_format<value_type, size_type>);
-    static constexpr size_t LONG_FMT_ALIGN = alignof(small_vector_impl::long_format<value_type, size_type>);
-
-    ///@brief The number of value types which can be stored in-place in the object (may be zero)
-    static constexpr size_t SHORT_CAPACITY = (LONG_FMT_SIZE - sizeof(size_type)) / sizeof(value_type);
-
-    /**
-     * @brief required padding
-     *
-     * The amount of padding required to ensure the size_ attributes of long_format and short_format
-     * are aligned.
-     */
-    static constexpr size_t SHORT_PAD = LONG_FMT_SIZE - (sizeof(value_type) * SHORT_CAPACITY + sizeof(size_type));
-
-    static constexpr size_t SHORT_FMT_SIZE = sizeof(small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD>);
-    static constexpr size_t SHORT_FMT_ALIGN = alignof(small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD>);
-
-    static_assert(LONG_FMT_SIZE == SHORT_FMT_SIZE, "Short and long data formats must have same size");
-    static_assert(LONG_FMT_ALIGN % SHORT_FMT_ALIGN == 0, "Short and long data formats must have compatible alignment");
-
-  public:
-    static constexpr size_t INPLACE_CAPACITY = SHORT_CAPACITY;
-
-  private: //Internal methods
-    /**
-     * @brief Returns a pointer to the (uninitialized) location for the next element to be added.
-     *
-     * Automatically grows the storage if needed.
-     */
-    T* next_back() {
-        T* next = nullptr;
-        if (size() < SHORT_CAPACITY) { //Space in-place
-            next = short_.data_.data() + size();
-        } else { //Dynamically allocated
-            if (size() == capacity()) {
-                //Out of space
-                grow();
-            }
-            next = long_.data_ + size();
-        }
-        ++long_.size_;
-        VTR_ASSERT_SAFE(size() <= capacity());
-        return next;
-    }
-
-    /**
-     * @brief Increases the capacity by GROWTH_FACTOR
-     *
-     * Note that this automatically handles the case of growing beyond SHORT_CAPACITY and
-     * switching to long_format
-     */
-    void grow() {
-        //How much to scale the size of the storage when out of space
-        constexpr size_type GROWTH_FACTOR = 2;
-
-        VTR_ASSERT_SAFE_MSG(size() >= SHORT_CAPACITY, "Should only grow capacity when at or beyond SHORT_CAPACITY");
-        VTR_ASSERT_SAFE_MSG(capacity() <= (max_size() / GROWTH_FACTOR), "No capacity overflow");
-        size_type new_capacity = std::max<size_type>(1, capacity() * GROWTH_FACTOR);
-        //TODO: Consider ensuring new_capacity is always a power of 2, may be easier on the memory allocator...
-
-        VTR_ASSERT_SAFE_MSG(new_capacity > capacity(), "Grown capacity should be greater than previous capacity");
-
-        change_capacity(new_capacity);
-    }
-
-    /**
-     * @brief Changes capacity to new_capacity
-     *
-     * It is assumed that new_capacity is > SHORT_CAPACITY.
-     *
-     * If currently in short format, automatically converts to long format
-     */
-    void change_capacity(size_type new_capacity) {
-        VTR_ASSERT_SAFE_MSG(new_capacity >= size(), "New capacity should be at least size");
-
-        if (new_capacity == capacity()) {
-            return; //Already at correct capacity
-        }
-
-        //Get new raw memory
-        T* tmp_data = alloc(new_capacity);
-
-        //Copy values
-        std::uninitialized_copy(begin(), end(), tmp_data);
-
-        //Clean-up the old values
-        //We do this before updating the array pointer, since if we are updating
-        //from short to long the assignment would corrupt the old values
-        destruct_elements();
-
-        //Update
-        std::swap(long_.data_, tmp_data);
-        long_.capacity_ = new_capacity;
-
-        //Free memory if we aren't using the inplace buffer
-        if (!is_short()) {
-            dealloc(tmp_data);
-        }
-    }
-
-    ///@brief Returns true if using the short/in-place format
-    bool is_short() const {
-        return SHORT_CAPACITY > 0u          //Can use the inplace buffer
-               && size() <= SHORT_CAPACITY; //Not using the dynamic buffer
-    }
-
-    /**
-     * @brief set the size 
-     *
-     * The two data (short/long) are padded to
-     * ensure that thier size_ members area always
-     * aligned, allowing is to set the size directly
-     * for both formats
-     */
-    void set_size(size_type new_size) {
-        short_.size_ = new_size;
-    }
-
-    ///@brief Allocates raw (un-initialzied) memory for nelem objects of type T
-    static T* alloc(size_type nelem) {
-        return static_cast<T*>(::operator new(sizeof(T) * nelem));
-    }
-
-    /**
-     * @brief Deallocates a block of memory
-     * 
-     * Caller must ensure any object's associated with this block have already had
-     * their destructors called
-     */
-    static void dealloc(T* data) {
-        ::operator delete(data);
-    }
-
-    /**
-     * @brief Swaps the elements in [src_first, src_last) to positions starting at dst_first
-     *
-     * Returns an iterator to the element in the first swapped location
-     */
-    iterator swap_elements(iterator src_first, iterator src_last, iterator dst_first) {
-        VTR_ASSERT_SAFE_MSG(src_first < src_last, "First swap range first must start before last");
-
-        auto dst_itr = dst_first;
-        for (auto src_itr = src_first; src_itr != src_last; ++src_itr) {
-            std::swap(*src_itr, *(dst_itr++));
-        }
-
-        return src_first;
-    }
-
-    /**
-     * @brief Swaps the elements in [src_first, src_last) in reverse order starting at dst_first and working backwards
-     *
-     * Returns an iterator to the element in the first swapped location
-     */
-    iterator reverse_swap_elements(iterator src_first, iterator src_last, iterator dst_first) {
-        VTR_ASSERT_SAFE_MSG(src_first < src_last, "First swap range first must start before last");
-
-        auto dst_itr = dst_first;
-        for (auto src_itr = src_last - 1; src_itr != src_first - 1; --src_itr) {
-            std::swap(*src_itr, *(dst_itr--));
-        }
-
-        return src_first;
-    }
-
-    ///@brief Calls the destructors of all elements currently held
-    void destruct_elements() {
-        destruct_elements(begin(), end());
-    }
-
-    ///@brief Calls the destructors of elements in [first, last] range
-    void destruct_elements(iterator first, iterator last) {
-        for (auto itr = first; itr != last; ++itr) {
-            itr->~T();
-        }
-    }
-
-    ///@brief Calls the destructors of elements in one position (position)
-    void destruct_element(iterator position) {
-        destruct_elements(position, position + 1);
-    }
-
-  private: //Data
-    /*
-     * The object data storage is re-used between the long and short formats.
-     *
-     * If the capacity is small (less than or equal to SHORT_CAPACITY) the
-     * short format (which stores element in-place) is used. Otherwise the
-     * long format is used and the elements are stored in a dynamically
-     * allocated buffer
-     */
-    union {
-        small_vector_impl::long_format<value_type, size_type> long_;
-        small_vector_impl::short_format<value_type, size_type, SHORT_CAPACITY, SHORT_PAD> short_;
-    };
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h b/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
deleted file mode 100644
index 3af949701..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_string_interning.h
+++ /dev/null
@@ -1,558 +0,0 @@
-#ifndef VTR_STRING_INTERNING_H_
-#define VTR_STRING_INTERNING_H_
-
-/**
- * @file
- * @brief  Provides basic string interning, along with pattern splitting suitable for use with FASM.
- * 
- *  For reference, string interning refers to keeping a unique copy of a string
- *  in storage, and then handing out an id to that storage location, rather than
- *  keeping the string around.  This deduplicates memory overhead for strings.
- * 
- *  This string internment has an additional feature that is splitting the
- *  input string into "parts" based on '.', which happens to be the feature
- *  seperator for FASM.  This means the string "TILE.CLB.A" and "TILE.CLB.B"
- *  would be made up of the intern ids for {"TILE", "CLB", "A"} and
- *  {"TILE", "CLB", "B"} respectively, allowing some internal deduplication.
- * 
- *  Strings can contain up to kMaxParts, before they will be interned as their
- *  whole string.
- * 
- *  Interned strings (interned_string) that come from the same internment
- *  object (string_internment) can safely be checked for equality and hashed
- *  without touching the underlying string.  Lexigraphical comprisions (e.g. <)
- *  requires reconstructing the string.
- * 
- *  Basic usage:
- *  -# Create a string_internment
- *  -# Invoke string_internment::intern_string, which returns the
- *     interned_string object that is the interned string's unique idenfier.
- *     This idenfier can be checked for equality or hashed. If
- *     string_internment::intern_string is called with the same string, a value
- *     equivalent interned_string object will be returned.
- *  -# If the original string is required, interned_string::get can be invoked
- *     to copy the string into a std::string.
- *     interned_string also provides iteration via begin/end, however the begin
- *     method requires a pointer to original string_internment object.  This is
- *     not suitable for range iteration, so the method interned_string::bind
- *     can be used to create a bound_interned_string that can be used in a
- *     range iteration context.
- * 
- *     For reference, the reason that interned_string's does not have a
- *     reference back to the string_internment object is to keep their memory
- *     footprint lower.
- */
-#include <cstring>
-#include <string>
-#include <vector>
-#include <unordered_map>
-#include <stdexcept>
-#include <climits>
-#include <algorithm>
-#include <array>
-
-#include "vtr_strong_id.h"
-#include "vtr_string_view.h"
-#include "vtr_vector.h"
-
-namespace vtr {
-
-// Forward declare classes for pointers.
-class string_internment;
-class interned_string;
-class interned_string_less;
-
-// StrongId for identifying unique string pieces.
-struct interned_string_tag;
-typedef StrongId<interned_string_tag> StringId;
-
-/**
- * @brief Values that control the size of the used storage
- *
- * To keep interned_string memory footprint lower and flexible, these values
- * control the size of the storage used.
- *
- * Number of bytes to represent the StringId.  This implies a maximum number of unique strings available equal to (1 << (kBytesPerId*CHAR_BIT)).
- */
-constexpr size_t kBytesPerId = 3;
-///@brief Maximum number of splits to accomidate before just interning the entire string.
-constexpr size_t kMaxParts = 3;
-///@brief Number of bytes to represent the number of splits present in an interned string.
-constexpr size_t kSizeSize = 1;
-///@brief Which character to split the input string by.
-constexpr char kSplitChar = '.';
-
-static_assert((1 << (CHAR_BIT * kSizeSize)) > kMaxParts, "Size of size data is too small");
-
-/**
- * @brief Iterator over interned string.
- *
- * This object is much heavier memory wise than interned_string, so do not
- * store these.
- * 
- * This iterator only accomidates the forward_iterator concept.
- * 
- * Do no construct this iterator directly.  Use either
- * bound_interned_string::begin/end or interned_string;:begin/end.
- */
-class interned_string_iterator {
-  public:
-    interned_string_iterator(const string_internment* internment, std::array<StringId, kMaxParts> intern_ids, size_t n);
-
-    interned_string_iterator() {
-        clear();
-    }
-
-    using value_type = char;
-    using difference_type = void;
-    using pointer = const char*;
-    using reference = const char&;
-    using iterator_category = std::forward_iterator_tag;
-
-    char operator*() const {
-        if (num_parts_ == size_t(-1)) {
-            throw std::out_of_range("Invalid iterator");
-        }
-
-        if (str_idx_ >= view_.size()) {
-            return kSplitChar;
-        } else {
-            return view_.at(str_idx_);
-        }
-    }
-
-    interned_string_iterator& operator++();
-    interned_string_iterator operator++(int);
-
-    friend bool operator==(const interned_string_iterator& lhs, const interned_string_iterator& rhs);
-
-  private:
-    void clear() {
-        internment_ = nullptr;
-        num_parts_ = size_t(-1);
-        std::fill(parts_.begin(), parts_.end(), StringId());
-        part_idx_ = size_t(-1);
-        str_idx_ = size_t(-1);
-        view_ = vtr::string_view();
-    }
-
-    const string_internment* internment_;
-    size_t num_parts_;
-    std::array<StringId, kMaxParts> parts_;
-    size_t part_idx_;
-    size_t str_idx_;
-    vtr::string_view view_;
-};
-
-///@brief == operator
-inline bool operator==(const interned_string_iterator& lhs, const interned_string_iterator& rhs) {
-    return lhs.internment_ == rhs.internment_ && lhs.num_parts_ == rhs.num_parts_ && lhs.parts_ == rhs.parts_ && lhs.part_idx_ == rhs.part_idx_ && lhs.str_idx_ == rhs.str_idx_ && lhs.view_ == rhs.view_;
-}
-
-///@brief != operator
-inline bool operator!=(const interned_string_iterator& lhs, const interned_string_iterator& rhs) {
-    return !(lhs == rhs);
-}
-
-/**
- * @brief A interned_string bound to it's string_internment object.
- *
- * This object is heavier than just an interned_string.
- * This object holds a pointer to interned_string, so its lifetime must be
- * shorter than the parent interned_string.
- */
-class bound_interned_string {
-  public:
-    ///@brief constructor
-    bound_interned_string(const string_internment* internment, const interned_string* str)
-        : internment_(internment)
-        , str_(str) {}
-
-    ///@brief return an iterator to the first part of the interned_string
-    interned_string_iterator begin() const;
-    ///@brief return an iterator to the last part of the interned_string
-    interned_string_iterator end() const;
-
-  private:
-    const string_internment* internment_;
-    const interned_string* str_;
-};
-
-/**
- * @brief Interned string value returned from a string_internment object.
- *
- * This is a value object without allocation.  It can be checked for equality
- * and hashed safely against other interned_string's generated from the same
- * string_internment.
- */
-class interned_string {
-  public:
-    ///@brief constructor
-    interned_string(std::array<StringId, kMaxParts> intern_ids, size_t n) {
-        std::fill(storage_.begin(), storage_.end(), 0);
-        set_num_parts(n);
-        for (size_t i = 0; i < n; ++i) {
-            set_id(i, intern_ids[i]);
-        }
-    }
-
-    /**
-     * @brief Copy the underlying string into output.
-     *
-     * internment must the object that generated this interned_string.
-     */
-    void get(const string_internment* internment, std::string* output) const;
-
-    /**
-     * @brief Returns the underlying string as a std::string.
-     *
-     * This method will allocated memory.
-     */
-    std::string get(const string_internment* internment) const {
-        std::string result;
-        get(internment, &result);
-        return result;
-    }
-
-    /**
-     * @brief Bind the parent string_internment and return a bound_interned_string object.
-     * 
-     * That bound_interned_string lifetime must be shorter than this
-     * interned_string object lifetime, as bound_interned_string contains
-     * a reference this object, along with a reference to the internment
-     * object.
-     */
-    bound_interned_string bind(const string_internment* internment) const {
-        return bound_interned_string(internment, this);
-    }
-
-    ///@brief begin() function
-    interned_string_iterator begin(const string_internment* internment) const {
-        size_t n = num_parts();
-        std::array<StringId, kMaxParts> intern_ids;
-
-        for (size_t i = 0; i < n; ++i) {
-            intern_ids[i] = id(i);
-        }
-
-        return interned_string_iterator(internment, intern_ids, n);
-    }
-
-    ///@brief end() function
-    interned_string_iterator end() const {
-        return interned_string_iterator();
-    }
-
-    ///@brief == operator
-    friend bool operator==(interned_string lhs,
-                           interned_string rhs) noexcept;
-    ///@brief != operator
-    friend bool operator!=(interned_string lhs,
-                           interned_string rhs) noexcept;
-    ///@brief hash function
-    friend std::hash<interned_string>;
-    friend interned_string_less;
-
-  private:
-    void set_num_parts(size_t n) {
-        for (size_t i = 0; i < kSizeSize; ++i) {
-            storage_[i] = (n >> (i * CHAR_BIT)) & UCHAR_MAX;
-        }
-
-        if (num_parts() != n) {
-            throw std::runtime_error("Storage size exceeded.");
-        }
-    }
-
-    size_t num_parts() const {
-        size_t n = 0;
-        for (size_t i = 0; i < kSizeSize; ++i) {
-            n |= storage_[i] << (i * CHAR_BIT);
-        }
-
-        return n;
-    }
-
-    void set_id(size_t idx, StringId id) {
-        if (idx >= kMaxParts) {
-            throw std::runtime_error("Storage size exceeded.");
-        }
-
-        size_t val = (size_t)id;
-        for (size_t i = 0; i < kBytesPerId; ++i) {
-            storage_[kSizeSize + i + idx * kBytesPerId] = (val >> (i * CHAR_BIT)) & UCHAR_MAX;
-        }
-
-        if (this->id(idx) != id) {
-            throw std::runtime_error("Storage size exceeded.");
-        }
-    }
-
-    StringId id(size_t idx) const {
-        size_t val = 0;
-        for (size_t i = 0; i < kBytesPerId; ++i) {
-            val |= storage_[kSizeSize + i + idx * kBytesPerId] << (i * CHAR_BIT);
-        }
-
-        return StringId(val);
-    }
-
-    std::array<uint8_t, kSizeSize + kMaxParts * kBytesPerId> storage_;
-};
-
-///@brief == operator
-inline bool operator==(interned_string lhs,
-                       interned_string rhs) noexcept {
-    return lhs.storage_ == rhs.storage_;
-}
-
-///@brief != operator
-inline bool operator!=(interned_string lhs,
-                       interned_string rhs) noexcept {
-    return lhs.storage_ != rhs.storage_;
-}
-
-///@brief < operator
-inline bool operator<(bound_interned_string lhs,
-                      bound_interned_string rhs) noexcept {
-    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-///@brief >= operator
-inline bool operator>=(bound_interned_string lhs,
-                       bound_interned_string rhs) noexcept {
-    return !std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-///@brief > operator
-inline bool operator>(bound_interned_string lhs,
-                      bound_interned_string rhs) noexcept {
-    return rhs < lhs;
-}
-
-///@brief <= operator
-inline bool operator<=(bound_interned_string lhs,
-                       bound_interned_string rhs) noexcept {
-    return rhs >= lhs;
-}
-
-/**
- * @brief  Storage of interned string, and object capable of generating new interned_string objects.
- */
-class string_internment {
-  public:
-    /**
-     * @brief Intern a string, and return a unique identifier to that string.
-     *
-     * If interned_string is ever called with two strings of the same value,
-     * the interned_string will be equal.
-     */
-    interned_string intern_string(vtr::string_view view) {
-        size_t num_parts = 1;
-        for (const auto& c : view) {
-            if (c == kSplitChar) {
-                num_parts += 1;
-            }
-        }
-
-        std::array<StringId, kMaxParts> parts;
-        if (num_parts == 1 || num_parts > kMaxParts) {
-            // Intern entire string.
-            parts[0] = intern_one_string(view);
-            return interned_string(parts, 1);
-        } else {
-            // Implements parts = [intern_one_string(s) for s in view.split(kSplitChar)]
-            size_t idx = 0;
-            size_t start = 0;
-
-            for (size_t i = 0; i < view.size(); ++i) {
-                if (view[i] == kSplitChar) {
-                    parts[idx++] = intern_one_string(view.substr(start, i - start));
-                    start = i + 1;
-                    if (idx == num_parts - 1) {
-                        break;
-                    }
-                }
-            }
-
-            parts[idx++] = intern_one_string(view.substr(start));
-            return interned_string(parts, num_parts);
-        }
-    }
-
-    /**
-     * @brief Retrieve a string part based on id.
-     *
-     * This method should not generally be called directly.
-     */
-    vtr::string_view get_string(StringId id) const {
-        auto& str = strings_[id];
-        return vtr::string_view(str.data(), str.size());
-    }
-
-    ///@brief Number of unique string parts stored.
-    size_t unique_strings() const {
-        return strings_.size();
-    }
-
-  private:
-    StringId intern_one_string(vtr::string_view view) {
-        temporary_.assign(view.begin(), view.end());
-        StringId next_id(strings_.size());
-        auto result = string_to_id_.insert(std::make_pair(temporary_, next_id));
-        if (result.second) {
-            strings_.push_back(std::move(temporary_));
-        }
-
-        return result.first->second;
-    }
-
-    // FIXME: This storage scheme does store 2x memory for the strings storage,
-    // however it does avoid having to be concerned with what happens when
-    // strings_ resizes, so for a simplier initial implementation, this is the
-    // approach taken.
-    vtr::vector<StringId, std::string> strings_;
-    std::string temporary_;
-    std::unordered_map<std::string, StringId> string_to_id_;
-};
-
-/**
- * @brief Copy the underlying string into output.
- *
- * internment must the object that generated this interned_string.
- */
-inline void interned_string::get(const string_internment* internment, std::string* output) const {
-    // Implements
-    // kSplitChar.join(interned_string->get_string(id(idx)) for idx in range(num_parts())));
-    size_t parts = num_parts();
-    size_t storage_needed = parts - 1;
-    std::array<StringId, kMaxParts> intern_ids;
-    for (size_t i = 0; i < parts; ++i) {
-        intern_ids[i] = id(i);
-        storage_needed += internment->get_string(intern_ids[i]).size();
-    }
-
-    output->clear();
-    output->reserve(storage_needed);
-
-    for (size_t i = 0; i < parts; ++i) {
-        auto view = internment->get_string(intern_ids[i]);
-        std::copy(view.begin(), view.end(), std::back_inserter(*output));
-        if (i + 1 < parts) {
-            output->push_back(kSplitChar);
-        }
-    }
-}
-
-/**
- * @brief constructor for interned string iterator.
- *
- * Do no construct this iterator directly.  Use either
- * bound_interned_string::begin/end or interned_string;:begin/end.
- */
-inline interned_string_iterator::interned_string_iterator(const string_internment* internment, std::array<StringId, kMaxParts> intern_ids, size_t n)
-    : internment_(internment)
-    , num_parts_(n)
-    , parts_(intern_ids)
-    , part_idx_(0)
-    , str_idx_(0) {
-    if (num_parts_ == 0) {
-        clear();
-    } else {
-        view_ = internment_->get_string(parts_[0]);
-    }
-}
-
-///@brief Increment operator for interned_string_iterator
-inline interned_string_iterator& interned_string_iterator::operator++() {
-    if (num_parts_ == size_t(-1)) {
-        throw std::out_of_range("Invalid iterator");
-    }
-
-    if (str_idx_ < view_.size()) {
-        // Current string has characters left, advance.
-        str_idx_ += 1;
-        // Normally when str_idx_ the iterator will next emit a kSplitChar,
-        // but this is omitted on the last part of the string.
-        if (str_idx_ == view_.size() && part_idx_ + 1 == num_parts_) {
-            clear();
-        }
-    } else {
-        // Current part of the string is out of characters, and the
-        // kSplitChar has been emitted, advance to the next part.
-        str_idx_ = 0;
-        part_idx_ += 1;
-        if (part_idx_ == num_parts_) {
-            // No more parts.
-            clear();
-        } else {
-            view_ = internment_->get_string(parts_[part_idx_]);
-            if (view_.size() == 0 && part_idx_ + 1 == num_parts_) {
-                // The last string part is empty, and because this is the last
-                // part we don't want to emit another kSplitChar.
-                clear();
-            }
-        }
-    }
-
-    return *this;
-}
-
-///@brief Increment operator for interned_string_iterator
-inline interned_string_iterator interned_string_iterator::operator++(int) {
-    interned_string_iterator prev = *this;
-    ++*this;
-
-    return prev;
-}
-
-///@brief return an iterator to the first part of the interned_string
-inline interned_string_iterator bound_interned_string::begin() const {
-    return str_->begin(internment_);
-}
-
-///@brief return an iterator to the last part of the interned_string
-inline interned_string_iterator bound_interned_string::end() const {
-    return interned_string_iterator();
-}
-
-inline std::ostream& operator<<(std::ostream& os, bound_interned_string const& value) {
-    for (const auto& c : value) {
-        os << c;
-    }
-    return os;
-}
-
-/**
- * @brief A friend class to interned_string that compares 2 interned_strings
- */
-class interned_string_less {
-  public:
-    ///@brief Return true if the first interned string is less than the second one
-    bool operator()(const vtr::interned_string& lhs, const vtr::interned_string& rhs) const {
-        return lhs.storage_ < rhs.storage_;
-    }
-};
-
-} // namespace vtr
-
-namespace std {
-/**
- * @brief Hash function for the interned_string 
- *
- * It is defined as a friend function to interned_string class.
- * It returns a unique hash for every interned_string.
- */
-template<>
-struct hash<vtr::interned_string> {
-    std::size_t operator()(vtr::interned_string const& s) const noexcept {
-        std::size_t h = 0;
-        for (const auto& data : s.storage_) {
-            vtr::hash_combine(h, std::hash<char>()(data));
-        }
-        return h;
-    }
-};
-} // namespace std
-
-#endif /* VTR_STRING_INTERNING_H_ */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_string_view.h b/third_party/vtr/libs/vtrutil/src/vtr_string_view.h
deleted file mode 100644
index 12a7a7a44..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_string_view.h
+++ /dev/null
@@ -1,192 +0,0 @@
-#ifndef VTR_STRING_VIEW_H_
-#define VTR_STRING_VIEW_H_
-
-#include <cstring>
-#include <ostream>
-#include <string>
-#include <stdexcept>
-
-#include "vtr_hash.h"
-
-namespace vtr {
-
-/**
- * @brief Implements a view to a fixed length string (similar to std::basic_string_view).
- *
- * The underlying string does not need to be NULL terminated.
- */
-class string_view {
-  public:
-    static constexpr size_t npos = size_t(-1);
-
-    ///@brief constructor
-    explicit constexpr string_view()
-        : data_(nullptr)
-        , size_(0) {}
-
-    ///@brief constructor
-    explicit string_view(const char* str)
-        : data_(str)
-        , size_(strlen(str)) {}
-    ///@brief constructor
-    explicit constexpr string_view(const char* str, size_t size)
-        : data_(str)
-        , size_(size) {}
-
-    constexpr string_view(const string_view& other) noexcept = default;
-    ///@brief copy constructor
-    constexpr string_view& operator=(const string_view& view) noexcept {
-        data_ = view.data_;
-        size_ = view.size_;
-        return *this;
-    }
-
-    ///@brief indexing [] operator (immutable)
-    constexpr char operator[](size_t pos) const {
-        return data_[pos];
-    }
-
-    ///@brief aT() operator (immutable)
-    const char& at(size_t pos) const {
-        if (pos >= size()) {
-            throw std::out_of_range("Pos is out of range.");
-        }
-
-        return data_[pos];
-    }
-
-    ///@brief Returns the first character of the string
-    constexpr const char& front() const {
-        return data_[0];
-    }
-
-    ///@brief Returns the last character of the string
-    constexpr const char& back() const {
-        return data_[size() - 1];
-    }
-
-    ///@brief Returns a pointer to the string data
-    constexpr const char* data() const {
-        return data_;
-    }
-
-    ///@brief Returns the string size
-    constexpr size_t size() const noexcept {
-        return size_;
-    }
-
-    ///@brief Returns the string size
-    constexpr size_t length() const noexcept {
-        return size_;
-    }
-
-    ///@brief Returns true if empty
-    constexpr bool empty() const noexcept {
-        return size_ == 0;
-    }
-
-    ///@brief Returns a pointer to the begin of the string
-    constexpr const char* begin() const noexcept {
-        return data_;
-    }
-
-    ///@brief Same as begin()
-    constexpr const char* cbegin() const noexcept {
-        return data_;
-    }
-
-    ///@brief Returns a pointer to the end of the string
-    constexpr const char* end() const noexcept {
-        return data_ + size_;
-    }
-
-    ///@brief Same as end()
-    constexpr const char* cend() const noexcept {
-        return data_ + size_;
-    }
-
-    ///@brief Swaps two string views
-    void swap(string_view& v) noexcept {
-        std::swap(data_, v.data_);
-        std::swap(size_, v.size_);
-    }
-
-    ///@brief Returns a newly constructed string object with its value initialized to a copy of a substring of this object.
-    string_view substr(size_t pos = 0, size_t count = npos) {
-        if (pos > size()) {
-            throw std::out_of_range("Pos is out of range.");
-        }
-
-        size_t rcount = size_ - pos;
-        if (count != npos && (pos + count) < size_) {
-            rcount = count;
-        }
-
-        return string_view(data_ + pos, rcount);
-    }
-
-  private:
-    const char* data_;
-    size_t size_;
-};
-
-///@brief == operator
-inline bool operator==(string_view lhs,
-                       string_view rhs) noexcept {
-    return lhs.size() == rhs.size() && (lhs.empty() || rhs.empty() || (strncmp(lhs.data(), rhs.data(), std::min(lhs.size(), rhs.size())) == 0));
-}
-
-///@brief != operator
-inline bool operator!=(string_view lhs,
-                       string_view rhs) noexcept {
-    return lhs.size() != rhs.size() || strncmp(lhs.data(), rhs.data(), std::min(lhs.size(), rhs.size())) != 0;
-}
-
-///@brief < operator
-inline bool operator<(string_view lhs,
-                      string_view rhs) noexcept {
-    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-///brief >= operator
-inline bool operator>=(string_view lhs,
-                       string_view rhs) noexcept {
-    return !std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
-}
-
-///@brief > operator
-inline bool operator>(string_view lhs,
-                      string_view rhs) noexcept {
-    return rhs < lhs;
-}
-
-///@brief <= operator
-inline bool operator<=(string_view lhs,
-                       string_view rhs) noexcept {
-    return rhs >= lhs;
-}
-
-///@brief << operator for ostream
-inline std::ostream& operator<<(std::ostream& os, string_view const& value) {
-    for (const auto& c : value) {
-        os << c;
-    }
-    return os;
-}
-
-} // namespace vtr
-
-namespace std {
-template<>
-struct hash<vtr::string_view> {
-    std::size_t operator()(vtr::string_view const& s) const noexcept {
-        std::size_t h = 0;
-        for (const auto& data : s) {
-            vtr::hash_combine(h, std::hash<char>()(data));
-        }
-        return h;
-    }
-};
-} // namespace std
-
-#endif /* VTR_STRING_VIEW_H_ */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h b/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
deleted file mode 100644
index 1ce922ab5..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_strong_id.h
+++ /dev/null
@@ -1,245 +0,0 @@
-#ifndef VTR_STRONG_ID_H
-#define VTR_STRONG_ID_H
-/**
- * @file
- * @brief This header provides the StrongId class.
- *
- * It is template which can be used to create strong Id's 
- * which avoid accidental type conversions (generating compiler errors when they occur).
- *
- * Motivation
- * ==========
- * It is common to use an Id (typically an integer) to identify and represent a component.
- * A basic example (poor style):
- *
- *      size_t count_net_terminals(int net_id);
- *
- * Where a plain int is used to represent the net identifier.
- * Using a plain basic type is poor style since it makes it unclear that the parameter is
- * an Id.
- *
- * A better example is to use a typedef:
- *
- *      typedef int NetId;
- *
- *      size_t count_net_teriminals(NetId net_id);
- *
- * It is now clear that the parameter is expecting an Id.
- *
- * However this approach has some limitations. In particular, typedef's only create type
- * aliases, and still allow conversions. This is problematic if there are multiple types
- * of Ids. For example:
- *
- *      typedef int NetId;
- *      typedef int BlkId;
- *
- *      size_t count_net_teriminals(NetId net_id);
- *
- *      BlkId blk_id = 10;
- *      NetId net_id = 42;
- *
- *      count_net_teriminals(net_id); //OK
- *      count_net_teriminals(blk_id); //Bug: passed a BlkId as a NetId
- *
- * Since typdefs are aliases the compiler issues no errors or warnings, and silently passes
- * the BlkId where a NetId is expected. This results in hard to diagnose bugs.
- *
- * We can avoid this issue by using a StrongId:
- *
- *      struct net_id_tag; //Phantom tag for NetId
- *      struct blk_id_tag; //Phantom tag for BlkId
- *
- *      typedef StrongId<net_id_tag> NetId;
- *      typedef StrongId<blk_id_tag> BlkId;
- *
- *      size_t count_net_teriminals(NetId net_id);
- *
- *      BlkId blk_id = 10;
- *      NetId net_id = 42;
- *
- *      count_net_teriminals(net_id); //OK
- *      count_net_teriminals(blk_id); //Compiler Error: NetId expected!
- *
- * StrongId is a template which implements the basic features of an Id, but disallows silent conversions
- * between different types of Ids. It uses another 'tag' type (passed as the first template parameter)
- * to uniquely identify the type of the Id (preventing conversions between different types of Ids).
- *
- * Usage
- * =====
- *
- * The StrongId template class takes one required and three optional template parameters:
- *
- *    1. Tag        - the unique type used to identify this type of Ids [Required]
- *    2. T          - the underlying integral id type (default: int) [Optional]
- *    3. T sentinel - a value representing an invalid Id (default: -1) [Optional]
- *
- * If no value is supllied during construction the StrongId is initialized to the invalid/sentinel value.
- *
- * Example 1: default definition
- *
- *      struct net_id_tag;
- *      typedef StrongId<net_id_tag> NetId; //Internally stores an integer Id, -1 represents invalid
- *
- * Example 2: definition with custom underlying type
- *
- *      struct blk_id_tag;
- *      typedef StrongId<net_id_tag,size_t> BlkId; //Internally stores a size_t Id, -1 represents invalid
- *
- * Example 3: definition with custom underlying type and custom sentinel value
- *
- *      struct pin_id_tag;
- *      typedef StrongId<net_id_tag,size_t,0> PinId; //Internally stores a size_t Id, 0 represents invalid
- *
- * Example 4: Creating Ids
- *
- *      struct net_id_tag;
- *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
- *
- *      MyId my_id;           //Defaults to the sentinel value (-1 by default)
- *      MyId my_other_id = 5; //Explicit construction
- *      MyId my_thrid_id(25); //Explicit construction
- *
- * Example 5: Comparing Ids
- *
- *      struct net_id_tag;
- *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
- *
- *      MyId my_id;           //Defaults to the sentinel value (-1 by default)
- *      MyId my_id_one = 1;
- *      MyId my_id_two = 2;
- *      MyId my_id_also_one = 1;
- *
- *      my_id_one == my_id_also_one; //True
- *      my_id_one == my_id; //False
- *      my_id_one == my_id_two; //False
- *      my_id_one != my_id_two; //True
- *
- * Example 5: Checking for invalid Ids
- *
- *      struct net_id_tag;
- *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
- *
- *      MyId my_id;           //Defaults to the sentinel value
- *      MyId my_id_one = 1;
- *
- *      //Comparison against a constructed invalid id
- *      my_id == MyId::INVALID(); //True
- *      my_id_one == MyId::INVALID(); //False
- *      my_id_one != MyId::INVALID(); //True
- *
- *      //The Id can also be evaluated in a boolean context against the sentinel value
- *      if(my_id) //False, my_id is invalid
- *      if(!my_id) //True my_id is valid
- *      if(my_id_one) //True my_id_one is valid
- *
- * Example 6: Indexing data structures
- *
- *      struct my_id_tag;
- *      typedef StrongId<net_id_tag> MyId; //Internally stores an integer Id, -1 represents invalid
- *
- *      std::vector<int> my_vec = {0, 1, 2, 3, 4, 5};
- *
- *      MyId my_id = 2;
- *
- *      my_vec[size_t(my_id)]; //Access the third element via explicit conversion
- */
-#include <type_traits> //for std::is_integral
-#include <cstddef>     //for std::size_t
-#include <functional>  //for std::hash
-
-namespace vtr {
-
-// Forward declare the class (needed for operator declarations)
-template<typename tag, typename T, T sentinel>
-class StrongId;
-
-/*
- * Forward declare the equality/inequality operators
- *
- * We need to do this before the class definition so the class can
- * friend them
- */
-template<typename tag, typename T, T sentinel>
-bool operator==(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-
-template<typename tag, typename T, T sentinel>
-bool operator!=(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-
-template<typename tag, typename T, T sentinel>
-bool operator<(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-
-///@brief Class template definition with default template parameters
-template<typename tag, typename T = int, T sentinel = T(-1)>
-class StrongId {
-    static_assert(std::is_integral<T>::value, "T must be integral");
-
-  public:
-    ///@brief Gets the invalid Id
-    static constexpr StrongId INVALID() { return StrongId(); }
-
-    ///@brief Default to the sentinel value
-    constexpr StrongId()
-        : id_(sentinel) {}
-
-    ///@brief Only allow explict constructions from a raw Id (no automatic conversions)
-    explicit constexpr StrongId(T id)
-        : id_(id) {}
-
-    // Allow some explicit conversion to useful types:
-
-    ///@brief Allow explicit conversion to bool (e.g. if(id))
-    explicit operator bool() const { return *this != INVALID(); }
-
-    ///@brief Allow explicit conversion to size_t (e.g. my_vector[size_t(strong_id)])
-    explicit operator std::size_t() const { return static_cast<std::size_t>(id_); }
-
-    ///@brief To enable hasing Ids
-    friend std::hash<StrongId<tag, T, sentinel>>;
-
-    /**
-     * @brief To enable comparisions between Ids
-     *
-     * Note that since these are templated functions we provide an empty set of template parameters
-     * after the function name (i.e. <>)
-     */
-    friend bool operator== <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-    ///@brief != operator
-    friend bool operator!= <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-    ///@brief < operator
-    friend bool operator< <>(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs);
-
-  private:
-    T id_;
-};
-
-///@brief == operator
-template<typename tag, typename T, T sentinel>
-bool operator==(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
-    return lhs.id_ == rhs.id_;
-}
-
-///@brief != operator
-template<typename tag, typename T, T sentinel>
-bool operator!=(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
-    return !(lhs == rhs);
-}
-
-///@brief operator < Needed for std::map-like containers
-template<typename tag, typename T, T sentinel>
-bool operator<(const StrongId<tag, T, sentinel>& lhs, const StrongId<tag, T, sentinel>& rhs) {
-    return lhs.id_ < rhs.id_;
-}
-
-} //namespace vtr
-
-///@brief Specialize std::hash for StrongId's (needed for std::unordered_map-like containers)
-namespace std {
-template<typename tag, typename T, T sentinel>
-struct hash<vtr::StrongId<tag, T, sentinel>> {
-    std::size_t operator()(const vtr::StrongId<tag, T, sentinel> k) const noexcept {
-        return std::hash<T>()(k.id_); //Hash with the underlying type
-    }
-};
-} //namespace std
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h b/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
deleted file mode 100644
index e9fd938f3..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_strong_id_range.h
+++ /dev/null
@@ -1,185 +0,0 @@
-#ifndef _VTR_STRONG_ID_RANGE_H
-#define _VTR_STRONG_ID_RANGE_H
-
-#include <algorithm>
-#include "vtr_assert.h"
-
-namespace vtr {
-
-/**
- * @file
- * @brief This header defines a utility class for StrongId's.
- *
- * StrongId's are described in vtr_strong_id.h.  In some cases, StrongId's be considered
- * like random access iterators, but not all StrongId's have this property.
- * In addition, there is utility in refering to a range of id's, and being able
- * to iterator over that range.
- */
-
-/**
- * @brief StrongIdIterator class
- *
- * StrongIdIterator allows a StrongId to be treated like a random access
- * iterator.  Whether this is a correct use of the abstraction is up to the
- * called.
- *
- */
-template<typename StrongId>
-class StrongIdIterator {
-  public:
-    ///@brief constructor
-    StrongIdIterator() = default;
-    ///@brief copy constructor
-    StrongIdIterator& operator=(const StrongIdIterator& other) = default;
-    ///@brief copy constructor
-    StrongIdIterator(const StrongIdIterator& other) = default;
-    ///@brief constructor
-    explicit StrongIdIterator(StrongId id)
-        : id_(id) {
-        VTR_ASSERT(bool(id));
-    }
-
-    using iterator_category = std::random_access_iterator_tag;
-    using value_type = StrongId;
-    using reference = StrongId&;
-    using pointer = StrongId*;
-    using difference_type = ssize_t;
-
-    ///@brief Dereference operator (*)
-    StrongId& operator*() {
-        VTR_ASSERT_SAFE(bool(id_));
-        return this->id_;
-    }
-
-    ///@brief += operator
-    StrongIdIterator& operator+=(ssize_t n) {
-        VTR_ASSERT_SAFE(bool(id_));
-        id_ = StrongId(size_t(id_) + n);
-        VTR_ASSERT_SAFE(bool(id_));
-        return *this;
-    }
-
-    ///@brief -= operator
-    StrongIdIterator& operator-=(ssize_t n) {
-        VTR_ASSERT_SAFE(bool(id_));
-        id_ = StrongId(size_t(id_) - n);
-        VTR_ASSERT_SAFE(bool(id_));
-        return *this;
-    }
-
-    ///@brief ++ operator
-    StrongIdIterator& operator++() {
-        VTR_ASSERT_SAFE(bool(id_));
-        *this += 1;
-        VTR_ASSERT_SAFE(bool(id_));
-        return *this;
-    }
-
-    ///@brief Decremment operator
-    StrongIdIterator& operator--() {
-        VTR_ASSERT_SAFE(bool(id_));
-        *this -= 1;
-        VTR_ASSERT_SAFE(bool(id_));
-        return *this;
-    }
-
-    ///@brief Indexing operator []
-    StrongId operator[](ssize_t offset) const {
-        return StrongId(size_t(id_) + offset);
-    }
-
-    ///@brief + operator
-    template<typename IdType>
-    friend StrongIdIterator<IdType> operator+(
-        const StrongIdIterator<IdType>& lhs,
-        ssize_t n) {
-        StrongIdIterator ret = lhs;
-        ret += n;
-        return ret;
-    }
-
-    ///@brief - operator
-    template<typename IdType>
-    friend StrongIdIterator<IdType> operator-(
-        const StrongIdIterator<IdType>& lhs,
-        ssize_t n) {
-        StrongIdIterator ret = lhs;
-        ret -= n;
-        return ret;
-    }
-
-    ///@brief ~ operator
-    template<typename IdType>
-    friend ssize_t operator-(
-        const StrongIdIterator<IdType>& lhs,
-        const StrongIdIterator<IdType>& rhs) {
-        VTR_ASSERT_SAFE(bool(lhs.id_));
-        VTR_ASSERT_SAFE(bool(rhs.id_));
-
-        ssize_t ret = size_t(lhs.id_);
-        ret -= size_t(rhs.id_);
-        return ret;
-    }
-
-    ///@brief == operator
-    template<typename IdType>
-    friend bool operator==(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
-        return lhs.id_ == rhs.id_;
-    }
-
-    ///@brief != operator
-    template<typename IdType>
-    friend bool operator!=(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
-        return lhs.id_ != rhs.id_;
-    }
-
-    ///@brief < operator
-    template<typename IdType>
-    friend bool operator<(const StrongIdIterator<IdType>& lhs, const StrongIdIterator<IdType>& rhs) {
-        return lhs.id_ < rhs.id_;
-    }
-
-  private:
-    StrongId id_;
-};
-
-/**
- * @brief StrongIdRange class
- *
- * StrongIdRange allows a pair of StrongId's to defines a continguous range of
- * ids.  The "end" StrongId is excluded from this range.
- */
-template<typename StrongId>
-class StrongIdRange {
-  public:
-    ///@brief constructor
-    StrongIdRange(StrongId b, StrongId e)
-        : begin_(b)
-        , end_(e) {
-        VTR_ASSERT(begin_ < end_ || begin_ == end_);
-    }
-
-    ///@brief Returns a StrongIdIterator to the first strongId in the range
-    StrongIdIterator<StrongId> begin() const {
-        return StrongIdIterator<StrongId>(begin_);
-    }
-    ///@brief Returns a StrongIdIterator referring to the past-the-end element in the vector container.
-    StrongIdIterator<StrongId> end() const {
-        return StrongIdIterator<StrongId>(end_);
-    }
-
-    ///@brief Returns true if the range is empty
-    bool empty() { return begin_ == end_; }
-    ///@brief Reurns the size of the range
-    size_t size() {
-        return std::distance(begin(), end());
-    }
-
-  private:
-    StrongId begin_;
-    StrongId end_;
-};
-
-} //namespace vtr
-
-#endif /* _VTR_STRONG_ID_RANGE_H */
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_time.cc b/third_party/vtr/libs/vtrutil/src/vtr_time.cc
deleted file mode 100644
index a557f1867..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_time.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-#include "vtr_time.h"
-
-#include "vtr_log.h"
-#include "vtr_rusage.h"
-
-namespace vtr {
-
-int f_timer_depth = 0;
-
-///@brief Constructor
-Timer::Timer()
-    : start_(clock::now())
-    , initial_max_rss_(get_max_rss()) {
-}
-
-///@brief Returns the elapsed seconds since construction
-float Timer::elapsed_sec() const {
-    return std::chrono::duration<float>(clock::now() - start_).count();
-}
-
-///@brief Returns the maximum resident size (rss) in bytes
-float Timer::max_rss_mib() const {
-    return get_max_rss() / BYTE_TO_MIB;
-}
-
-///@brief Returns the change in maximum resident size in bytes
-float Timer::delta_max_rss_mib() const {
-    return (get_max_rss() - initial_max_rss_) / BYTE_TO_MIB;
-}
-
-///@brief Constructor
-ScopedActionTimer::ScopedActionTimer(std::string action_str)
-    : action_(action_str)
-    , depth_(f_timer_depth++) {
-}
-
-///@brief Destructor
-ScopedActionTimer::~ScopedActionTimer() {
-    --f_timer_depth;
-}
-
-///@brief Sets quiet value (when true, prints the timing info)
-void ScopedActionTimer::quiet(bool value) {
-    quiet_ = value;
-}
-
-///@brief Returns the quiet value
-bool ScopedActionTimer::quiet() const {
-    return quiet_;
-}
-
-///@brief Returns the action string
-std::string ScopedActionTimer::action() const {
-    return action_;
-}
-
-///@brief Pads the output string with # if it is not empty
-std::string ScopedActionTimer::pad() const {
-    if (depth() == 0) {
-        return "";
-    }
-    return std::string(depth(), '#') + " ";
-}
-
-///@brief Returns the depth
-int ScopedActionTimer::depth() const {
-    return depth_;
-}
-
-///@brief Constructor
-ScopedFinishTimer::ScopedFinishTimer(std::string action_str)
-    : ScopedActionTimer(action_str) {
-}
-
-///@brief Destructor
-ScopedFinishTimer::~ScopedFinishTimer() {
-    if (!quiet()) {
-        vtr::printf_info("%s%s took %.2f seconds (max_rss %.1f MiB)\n",
-                         pad().c_str(), action().c_str(), elapsed_sec(),
-                         max_rss_mib());
-    }
-}
-
-///@brief Constructor
-ScopedStartFinishTimer::ScopedStartFinishTimer(std::string action_str)
-    : ScopedActionTimer(action_str) {
-    vtr::printf_info("%s%s\n", pad().c_str(), action().c_str());
-}
-
-///@brief Destructor
-ScopedStartFinishTimer::~ScopedStartFinishTimer() {
-    if (!quiet()) {
-        vtr::printf_info("%s%s took %.2f seconds (max_rss %.1f MiB, delta_rss %+.1f MiB)\n",
-                         pad().c_str(), action().c_str(), elapsed_sec(),
-                         max_rss_mib(), delta_max_rss_mib());
-    }
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_time.h b/third_party/vtr/libs/vtrutil/src/vtr_time.h
deleted file mode 100644
index 2a4d4ec8a..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_time.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef VTR_TIME_H
-#define VTR_TIME_H
-#include <chrono>
-#include <string>
-
-namespace vtr {
-
-///@brief Class for tracking time elapsed since construction
-class Timer {
-  public:
-    Timer();
-    virtual ~Timer() = default;
-
-    ///@brief No copy
-    Timer(Timer&) = delete;
-    Timer& operator=(Timer&) = delete;
-
-    ///@brief No move
-    Timer(Timer&&) = delete;
-    Timer& operator=(Timer&&) = delete;
-
-    ///@brief Return elapsed time in seconds
-    float elapsed_sec() const;
-
-    ///@brief Return peak memory resident set size (in MiB)
-    float max_rss_mib() const;
-
-    ///@brief Return change in peak memory resident set size (in MiB)
-    float delta_max_rss_mib() const;
-
-  private:
-    using clock = std::chrono::steady_clock;
-    std::chrono::time_point<clock> start_;
-
-    size_t initial_max_rss_; //Maximum resident set size In bytes
-    constexpr static float BYTE_TO_MIB = 1024 * 1024;
-};
-
-///@brief Scoped time class which prints the time elapsed for the specifid action
-class ScopedActionTimer : public Timer {
-  public:
-    ScopedActionTimer(const std::string action);
-    ~ScopedActionTimer();
-
-    void quiet(bool value);
-    bool quiet() const;
-    std::string action() const;
-
-  protected:
-    int depth() const;
-    std::string pad() const;
-
-  private:
-    const std::string action_;
-    bool quiet_ = false;
-    int depth_;
-};
-
-/**
- * @brief Scoped elapsed time class which prints the time elapsed for the specified action when it is destructed.
- *
- * For example:
- * 
- *       {
- *           vtr::ScopedFinishTimer timer("my_action");
- * 
- *           //Do other work
- * 
- *           //Will print: 'my_action took X.XX seconds' when out-of-scope
- *       }
- */
-class ScopedFinishTimer : public ScopedActionTimer {
-  public:
-    ScopedFinishTimer(const std::string action);
-    ~ScopedFinishTimer();
-};
-
-/**
- * @brief Scoped elapsed time class which prints out the action when initialized and again both the action and elapsed time
- *
- * when destructed.
- * For example:
- * 
- *       {
- *           vtr::ScopedStartFinishTimer timer("my_action") //Will print: 'my_action'
- * 
- *           //Do other work
- * 
- *           //Will print 'my_action took X.XX seconds' when out of scope
- *       }
- */
-class ScopedStartFinishTimer : public ScopedActionTimer {
-  public:
-    ScopedStartFinishTimer(const std::string action);
-    ~ScopedStartFinishTimer();
-};
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_token.cc b/third_party/vtr/libs/vtrutil/src/vtr_token.cc
deleted file mode 100644
index 1715e9f23..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_token.cc
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * Jason Luu
- * July 22, 2009
- * Tokenizer
- */
-
-#include <cstring>
-
-#include "vtr_assert.h"
-#include "vtr_log.h"
-#include "vtr_util.h"
-#include "vtr_memory.h"
-#include "vtr_token.h"
-
-enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
-                                       const char cur);
-
-bool IsWhitespace(char c);
-
-///@brief Returns true if character is whatspace between tokens
-bool IsWhitespace(char c) {
-    switch (c) {
-        case ' ':
-        case '\t':
-        case '\r':
-        case '\n':
-            return true;
-        default:
-            return false;
-    }
-}
-
-///@brief Returns a token list of the text for a given string.
-t_token* GetTokensFromString(const char* inString, int* num_tokens) {
-    const char* cur;
-    t_token* tokens;
-    int i, in_string_index, prev_in_string_index;
-    bool has_null;
-    enum e_token_type cur_token_type, new_token_type;
-
-    *num_tokens = i = 0;
-    cur_token_type = TOKEN_NULL;
-
-    if (inString == nullptr) {
-        return nullptr;
-    };
-
-    cur = inString;
-
-    /* Count number of tokens */
-    while (*cur) {
-        new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
-        if (new_token_type != cur_token_type) {
-            cur_token_type = new_token_type;
-            if (new_token_type != TOKEN_NULL) {
-                i++;
-            }
-        }
-        ++cur;
-    }
-    *num_tokens = i;
-
-    if (*num_tokens > 0) {
-        tokens = (t_token*)vtr::calloc(*num_tokens + 1, sizeof(t_token));
-    } else {
-        return nullptr;
-    }
-
-    /* populate tokens */
-    i = 0;
-    in_string_index = 0;
-    has_null = true;
-    prev_in_string_index = 0;
-    cur_token_type = TOKEN_NULL;
-
-    cur = inString;
-
-    while (*cur) {
-        new_token_type = GetTokenTypeFromChar(cur_token_type, *cur);
-        if (new_token_type != cur_token_type) {
-            if (!has_null) {
-                tokens[i - 1].data[in_string_index - prev_in_string_index] = '\0'; /* NULL the end of the data string */
-                has_null = true;
-            }
-            if (new_token_type != TOKEN_NULL) {
-                tokens[i].type = new_token_type;
-                tokens[i].data = vtr::strdup(inString + in_string_index);
-                prev_in_string_index = in_string_index;
-                has_null = false;
-                i++;
-            }
-            cur_token_type = new_token_type;
-        }
-        ++cur;
-        in_string_index++;
-    }
-
-    VTR_ASSERT(i == *num_tokens);
-
-    tokens[*num_tokens].type = TOKEN_NULL;
-    tokens[*num_tokens].data = nullptr;
-
-    /* Return the list */
-    return tokens;
-}
-
-///@brief Free (tokens)
-void freeTokens(t_token* tokens, const int num_tokens) {
-    int i;
-    for (i = 0; i < num_tokens; i++) {
-        free(tokens[i].data);
-    }
-    free(tokens);
-}
-
-///@brief Returns a token type of the given char
-enum e_token_type GetTokenTypeFromChar(const enum e_token_type cur_token_type,
-                                       const char cur) {
-    if (IsWhitespace(cur)) {
-        return TOKEN_NULL;
-    } else {
-        if (cur == '[') {
-            return TOKEN_OPEN_SQUARE_BRACKET;
-        } else if (cur == ']') {
-            return TOKEN_CLOSE_SQUARE_BRACKET;
-        } else if (cur == '{') {
-            return TOKEN_OPEN_SQUIG_BRACKET;
-        } else if (cur == '}') {
-            return TOKEN_CLOSE_SQUIG_BRACKET;
-        } else if (cur == ':') {
-            return TOKEN_COLON;
-        } else if (cur == '.') {
-            return TOKEN_DOT;
-        } else if (cur >= '0' && cur <= '9' && cur_token_type != TOKEN_STRING) {
-            return TOKEN_INT;
-        } else {
-            return TOKEN_STRING;
-        }
-    }
-}
-
-///@brief Returns true if the token's type equals to token_type
-bool checkTokenType(const t_token token, enum e_token_type token_type) {
-    if (token.type != token_type) {
-        return false;
-    }
-    return true;
-}
-
-///@brief Returns a 2D array representing the atof result of all the input string entries seperated by whitespace
-void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* instring) {
-    int i, j;
-    char *cur, *cur2, *copy, *final;
-
-    copy = vtr::strdup(instring);
-    final = copy;
-    while (*final != '\0') {
-        final++;
-    }
-
-    cur = copy;
-    i = j = 0;
-    while (cur != final) {
-        while (IsWhitespace(*cur) && cur != final) {
-            if (j == max_j) {
-                i++;
-                j = 0;
-            }
-            cur++;
-        }
-        if (cur == final) {
-            break;
-        }
-        cur2 = cur;
-        while (!IsWhitespace(*cur2) && cur2 != final) {
-            cur2++;
-        }
-        *cur2 = '\0';
-        VTR_ASSERT(i < max_i && j < max_j);
-        matrix[i][j] = vtr::atof(cur);
-        j++;
-        cur = cur2;
-        *cur = ' ';
-    }
-
-    VTR_ASSERT((i == max_i && j == 0) || (i == max_i - 1 && j == max_j));
-
-    free(copy);
-}
-
-/* Date:July 2nd, 2013													*
- * Author: Daniel Chen													*/
-/** 
- * @brief Checks if the number of entries (separated by whitespace)	matches the the expected number (max_i * max_j)
- *
- * can be used before calling my_atof_2D						
- */
-bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, int* num_entries) {
-    /* Check if max_i * max_j matches number of entries in instring */
-    const char* cur = instring;
-    bool in_str = false;
-    int entry_count = 0;
-
-    /* First count number of entries in instring */
-    while (*cur != '\0') {
-        if (!IsWhitespace(*cur) && !in_str) {
-            in_str = true;
-            entry_count++;
-        } else if (IsWhitespace(*cur)) {
-            in_str = false;
-        }
-        cur++;
-    }
-    *num_entries = entry_count;
-
-    if (max_i * max_j != entry_count) return false;
-    return true;
-}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_token.h b/third_party/vtr/libs/vtrutil/src/vtr_token.h
deleted file mode 100644
index 9556d6614..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_token.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * @file
- * @author Jason Luu
- * @Date July 22, 2009
- * @brief Tokenizer
- */
-
-#ifndef TOKEN_H
-#define TOKEN_H
-
-///@brief Token types
-enum e_token_type {
-    TOKEN_NULL,
-    TOKEN_STRING,
-    TOKEN_INT,
-    TOKEN_OPEN_SQUARE_BRACKET,
-    TOKEN_CLOSE_SQUARE_BRACKET,
-    TOKEN_OPEN_SQUIG_BRACKET,
-    TOKEN_CLOSE_SQUIG_BRACKET,
-    TOKEN_COLON,
-    TOKEN_DOT
-};
-
-///@brief Token structure
-struct t_token {
-    enum e_token_type type;
-    char* data;
-};
-
-t_token* GetTokensFromString(const char* inString, int* num_tokens);
-
-void freeTokens(t_token* tokens, const int num_tokens);
-
-bool checkTokenType(const t_token token, enum e_token_type token_type);
-
-void my_atof_2D(float** matrix, const int max_i, const int max_j, const char* instring);
-
-bool check_my_atof_2D(const int max_i, const int max_j, const char* instring, int* num_entries);
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_util.cc b/third_party/vtr/libs/vtrutil/src/vtr_util.cc
deleted file mode 100644
index 45ee30358..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_util.cc
+++ /dev/null
@@ -1,504 +0,0 @@
-#include <cstdarg>
-#include <cstdlib>
-#include <cerrno> //For errno
-#include <cstring>
-#include <memory>
-#include <sstream>
-
-#include "vtr_util.h"
-#include "vtr_assert.h"
-#include "vtr_memory.h"
-#include "vtr_error.h"
-
-#if defined(__unix__)
-#    include <unistd.h> //For getpid()
-#endif
-
-namespace vtr {
-
-std::string out_file_prefix;     /* used by fopen */
-static int file_line_number = 0; /* file in line number being parsed (used by fgets) */
-static int cont;                 /* line continued? (used by strtok)*/
-
-/**
- * @brief Splits the c-style string 'text' along the specified delimiter characters in 'delims'
- *
- * The split strings (excluding the delimiters) are returned
- */
-std::vector<std::string> split(const char* text, const std::string delims) {
-    if (text) {
-        std::string text_str(text);
-        return split(text_str, delims);
-    }
-    return std::vector<std::string>();
-}
-
-/**
- * @brief Splits the string 'text' along the specified delimiter characters in 'delims'
- *
- * The split strings (excluding the delimiters) are returned
- */
-std::vector<std::string> split(const std::string& text, const std::string delims) {
-    std::vector<std::string> tokens;
-
-    std::string curr_tok;
-    for (char c : text) {
-        if (delims.find(c) != std::string::npos) {
-            //Delimeter character
-            if (!curr_tok.empty()) {
-                //At the end of the token
-
-                //Save it
-                tokens.push_back(curr_tok);
-
-                //Reset token
-                curr_tok.clear();
-            } else {
-                //Pass
-            }
-        } else {
-            //Non-delimeter append to token
-            curr_tok += c;
-        }
-    }
-
-    //Add last token
-    if (!curr_tok.empty()) {
-        //Save it
-        tokens.push_back(curr_tok);
-    }
-    return tokens;
-}
-
-///@brief Returns 'input' with the first instance of 'search' replaced with 'replace'
-std::string replace_first(const std::string& input, const std::string& search, const std::string& replace) {
-    auto pos = input.find(search);
-
-    std::string output(input, 0, pos);
-    output += replace;
-    output += std::string(input, pos + search.size());
-
-    return output;
-}
-
-///@brief Returns 'input' with all instances of 'search' replaced with 'replace'
-std::string replace_all(const std::string& input, const std::string& search, const std::string& replace) {
-    std::string output;
-
-    size_t last = 0;
-    size_t pos = input.find(search, last); //Find the first instance of 'search' starting at or after 'last'
-    while (pos != std::string::npos) {
-        output += input.substr(last, pos - last); //Append anything in the input string between last and current match
-        output += replace;                        //Add the replacement
-
-        last = pos + search.size(); //Advance past the current match
-
-        pos = input.find(search, last); //Look for the next match
-    }
-    output += input.substr(last, pos - last); //Append anything in 'input' after the last match
-
-    return output;
-}
-
-///@brief Retruns true if str starts with prefix
-bool starts_with(std::string str, std::string prefix) {
-    return str.find(prefix) == 0;
-}
-
-///@brief Returns a std::string formatted using a printf-style format string
-std::string string_fmt(const char* fmt, ...) {
-    // Make a variable argument list
-    va_list va_args;
-
-    // Initialize variable argument list
-    va_start(va_args, fmt);
-
-    //Format string
-    std::string str = vstring_fmt(fmt, va_args);
-
-    // Reset variable argument list
-    va_end(va_args);
-
-    return str;
-}
-
-///@brief Returns a std::string formatted using a printf-style format string taking an explicit va_list
-std::string vstring_fmt(const char* fmt, va_list args) {
-    // We need to copy the args so we don't change them before the true formating
-    va_list va_args_copy;
-    va_copy(va_args_copy, args);
-
-    //Determine the formatted length using a copy of the args
-    int len = std::vsnprintf(nullptr, 0, fmt, va_args_copy);
-
-    va_end(va_args_copy); //Clean-up
-
-    //Negative if there is a problem with the format string
-    VTR_ASSERT_MSG(len >= 0, "Problem decoding format string");
-
-    size_t buf_size = len + 1; //For terminator
-
-    //Allocate a buffer
-    //  unique_ptr will free buffer automatically
-    std::unique_ptr<char[]> buf(new char[buf_size]);
-
-    //Format into the buffer using the original args
-    len = std::vsnprintf(buf.get(), buf_size, fmt, args);
-
-    VTR_ASSERT_MSG(len >= 0, "Problem decoding format string");
-    VTR_ASSERT(static_cast<size_t>(len) == buf_size - 1);
-
-    //Build the string from the buffer
-    return std::string(buf.get(), len);
-}
-
-///@brief An alternate for strncpy since strncpy doesn't work as most people would expect. This ensures null termination
-char* strncpy(char* dest, const char* src, size_t size) {
-    /* Find string's length */
-    size_t len = std::strlen(src);
-
-    /* Cap length at (num - 1) to leave room for \0 */
-    if (size <= len)
-        len = (size - 1);
-
-    /* Copy as much of string as we can fit */
-    std::memcpy(dest, src, len);
-
-    /* explicit null termination */
-    dest[len] = '\0';
-
-    return dest;
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-char* strdup(const char* str) {
-    if (str == nullptr) {
-        return nullptr;
-    }
-
-    size_t Len = std::strlen(str);
-    //use calloc to already make the last char '\0'
-    return (char*)std::memcpy(vtr::calloc(Len + 1, sizeof(char)), str, Len);
-    ;
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-template<class T>
-T atoT(const std::string& value, const std::string& type_name) {
-    //The c version of atof doesn't catch errors.
-    //
-    //This version uses stringstream to detect conversion errors
-    std::istringstream ss(value);
-
-    T val;
-    ss >> val;
-
-    if (ss.fail() || !ss.eof()) {
-        //Failed to convert, or did not consume all input
-        std::stringstream msg;
-        msg << "Failed to convert string '" << value << "' to " << type_name;
-        throw VtrError(msg.str(), __FILE__, __LINE__);
-    }
-
-    return val;
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-int atoi(const std::string& value) {
-    return atoT<int>(value, "int");
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-double atod(const std::string& value) {
-    return atoT<double>(value, "double");
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-float atof(const std::string& value) {
-    return atoT<float>(value, "float");
-}
-
-/**
- * @brief Legacy c-style function replacements.
- *
- * Typically these add extra error checking
- * and/or correct 'unexpected' behaviour of the standard c-functions
- */
-unsigned atou(const std::string& value) {
-    return atoT<unsigned>(value, "unsigned int");
-}
-
-/**
- * @brief Get next token, and wrap to next line if \ at end of line.    
- *
- * There is a bit of a "gotcha" in strtok.  It does not make a   *
- * copy of the character array which you pass by pointer on the  
- * first call.  Thus, you must make sure this array exists for   
- * as long as you are using strtok to parse that line.  Don't    
- * use local buffers in a bunch of subroutines calling each      
- * other; the local buffer may be overwritten when the stack is  
- * restored after return from the subroutine.                    
- */
-char* strtok(char* ptr, const char* tokens, FILE* fp, char* buf) {
-    char* val;
-
-    val = std::strtok(ptr, tokens);
-    for (;;) {
-        if (val != nullptr || cont == 0)
-            return (val);
-
-        /* return unless we have a null value and a continuation line */
-        if (vtr::fgets(buf, bufsize, fp) == nullptr)
-            return (nullptr);
-
-        val = std::strtok(buf, tokens);
-    }
-}
-
-///@brief The legacy fopen function with extra error checking
-FILE* fopen(const char* fname, const char* flag) {
-    FILE* fp;
-    size_t Len;
-    char* new_fname = nullptr;
-    file_line_number = 0;
-
-    /* Appends a prefix string for output files */
-    if (!out_file_prefix.empty()) {
-        if (std::strchr(flag, 'w')) {
-            Len = 1; /* NULL char */
-            Len += std::strlen(out_file_prefix.c_str());
-            Len += std::strlen(fname);
-            new_fname = (char*)vtr::malloc(Len * sizeof(char));
-            strcpy(new_fname, out_file_prefix.c_str());
-            strcat(new_fname, fname);
-            fname = new_fname;
-        }
-    }
-
-    if (nullptr == (fp = std::fopen(fname, flag))) {
-        throw VtrError(string_fmt("Error opening file %s for %s access: %s.\n", fname, flag, strerror(errno)), __FILE__, __LINE__);
-    }
-
-    if (new_fname)
-        std::free(new_fname);
-
-    return (fp);
-}
-
-///@brief The legacy fclose function
-int fclose(FILE* f) {
-    return std::fclose(f);
-}
-
-/**
- * @brief Get an input line, update the line number and cut off any comment part.
- *
- * A \ at the end of a line with no comment part (#) means continue. 
- * vtr::fgets should give
- * identical results for Windows (\r\n) and Linux (\n) 
- * newlines, since it replaces each carriage return \r
- * by a newline character \n.  Returns NULL after EOF.
- */
-char* fgets(char* buf, int max_size, FILE* fp) {
-    int ch;
-    int i;
-
-    cont = 0;           /* line continued? */
-    file_line_number++; /* global variable */
-
-    for (i = 0; i < max_size - 1; i++) { /* Keep going until the line finishes or the buffer is full */
-
-        ch = std::fgetc(fp);
-
-        if (std::feof(fp)) { /* end of file */
-            if (i == 0) {
-                return nullptr; /* required so we can write while (vtr::fgets(...) != NULL) */
-            } else {            /* no newline before end of file - last line must be returned */
-                buf[i] = '\0';
-                return buf;
-            }
-        }
-
-        if (ch == '#') { /* comment */
-            buf[i] = '\0';
-            while ((ch = std::fgetc(fp)) != '\n' && !std::feof(fp))
-                ; /* skip the rest of the line */
-            return buf;
-        }
-
-        if (ch == '\r' || ch == '\n') {         /* newline (cross-platform) */
-            if (i != 0 && buf[i - 1] == '\\') { /* if \ at end of line, line continued */
-                cont = 1;
-                buf[i - 1] = '\n'; /* May need this for tokens */
-                buf[i] = '\0';
-            } else {
-                buf[i] = '\n';
-                buf[i + 1] = '\0';
-            }
-            return buf;
-        }
-
-        buf[i] = ch; /* copy character into the buffer */
-    }
-
-    /* Buffer is full but line has not terminated, so error */
-    throw VtrError(string_fmt("Error on line %d -- line is too long for input buffer.\n"
-                              "All lines must be at most %d characters long.\n",
-                              bufsize - 2),
-                   __FILE__, __LINE__);
-    return nullptr;
-}
-
-/**
- * @brief to get an arbitrary long input line and cut off any
- * comment part 
- * 
- * the getline function is exaly like the __get_delim function 
- * in GNU with '\n' delimiter. As a result, to make the function 
- * behaviour identical for Windows (\r\n) and Linux (\n) compiler 
- * macros for checking operating systems have been used.
- * 
- * @note user need to take care of the given pointer,
- * which will be dynamically allocated by getdelim
- */
-char* getline(char*& _lineptr, FILE* _stream) {
-    int i;
-    int ch;
-    size_t _n = 0;
-    ssize_t nread;
-
-#if defined(__unix__)
-    nread = getdelim(&_lineptr, &_n, '\n', _stream);
-#elif defined(_WIN32)
-#    define __WIN_NLTK "\r\n"
-    nread = getdelim(&_lineptr, &_n, __WIN_NLTK, _stream);
-#endif
-
-    if (nread == -1) {
-        int errsv = errno;
-        std::string error_msg;
-
-        if (errsv == EINVAL)
-            error_msg = string_fmt("[%s] Bad arguments (_lineptr is NULL, or _stream is not valid).", strerror(errsv));
-        else if (errsv == ENOMEM)
-            error_msg = string_fmt("[%s] Allocation or reallocation of the line buffer failed.", strerror(errsv));
-        else
-            /* end of file so it will return null */
-            return nullptr;
-
-        /* getline was unsuccessful, so error */
-        throw VtrError(string_fmt("Error -- %s\n",
-                                  error_msg.c_str()),
-                       __FILE__, __LINE__);
-        return nullptr;
-    }
-
-    cont = 0;           /* line continued? */
-    file_line_number++; /* global variable */
-
-    for (i = 0; i < nread; i++) { /* Keep going until the line finishes */
-
-        ch = _lineptr[i];
-
-        if (ch == '#') { /* comment */
-            _lineptr[i] = '\0';
-            /* skip the rest of the line */
-            break;
-        }
-    }
-
-    return (_lineptr);
-}
-
-///@brief Returns line number of last opened and read file
-int get_file_line_number_of_last_opened_file() {
-    return file_line_number;
-}
-
-bool file_exists(const char* filename) {
-    FILE* file;
-
-    if (filename == nullptr) {
-        return false;
-    }
-
-    file = std::fopen(filename, "r");
-    if (file) {
-        std::fclose(file);
-        return true;
-    }
-    return false;
-}
-
-/* Date:July 17th, 2013
- * Author: Daniel Chen */
-/**
- * @brief Checks the file extension of an file to ensure correct file format. 
- *
- * Returns true if format is correct, and false otherwise.
- * @note This is probably a fragile check, but at least should 
- * prevent common problems such as swapping architecture file 
- * and blif file on the VPR command line.
- */
-bool check_file_name_extension(const char* file_name,
-                               const char* file_extension) {
-    const char* str;
-    int len_extension;
-
-    len_extension = std::strlen(file_extension);
-    str = std::strstr(file_name, file_extension);
-    if (str == nullptr || (*(str + len_extension) != '\0')) {
-        return false;
-    }
-
-    return true;
-}
-
-/**
- * @brief Legacy ReadLine Tokening
- */
-std::vector<std::string> ReadLineTokens(FILE* InFile, int* LineNum) {
-    std::unique_ptr<char[]> buf(new char[vtr::bufsize]);
-
-    const char* line = vtr::fgets(buf.get(), vtr::bufsize, InFile);
-
-    ++(*LineNum);
-
-    return vtr::split(line);
-}
-
-///@brief Returns pid if os is unix, -1 otherwise.
-int get_pid() {
-#if defined(__unix__)
-    return getpid();
-#else
-    return -1;
-#endif
-}
-
-} // namespace vtr
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_util.h b/third_party/vtr/libs/vtrutil/src/vtr_util.h
deleted file mode 100644
index 08562d3d0..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_util.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef VTR_UTIL_H
-#define VTR_UTIL_H
-
-#include <algorithm>
-#include <vector>
-#include <string>
-#include <cstdarg>
-#include <array>
-
-namespace vtr {
-
-/**
- * @brief Splits the string 'text' along the specified delimiter characters in 'delims'
- *
- * The split strings (excluding the delimiters) are returned
- */
-std::vector<std::string> split(const char* text, const std::string delims = " \t\n");
-std::vector<std::string> split(const std::string& text, const std::string delims = " \t\n");
-
-///@brief Returns 'input' with the first instance of 'search' replaced with 'replace'
-std::string replace_first(const std::string& input, const std::string& search, const std::string& replace);
-
-///@brief Returns 'input' with all instances of 'search' replaced with 'replace'
-std::string replace_all(const std::string& input, const std::string& search, const std::string& replace);
-
-///@brief Retruns true if str starts with prefix
-bool starts_with(std::string str, std::string prefix);
-
-///@brief Returns a std::string formatted using a printf-style format string
-std::string string_fmt(const char* fmt, ...);
-
-///@brief Returns a std::string formatted using a printf-style format string taking an explicit va_list
-std::string vstring_fmt(const char* fmt, va_list args);
-
-/**
- * @brief Joins a sequence by a specified delimeter
- *
- *  For example the sequence {"home", "user", "my_files", "test.blif"} with delim="/"
- *  would return "home/user/my_files/test.blif"
- */
-template<typename Iter>
-std::string join(Iter begin, Iter end, std::string delim);
-
-template<typename Container>
-std::string join(Container container, std::string delim);
-
-template<typename T>
-std::string join(std::initializer_list<T> list, std::string delim);
-
-template<typename Container>
-void uniquify(Container container);
-
-constexpr size_t bufsize = 32768; /* Maximum line length for various parsing proc. */
-char* strncpy(char* dest, const char* src, size_t size);
-char* strdup(const char* str);
-char* strtok(char* ptr, const char* tokens, FILE* fp, char* buf);
-FILE* fopen(const char* fname, const char* flag);
-int fclose(FILE* f);
-char* fgets(char* buf, int max_size, FILE* fp);
-char* getline(char*& _lineptr, FILE* _stream);
-
-int atoi(const std::string& value);
-unsigned atou(const std::string& value);
-float atof(const std::string& value);
-double atod(const std::string& value);
-
-/**
- * @brief File utilities
- */
-int get_file_line_number_of_last_opened_file();
-bool file_exists(const char* filename);
-bool check_file_name_extension(const char* file_name,
-                               const char* file_extension);
-
-extern std::string out_file_prefix;
-
-/**
- * @brief Legacy ReadLine Tokening
- */
-std::vector<std::string> ReadLineTokens(FILE* InFile, int* LineNum);
-
-/**
- * @brief Template join function implementation
- */
-template<typename Iter>
-std::string join(Iter begin, Iter end, std::string delim) {
-    std::string joined_str;
-    for (auto iter = begin; iter != end; ++iter) {
-        joined_str += *iter;
-        if (iter != end - 1) {
-            joined_str += delim;
-        }
-    }
-    return joined_str;
-}
-
-template<typename Container>
-std::string join(Container container, std::string delim) {
-    return join(std::begin(container), std::end(container), delim);
-}
-
-template<typename T>
-std::string join(std::initializer_list<T> list, std::string delim) {
-    return join(list.begin(), list.end(), delim);
-}
-
-/**
- * @brief Template uniquify function implementation
- *
- * Removes repeated elements in the container
- */
-template<typename Container>
-void uniquify(Container container) {
-    std::sort(container.begin(), container.end());
-    container.erase(std::unique(container.begin(), container.end()),
-                    container.end());
-}
-
-int get_pid();
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h b/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
deleted file mode 100644
index 720722593..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_vec_id_set.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef VTR_SET_H
-#define VTR_SET_H
-
-#include <vector>
-
-namespace vtr {
-
-/**
- * @brief Implements a set-like interface which supports multiple operations
- *
- * The supported operations are:
- *  - insertion
- *  - iteration
- *  - membership test
- * all in constant time.
- *
- * It assumes the element type (T) is convertable to size_t.
- * Usually, elements are vtr::StrongIds.
- *
- * Iteration through the elements is not strictly ordered, usually
- * insertion order, unless sort() has been previously called.
- *
- * The underlying implementation uses a vector for element
- * storage (for iteration), and a bit-set for membership tests.
- */
-template<typename T>
-class vec_id_set {
-  public:
-    typedef typename std::vector<T>::const_iterator const_iterator;
-    typedef const_iterator iterator;
-
-    ///@brief Returns an iterator to the first element in the sequence
-    auto begin() const { return vec_.begin(); }
-    ///@brief Returns an iterator referring to the past-the-end element in the vector container
-    auto end() const { return vec_.end(); }
-
-    ///@brief Returns a constant iterator to the first element in the sequence
-    auto cbegin() const { return vec_.cbegin(); }
-    ///@brief Returns a constant iterator referring to the past-the-end element in the vector container
-    auto cend() const { return vec_.cend(); }
-
-    ///@brief Insert val in the set
-    bool insert(T val) {
-        if (count(val)) { //Already inserted
-            return false;
-        }
-
-        vec_.push_back(val);
-
-        //Mark this value as being contained
-        if (size_t(val) >= contained_.size()) {
-            //We dynamically grow contained_ based on the maximum
-            //value contained. This allows us to avoid expensive
-            contained_.resize(size_t(val) + 1, false);
-        }
-        contained_[size_t(val)] = true;
-
-        return true;
-    }
-
-    ///@brief Iterators specifying a range of elements. Copies of the elements in the range [first,last) are inserted in the container.
-    template<typename Iter>
-    void insert(Iter first, Iter last) {
-        size_t nelem = std::distance(first, last);
-        vec_.reserve(size() + nelem);
-        contained_.reserve(size() + nelem);
-
-        for (Iter itr = first; itr != last; ++itr) {
-            insert(*itr);
-        }
-    }
-
-    ///@brief Count elements with a specific value
-    size_t count(T val) const {
-        if (size_t(val) < contained_.size()) {
-            //Value is with-in range of previously inserted
-            //elements, so look-up its membership
-            return contained_[size_t(val)];
-        }
-        return 0;
-    }
-
-    ///@brief Returns the size of the container
-    size_t size() const {
-        return vec_.size();
-    }
-
-    ///@brief Sort elements in the container
-    void sort() {
-        std::sort(vec_.begin(), vec_.end());
-    }
-
-    ///@bried Clears the container
-    void clear() {
-        vec_.clear();
-        contained_.clear();
-    }
-
-  private:
-    std::vector<T> vec_;          //Elements contained
-    std::vector<bool> contained_; //Bit-set for constant-time membership test
-};
-
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vector.h b/third_party/vtr/libs/vtrutil/src/vtr_vector.h
deleted file mode 100644
index dc8b689af..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_vector.h
+++ /dev/null
@@ -1,211 +0,0 @@
-#ifndef VTR_VECTOR
-#define VTR_VECTOR
-#include <vector>
-#include <cstddef>
-#include <iterator>
-#include "vtr_range.h"
-
-namespace vtr {
-
-/**
- * @brief A std::vector container which is indexed by K (instead of size_t).
- *
- * The main use of this container is to behave like a std::vector which is
- * indexed by a vtr::StrongId. It assumes that K is explicitly convertable to size_t
- * (i.e. via operator size_t()), and can be explicitly constructed from a size_t.
- *
- * It includes all the following std::vector functions:
- *      - begin
- *      - cbegin
- *      - cend
- *      - crbegin
- *      - crend
- *      - end
- *      - rbegin
- *      - rend
- *      - capacity
- *      - empty
- *      - max_size
- *      - reserve
- *      - resize
- *      - shrink_to_fit
- *      - size
- *      - back
- *      - front
- *      - assign
- *      - clear
- *      - emplace
- *      - emplace_back
- *      - erase
- *      - get_allocator
- *      - insert
- *      - pop_back
- *      - push_back
- *
- * If you need more std::map-like (instead of std::vector-like) behaviour see
- * vtr::vector_map.
- */
-template<typename K, typename V, typename Allocator = std::allocator<V>>
-class vector : private std::vector<V, Allocator> {
-    using storage = std::vector<V, Allocator>;
-
-  public:
-    typedef K key_type;
-
-    class key_iterator;
-    typedef vtr::Range<key_iterator> key_range;
-
-  public:
-    //Pass through std::vector's types
-    using typename storage::allocator_type;
-    using typename storage::const_iterator;
-    using typename storage::const_pointer;
-    using typename storage::const_reference;
-    using typename storage::const_reverse_iterator;
-    using typename storage::difference_type;
-    using typename storage::iterator;
-    using typename storage::pointer;
-    using typename storage::reference;
-    using typename storage::reverse_iterator;
-    using typename storage::size_type;
-    using typename storage::value_type;
-
-    //Pass through storagemethods
-    using std::vector<V, Allocator>::vector;
-
-    using storage::begin;
-    using storage::cbegin;
-    using storage::cend;
-    using storage::crbegin;
-    using storage::crend;
-    using storage::end;
-    using storage::rbegin;
-    using storage::rend;
-
-    using storage::capacity;
-    using storage::empty;
-    using storage::max_size;
-    using storage::reserve;
-    using storage::resize;
-    using storage::shrink_to_fit;
-    using storage::size;
-
-    using storage::back;
-    using storage::front;
-
-    using storage::assign;
-    using storage::clear;
-    using storage::emplace;
-    using storage::emplace_back;
-    using storage::erase;
-    using storage::get_allocator;
-    using storage::insert;
-    using storage::pop_back;
-    using storage::push_back;
-
-    /*
-     * We can't using-forward storage::data, as it might not exist
-     * in the particular specialization (typically: vector<bool>)
-     * causing compiler complains.
-     * Instead, implement it as inline forwarding method whose
-     * compilation is deferred to when it is actually requested.
-     */
-    ///@brief Returns a pointer to the vector's data
-    inline V* data() { return storage::data(); }
-    ///@brief Returns a pointer to the vector's data (immutable)
-    inline const V* data() const { return storage::data(); }
-
-    /*
-     * Don't include operator[] and at() from std::vector,
-     *
-     * since we redine them to take key_type instead of size_t
-     */
-    ///@brief [] operator
-    reference operator[](const key_type id) {
-        auto i = size_t(id);
-        return storage::operator[](i);
-    }
-    ///@brief [] operator immutable
-    const_reference operator[](const key_type id) const {
-        auto i = size_t(id);
-        return storage::operator[](i);
-    }
-    ///@brief at() operator
-    reference at(const key_type id) {
-        auto i = size_t(id);
-        return storage::at(i);
-    }
-    ///@brief at() operator immutable
-    const_reference at(const key_type id) const {
-        auto i = size_t(id);
-        return storage::at(i);
-    }
-
-    // We must re-define swap to avoid inaccessible base class errors
-    ///@brief swap function
-    void swap(vector<K, V, Allocator>& other) {
-        std::swap(*this, other);
-    }
-
-    ///@brief Returns a range containing the keys
-    key_range keys() const {
-        return vtr::make_range(key_begin(), key_end());
-    }
-
-  public:
-    /**
-     * @brief Iterator class which is convertable to the key_type
-     *
-     * This allows end-users to call the parent class's keys() member
-     * to iterate through the keys with a range-based for loop
-     */
-    class key_iterator : public std::iterator<std::bidirectional_iterator_tag, key_type> {
-      public:
-        ///@brief We use the intermediate type my_iter to avoid a potential ambiguity for which clang generates errors and warnings
-        using my_iter = typename std::iterator<std::bidirectional_iterator_tag, K>;
-        using typename my_iter::iterator;
-        using typename my_iter::pointer;
-        using typename my_iter::reference;
-        using typename my_iter::value_type;
-
-        ///@brief constructor
-        key_iterator(key_iterator::value_type init)
-            : value_(init) {}
-
-        /*
-         * vtr::vector assumes that the key time is convertable to size_t.
-         *
-         * It also assumes all the underlying IDs are zero-based and contiguous. That means
-         * we can just increment the underlying Id to build the next key.
-         */
-        ///@brief ++ operator
-        key_iterator operator++() {
-            value_ = value_type(size_t(value_) + 1);
-            return *this;
-        }
-        ///@brief decrement operator
-        key_iterator operator--() {
-            value_ = value_type(size_t(value_) - 1);
-            return *this;
-        }
-        ///@brief dereference oeprator
-        reference operator*() { return value_; }
-        ///@brief -> operator
-        pointer operator->() { return &value_; }
-
-        ///@brief == operator
-        friend bool operator==(const key_iterator lhs, const key_iterator rhs) { return lhs.value_ == rhs.value_; }
-        ///@brief != operator
-        friend bool operator!=(const key_iterator lhs, const key_iterator rhs) { return !(lhs == rhs); }
-
-      private:
-        value_type value_;
-    };
-
-  private:
-    key_iterator key_begin() const { return key_iterator(key_type(0)); }
-    key_iterator key_end() const { return key_iterator(key_type(size())); }
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h b/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
deleted file mode 100644
index 50309e86e..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_vector_map.h
+++ /dev/null
@@ -1,172 +0,0 @@
-#ifndef VTR_VECTOR_MAP
-#define VTR_VECTOR_MAP
-#include <vector>
-
-#include "vtr_assert.h"
-#include "vtr_sentinels.h"
-
-namespace vtr {
-
-/**
- * @brief A vector-like container which is indexed by K (instead of size_t as in std::vector).
- * 
- * The main use of this container is to behave like a std::vector which is indexed by
- * vtr::StrongId.
- * 
- * Requires that K be convertable to size_t with the size_t operator (i.e. size_t()), and
- * that the conversion results in a linearly increasing index into the underlying vector.
- * 
- * This results in a container that is somewhat similar to a std::map (i.e. converts from one
- * type to another), but requires contiguously ascending (i.e. linear) keys. Unlike std::map
- * only the values are stored (at the specified index/key), reducing memory usage and improving
- * cache locality. Furthermore, operator[] and find() return the value or iterator directly
- * associated with the value (like std::vector) rather than a std::pair (like std::map).
- * insert() takes both the key and value as separate arguments and has no return value.
- * 
- * Additionally, vector_map will silently create values for 'gaps' in the index range (i.e.
- * those elements are initialized with Sentinel::INVALID()).
- * 
- * If you need a fully featured std::map like container without the above differences see
- * vtr::linear_map.
- * 
- * If you do not need std::map-like features see vtr::vector. Note that vtr::vector_map is very similar 
- * to vtr::vector. Unless there is a specific reason that vtr::vector_map is needed, it is better to use vtr::vector.
- * 
- * Note that it is possible to use vector_map with sparse/non-contiguous keys, but this is typically
- * memory inefficient as the underlying vector will allocate space for [0..size_t(max_key)-1],
- * where max_key is the largest key that has been inserted.
- * 
- * As with a std::vector, it is the caller's responsibility to ensure there is sufficient space
- * when a given index/key before it is accessed. The exception to this are the find(), insert() and
- * update() methods which handle non-existing keys gracefully.
- */
-
-template<typename K, typename V, typename Sentinel = DefaultSentinel<V>>
-class vector_map {
-  public: //Public types
-    typedef typename std::vector<V>::const_reference const_reference;
-    typedef typename std::vector<V>::reference reference;
-
-    typedef typename std::vector<V>::iterator iterator;
-    typedef typename std::vector<V>::const_iterator const_iterator;
-    typedef typename std::vector<V>::const_reverse_iterator const_reverse_iterator;
-
-  public:
-    ///@brief Constructor
-    template<typename... Args>
-    vector_map(Args&&... args)
-        : vec_(std::forward<Args>(args)...) {}
-
-  public: //Accessors
-    ///@brief Returns an iterator referring to the first element in the map container.
-    const_iterator begin() const { return vec_.begin(); }
-    ///@brief Returns an iterator referring to the past-the-end element in the map container.
-    const_iterator end() const { return vec_.end(); }
-    ///@begin Returns a reverse iterator pointing to the last element in the container (i.e., its reverse beginning).
-    const_reverse_iterator rbegin() const { return vec_.rbegin(); }
-    ///@brief Returns a reverse iterator pointing to the theoretical element right before the first element in the map container (which is considered its reverse end).
-    const_reverse_iterator rend() const { return vec_.rend(); }
-
-    //Indexing
-    ///@brief [] operator immutable
-    const_reference operator[](const K n) const {
-        size_t index = size_t(n);
-
-        /**
-         * Shouldn't check for index >= 0, since size_t is unsigned thus won't be negative
-         *
-         * A negative input to n would result in an absurdly large number close the maximum size of size_t, and be caught by index < vec_.size()
-         * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf chapter 4.7 para 2
-         */
-
-        VTR_ASSERT_SAFE_MSG(index < vec_.size(), "Out-of-range index");
-        return vec_[index];
-    }
-
-    ///@brief Searches the container for an element with a key equivalent to k and returns an iterator to it if found, otherwise it returns an iterator to vector_map::end.
-    const_iterator find(const K key) const {
-        if (size_t(key) < vec_.size()) {
-            return vec_.begin() + size_t(key);
-        } else {
-            return vec_.end();
-        }
-    }
-
-    ///@brief Returns the number of elements in the container.
-    std::size_t size() const { return vec_.size(); }
-
-    ///@brief Returns true if the container is empty
-    bool empty() const { return vec_.empty(); }
-
-    ///@brief Returns true if the container contains key
-    bool contains(const K key) const { return size_t(key) < vec_.size(); }
-    ///@brief Returns 1 if the container contains key, 0 otherwise
-    size_t count(const K key) const { return contains(key) ? 1 : 0; }
-
-  public: //Mutators
-    // Delegate potentially overloaded functions to the underlying vector with perfect forwarding
-    ///@brief push_back function
-    template<typename... Args>
-    void push_back(Args&&... args) { vec_.push_back(std::forward<Args>(args)...); }
-
-    ///@brief emplace_back function
-    template<typename... Args>
-    void emplace_back(Args&&... args) { vec_.emplace_back(std::forward<Args>(args)...); }
-
-    ///@brief resize function
-    template<typename... Args>
-    void resize(Args&&... args) { vec_.resize(std::forward<Args>(args)...); }
-
-    ///@brief clears the container
-    void clear() { vec_.clear(); }
-
-    ///@brief Returns the capacity of the container
-    size_t capacity() const { return vec_.capacity(); }
-    ///@brief Requests the container to reduce its capacity to fit its size.
-    void shrink_to_fit() { vec_.shrink_to_fit(); }
-
-    ///@brief Returns an iterator referring to the first element in the map container.
-    iterator begin() { return vec_.begin(); }
-    ///@brief Returns an iterator referring to the past-the-end element in the map container.
-    iterator end() { return vec_.end(); }
-
-    ///@brief Indexing
-    reference operator[](const K n) {
-        VTR_ASSERT_SAFE_MSG(size_t(n) < vec_.size(), "Out-of-range index");
-        return vec_[size_t(n)];
-    }
-
-    ///@brief Returns an iterator to the first element in the container that compares equal to val. If no such element is found, the function returns end().
-    iterator find(const K key) {
-        if (size_t(key) < vec_.size()) {
-            return vec_.begin() + size_t(key);
-        } else {
-            return vec_.end();
-        }
-    }
-
-    ///@brief Extends the container by inserting new elements, effectively increasing the container size by the number of elements inserted.
-    void insert(const K key, const V value) {
-        if (size_t(key) >= vec_.size()) {
-            //Resize so key is in range
-            vec_.resize(size_t(key) + 1, Sentinel::INVALID());
-        }
-
-        //Insert the value
-        operator[](key) = value;
-    }
-
-    ///@brief Inserts the new key value pair in the container
-    void update(const K key, const V value) { insert(key, value); }
-
-    ///@brief Swap (this enables std::swap via ADL)
-    friend void swap(vector_map<K, V>& x, vector_map<K, V>& y) {
-        std::swap(x.vec_, y.vec_);
-    }
-
-  private:
-    std::vector<V> vec_;
-};
-
-} // namespace vtr
-#endif
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in b/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
deleted file mode 100644
index 4755874ba..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_version.cpp.in
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "vtr_version.h"
-
-//This file is automatically processed by CMAKE and replaces
-//the values between ampersand's with the releveant CMAKE variable
-//before being compiled.
-namespace vtr {
-    const char* VERSION = "@VTR_VERSION@";
-    const char* VERSION_SHORT = "@VTR_VERSION_SHORT@";
-
-    const size_t VERSION_MAJOR = @VTR_VERSION_MAJOR@;
-    const size_t VERSION_MINOR = @VTR_VERSION_MINOR@;
-    const size_t VERSION_PATCH = @VTR_VERSION_PATCH@;
-    const char* VERSION_PRERELEASE = "@VTR_VERSION_PRERELEASE@";
-
-    const char* VCS_REVISION = "@VTR_VCS_REVISION@";
-    const char* VCS_REVISION_SHORT = "@VTR_VCS_REVISION_SHORT@";
-    const char* COMPILER = "@VTR_COMPILER_INFO@";
-    const char* BUILD_TIMESTAMP = "@VTR_BUILD_TIMESTAMP@";
-    const char* BUILD_INFO = "@VTR_BUILD_INFO@";
-}
diff --git a/third_party/vtr/libs/vtrutil/src/vtr_version.h b/third_party/vtr/libs/vtrutil/src/vtr_version.h
deleted file mode 100644
index f9bfaac14..000000000
--- a/third_party/vtr/libs/vtrutil/src/vtr_version.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef VTR_VERSION_H
-#define VTR_VERSION_H
-#include <cstddef>
-
-namespace vtr {
-extern const char* VERSION;
-extern const char* VERSION_SHORT;
-
-extern const size_t VERSION_MAJOR;
-extern const size_t VERSION_MINOR;
-extern const size_t VERSION_PATCH;
-extern const char* VERSION_PRERELEASE;
-
-extern const char* VCS_REVISION;
-extern const char* COMPILER;
-extern const char* BUILD_TIMESTAMP;
-extern const char* BUILD_INFO;
-} // namespace vtr
-
-#endif
diff --git a/third_party/vtr/libs/vtrutil/test/main.cpp b/third_party/vtr/libs/vtrutil/test/main.cpp
deleted file mode 100644
index f5c7e84bc..000000000
--- a/third_party/vtr/libs/vtrutil/test/main.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#define CATCH_CONFIG_MAIN
-#include "catch2/catch_test_macros.hpp"
diff --git a/third_party/vtr/libs/vtrutil/test/test_array_view.cpp b/third_party/vtr/libs/vtrutil/test/test_array_view.cpp
deleted file mode 100644
index 69f92c40d..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_array_view.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_array_view.h"
-#include "vtr_strong_id.h"
-#include <array>
-
-struct test_tag;
-using TestStrongId = vtr::StrongId<test_tag>;
-
-TEST_CASE("Array view", "[array_view/array_view]") {
-    std::array<uint16_t, 10> arr;
-    vtr::array_view<uint16_t> arr_view(arr.data(), arr.size());
-
-    const vtr::array_view<uint16_t>& carr_view = arr_view;
-    const vtr::array_view<uint16_t> carr_view2 = arr_view;
-    const vtr::array_view<uint16_t> carr_view3(arr_view);
-
-    REQUIRE(arr.size() == arr_view.size());
-    REQUIRE(arr.data() == arr_view.data());
-    REQUIRE(arr.data() == carr_view.data());
-    REQUIRE(arr.data() == carr_view2.data());
-    REQUIRE(arr.data() == carr_view3.data());
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        arr[i] = i;
-    }
-
-    for (size_t i = 0; i < arr_view.size(); ++i) {
-        REQUIRE(arr_view[i] == i);
-        REQUIRE(carr_view[i] == i);
-        REQUIRE(carr_view2[i] == i);
-        REQUIRE(carr_view3[i] == i);
-    }
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        REQUIRE(&arr[i] == &arr_view[i]);
-        REQUIRE(&arr.at(i) == &arr_view.at(i));
-        REQUIRE(&arr[i] == &carr_view[i]);
-        REQUIRE(&arr.at(i) == &carr_view.at(i));
-        REQUIRE(&arr[i] == &carr_view2[i]);
-        REQUIRE(&arr.at(i) == &carr_view2.at(i));
-        REQUIRE(&arr[i] == &carr_view3[i]);
-        REQUIRE(&arr.at(i) == &carr_view3.at(i));
-    }
-
-    for (size_t i = 0; i < arr_view.size(); ++i) {
-        arr_view[i] = arr_view.size() - i;
-    }
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        REQUIRE(arr[i] == (arr_view.size() - i));
-        REQUIRE(carr_view[i] == (arr_view.size() - i));
-        REQUIRE(carr_view2[i] == (arr_view.size() - i));
-        REQUIRE(carr_view3[i] == (arr_view.size() - i));
-    }
-}
-
-TEST_CASE("Array view id", "[array_view/array_view_id]") {
-    std::array<uint16_t, 10> arr;
-    vtr::array_view_id<TestStrongId, uint16_t> arr_view(arr.data(), arr.size());
-
-    const vtr::array_view_id<TestStrongId, uint16_t>& carr_view = arr_view;
-    const vtr::array_view_id<TestStrongId, uint16_t> carr_view2 = arr_view;
-    const vtr::array_view_id<TestStrongId, uint16_t> carr_view3(arr_view);
-
-    REQUIRE(arr.size() == arr_view.size());
-    REQUIRE(arr.data() == arr_view.data());
-    REQUIRE(arr.data() == carr_view.data());
-    REQUIRE(arr.data() == carr_view2.data());
-    REQUIRE(arr.data() == carr_view3.data());
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        arr[i] = i;
-    }
-
-    for (size_t i = 0; i < arr_view.size(); ++i) {
-        TestStrongId id(i);
-
-        REQUIRE(arr_view[id] == i);
-        REQUIRE(carr_view[id] == i);
-        REQUIRE(carr_view2[id] == i);
-        REQUIRE(carr_view3[id] == i);
-    }
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        TestStrongId id(i);
-
-        REQUIRE(&arr[i] == &arr_view[id]);
-        REQUIRE(&arr.at(i) == &arr_view.at(id));
-        REQUIRE(&arr[i] == &carr_view[id]);
-        REQUIRE(&arr.at(i) == &carr_view.at(id));
-        REQUIRE(&arr[i] == &carr_view2[id]);
-        REQUIRE(&arr.at(i) == &carr_view2.at(id));
-        REQUIRE(&arr[i] == &carr_view3[id]);
-        REQUIRE(&arr.at(i) == &carr_view3.at(id));
-    }
-
-    for (size_t i = 0; i < arr_view.size(); ++i) {
-        TestStrongId id(i);
-        arr_view[id] = arr_view.size() - i;
-    }
-
-    for (size_t i = 0; i < arr.size(); ++i) {
-        TestStrongId id(i);
-        REQUIRE(arr[i] == (arr_view.size() - i));
-        REQUIRE(carr_view[id] == (arr_view.size() - i));
-        REQUIRE(carr_view2[id] == (arr_view.size() - i));
-        REQUIRE(carr_view3[id] == (arr_view.size() - i));
-    }
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp b/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
deleted file mode 100644
index 5070ee717..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_expr_eval.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <limits>
-
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_expr_eval.h"
-
-TEST_CASE("Simple Expressions", "[vtr_expr_eval]") {
-    vtr::FormulaParser parser;
-    vtr::t_formula_data vars;
-
-    REQUIRE(parser.parse_formula("0", vars) == 0);
-    REQUIRE(parser.parse_formula("42", vars) == 42);
-
-    REQUIRE(parser.parse_formula("5 + 2", vars) == 7);
-    REQUIRE(parser.parse_formula("5 + 10", vars) == 15);
-    REQUIRE(parser.parse_formula("5 - 2", vars) == 3);
-    REQUIRE(parser.parse_formula("5 - 10", vars) == -5);
-
-    REQUIRE(parser.parse_formula("5 * 5", vars) == 25);
-    REQUIRE(parser.parse_formula("5 / 5", vars) == 1);
-
-    //Floor arithmetic
-    REQUIRE(parser.parse_formula("5 / 10", vars) == 0);
-    REQUIRE(parser.parse_formula("10 / 9", vars) == 1);
-
-    REQUIRE(parser.parse_formula("5 % 10", vars) == 5);
-    REQUIRE(parser.parse_formula("10 % 9", vars) == 1);
-
-    REQUIRE(parser.parse_formula("5 < 10", vars) == 1);
-    REQUIRE(parser.parse_formula("20 < 10", vars) == 0);
-
-    REQUIRE(parser.parse_formula("5 > 10", vars) == 0);
-    REQUIRE(parser.parse_formula("20 > 10", vars) == 1);
-}
-
-TEST_CASE("Negative Literals", "[vtr_expr_eval]") {
-    //TODO: Currently unsupported, should support in the future...
-    //REQUIRE(parser.parse_formula("-5 + 10", vars) == 5);
-    //REQUIRE(parser.parse_formula("-10 + 5", vars) == -5);
-    //REQUIRE(parser.parse_formula("-1", vars) == -1);
-}
-
-TEST_CASE("Bracket Expressions", "[vtr_expr_eval]") {
-    vtr::FormulaParser parser;
-    vtr::t_formula_data vars;
-
-    REQUIRE(parser.parse_formula("20 / (4 + 1)", vars) == 4);
-    REQUIRE(parser.parse_formula("(20 / 5) + 1", vars) == 5);
-    REQUIRE(parser.parse_formula("20 / 5 + 1", vars) == 5);
-}
-
-TEST_CASE("Variable Expressions", "[vtr_expr_eval]") {
-    vtr::FormulaParser parser;
-    vtr::t_formula_data vars;
-    vars.set_var_value("x", 5);
-    vars.set_var_value("y", 10);
-
-    REQUIRE(parser.parse_formula("x", vars) == 5);
-    REQUIRE(parser.parse_formula("y", vars) == 10);
-
-    REQUIRE(parser.parse_formula("x + y", vars) == 15);
-    REQUIRE(parser.parse_formula("y + x", vars) == 15);
-
-    REQUIRE(parser.parse_formula("x - y", vars) == -5);
-    REQUIRE(parser.parse_formula("y - x", vars) == 5);
-
-    REQUIRE(parser.parse_formula("x * y", vars) == 50);
-    REQUIRE(parser.parse_formula("y * x", vars) == 50);
-
-    REQUIRE(parser.parse_formula("x / y", vars) == 0);
-    REQUIRE(parser.parse_formula("y / x", vars) == 2);
-}
-
-TEST_CASE("Function Expressions", "[vtr_expr_eval]") {
-    vtr::FormulaParser parser;
-    vtr::t_formula_data vars;
-
-    REQUIRE(parser.parse_formula("min(5, 2)", vars) == 2);
-    REQUIRE(parser.parse_formula("min(2, 5)", vars) == 2);
-    //REQUIRE(parser.parse_formula("min(-5, 2)", vars) == -5); //Negative literals currently unsupported
-    //REQUIRE(parser.parse_formula("min(-2, 5)", vars) == -2);
-
-    REQUIRE(parser.parse_formula("max(5, 2)", vars) == 5);
-    REQUIRE(parser.parse_formula("max(2, 5)", vars) == 5);
-    //REQUIRE(parser.parse_formula("max(-5, 2)", vars) == 2); //Negative literals currently unsupported
-    //REQUIRE(parser.parse_formula("max(-2, 5)", vars) == 5);
-
-    REQUIRE(parser.parse_formula("gcd(20, 25)", vars) == 5);
-    REQUIRE(parser.parse_formula("lcm(20, 25)", vars) == 100);
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_geometry.cpp b/third_party/vtr/libs/vtrutil/test/test_geometry.cpp
deleted file mode 100644
index 3d44d5977..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_geometry.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-#include "catch2/catch_approx.hpp"
-
-#include "vtr_geometry.h"
-
-using namespace Catch;
-
-TEST_CASE("Point", "[vtr_geometry/Point]") {
-    vtr::Point<int> p1(5, 3);
-    vtr::Point<float> p2(5.3, 3.9);
-    SECTION("location") {
-        REQUIRE(p1.x() == 5);
-        REQUIRE(p1.y() == 3);
-
-        REQUIRE(p2.x() == Approx(5.3));
-        REQUIRE(p2.y() == Approx(3.9));
-    }
-
-    SECTION("equality") {
-        REQUIRE(p1 == p1);
-        REQUIRE(p2 == p2);
-    }
-}
-
-TEST_CASE("Rect", "[vtr_geometry/Rect]") {
-    // int tests
-    {
-        vtr::Point<int> pi_1(5, 3);
-        vtr::Point<int> pi_2(10, 11);
-        vtr::Point<int> pi_3(7, 9);
-
-        vtr::Rect<int> r1(pi_1.x(), pi_1.y(), pi_2.x(), pi_2.y());
-        vtr::Rect<int> r2(pi_1, pi_2);
-        vtr::Rect<int> r3(pi_1, pi_3);
-        vtr::Rect<int> r4(pi_3, pi_2);
-
-        SECTION("equality") {
-            REQUIRE(r1 == r2);
-        }
-
-        SECTION("location") {
-            REQUIRE(r1.xmin() == pi_1.x());
-            REQUIRE(r1.xmax() == pi_2.x());
-            REQUIRE(r1.ymin() == pi_1.y());
-            REQUIRE(r1.ymax() == pi_2.y());
-        }
-
-        SECTION("point_accessors") {
-            REQUIRE(r1.bottom_left() == pi_1);
-            REQUIRE(r1.top_right() == pi_2);
-            REQUIRE(r2.bottom_left() == pi_1);
-            REQUIRE(r2.top_right() == pi_2);
-        }
-
-        SECTION("dimensions") {
-            REQUIRE(r1.width() == 5);
-            REQUIRE(r1.height() == 8);
-            REQUIRE(r2.width() == 5);
-            REQUIRE(r2.height() == 8);
-        }
-
-        SECTION("contains_int") {
-            REQUIRE(r2.contains(pi_1));
-            REQUIRE(r2.contains({6, 4}));
-            REQUIRE_FALSE(r2.contains({100, 4}));
-            REQUIRE_FALSE(r2.contains(pi_2));
-            REQUIRE(vtr::Rect<int>(pi_1).contains(pi_1));
-        }
-
-        SECTION("strictly_contains_int") {
-            REQUIRE_FALSE(r2.strictly_contains(pi_1));
-            REQUIRE(r2.strictly_contains({6, 4}));
-            REQUIRE_FALSE(r2.strictly_contains({100, 4}));
-            REQUIRE_FALSE(r2.strictly_contains(pi_2));
-        }
-
-        SECTION("coincident_int") {
-            REQUIRE(r2.coincident(pi_1));
-            REQUIRE(r2.coincident({6, 4}));
-            REQUIRE_FALSE(r2.coincident({100, 4}));
-            REQUIRE(r2.coincident(pi_2));
-        }
-
-        SECTION("bounds_int") {
-            REQUIRE(r1 == bounding_box(r3, r4));
-        }
-
-        SECTION("empty_int") {
-            REQUIRE(vtr::Rect<int>().empty());
-        }
-
-        SECTION("sample_int") {
-            auto r = vtr::Rect<int>(pi_1, pi_2);
-            REQUIRE(sample(r, 0, 0, 17) == pi_1);
-            REQUIRE(sample(r, 17, 17, 17) == pi_2);
-            auto inside = sample(r, 3, 11, 17);
-            REQUIRE(r.contains(inside));
-        }
-    }
-
-    // float tests
-    {
-        vtr::Point<float> pf_1(5.3, 3.9);
-        vtr::Point<float> pf_2(10.5, 11.1);
-        vtr::Point<float> pf_3(7.2, 9.4);
-
-        vtr::Rect<float> r3(pf_1.x(), pf_1.y(), pf_2.x(), pf_2.y());
-        vtr::Rect<float> r4(pf_1, pf_2);
-        vtr::Rect<float> r5(pf_1, pf_3);
-        vtr::Rect<float> r6(pf_3, pf_2);
-        // vtr::Rect<float> r7(pf_1); // <-- will fail to compile
-
-        SECTION("equality_float") {
-            REQUIRE(r3 == r4);
-        }
-
-        SECTION("location_float") {
-            REQUIRE(r3.xmin() == pf_1.x());
-            REQUIRE(r3.xmax() == pf_2.x());
-            REQUIRE(r3.ymin() == pf_1.y());
-            REQUIRE(r3.ymax() == pf_2.y());
-        }
-
-        SECTION("point_accessors_float") {
-            REQUIRE(r3.bottom_left() == pf_1);
-            REQUIRE(r3.top_right() == pf_2);
-            REQUIRE(r4.bottom_left() == pf_1);
-            REQUIRE(r4.top_right() == pf_2);
-        }
-
-        SECTION("dimensions") {
-            REQUIRE(r3.width() == Approx(5.2));
-            REQUIRE(r3.height() == Approx(7.2));
-            REQUIRE(r4.width() == Approx(5.2));
-            REQUIRE(r4.height() == Approx(7.2));
-        }
-
-        SECTION("contains_float") {
-            REQUIRE(r4.contains(pf_1));
-            REQUIRE(r4.contains({6, 4}));
-            REQUIRE_FALSE(r4.contains({100, 4}));
-            REQUIRE_FALSE(r4.contains(pf_2));
-        }
-
-        SECTION("strictly_contains_float") {
-            REQUIRE_FALSE(r4.strictly_contains(pf_1));
-            REQUIRE(r4.strictly_contains({6, 4}));
-            REQUIRE_FALSE(r4.strictly_contains({100, 4}));
-            REQUIRE_FALSE(r4.strictly_contains(pf_2));
-        }
-
-        SECTION("coincident_float") {
-            REQUIRE(r4.coincident(pf_1));
-            REQUIRE(r4.coincident({6, 4}));
-            REQUIRE_FALSE(r4.coincident({100, 4}));
-            REQUIRE(r4.coincident(pf_2));
-        }
-
-        SECTION("bounds_float") {
-            REQUIRE(r3 == bounding_box(r5, r6));
-        }
-
-        SECTION("empty_float") {
-            REQUIRE(vtr::Rect<float>().empty());
-        }
-    }
-}
-
-TEST_CASE("Line", "[vtr_geometry/Line]") {
-    std::vector<vtr::Point<int>> points = {{0, 0},
-                                           {0, 2},
-                                           {1, 0},
-                                           {1, -2}};
-
-    vtr::Line<int> line(points);
-
-    SECTION("points") {
-        auto line_points = line.points();
-
-        REQUIRE(line_points.size() == points.size());
-
-        int i = 0;
-        for (auto point : line_points) {
-            REQUIRE(points[i] == point);
-            ++i;
-        }
-    }
-
-    SECTION("bounding_box") {
-        auto bb = line.bounding_box();
-
-        REQUIRE(bb.xmin() == 0);
-        REQUIRE(bb.xmax() == 1);
-        REQUIRE(bb.ymin() == -2);
-        REQUIRE(bb.ymax() == 2);
-    }
-}
-
-TEST_CASE("RectUnion", "[vtr_geometry/RectUnion]") {
-    std::vector<vtr::Rect<int>> rects = {{0, 0, 2, 2},
-                                         {1, 1, 3, 3}};
-
-    vtr::RectUnion<int> rect_union(rects);
-
-    SECTION("rects") {
-        auto union_rects = rect_union.rects();
-
-        REQUIRE(union_rects.size() == rects.size());
-
-        int i = 0;
-        for (auto rect : union_rects) {
-            REQUIRE(rects[i] == rect);
-            ++i;
-        }
-    }
-    SECTION("bounding_box") {
-        auto bb = rect_union.bounding_box();
-
-        REQUIRE(bb.xmin() == 0);
-        REQUIRE(bb.xmax() == 3);
-        REQUIRE(bb.ymin() == 0);
-        REQUIRE(bb.ymax() == 3);
-    }
-
-    SECTION("contains") {
-        REQUIRE(rect_union.contains({0, 0}));
-        REQUIRE(rect_union.contains({1, 1}));
-        REQUIRE(rect_union.contains({2, 2}));
-        REQUIRE_FALSE(rect_union.contains({3, 3}));
-    }
-
-    SECTION("strictly_contains") {
-        REQUIRE_FALSE(rect_union.strictly_contains({0, 0}));
-        REQUIRE(rect_union.strictly_contains({1, 1}));
-        REQUIRE(rect_union.strictly_contains({2, 2}));
-        REQUIRE_FALSE(rect_union.strictly_contains({3, 3}));
-    }
-
-    SECTION("coincident") {
-        REQUIRE(rect_union.coincident({0, 0}));
-        REQUIRE(rect_union.coincident({1, 1}));
-        REQUIRE(rect_union.coincident({2, 2}));
-        REQUIRE(rect_union.coincident({3, 3}));
-    }
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_map_util.cpp b/third_party/vtr/libs/vtrutil/test/test_map_util.cpp
deleted file mode 100644
index 0021151d0..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_map_util.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_map_util.h"
-#include "vtr_range.h"
-
-#include <map>
-
-TEST_CASE("Iterate Map Keys Values", "[vtr_map_util]") {
-    std::vector<int> keys = {0, 1, 2, 3};
-    std::vector<char> values = {'a', 'b', 'c', 'd'};
-
-    //Initialize map
-    std::map<int, char> map;
-    for (size_t i = 0; i < keys.size(); ++i) {
-        map[keys[i]] = values[i];
-    }
-
-    //Check key iteration
-    auto key_range = vtr::make_key_range(map);
-
-    std::vector<int> seen_keys;
-    for (int key : key_range) {
-        seen_keys.push_back(key);
-    }
-    REQUIRE(seen_keys == keys);
-
-    //Check value iteration
-    auto value_range = vtr::make_value_range(map);
-
-    std::vector<char> seen_values;
-    for (char value : value_range) {
-        seen_values.push_back(value);
-    }
-    REQUIRE(seen_values == values);
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_math.cpp b/third_party/vtr/libs/vtrutil/test/test_math.cpp
deleted file mode 100644
index c8e011a8d..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_math.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <limits>
-
-#include "catch2/catch_test_macros.hpp"
-#include "catch2/catch_approx.hpp"
-
-#include "vtr_math.h"
-
-using namespace Catch;
-
-TEST_CASE("Nearest Integer", "[vtr_math]") {
-    REQUIRE(vtr::nint(0.) == 0);
-    REQUIRE(vtr::nint(0.1) == 0);
-    REQUIRE(vtr::nint(0.5) == 1);
-    REQUIRE(vtr::nint(0.9) == 1);
-
-    REQUIRE(vtr::nint(1.) == 1);
-    REQUIRE(vtr::nint(1.1) == 1);
-    REQUIRE(vtr::nint(1.5) == 2);
-    REQUIRE(vtr::nint(1.9) == 2);
-
-    REQUIRE(vtr::nint(42.) == 42);
-    REQUIRE(vtr::nint(42.1) == 42);
-    REQUIRE(vtr::nint(42.5) == 43);
-    REQUIRE(vtr::nint(42.9) == 43);
-}
-
-TEST_CASE("Safe Ratio", "[vtr_math]") {
-    REQUIRE(vtr::safe_ratio(1., 1.) == Approx(1.));
-    REQUIRE(vtr::safe_ratio(1., 2.) == Approx(0.5));
-    REQUIRE(vtr::safe_ratio(50., 0.) == Approx(0.));
-}
-
-TEST_CASE("Is Close", "[vtr_math]") {
-    //double NAN = std::numeric_limits<double>::quiet_NaN();
-    double INF = std::numeric_limits<double>::infinity();
-
-    double num = 32.4;
-
-    double num_close = num - vtr::DEFAULT_REL_TOL * num / 2;
-    double num_not_quite_close = num - 2 * vtr::DEFAULT_REL_TOL * num;
-    double num_far = 2 * num;
-
-    REQUIRE(vtr::isclose(-1., -1.));
-    REQUIRE(vtr::isclose(1., 1.));
-    REQUIRE(vtr::isclose(0., 0.));
-    REQUIRE(vtr::isclose(num, num));
-    REQUIRE(vtr::isclose(num, num_close));
-    REQUIRE(!vtr::isclose(num, num_not_quite_close));
-    REQUIRE(!vtr::isclose(num, num_far));
-
-    REQUIRE(vtr::isclose(INF, INF));
-    REQUIRE(!vtr::isclose(-INF, INF));
-    REQUIRE(!vtr::isclose(NAN, NAN));
-
-    //Absolute tolerance tests
-    REQUIRE(vtr::isclose(32.2, 32.4, 1e-9, 0.2));
-    REQUIRE(!vtr::isclose(32.2, 32.4, 1e-9, 0.1));
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
deleted file mode 100644
index bc5911917..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_ragged_vector.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_ragged_matrix.h"
-
-#include <numeric>
-
-TEST_CASE("Construction", "[vtr_ragged_matrix]") {
-    vtr::FlatRaggedMatrix<float> empty;
-    REQUIRE(empty.size() == 0);
-    REQUIRE(empty.empty());
-
-    std::vector<size_t> row_sizes = {1, 5, 3, 10};
-    size_t nelem = std::accumulate(row_sizes.begin(), row_sizes.end(), 0u);
-
-    //Construct from container of row sizes
-    vtr::FlatRaggedMatrix<float> ones_container(row_sizes, 1.0);
-    REQUIRE(ones_container.size() == nelem);
-    REQUIRE(!ones_container.empty());
-
-    //Construct from row size callback
-    auto row_size_callback = [&](size_t irow) {
-        return row_sizes[irow];
-    };
-    vtr::FlatRaggedMatrix<float> ones_callback(row_sizes.size(), row_size_callback, 1.0);
-    REQUIRE(ones_callback.size() == nelem);
-    REQUIRE(!ones_callback.empty());
-
-    //Construct from row size iterators
-    vtr::FlatRaggedMatrix<float> ones_iterator(row_sizes.begin(), row_sizes.end(), 1.0);
-    REQUIRE(ones_iterator.size() == nelem);
-    REQUIRE(!ones_iterator.empty());
-
-    //Clear
-    ones_container.clear();
-    REQUIRE(ones_container.size() == 0);
-
-    ones_callback.clear();
-    REQUIRE(ones_callback.size() == 0);
-
-    ones_iterator.clear();
-    REQUIRE(ones_iterator.size() == 0);
-}
-
-TEST_CASE("Iteration", "[vtr_ragged_matrix]") {
-    std::vector<size_t> row_sizes = {1, 5, 3, 10};
-    vtr::FlatRaggedMatrix<float> ones(row_sizes, 1.0);
-
-    float expected_sum = std::accumulate(row_sizes.begin(), row_sizes.end(), 0.f);
-
-    //Iteration by indices
-    float index_iteration_sum = 0.;
-    for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
-        for (size_t icol = 0; icol < row_sizes[irow]; ++icol) {
-            index_iteration_sum += ones[irow][icol];
-        }
-    }
-    REQUIRE(index_iteration_sum == expected_sum);
-
-    //Iteration by first index + proxy
-    float row_for_iteration_sum = 0.;
-    for (size_t irow = 0; irow < row_sizes.size(); ++irow) {
-        REQUIRE(ones[irow].size() == row_sizes[irow]);
-
-        for (float val : ones[irow]) {
-            row_for_iteration_sum += val;
-        }
-    }
-    REQUIRE(row_for_iteration_sum == expected_sum);
-
-    //Iteration by range
-    float for_iteration_sum = 0.;
-    for (float val : ones) {
-        for_iteration_sum += val;
-    }
-    REQUIRE(for_iteration_sum == expected_sum);
-}
-
-TEST_CASE("Modification", "[vtr_ragged_matrix]") {
-    std::vector<size_t> row_sizes = {1, 5, 3, 10};
-    vtr::FlatRaggedMatrix<float> ones(row_sizes, 1.0);
-
-    float base_sum = std::accumulate(row_sizes.begin(), row_sizes.end(), 0.f);
-
-    //Index based modification
-    size_t irow = 3;
-    for (size_t icol = 0; icol < row_sizes[irow]; ++icol) {
-        ones[irow][icol] = 2.;
-    }
-    base_sum += row_sizes[irow];
-    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
-
-    //Range for row modification
-    irow = 2;
-    for (float& val : ones[irow]) {
-        val = 2.;
-    }
-    base_sum += row_sizes[irow];
-    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
-
-    //Single element modification
-    ones[0][0] = 3.;
-    base_sum += 2.;
-    REQUIRE(std::accumulate(ones.begin(), ones.end(), 0.f) == base_sum);
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_random.cpp b/third_party/vtr/libs/vtrutil/test/test_random.cpp
deleted file mode 100644
index c2287749f..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_random.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_random.h"
-
-#include <vector>
-#include <iostream>
-
-TEST_CASE("shuffle", "[vtr_random/shuffle]") {
-    std::vector<int> numbers = {1, 2, 3, 4, 5};
-
-    vtr::RandState rand_state = 1;
-    vtr::shuffle(numbers.begin(), numbers.end(), rand_state);
-
-    std::vector<int> numbers_shuffled_1 = {5, 2, 4, 1, 3};
-    REQUIRE(numbers == numbers_shuffled_1);
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_range.cpp b/third_party/vtr/libs/vtrutil/test/test_range.cpp
deleted file mode 100644
index da56318ce..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_range.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_range.h"
-#include <vector>
-
-TEST_CASE("Range Ops", "[vtr_range]") {
-    std::vector<int> vec = {1, 2, 3};
-
-    {
-        //From iterator pair
-        auto range = vtr::make_range(vec.begin(), vec.end());
-        REQUIRE(range.size() == vec.size());
-
-        size_t i = 0;
-        for (auto elem : range) {
-            REQUIRE(elem == vec[i]);
-            i++;
-        }
-        REQUIRE(i == vec.size());
-    }
-
-    {
-        //From container
-        auto range = vtr::make_range(vec);
-        REQUIRE(range.size() == vec.size());
-
-        size_t i = 0;
-        for (auto elem : range) {
-            REQUIRE(elem == vec[i]);
-            i++;
-        }
-        REQUIRE(i == vec.size());
-    }
-
-    {
-        //Empty
-        auto range = vtr::make_range(vec.begin(), vec.begin());
-        REQUIRE(range.size() == 0);
-        REQUIRE(range.empty());
-    }
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
deleted file mode 100644
index c0c47ab24..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_small_vector.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_small_vector.h"
-#include <vector>
-
-namespace vtr {
-
-//Must be delcared in namespace for argument dependent lookup to work with clang
-template<class T>
-bool operator==(const std::vector<T>& lhs, const vtr::small_vector<T>& rhs) {
-    if (lhs.size() != rhs.size()) return false;
-
-    for (size_t i = 0; i < lhs.size(); ++i) {
-        if (lhs[i] != rhs[i]) return false;
-    }
-    return true;
-}
-
-} // namespace vtr
-
-TEST_CASE("Basic", "[vtr_small_vector]") {
-    std::vector<int> ref;
-    vtr::small_vector<int> vec;
-
-    //Create the vectors the same way
-    int i;
-    for (i = 0; i < 100; i++) {
-        ref.push_back(i);
-        vec.push_back(i);
-
-        REQUIRE(ref == vec);
-    }
-
-    //Check forward iteration
-    auto vec_itr = vec.begin();
-    for (auto ref_itr = ref.begin(); ref_itr != ref.end(); ++ref_itr, ++vec_itr) {
-        REQUIRE(*ref_itr == *vec_itr);
-
-        int dist = std::distance(ref.begin(), ref_itr);
-        REQUIRE(ref[dist] == vec[dist]);
-    }
-
-    //Check backward iteration
-    auto vec_rev_itr = vec.rbegin();
-    for (auto ref_itr = ref.rbegin(); ref_itr != ref.rend(); ++ref_itr, ++vec_rev_itr) {
-        REQUIRE(*ref_itr == *vec_rev_itr);
-    }
-
-    //Check front/back
-    REQUIRE(ref.front() == vec.front());
-    REQUIRE(ref.back() == vec.back());
-
-    //Push/Emplace/Pop back
-    ref.push_back(i);
-    vec.push_back(i);
-    REQUIRE(ref == vec);
-    ++i;
-
-    ref.emplace_back(i);
-    vec.emplace_back(i);
-    REQUIRE(ref == vec);
-    ++i;
-
-    ref.pop_back();
-    vec.pop_back();
-    REQUIRE(ref == vec);
-
-    //Test the short (internal storage) transition
-    size_t inplace_cap = vtr::small_vector<int>::INPLACE_CAPACITY;
-    REQUIRE(inplace_cap > 1);
-
-    //From long to short
-    ref.resize(inplace_cap + 1);
-    vec.resize(inplace_cap + 1);
-    REQUIRE(ref == vec);
-    REQUIRE(vec.size() > inplace_cap);
-
-    ref.pop_back();
-    vec.pop_back();
-    REQUIRE(ref == vec);
-    REQUIRE(vec.size() == inplace_cap);
-
-    ref.pop_back();
-    vec.pop_back();
-    REQUIRE(ref == vec);
-    REQUIRE(vec.size() < inplace_cap);
-
-    //From short to long
-    ref.push_back(i);
-    vec.push_back(i);
-    REQUIRE(ref == vec);
-    REQUIRE(vec.size() == inplace_cap);
-    ++i;
-
-    ref.push_back(i);
-    vec.push_back(i);
-    REQUIRE(ref == vec);
-    REQUIRE(vec.size() > inplace_cap);
-    ++i;
-
-#if 0
-    //Emplace at position
-    auto ref_itr = ref.begin() + ref.size() / 2;
-    ref.emplace(ref_itr, i);
-    vec_itr = vec.begin() + vec.size() / 2;
-    vec.emplace(vec_itr, i);
-    i++;
-    REQUIRE(ref == vec);
-#endif
-
-    //Insert single at position
-    auto ref_itr = ref.begin() + ref.size() / 2;
-    ref.insert(ref_itr, i);
-    vec_itr = vec.begin() + vec.size() / 2;
-    vec.insert(vec_itr, i);
-    i++;
-    REQUIRE(ref == vec);
-
-    //Insert K at position
-    int k = 5;
-    ref_itr = ref.begin() + ref.size() / 2;
-    ref.insert(ref_itr, k, i);
-    vec_itr = vec.begin() + vec.size() / 2;
-    vec.insert(vec_itr, k, i);
-    i++;
-    REQUIRE(ref == vec);
-
-    //Range insert
-    std::vector<int> range_values = {5, 4, 3, 2, 1};
-    ref_itr = ref.begin() + ref.size() / 2;
-    ref.insert(ref_itr, range_values.begin(), range_values.end());
-#if 0
-    vec_itr = vec.begin() + vec.size() / 2;
-    vec.insert(vec_itr, range_values.begin(), range_values.end());
-    REQUIRE(ref == vec);
-#endif
-
-    //Clear
-    ref.clear();
-    vec.clear();
-    REQUIRE(ref == vec);
-
-    //Add after clear
-    ref.push_back(i);
-    vec.push_back(i);
-    REQUIRE(ref == vec);
-    ++i;
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_strings.cpp b/third_party/vtr/libs/vtrutil/test/test_strings.cpp
deleted file mode 100644
index b7fa4ea82..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_strings.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_string_view.h"
-#include "vtr_string_interning.h"
-
-TEST_CASE("String view", "[vtr_string_view/string_view]") {
-    vtr::string_view a("test");
-    vtr::string_view b("test");
-    vtr::string_view c("tes");
-    vtr::string_view d("est");
-    vtr::string_view e("es");
-
-    REQUIRE(a.size() == 4);
-    REQUIRE(b.size() == 4);
-    REQUIRE(c.size() == 3);
-    REQUIRE(d.size() == 3);
-    REQUIRE(e.size() == 2);
-
-    REQUIRE(a[0] == 't');
-    REQUIRE(a[1] == 'e');
-    REQUIRE(a[2] == 's');
-    REQUIRE(a[3] == 't');
-
-    auto itr = a.begin();
-    REQUIRE(*itr++ == 't');
-    REQUIRE(*itr++ == 'e');
-    REQUIRE(*itr++ == 's');
-    REQUIRE(*itr++ == 't');
-    REQUIRE(itr == a.end());
-
-    REQUIRE(a.front() == 't');
-    REQUIRE(c.front() == 't');
-    REQUIRE(c.back() == 's');
-
-    REQUIRE(a == b);
-    REQUIRE(a <= b);
-    REQUIRE(a >= b);
-    REQUIRE(a != c);
-
-    REQUIRE(c < a);
-    REQUIRE(a >= c);
-
-    REQUIRE(a > c);
-    REQUIRE(c <= a);
-
-    REQUIRE(c != d);
-    REQUIRE(a.substr(0, 3) == c);
-    REQUIRE(a.substr(1, 3) == d);
-    REQUIRE(a.substr(1) == d);
-    REQUIRE(a.substr(1, 2) == e);
-    REQUIRE(std::hash<vtr::string_view>()(a) == std::hash<vtr::string_view>()(b));
-    REQUIRE(std::hash<vtr::string_view>()(a) != std::hash<vtr::string_view>()(c));
-
-    vtr::string_view f = a;
-    REQUIRE(b == f);
-
-    f = e;
-    REQUIRE(b != f);
-    REQUIRE(e == f);
-
-    std::swap(a, f);
-    REQUIRE(a == e);
-    REQUIRE(f == b);
-}
-
-TEST_CASE("Basic string internment", "[vtr_string_interning/string_internment") {
-    vtr::string_internment internment;
-
-    vtr::interned_string a = internment.intern_string(vtr::string_view("test"));
-    vtr::interned_string b = internment.intern_string(vtr::string_view("test"));
-    vtr::interned_string c = internment.intern_string(vtr::string_view("tes"));
-    vtr::interned_string d = internment.intern_string(vtr::string_view("est"));
-    vtr::interned_string e = internment.intern_string(vtr::string_view("es"));
-
-    auto itr = a.begin(&internment);
-    REQUIRE(*itr++ == 't');
-    REQUIRE(*itr++ == 'e');
-    REQUIRE(*itr++ == 's');
-    REQUIRE(*itr++ == 't');
-    REQUIRE(itr == a.end());
-
-    itr = a.begin(&internment);
-    REQUIRE(*itr == 't');
-    ++itr;
-    REQUIRE(*itr == 'e');
-    ++itr;
-    REQUIRE(*itr == 's');
-    ++itr;
-    REQUIRE(*itr == 't');
-    ++itr;
-    REQUIRE(itr == a.end());
-
-    REQUIRE(a == b);
-    REQUIRE(a.bind(&internment) <= b.bind(&internment));
-    REQUIRE(a.bind(&internment) >= b.bind(&internment));
-    REQUIRE(a != c);
-
-    REQUIRE(c.bind(&internment) < a.bind(&internment));
-    REQUIRE(a.bind(&internment) >= c.bind(&internment));
-
-    REQUIRE(a.bind(&internment) > c.bind(&internment));
-    REQUIRE(c.bind(&internment) <= a.bind(&internment));
-
-    REQUIRE(c != d);
-    REQUIRE(std::hash<vtr::interned_string>()(a) == std::hash<vtr::interned_string>()(b));
-    REQUIRE(std::hash<vtr::interned_string>()(a) != std::hash<vtr::interned_string>()(c));
-
-    std::string g;
-    a.get(&internment, &g);
-    REQUIRE(g == "test");
-    c.get(&internment, &g);
-    REQUIRE(g == "tes");
-    d.get(&internment, &g);
-    REQUIRE(g == "est");
-
-    vtr::interned_string f = a;
-    REQUIRE(b == f);
-
-    f = e;
-    REQUIRE(b != f);
-    REQUIRE(e == f);
-
-    std::swap(a, f);
-    REQUIRE(a == e);
-    REQUIRE(f == b);
-}
-
-static void test_internment_retreval(const vtr::string_internment* internment, vtr::interned_string str, const char* expect) {
-    std::string copy;
-    str.get(internment, &copy);
-    REQUIRE(copy == expect);
-    copy.clear();
-    std::copy(str.begin(internment), str.end(), std::back_inserter(copy));
-    REQUIRE(copy == expect);
-}
-
-TEST_CASE("Split string internment", "[vtr_string_interning/string_internment") {
-    vtr::string_internment internment;
-
-    size_t unique_strings = 0;
-
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string a = internment.intern_string(vtr::string_view("test"));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string b = internment.intern_string(vtr::string_view("test.test"));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string c = internment.intern_string(vtr::string_view("test.test.test"));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string d = internment.intern_string(vtr::string_view("test.test.test.test"));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-
-    test_internment_retreval(&internment, a, "test");
-    test_internment_retreval(&internment, b, "test.test");
-    test_internment_retreval(&internment, c, "test.test.test");
-    test_internment_retreval(&internment, d, "test.test.test.test");
-
-    vtr::interned_string f = internment.intern_string(vtr::string_view("a"));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string g = internment.intern_string(vtr::string_view("b.c"));
-    unique_strings += 2;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string h = internment.intern_string(vtr::string_view("d.e.f"));
-    unique_strings += 3;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string i = internment.intern_string(vtr::string_view("g.h.i.j"));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-
-    test_internment_retreval(&internment, f, "a");
-    test_internment_retreval(&internment, g, "b.c");
-    test_internment_retreval(&internment, h, "d.e.f");
-    test_internment_retreval(&internment, i, "g.h.i.j");
-
-    vtr::interned_string j = internment.intern_string(vtr::string_view("."));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string k = internment.intern_string(vtr::string_view(".."));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string l = internment.intern_string(vtr::string_view("..."));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string m = internment.intern_string(vtr::string_view("...."));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-
-    test_internment_retreval(&internment, j, ".");
-    test_internment_retreval(&internment, k, "..");
-    test_internment_retreval(&internment, l, "...");
-    test_internment_retreval(&internment, m, "....");
-
-    vtr::interned_string n = internment.intern_string(vtr::string_view(".q"));
-    unique_strings += 1;
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string o = internment.intern_string(vtr::string_view(".a."));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string p = internment.intern_string(vtr::string_view("b.c.d"));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string q = internment.intern_string(vtr::string_view("e..f"));
-    REQUIRE(internment.unique_strings() == unique_strings);
-    vtr::interned_string r = internment.intern_string(vtr::string_view("e."));
-    REQUIRE(internment.unique_strings() == unique_strings);
-
-    test_internment_retreval(&internment, n, ".q");
-    test_internment_retreval(&internment, o, ".a.");
-    test_internment_retreval(&internment, p, "b.c.d");
-    test_internment_retreval(&internment, q, "e..f");
-    test_internment_retreval(&internment, r, "e.");
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp b/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
deleted file mode 100644
index d9b766a17..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_strong_id.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_strong_id.h"
-#include "vtr_strong_id_range.h"
-
-struct t_test_tag;
-using TestStrongId = vtr::StrongId<t_test_tag>;
-
-TEST_CASE("StrongId", "[StrongId/StrongId]") {
-    TestStrongId a;
-    TestStrongId b;
-    TestStrongId c(0);
-    TestStrongId d(0);
-    TestStrongId e(1);
-    TestStrongId f(2);
-
-    REQUIRE(!bool(a));
-    REQUIRE(!bool(b));
-    REQUIRE(bool(c));
-    REQUIRE(bool(d));
-    REQUIRE(bool(e));
-    REQUIRE(bool(f));
-
-    REQUIRE(a == b);
-    REQUIRE(a == TestStrongId::INVALID());
-
-    REQUIRE(c == d);
-    REQUIRE(c != a);
-    REQUIRE(c != TestStrongId::INVALID());
-    REQUIRE(d != TestStrongId::INVALID());
-
-    REQUIRE(c != e);
-    REQUIRE(c != f);
-    REQUIRE(e != f);
-
-    REQUIRE(c < e);
-    REQUIRE(c < f);
-    REQUIRE(e < f);
-    REQUIRE(!(e < c));
-    REQUIRE(!(f < c));
-    REQUIRE(!(f < e));
-}
-
-TEST_CASE("StrongIdIterator", "[StrongId/StrongIdIterator]") {
-    TestStrongId a(0);
-    TestStrongId b(1);
-    TestStrongId c(5);
-    TestStrongId d(5);
-
-    vtr::StrongIdIterator<TestStrongId> a_iter(a);
-    vtr::StrongIdIterator<TestStrongId> b_iter(b);
-    vtr::StrongIdIterator<TestStrongId> c_iter(c);
-    vtr::StrongIdIterator<TestStrongId> d_iter(d);
-
-    REQUIRE(*a_iter == a);
-    REQUIRE(*b_iter == b);
-    REQUIRE(*c_iter == c);
-    REQUIRE(*c_iter == d);
-    REQUIRE(*d_iter == c);
-    REQUIRE(*d_iter == d);
-
-    REQUIRE(a_iter != b_iter);
-    REQUIRE(a_iter != c_iter);
-    REQUIRE(a_iter != d_iter);
-
-    REQUIRE(c_iter == d_iter);
-    REQUIRE(c_iter != a_iter);
-    REQUIRE(c_iter != b_iter);
-
-    REQUIRE(std::distance(a_iter, b_iter) == 1);
-    REQUIRE(std::distance(c_iter, d_iter) == 0);
-    REQUIRE(std::distance(d_iter, a_iter) == -5);
-
-    REQUIRE(a_iter < b_iter);
-    REQUIRE(b_iter < c_iter);
-    REQUIRE(!(c_iter < b_iter));
-
-    REQUIRE(a_iter[0] == a);
-    REQUIRE(a_iter[1] == b);
-    REQUIRE(a_iter[5] == c);
-    REQUIRE(c_iter[0] == c);
-    REQUIRE(c_iter[-4] == b);
-    REQUIRE(c_iter[-5] == a);
-
-    REQUIRE((a_iter + 5) == c_iter);
-    REQUIRE(a_iter == (c_iter - 5));
-    a_iter += 5;
-    REQUIRE(a_iter == c_iter);
-    a_iter -= 4;
-    REQUIRE(a_iter == b_iter);
-}
-
-TEST_CASE("StrongIdRange", "[StrongId/StrongIdRange]") {
-    TestStrongId a(0);
-    TestStrongId b(0);
-    TestStrongId c(5);
-    TestStrongId d(1);
-
-    vtr::StrongIdRange<TestStrongId> r1(a, b);
-    REQUIRE(r1.size() == 0);
-    REQUIRE(r1.empty());
-
-    vtr::StrongIdRange<TestStrongId> r2(a, c);
-    REQUIRE(r2.size() == 5);
-    REQUIRE(!r2.empty());
-
-    vtr::StrongIdRange<TestStrongId> r3(d, c);
-    REQUIRE(r3.size() == 4);
-    REQUIRE(!r3.empty());
-
-    int count = 0;
-    for (TestStrongId id : r1) {
-        (void)id;
-        count += 1;
-    }
-    REQUIRE(count == 0);
-
-    for (TestStrongId id : r2) {
-        REQUIRE(TestStrongId(count) == id);
-        count += 1;
-    }
-    REQUIRE(count == 5);
-
-    count = 0;
-    for (TestStrongId id : r3) {
-        REQUIRE(TestStrongId(count + 1) == id);
-        count += 1;
-    }
-    REQUIRE(count == 4);
-}
diff --git a/third_party/vtr/libs/vtrutil/test/test_vector.cpp b/third_party/vtr/libs/vtrutil/test/test_vector.cpp
deleted file mode 100644
index 247a3df57..000000000
--- a/third_party/vtr/libs/vtrutil/test/test_vector.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "catch2/catch_test_macros.hpp"
-
-#include "vtr_vector.h"
-#include "vtr_strong_id.h"
-
-#include <ostream>
-
-struct test_tag;
-typedef vtr::StrongId<test_tag> TestId;
-
-std::ostream& operator<<(std::ostream& os, const TestId id);
-
-std::ostream& operator<<(std::ostream& os, const TestId id) {
-    os << "TestId(" << size_t(id) << ")";
-    return os;
-}
-
-TEST_CASE("Basic Ops", "[vtr_vector]") {
-    vtr::vector<TestId, int> vec;
-
-    vec.push_back(1);
-    vec.push_back(2);
-    vec.push_back(3);
-
-    REQUIRE(vec.size() == 3);
-    REQUIRE(vec[TestId(0)] == 1);
-    REQUIRE(vec[TestId(1)] == 2);
-    REQUIRE(vec[TestId(2)] == 3);
-
-    vec.emplace_back(4);
-
-    REQUIRE(vec.size() == 4);
-    REQUIRE(vec[TestId(3)] == 4);
-
-    REQUIRE(vec.front() == 1);
-    REQUIRE(vec.back() == 4);
-}
-
-TEST_CASE("Key Access", "[vtr_vector]") {
-    vtr::vector<TestId, int> vec;
-
-    vec.push_back(1);
-    vec.push_back(2);
-    vec.push_back(3);
-    vec.push_back(4);
-
-    std::vector<TestId> expected_keys = {TestId(0), TestId(1), TestId(2), TestId(3)};
-
-    auto keys = vec.keys();
-    REQUIRE(keys.size() == vec.size());
-
-    size_t i = 0;
-    for (TestId key : keys) {
-        REQUIRE(key == expected_keys[i]);
-        ++i;
-    }
-}
diff --git a/third_party/vtr/verilog/LICENSE.md b/third_party/vtr/verilog/LICENSE.md
deleted file mode 100644
index 01332da43..000000000
--- a/third_party/vtr/verilog/LICENSE.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# VTR License
-
-The software package "VTR" includes the software tools ODIN II, ABC, and VPR as
-well as additional benchmarks, documentation, libraries and scripts. The authors
-of the various components of VTR retain their ownership of their tools.
-
-* Unless otherwise noted (in particular ABC, the benchmark circuits and some libraries),
-all software, documents, and scripts in VTR, follows the standard MIT license described
-[here](http://www.opensource.org/licenses/mit-license.php) copied below for
-your convenience:
-
-> The MIT License (MIT)
->
-> Copyright 2012 VTR Developers
->
-> Permission is hereby granted, free of charge, to any person obtaining a copy of
-> this software and associated documentation files (the "Software"), to deal in
-> the Software without restriction, including without limitation the rights to
-> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-> of the Software, and to permit persons to whom the Software is furnished to do
-> so, subject to the following conditions:
->
-> The above copyright notice and this permission notice shall be included in all
-> copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-> SOFTWARE.
-
-* Terms and conditions for ABC is found
-[here](http://www.eecs.berkeley.edu/~alanmi/abc/copyright.htm) copied below
-for your convenience:
-
-> Copyright (c) The Regents of the University of California. All rights reserved.
->
-> Permission is hereby granted, without written agreement and without license or
-> royalty fees, to use, copy, modify, and distribute this software and its
-> documentation for any purpose, provided that the above copyright notice and the
-> following two paragraphs appear in all copies of this software.
->
-> IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
-> DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-> THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
-> CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
->
-> THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
-> BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-> A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
-> AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
-> SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
-The benchmark circuits are all open source but each have their own
-individual terms and conditions which are listed in the source code of each
-benchmark.
-
-Subject to these conditions, the software is provided free of charge to all
-interested parties.
-
-If you do decide to use this tool, please reference our work as references are
-important in academia.
-
-Donations in the form of research grants to promote further research and
-development on the tools will be gladly accepted, either anonymously or with
-attribution on our future publications.
-
diff --git a/third_party/vtr/LICENSE.md b/third_party/vtr_flow/LICENSE.md
similarity index 100%
rename from third_party/vtr/LICENSE.md
rename to third_party/vtr_flow/LICENSE.md
diff --git a/third_party/vtr_flow/README.md b/third_party/vtr_flow/README.md
new file mode 100755
index 000000000..53558ddb8
--- /dev/null
+++ b/third_party/vtr_flow/README.md
@@ -0,0 +1,7 @@
+# VTR Flow
+
+This folder contains architecture files, benchmark circuits and scripts for running the VTR flow.
+
+For a description see the ['Running the VTR Flow' documentation at https://docs.verilogtorouting.org](https://docs.verilogtorouting.org/en/latest/vtr/running_vtr/),
+or the [`run_vtr_flow.rst` file](../doc/src/vtr/run_vtr_flow.rst) in the [`doc` folder](../doc)
+in the [root of the VTR source tree](https://github.com/SymbiFlow/vtr-verilog-to-routing).
diff --git a/third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml b/third_party/vtr_flow/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
similarity index 100%
rename from third_party/vtr/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
rename to third_party/vtr_flow/arch/k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml
diff --git a/third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml b/third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
similarity index 100%
rename from third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
rename to third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
diff --git a/third_party/vtr/primitives.v b/third_party/vtr_flow/primitives.v
similarity index 100%
rename from third_party/vtr/primitives.v
rename to third_party/vtr_flow/primitives.v
diff --git a/third_party/vtr/verilog/eltwise_layer.v b/third_party/vtr_flow/verilog/eltwise_layer.v
similarity index 100%
rename from third_party/vtr/verilog/eltwise_layer.v
rename to third_party/vtr_flow/verilog/eltwise_layer.v
diff --git a/third_party/vtr/verilog/hard_block_include.v b/third_party/vtr_flow/verilog/hard_block_include.v
similarity index 100%
rename from third_party/vtr/verilog/hard_block_include.v
rename to third_party/vtr_flow/verilog/hard_block_include.v
diff --git a/third_party/vtr/verilog/raygentop.v b/third_party/vtr_flow/verilog/raygentop.v
similarity index 100%
rename from third_party/vtr/verilog/raygentop.v
rename to third_party/vtr_flow/verilog/raygentop.v

From cfd6c83527e3558a58928b7b57b31fea11ebf9cf Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Thu, 29 Dec 2022 18:17:02 -0400
Subject: [PATCH 29/56] g++-11

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4ab181419..ad90b3803 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -34,7 +34,7 @@ jobs:
     - name: Install
       run: |
         sudo apt-get update
-        sudo apt-get install git g++-9 build-essential bison flex \
+        sudo apt-get install git g++-11 build-essential bison flex \
           libreadline-dev gawk tcl-dev libffi-dev git graphviz xdot \
           pkg-config libboost-system-dev libboost-python-dev \
           libboost-filesystem-dev zlib1g-dev clang-format-8 cmake

From 714e58300ec6c1c7131118e3539941c64e22fa82 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Thu, 29 Dec 2022 18:21:42 -0400
Subject: [PATCH 30/56] g++ v

---
 .github/workflows/build-and-test.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build-and-test.sh b/.github/workflows/build-and-test.sh
index baf1f8e2f..51128c02e 100755
--- a/.github/workflows/build-and-test.sh
+++ b/.github/workflows/build-and-test.sh
@@ -24,6 +24,7 @@ source .github/workflows/common.sh
 start_section Building
 
 export CXXFLAGS=-Werror
+g++ -v
 make UHDM_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins/ VTR_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins plugins -j`nproc`
 unset CXXFLAGS
 

From 2b4bfa1705732a4ddf3ee688d6516605cf074cb1 Mon Sep 17 00:00:00 2001
From: Dani <17553473+poname@users.noreply.github.com>
Date: Mon, 2 Jan 2023 16:56:15 -0400
Subject: [PATCH 31/56] try vtr-optimized

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 1f8db2eac..029689030 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr
+  - litex-hub::vtr-optimized

From c3c937203d92ae1fb145fcdac9ed31be154fd68e Mon Sep 17 00:00:00 2001
From: Dani <17553473+poname@users.noreply.github.com>
Date: Mon, 2 Jan 2023 17:01:10 -0400
Subject: [PATCH 32/56] vtr

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 029689030..1f8db2eac 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr-optimized
+  - litex-hub::vtr

From 6310dfedc887854fe2dc84cf70fe3797cc78e0bc Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 10:31:09 -0400
Subject: [PATCH 33/56] vtr-libs test

---
 environment.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/environment.yml b/environment.yml
index 1f8db2eac..7ca5f3d98 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,8 +18,10 @@ name: yosys-plugins
 channels:
   - defaults
   - litex-hub
+  - poname
 dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
   - litex-hub::vtr
+  - poname:vtr-libs

From 4197e32ce3c3c447fee821ecf6a6f63939922de8 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 10:35:06 -0400
Subject: [PATCH 34/56] typo

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 7ca5f3d98..a3b9e0a85 100644
--- a/environment.yml
+++ b/environment.yml
@@ -24,4 +24,4 @@ dependencies:
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
   - litex-hub::vtr
-  - poname:vtr-libs
+  - poname::vtr-libs

From 5beb99959f8163ba0a682967c492244419bf37a7 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 10:46:32 -0400
Subject: [PATCH 35/56] ci

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index a3b9e0a85..9437ab404 100644
--- a/environment.yml
+++ b/environment.yml
@@ -24,4 +24,4 @@ dependencies:
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
   - litex-hub::vtr
-  - poname::vtr-libs
+  - poname::vtr-libs
\ No newline at end of file

From a237d28f3b29c573e7f62146874f76cfec5daa3f Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 11:15:06 -0400
Subject: [PATCH 36/56] ci 2

---
 environment.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 9437ab404..24a208b5d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,7 +18,6 @@ name: yosys-plugins
 channels:
   - defaults
   - litex-hub
-  - poname
 dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37

From 3226dce4e707661ae016b3071f0c99bbfeb85e1b Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 11:17:22 -0400
Subject: [PATCH 37/56] no vtr

---
 environment.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 24a208b5d..300a86754 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,5 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr
   - poname::vtr-libs
\ No newline at end of file

From 2a908e4750619d9923b1726cff1a3489d06e8cdd Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 11:21:02 -0400
Subject: [PATCH 38/56] vtr-gui

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 300a86754..8cd474f66 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - poname::vtr-libs
\ No newline at end of file
+  - litex-hub::vtr-gui
\ No newline at end of file

From 7b36c74a8c52f79960f65c685500983dfd84f6b1 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 11:25:41 -0400
Subject: [PATCH 39/56] vtr

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 8cd474f66..223f7a41b 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr-gui
\ No newline at end of file
+  - litex-hub::vtr
\ No newline at end of file

From 54f5f0f1f21f66113c0d30b492f870804aa209d0 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Tue, 3 Jan 2023 12:10:13 -0400
Subject: [PATCH 40/56] yosys

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 223f7a41b..490064fac 100644
--- a/environment.yml
+++ b/environment.yml
@@ -19,7 +19,7 @@ channels:
   - defaults
   - litex-hub
 dependencies:
-  - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
+  - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
   - litex-hub::vtr
\ No newline at end of file

From ef6aaa609bb11ea01445cffe05a2031e8f150b55 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 09:25:04 -0400
Subject: [PATCH 41/56] retry

---
 environment.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 490064fac..f564fd533 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,8 +18,9 @@ name: yosys-plugins
 channels:
   - defaults
   - litex-hub
+  - poname
 dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr
\ No newline at end of file
+  - poname::vtr-libs
\ No newline at end of file

From 16927704d3ff25cff33a8ef0ca826e8860d048a8 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:00:42 -0400
Subject: [PATCH 42/56] #2

---
 environment.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index f564fd533..d4f2ba4ff 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,7 +18,6 @@ name: yosys-plugins
 channels:
   - defaults
   - litex-hub
-  - poname
 dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37

From 5ef45cdaea66bb136722ab8b9ca6713e873ee522 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:18:00 -0400
Subject: [PATCH 43/56] dash

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index d4f2ba4ff..dcb14abcf 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - poname::vtr-libs
\ No newline at end of file
+  - poname::vtrlibs
\ No newline at end of file

From ab0c5027781e669c06a4258970c1ae1c4cdf1ae3 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:27:03 -0400
Subject: [PATCH 44/56] check#3

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index dcb14abcf..fae695be9 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - poname::vtrlibs
\ No newline at end of file
+  - casatlantic::vtrlibs
\ No newline at end of file

From 55bacdcd0ae2e811471fffe3f674c7c6935b4782 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:41:30 -0400
Subject: [PATCH 45/56] -lrtlnumber

---
 parmys-plugin/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index 7d487078e..d945c89de 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -70,6 +70,7 @@ LDLIBS += -larchfpga \
 		  -llog \
 		  -lpugixml \
 		  -lpugiutil \
+		  -lrtlnumber \
 		  -lpthread
 
 TECHLIBS_DIR = techlibs

From 0a3130aea8fa10813cc32f0b37921c82bea0254c Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:49:18 -0400
Subject: [PATCH 46/56] symlink fixed

---
 .../tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml  | 2 +-
 .../tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
index af1bf2426..f8c1cb9d4 120000
--- a/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
+++ b/parmys-plugin/tests/VexRiscv_Lite/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -1 +1 @@
-../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
+../../../third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
diff --git a/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml b/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
index af1bf2426..f8c1cb9d4 120000
--- a/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
+++ b/parmys-plugin/tests/mips32r1_core/k6_frac_N10_frac_chain_mem32K_40nm.xml
@@ -1 +1 @@
-../../../third_party/vtr/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file
+../../../third_party/vtr_flow/arch/k6_frac_N10_frac_chain_mem32K_40nm.xml
\ No newline at end of file

From 7da8d01ad852e1358605f26a1dd79dd2461bca1b Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:51:57 -0400
Subject: [PATCH 47/56] smoke test

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index fae695be9..490064fac 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - casatlantic::vtrlibs
\ No newline at end of file
+  - litex-hub::vtr
\ No newline at end of file

From b9739dfd0a9bc646d08aec8fded63ed88e93538a Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 10:55:12 -0400
Subject: [PATCH 48/56] no smoke

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 490064fac..fae695be9 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - litex-hub::vtr
\ No newline at end of file
+  - casatlantic::vtrlibs
\ No newline at end of file

From 7dd1c1a305750ba5418ec25aa0430c8a6ed10fcd Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 12:09:03 -0400
Subject: [PATCH 49/56] cas-atlantic::vtr-libs

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index fae695be9..7f1f30aba 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,4 +22,4 @@ dependencies:
   - litex-hub::yosys
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
-  - casatlantic::vtrlibs
\ No newline at end of file
+  - cas-atlantic::vtr-libs
\ No newline at end of file

From 6d94ece00b39e99da06ad5423168c25a805613a3 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 12:19:13 -0400
Subject: [PATCH 50/56] cleaned

---
 .github/workflows/build-and-test.sh | 1 -
 .github/workflows/ci.yml            | 2 +-
 environment.yml                     | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-and-test.sh b/.github/workflows/build-and-test.sh
index 51128c02e..baf1f8e2f 100755
--- a/.github/workflows/build-and-test.sh
+++ b/.github/workflows/build-and-test.sh
@@ -24,7 +24,6 @@ source .github/workflows/common.sh
 start_section Building
 
 export CXXFLAGS=-Werror
-g++ -v
 make UHDM_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins/ VTR_INSTALL_DIR=`pwd`/env/conda/envs/yosys-plugins plugins -j`nproc`
 unset CXXFLAGS
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ad90b3803..4ab181419 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -34,7 +34,7 @@ jobs:
     - name: Install
       run: |
         sudo apt-get update
-        sudo apt-get install git g++-11 build-essential bison flex \
+        sudo apt-get install git g++-9 build-essential bison flex \
           libreadline-dev gawk tcl-dev libffi-dev git graphviz xdot \
           pkg-config libboost-system-dev libboost-python-dev \
           libboost-filesystem-dev zlib1g-dev clang-format-8 cmake
diff --git a/environment.yml b/environment.yml
index 7f1f30aba..faa830714 100644
--- a/environment.yml
+++ b/environment.yml
@@ -19,7 +19,7 @@ channels:
   - defaults
   - litex-hub
 dependencies:
-  - litex-hub::yosys
+  - litex-hub::yosys=0.17_7_g990c9b8e1=20220512_085338_py37
   - litex-hub::surelog=0.0_5519_g900fb2499=20221223_060448_py37
   - litex-hub::iverilog
   - cas-atlantic::vtr-libs
\ No newline at end of file

From 4ea5bdee7fd7b20542870d28e6dd829556b8963e Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Wed, 4 Jan 2023 12:29:39 -0400
Subject: [PATCH 51/56] all together

---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 61f046c4a..dd92ed437 100644
--- a/Makefile
+++ b/Makefile
@@ -14,8 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-#PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))

From c9f1f6c45a4d84c1ab429f5518cb49a544aa9c4e Mon Sep 17 00:00:00 2001
From: Dani <17553473+poname@users.noreply.github.com>
Date: Tue, 24 Jan 2023 10:33:52 -0400
Subject: [PATCH 52/56] parmys added to ci

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1ab44d145..a4193728e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,6 +38,7 @@ jobs:
           - systemverilog
           - uhdm
           - dsp-ff
+          - parmys
 
     steps:
 

From f2d60ba1ed2b492ee7358485c61900ca816b417b Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Fri, 10 Feb 2023 11:57:56 -0400
Subject: [PATCH 53/56] code re-order

---
 Makefile                                      |  2 +-
 parmys-plugin/Makefile                        | 50 ++++++++++---------
 parmys-plugin/{src => core}/adder.cc          |  2 +-
 parmys-plugin/{src/include => core}/adder.h   |  0
 parmys-plugin/{src => core}/block_memory.cc   |  0
 .../{src/include => core}/block_memory.h      |  0
 parmys-plugin/{src => core}/hard_block.cc     |  2 +-
 .../{src/include => core}/hard_block.h        |  0
 parmys-plugin/{src => core}/memory.cc         |  0
 parmys-plugin/{src/include => core}/memory.h  |  0
 parmys-plugin/{src => core}/multiplier.cc     |  2 +-
 .../{src/include => core}/multiplier.h        |  0
 parmys-plugin/{src => core}/subtractor.cc     |  0
 .../{src/include => core}/subtractor.h        |  0
 .../{src => mapping}/hard_soft_logic_mixer.cc |  0
 .../hard_soft_logic_mixer.h                   |  2 +-
 .../{src => mapping}/mixing_optimization.cc   |  0
 .../include => mapping}/mixing_optimization.h |  0
 parmys-plugin/{src => mapping}/odin_ii.cc     |  0
 .../{src/include => mapping}/odin_ii.h        |  0
 parmys-plugin/{src => mapping}/partial_map.cc |  0
 .../{src/include => mapping}/partial_map.h    |  0
 .../{src => netlist}/netlist_cleanup.cc       |  0
 .../include => netlist}/netlist_cleanup.h     |  0
 .../{src => netlist}/netlist_statistic.cc     |  0
 .../include => netlist}/netlist_statistic.h   |  0
 .../{src => netlist}/netlist_utils.cc         |  0
 .../{src/include => netlist}/netlist_utils.h  |  0
 .../{src => netlist}/netlist_visualizer.cc    |  0
 .../include => netlist}/netlist_visualizer.h  |  0
 parmys-plugin/{src => netlist}/node_utils.cc  |  0
 .../{src/include => netlist}/node_utils.h     |  0
 parmys-plugin/{src => utils}/ast_util.cc      |  0
 .../{src/include => utils}/ast_util.h         |  0
 .../{src/include => utils}/config_t.h         |  0
 parmys-plugin/{src => utils}/enum_str.cc      |  0
 parmys-plugin/{src => utils}/hash_table.cc    |  0
 .../{src/include => utils}/hash_table.h       |  0
 parmys-plugin/{src => utils}/odin_error.cc    |  0
 .../{src/include => utils}/odin_error.h       |  0
 .../{src/include => utils}/odin_globals.h     |  2 +-
 .../{src/include => utils}/odin_types.h       |  0
 parmys-plugin/{src => utils}/odin_util.cc     |  0
 .../{src/include => utils}/odin_util.h        |  0
 .../{src => utils}/read_xml_config_file.cc    |  0
 .../include => utils}/read_xml_config_file.h  |  0
 parmys-plugin/{src => utils}/string_cache.cc  |  0
 .../{src/include => utils}/string_cache.h     |  0
 48 files changed, 33 insertions(+), 29 deletions(-)
 rename parmys-plugin/{src => core}/adder.cc (99%)
 rename parmys-plugin/{src/include => core}/adder.h (100%)
 rename parmys-plugin/{src => core}/block_memory.cc (100%)
 rename parmys-plugin/{src/include => core}/block_memory.h (100%)
 rename parmys-plugin/{src => core}/hard_block.cc (99%)
 rename parmys-plugin/{src/include => core}/hard_block.h (100%)
 rename parmys-plugin/{src => core}/memory.cc (100%)
 rename parmys-plugin/{src/include => core}/memory.h (100%)
 rename parmys-plugin/{src => core}/multiplier.cc (99%)
 rename parmys-plugin/{src/include => core}/multiplier.h (100%)
 rename parmys-plugin/{src => core}/subtractor.cc (100%)
 rename parmys-plugin/{src/include => core}/subtractor.h (100%)
 rename parmys-plugin/{src => mapping}/hard_soft_logic_mixer.cc (100%)
 rename parmys-plugin/{src/include => mapping}/hard_soft_logic_mixer.h (98%)
 rename parmys-plugin/{src => mapping}/mixing_optimization.cc (100%)
 rename parmys-plugin/{src/include => mapping}/mixing_optimization.h (100%)
 rename parmys-plugin/{src => mapping}/odin_ii.cc (100%)
 rename parmys-plugin/{src/include => mapping}/odin_ii.h (100%)
 rename parmys-plugin/{src => mapping}/partial_map.cc (100%)
 rename parmys-plugin/{src/include => mapping}/partial_map.h (100%)
 rename parmys-plugin/{src => netlist}/netlist_cleanup.cc (100%)
 rename parmys-plugin/{src/include => netlist}/netlist_cleanup.h (100%)
 rename parmys-plugin/{src => netlist}/netlist_statistic.cc (100%)
 rename parmys-plugin/{src/include => netlist}/netlist_statistic.h (100%)
 rename parmys-plugin/{src => netlist}/netlist_utils.cc (100%)
 rename parmys-plugin/{src/include => netlist}/netlist_utils.h (100%)
 rename parmys-plugin/{src => netlist}/netlist_visualizer.cc (100%)
 rename parmys-plugin/{src/include => netlist}/netlist_visualizer.h (100%)
 rename parmys-plugin/{src => netlist}/node_utils.cc (100%)
 rename parmys-plugin/{src/include => netlist}/node_utils.h (100%)
 rename parmys-plugin/{src => utils}/ast_util.cc (100%)
 rename parmys-plugin/{src/include => utils}/ast_util.h (100%)
 rename parmys-plugin/{src/include => utils}/config_t.h (100%)
 rename parmys-plugin/{src => utils}/enum_str.cc (100%)
 rename parmys-plugin/{src => utils}/hash_table.cc (100%)
 rename parmys-plugin/{src/include => utils}/hash_table.h (100%)
 rename parmys-plugin/{src => utils}/odin_error.cc (100%)
 rename parmys-plugin/{src/include => utils}/odin_error.h (100%)
 rename parmys-plugin/{src/include => utils}/odin_globals.h (100%)
 rename parmys-plugin/{src/include => utils}/odin_types.h (100%)
 rename parmys-plugin/{src => utils}/odin_util.cc (100%)
 rename parmys-plugin/{src/include => utils}/odin_util.h (100%)
 rename parmys-plugin/{src => utils}/read_xml_config_file.cc (100%)
 rename parmys-plugin/{src/include => utils}/read_xml_config_file.h (100%)
 rename parmys-plugin/{src => utils}/string_cache.cc (100%)
 rename parmys-plugin/{src/include => utils}/string_cache.h (100%)

diff --git a/Makefile b/Makefile
index dd92ed437..f8ad48099 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index d945c89de..1471d9c60 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -24,28 +24,28 @@ SOURCES = parmys.cc \
 		  parmys_update.cc \
 		  parmys_utils.cc \
 		  parmys_resolve.cc \
-		  ${ODIN_II_DIR}/adder.cc \
-		  ${ODIN_II_DIR}/enum_str.cc \
-		  ${ODIN_II_DIR}/mixing_optimization.cc \
-		  ${ODIN_II_DIR}/read_xml_config_file.cc \
-		  ${ODIN_II_DIR}/odin_error.cc \
-		  ${ODIN_II_DIR}/odin_util.cc \
-		  ${ODIN_II_DIR}/netlist_statistic.cc \
-		  ${ODIN_II_DIR}/netlist_utils.cc \
-		  ${ODIN_II_DIR}/netlist_cleanup.cc \
-		  ${ODIN_II_DIR}/node_utils.cc \
-		  ${ODIN_II_DIR}/multiplier.cc \
-		  ${ODIN_II_DIR}/subtractor.cc \
-		  ${ODIN_II_DIR}/hard_soft_logic_mixer.cc \
-		  ${ODIN_II_DIR}/odin_ii.cc \
-		  ${ODIN_II_DIR}/string_cache.cc \
-		  ${ODIN_II_DIR}/partial_map.cc \
-		  ${ODIN_II_DIR}/hard_block.cc \
-		  ${ODIN_II_DIR}/block_memory.cc \
-		  ${ODIN_II_DIR}/memory.cc \
-		  ${ODIN_II_DIR}/netlist_visualizer.cc \
-		  ${ODIN_II_DIR}/hash_table.cc \
-		  ${ODIN_II_DIR}/ast_util.cc
+		  core/adder.cc \
+		  utils/enum_str.cc \
+		  mapping/mixing_optimization.cc \
+		  utils/read_xml_config_file.cc \
+		  utils/odin_error.cc \
+		  utils/odin_util.cc \
+		  netlist/netlist_statistic.cc \
+		  netlist/netlist_utils.cc \
+		  netlist/netlist_cleanup.cc \
+		  netlist/netlist_visualizer.cc \
+		  netlist/node_utils.cc \
+		  core/multiplier.cc \
+		  core/subtractor.cc \
+		  mapping/hard_soft_logic_mixer.cc \
+		  mapping/odin_ii.cc \
+		  utils/string_cache.cc \
+		  mapping/partial_map.cc \
+		  core/hard_block.cc \
+		  core/block_memory.cc \
+		  core/memory.cc \
+		  utils/hash_table.cc \
+		  utils/ast_util.cc
 
 VTR_INSTALL_DIR ?= /usr/local
 
@@ -54,7 +54,11 @@ include ../Makefile_plugin.common
 CXXFLAGS += -std=c++14 -Wall -W -Wextra \
             -Wno-deprecated-declarations \
             -Wno-unused-parameter \
-			-I${ODIN_II_DIR}/include \
+            -I. \
+			-Icore \
+			-Imapping \
+			-Inetlist \
+			-Iutils \
 			-I${VTR_INSTALL_DIR}/include/libarchfpga \
 			-I${VTR_INSTALL_DIR}/include/liblog \
 			-I${VTR_INSTALL_DIR}/include/libpugiutil \
diff --git a/parmys-plugin/src/adder.cc b/parmys-plugin/core/adder.cc
similarity index 99%
rename from parmys-plugin/src/adder.cc
rename to parmys-plugin/core/adder.cc
index 935cdb238..719bbc97d 100644
--- a/parmys-plugin/src/adder.cc
+++ b/parmys-plugin/core/adder.cc
@@ -28,7 +28,7 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
-#include "../parmys_utils.hpp"
+#include "parmys_utils.hpp"
 
 using vtr::t_linked_vptr;
 
diff --git a/parmys-plugin/src/include/adder.h b/parmys-plugin/core/adder.h
similarity index 100%
rename from parmys-plugin/src/include/adder.h
rename to parmys-plugin/core/adder.h
diff --git a/parmys-plugin/src/block_memory.cc b/parmys-plugin/core/block_memory.cc
similarity index 100%
rename from parmys-plugin/src/block_memory.cc
rename to parmys-plugin/core/block_memory.cc
diff --git a/parmys-plugin/src/include/block_memory.h b/parmys-plugin/core/block_memory.h
similarity index 100%
rename from parmys-plugin/src/include/block_memory.h
rename to parmys-plugin/core/block_memory.h
diff --git a/parmys-plugin/src/hard_block.cc b/parmys-plugin/core/hard_block.cc
similarity index 99%
rename from parmys-plugin/src/hard_block.cc
rename to parmys-plugin/core/hard_block.cc
index f2735a3fd..17615e4e2 100644
--- a/parmys-plugin/src/hard_block.cc
+++ b/parmys-plugin/core/hard_block.cc
@@ -26,7 +26,7 @@
 
 #include "kernel/yosys.h"
 
-#include "../parmys_utils.hpp"
+#include "parmys_utils.hpp"
 
 STRING_CACHE *hard_block_names = NULL;
 
diff --git a/parmys-plugin/src/include/hard_block.h b/parmys-plugin/core/hard_block.h
similarity index 100%
rename from parmys-plugin/src/include/hard_block.h
rename to parmys-plugin/core/hard_block.h
diff --git a/parmys-plugin/src/memory.cc b/parmys-plugin/core/memory.cc
similarity index 100%
rename from parmys-plugin/src/memory.cc
rename to parmys-plugin/core/memory.cc
diff --git a/parmys-plugin/src/include/memory.h b/parmys-plugin/core/memory.h
similarity index 100%
rename from parmys-plugin/src/include/memory.h
rename to parmys-plugin/core/memory.h
diff --git a/parmys-plugin/src/multiplier.cc b/parmys-plugin/core/multiplier.cc
similarity index 99%
rename from parmys-plugin/src/multiplier.cc
rename to parmys-plugin/core/multiplier.cc
index 847618a5b..68a65377d 100644
--- a/parmys-plugin/src/multiplier.cc
+++ b/parmys-plugin/core/multiplier.cc
@@ -36,7 +36,7 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
-#include "../parmys_utils.hpp"
+#include "parmys_utils.hpp"
 
 using vtr::insert_in_vptr_list;
 using vtr::t_linked_vptr;
diff --git a/parmys-plugin/src/include/multiplier.h b/parmys-plugin/core/multiplier.h
similarity index 100%
rename from parmys-plugin/src/include/multiplier.h
rename to parmys-plugin/core/multiplier.h
diff --git a/parmys-plugin/src/subtractor.cc b/parmys-plugin/core/subtractor.cc
similarity index 100%
rename from parmys-plugin/src/subtractor.cc
rename to parmys-plugin/core/subtractor.cc
diff --git a/parmys-plugin/src/include/subtractor.h b/parmys-plugin/core/subtractor.h
similarity index 100%
rename from parmys-plugin/src/include/subtractor.h
rename to parmys-plugin/core/subtractor.h
diff --git a/parmys-plugin/src/hard_soft_logic_mixer.cc b/parmys-plugin/mapping/hard_soft_logic_mixer.cc
similarity index 100%
rename from parmys-plugin/src/hard_soft_logic_mixer.cc
rename to parmys-plugin/mapping/hard_soft_logic_mixer.cc
diff --git a/parmys-plugin/src/include/hard_soft_logic_mixer.h b/parmys-plugin/mapping/hard_soft_logic_mixer.h
similarity index 98%
rename from parmys-plugin/src/include/hard_soft_logic_mixer.h
rename to parmys-plugin/mapping/hard_soft_logic_mixer.h
index 5e0283d00..1db034e08 100644
--- a/parmys-plugin/src/include/hard_soft_logic_mixer.h
+++ b/parmys-plugin/mapping/hard_soft_logic_mixer.h
@@ -18,8 +18,8 @@
 #ifndef _HARD_SOFT_LOGIC_MIXER_HPP_
 #define _HARD_SOFT_LOGIC_MIXER_HPP_
 
+#include "odin_types.h"
 #include "mixing_optimization.h"
-#include "odin_types.h" // netlist_t, config_t
 
 class HardSoftLogicMixer
 {
diff --git a/parmys-plugin/src/mixing_optimization.cc b/parmys-plugin/mapping/mixing_optimization.cc
similarity index 100%
rename from parmys-plugin/src/mixing_optimization.cc
rename to parmys-plugin/mapping/mixing_optimization.cc
diff --git a/parmys-plugin/src/include/mixing_optimization.h b/parmys-plugin/mapping/mixing_optimization.h
similarity index 100%
rename from parmys-plugin/src/include/mixing_optimization.h
rename to parmys-plugin/mapping/mixing_optimization.h
diff --git a/parmys-plugin/src/odin_ii.cc b/parmys-plugin/mapping/odin_ii.cc
similarity index 100%
rename from parmys-plugin/src/odin_ii.cc
rename to parmys-plugin/mapping/odin_ii.cc
diff --git a/parmys-plugin/src/include/odin_ii.h b/parmys-plugin/mapping/odin_ii.h
similarity index 100%
rename from parmys-plugin/src/include/odin_ii.h
rename to parmys-plugin/mapping/odin_ii.h
diff --git a/parmys-plugin/src/partial_map.cc b/parmys-plugin/mapping/partial_map.cc
similarity index 100%
rename from parmys-plugin/src/partial_map.cc
rename to parmys-plugin/mapping/partial_map.cc
diff --git a/parmys-plugin/src/include/partial_map.h b/parmys-plugin/mapping/partial_map.h
similarity index 100%
rename from parmys-plugin/src/include/partial_map.h
rename to parmys-plugin/mapping/partial_map.h
diff --git a/parmys-plugin/src/netlist_cleanup.cc b/parmys-plugin/netlist/netlist_cleanup.cc
similarity index 100%
rename from parmys-plugin/src/netlist_cleanup.cc
rename to parmys-plugin/netlist/netlist_cleanup.cc
diff --git a/parmys-plugin/src/include/netlist_cleanup.h b/parmys-plugin/netlist/netlist_cleanup.h
similarity index 100%
rename from parmys-plugin/src/include/netlist_cleanup.h
rename to parmys-plugin/netlist/netlist_cleanup.h
diff --git a/parmys-plugin/src/netlist_statistic.cc b/parmys-plugin/netlist/netlist_statistic.cc
similarity index 100%
rename from parmys-plugin/src/netlist_statistic.cc
rename to parmys-plugin/netlist/netlist_statistic.cc
diff --git a/parmys-plugin/src/include/netlist_statistic.h b/parmys-plugin/netlist/netlist_statistic.h
similarity index 100%
rename from parmys-plugin/src/include/netlist_statistic.h
rename to parmys-plugin/netlist/netlist_statistic.h
diff --git a/parmys-plugin/src/netlist_utils.cc b/parmys-plugin/netlist/netlist_utils.cc
similarity index 100%
rename from parmys-plugin/src/netlist_utils.cc
rename to parmys-plugin/netlist/netlist_utils.cc
diff --git a/parmys-plugin/src/include/netlist_utils.h b/parmys-plugin/netlist/netlist_utils.h
similarity index 100%
rename from parmys-plugin/src/include/netlist_utils.h
rename to parmys-plugin/netlist/netlist_utils.h
diff --git a/parmys-plugin/src/netlist_visualizer.cc b/parmys-plugin/netlist/netlist_visualizer.cc
similarity index 100%
rename from parmys-plugin/src/netlist_visualizer.cc
rename to parmys-plugin/netlist/netlist_visualizer.cc
diff --git a/parmys-plugin/src/include/netlist_visualizer.h b/parmys-plugin/netlist/netlist_visualizer.h
similarity index 100%
rename from parmys-plugin/src/include/netlist_visualizer.h
rename to parmys-plugin/netlist/netlist_visualizer.h
diff --git a/parmys-plugin/src/node_utils.cc b/parmys-plugin/netlist/node_utils.cc
similarity index 100%
rename from parmys-plugin/src/node_utils.cc
rename to parmys-plugin/netlist/node_utils.cc
diff --git a/parmys-plugin/src/include/node_utils.h b/parmys-plugin/netlist/node_utils.h
similarity index 100%
rename from parmys-plugin/src/include/node_utils.h
rename to parmys-plugin/netlist/node_utils.h
diff --git a/parmys-plugin/src/ast_util.cc b/parmys-plugin/utils/ast_util.cc
similarity index 100%
rename from parmys-plugin/src/ast_util.cc
rename to parmys-plugin/utils/ast_util.cc
diff --git a/parmys-plugin/src/include/ast_util.h b/parmys-plugin/utils/ast_util.h
similarity index 100%
rename from parmys-plugin/src/include/ast_util.h
rename to parmys-plugin/utils/ast_util.h
diff --git a/parmys-plugin/src/include/config_t.h b/parmys-plugin/utils/config_t.h
similarity index 100%
rename from parmys-plugin/src/include/config_t.h
rename to parmys-plugin/utils/config_t.h
diff --git a/parmys-plugin/src/enum_str.cc b/parmys-plugin/utils/enum_str.cc
similarity index 100%
rename from parmys-plugin/src/enum_str.cc
rename to parmys-plugin/utils/enum_str.cc
diff --git a/parmys-plugin/src/hash_table.cc b/parmys-plugin/utils/hash_table.cc
similarity index 100%
rename from parmys-plugin/src/hash_table.cc
rename to parmys-plugin/utils/hash_table.cc
diff --git a/parmys-plugin/src/include/hash_table.h b/parmys-plugin/utils/hash_table.h
similarity index 100%
rename from parmys-plugin/src/include/hash_table.h
rename to parmys-plugin/utils/hash_table.h
diff --git a/parmys-plugin/src/odin_error.cc b/parmys-plugin/utils/odin_error.cc
similarity index 100%
rename from parmys-plugin/src/odin_error.cc
rename to parmys-plugin/utils/odin_error.cc
diff --git a/parmys-plugin/src/include/odin_error.h b/parmys-plugin/utils/odin_error.h
similarity index 100%
rename from parmys-plugin/src/include/odin_error.h
rename to parmys-plugin/utils/odin_error.h
diff --git a/parmys-plugin/src/include/odin_globals.h b/parmys-plugin/utils/odin_globals.h
similarity index 100%
rename from parmys-plugin/src/include/odin_globals.h
rename to parmys-plugin/utils/odin_globals.h
index fa00ecb05..8a1a255a1 100644
--- a/parmys-plugin/src/include/odin_globals.h
+++ b/parmys-plugin/utils/odin_globals.h
@@ -18,8 +18,8 @@
 #ifndef _ODIN_GLOBALS_H_
 #define _ODIN_GLOBALS_H_
 
-#include "config_t.h"
 #include "hard_soft_logic_mixer.h"
+#include "config_t.h"
 #include "hash_table.h"
 #include "odin_types.h"
 #include "read_xml_arch_file.h"
diff --git a/parmys-plugin/src/include/odin_types.h b/parmys-plugin/utils/odin_types.h
similarity index 100%
rename from parmys-plugin/src/include/odin_types.h
rename to parmys-plugin/utils/odin_types.h
diff --git a/parmys-plugin/src/odin_util.cc b/parmys-plugin/utils/odin_util.cc
similarity index 100%
rename from parmys-plugin/src/odin_util.cc
rename to parmys-plugin/utils/odin_util.cc
diff --git a/parmys-plugin/src/include/odin_util.h b/parmys-plugin/utils/odin_util.h
similarity index 100%
rename from parmys-plugin/src/include/odin_util.h
rename to parmys-plugin/utils/odin_util.h
diff --git a/parmys-plugin/src/read_xml_config_file.cc b/parmys-plugin/utils/read_xml_config_file.cc
similarity index 100%
rename from parmys-plugin/src/read_xml_config_file.cc
rename to parmys-plugin/utils/read_xml_config_file.cc
diff --git a/parmys-plugin/src/include/read_xml_config_file.h b/parmys-plugin/utils/read_xml_config_file.h
similarity index 100%
rename from parmys-plugin/src/include/read_xml_config_file.h
rename to parmys-plugin/utils/read_xml_config_file.h
diff --git a/parmys-plugin/src/string_cache.cc b/parmys-plugin/utils/string_cache.cc
similarity index 100%
rename from parmys-plugin/src/string_cache.cc
rename to parmys-plugin/utils/string_cache.cc
diff --git a/parmys-plugin/src/include/string_cache.h b/parmys-plugin/utils/string_cache.h
similarity index 100%
rename from parmys-plugin/src/include/string_cache.h
rename to parmys-plugin/utils/string_cache.h

From c697777d7d2349203b2d48e92f97557dc3782e26 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Fri, 10 Feb 2023 12:03:20 -0400
Subject: [PATCH 54/56] done!

---
 Makefile                                               | 2 +-
 parmys-plugin/Makefile                                 | 2 --
 parmys-plugin/core/adder.cc                            | 2 +-
 parmys-plugin/core/hard_block.cc                       | 2 +-
 parmys-plugin/core/multiplier.cc                       | 2 +-
 parmys-plugin/parmys.cc                                | 6 +++---
 parmys-plugin/parmys_arch.cc                           | 2 +-
 parmys-plugin/parmys_resolve.cc                        | 2 +-
 parmys-plugin/{parmys_resolve.hpp => parmys_resolve.h} | 0
 parmys-plugin/parmys_update.cc                         | 4 ++--
 parmys-plugin/{parmys_update.hpp => parmys_update.h}   | 0
 parmys-plugin/parmys_utils.cc                          | 2 +-
 parmys-plugin/{parmys_utils.hpp => parmys_utils.h}     | 0
 13 files changed, 12 insertions(+), 14 deletions(-)
 rename parmys-plugin/{parmys_resolve.hpp => parmys_resolve.h} (100%)
 rename parmys-plugin/{parmys_update.hpp => parmys_update.h} (100%)
 rename parmys-plugin/{parmys_utils.hpp => parmys_utils.h} (100%)

diff --git a/Makefile b/Makefile
index f8ad48099..dd92ed437 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/parmys-plugin/Makefile b/parmys-plugin/Makefile
index 1471d9c60..dbb3eb11e 100644
--- a/parmys-plugin/Makefile
+++ b/parmys-plugin/Makefile
@@ -16,8 +16,6 @@
 
 PLUGIN_DIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 
-ODIN_II_DIR=src
-
 NAME = parmys
 SOURCES = parmys.cc \
 		  parmys_arch.cc \
diff --git a/parmys-plugin/core/adder.cc b/parmys-plugin/core/adder.cc
index 719bbc97d..ff920398a 100644
--- a/parmys-plugin/core/adder.cc
+++ b/parmys-plugin/core/adder.cc
@@ -28,7 +28,7 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
-#include "parmys_utils.hpp"
+#include "parmys_utils.h"
 
 using vtr::t_linked_vptr;
 
diff --git a/parmys-plugin/core/hard_block.cc b/parmys-plugin/core/hard_block.cc
index 17615e4e2..a7437518f 100644
--- a/parmys-plugin/core/hard_block.cc
+++ b/parmys-plugin/core/hard_block.cc
@@ -26,7 +26,7 @@
 
 #include "kernel/yosys.h"
 
-#include "parmys_utils.hpp"
+#include "parmys_utils.h"
 
 STRING_CACHE *hard_block_names = NULL;
 
diff --git a/parmys-plugin/core/multiplier.cc b/parmys-plugin/core/multiplier.cc
index 68a65377d..befb0b337 100644
--- a/parmys-plugin/core/multiplier.cc
+++ b/parmys-plugin/core/multiplier.cc
@@ -36,7 +36,7 @@
 #include "vtr_memory.h"
 #include "vtr_util.h"
 
-#include "parmys_utils.hpp"
+#include "parmys_utils.h"
 
 using vtr::insert_in_vptr_list;
 using vtr::t_linked_vptr;
diff --git a/parmys-plugin/parmys.cc b/parmys-plugin/parmys.cc
index 29fe73ae3..2084d8190 100644
--- a/parmys-plugin/parmys.cc
+++ b/parmys-plugin/parmys.cc
@@ -33,7 +33,7 @@
 
 #include "netlist_visualizer.h"
 
-#include "parmys_resolve.hpp"
+#include "parmys_resolve.h"
 
 #include "adder.h"
 #include "arch_util.h"
@@ -48,8 +48,8 @@
 #include "subtractor.h"
 
 #include "ast_util.h"
-#include "parmys_update.hpp"
-#include "parmys_utils.hpp"
+#include "parmys_update.h"
+#include "parmys_utils.h"
 
 USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
diff --git a/parmys-plugin/parmys_arch.cc b/parmys-plugin/parmys_arch.cc
index 2175ebb38..d76c39e5c 100644
--- a/parmys-plugin/parmys_arch.cc
+++ b/parmys-plugin/parmys_arch.cc
@@ -20,7 +20,7 @@
 #include "arch_util.h"
 #include "echo_arch.h"
 #include "odin_types.h"
-#include "parmys_utils.hpp"
+#include "parmys_utils.h"
 #include "read_xml_arch_file.h"
 
 USING_YOSYS_NAMESPACE
diff --git a/parmys-plugin/parmys_resolve.cc b/parmys-plugin/parmys_resolve.cc
index 85af152ac..51cb665cb 100644
--- a/parmys-plugin/parmys_resolve.cc
+++ b/parmys-plugin/parmys_resolve.cc
@@ -26,7 +26,7 @@
 #include "block_memory.h"
 #include "memory.h"
 #include "multiplier.h"
-#include "parmys_resolve.hpp"
+#include "parmys_resolve.h"
 #include "subtractor.h"
 
 #include "vtr_util.h"
diff --git a/parmys-plugin/parmys_resolve.hpp b/parmys-plugin/parmys_resolve.h
similarity index 100%
rename from parmys-plugin/parmys_resolve.hpp
rename to parmys-plugin/parmys_resolve.h
diff --git a/parmys-plugin/parmys_update.cc b/parmys-plugin/parmys_update.cc
index b643a4682..ef55213c5 100644
--- a/parmys-plugin/parmys_update.cc
+++ b/parmys-plugin/parmys_update.cc
@@ -30,8 +30,8 @@
 #include "multiplier.h"
 
 #include "kernel/rtlil.h"
-#include "parmys_update.hpp"
-#include "parmys_utils.hpp"
+#include "parmys_update.h"
+#include "parmys_utils.h"
 
 USING_YOSYS_NAMESPACE
 
diff --git a/parmys-plugin/parmys_update.hpp b/parmys-plugin/parmys_update.h
similarity index 100%
rename from parmys-plugin/parmys_update.hpp
rename to parmys-plugin/parmys_update.h
diff --git a/parmys-plugin/parmys_utils.cc b/parmys-plugin/parmys_utils.cc
index 74f133fbe..a23a2848c 100644
--- a/parmys-plugin/parmys_utils.cc
+++ b/parmys-plugin/parmys_utils.cc
@@ -15,7 +15,7 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "parmys_utils.hpp"
+#include "parmys_utils.h"
 
 USING_YOSYS_NAMESPACE
 
diff --git a/parmys-plugin/parmys_utils.hpp b/parmys-plugin/parmys_utils.h
similarity index 100%
rename from parmys-plugin/parmys_utils.hpp
rename to parmys-plugin/parmys_utils.h

From 8352c5d604b0bc5d7467f975fb5abe6c8323f210 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Fri, 10 Feb 2023 12:15:36 -0400
Subject: [PATCH 55/56] format check

---
 Makefile                                      | 2 +-
 parmys-plugin/mapping/hard_soft_logic_mixer.h | 2 +-
 parmys-plugin/mapping/mixing_optimization.h   | 3 ++-
 parmys-plugin/mapping/odin_ii.h               | 1 +
 parmys-plugin/utils/odin_globals.h            | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index dd92ed437..f8ad48099 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
+PLUGIN_LIST := parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))
diff --git a/parmys-plugin/mapping/hard_soft_logic_mixer.h b/parmys-plugin/mapping/hard_soft_logic_mixer.h
index 1db034e08..86c58da0e 100644
--- a/parmys-plugin/mapping/hard_soft_logic_mixer.h
+++ b/parmys-plugin/mapping/hard_soft_logic_mixer.h
@@ -18,8 +18,8 @@
 #ifndef _HARD_SOFT_LOGIC_MIXER_HPP_
 #define _HARD_SOFT_LOGIC_MIXER_HPP_
 
-#include "odin_types.h"
 #include "mixing_optimization.h"
+#include "odin_types.h"
 
 class HardSoftLogicMixer
 {
diff --git a/parmys-plugin/mapping/mixing_optimization.h b/parmys-plugin/mapping/mixing_optimization.h
index 65cc11b57..3aa71fb7b 100644
--- a/parmys-plugin/mapping/mixing_optimization.h
+++ b/parmys-plugin/mapping/mixing_optimization.h
@@ -17,7 +17,8 @@
  */
 #ifndef _MIXING_OPTIMIZATION_H_
 #define _MIXING_OPTIMIZATION_H_
-#include "odin_types.h" // netlist_t, config_t
+
+#include "odin_types.h"
 
 class HardSoftLogicMixer;
 /**
diff --git a/parmys-plugin/mapping/odin_ii.h b/parmys-plugin/mapping/odin_ii.h
index 0e563b9e9..dec1624dc 100644
--- a/parmys-plugin/mapping/odin_ii.h
+++ b/parmys-plugin/mapping/odin_ii.h
@@ -19,6 +19,7 @@
 #define _ODIN_II_H_
 
 #include "odin_types.h"
+
 /* Odin-II exit status enumerator */
 enum ODIN_ERROR_CODE { ERROR_INITIALIZATION, ERROR_PARSE_CONFIG, ERROR_PARSE_ARCH, ERROR_ELABORATION, ERROR_OPTIMIZATION, ERROR_TECHMAP };
 
diff --git a/parmys-plugin/utils/odin_globals.h b/parmys-plugin/utils/odin_globals.h
index 8a1a255a1..fa00ecb05 100644
--- a/parmys-plugin/utils/odin_globals.h
+++ b/parmys-plugin/utils/odin_globals.h
@@ -18,8 +18,8 @@
 #ifndef _ODIN_GLOBALS_H_
 #define _ODIN_GLOBALS_H_
 
-#include "hard_soft_logic_mixer.h"
 #include "config_t.h"
+#include "hard_soft_logic_mixer.h"
 #include "hash_table.h"
 #include "odin_types.h"
 #include "read_xml_arch_file.h"

From 504ef2e369c985249b6033ef125fc9636a601ff7 Mon Sep 17 00:00:00 2001
From: dani <17553473+poname@users.noreply.github.com>
Date: Fri, 10 Feb 2023 12:18:06 -0400
Subject: [PATCH 56/56] all

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f8ad48099..dd92ed437 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-PLUGIN_LIST := parmys
+PLUGIN_LIST := fasm xdc params sdc ql-iob design_introspection integrateinv ql-qlf systemverilog uhdm dsp-ff parmys
 PLUGINS := $(foreach plugin,$(PLUGIN_LIST),$(plugin).so)
 PLUGINS_INSTALL := $(foreach plugin,$(PLUGIN_LIST),install_$(plugin))
 PLUGINS_CLEAN := $(foreach plugin,$(PLUGIN_LIST),clean_$(plugin))