diff --git a/.gitignore b/.gitignore index 485cccfcf..29234d44d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ # Clangd cache .cache + +# Clangd configurations +.clangd diff --git a/.gitmodules b/.gitmodules index 00d892bd3..77bef44d1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,7 @@ path = thirdparty/mimalloc url = https://github.com/microsoft/mimalloc.git shallow = true +[submodule "thirdparty/riscv-gnu-toolchain"] + path = thirdparty/riscv-gnu-toolchain + url = https://github.com/riscv-collab/riscv-gnu-toolchain.git + shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 841444541..486346744 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ project(buddy-mlir LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) +include(ExternalProject) #------------------------------------------------------------------------------- # Options and settings @@ -41,13 +42,15 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR BUDDY_MLIR_OUT_OF_TREE_ message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - set(LLVM_MLIR_BINARY_DIR ${MLIR_DIR}/../../../bin) - set(LLVM_MLIR_LIBRARY_DIR ${MLIR_DIR}/../../../lib) - set(LLVM_PROJECT_BUILD_DIR ${MLIR_DIR}/../../../) - if(NOT DEFINED LLVM_PROJECT_SOURCE_DIR) - get_filename_component(LLVM_PROJECT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llvm/ ABSOLUTE) + # LLVM_MAIN_SRC_DIR is a private variable for the LLVM in-tree build. + # To provide compatibility for unifying the one-step and two-step build, + # we set LLVM_MAIN_SRC_DIR ourselves here. + # This could benefit users who want to specify a custom LLVM source directory, + # but also not interfere with normal users who just want to use the buddy-mlir provided LLVM sources. + if(NOT DEFINED LLVM_MAIN_SRC_DIR) + get_filename_component(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llvm/llvm ABSOLUTE) endif() - set(LLVM_MLIR_SOURCE_DIR ${LLVM_PROJECT_SOURCE_DIR}/mlir) + set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir) list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") @@ -65,16 +68,9 @@ else() #------------------------------------------------------------------------------- # MLIR/LLVM Configuration #------------------------------------------------------------------------------- - - # Allow using out-of-tree llvm directory - set(LLVM_PROJECT_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/..) - message(STATUS "Using LLVM Project ${LLVM_PROJECT_SOURCE_DIR}") - set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir) set(MLIR_INCLUDE_DIR ${MLIR_MAIN_SRC_DIR}/include) set(MLIR_GENERATED_INCLUDE_DIR ${LLVM_BINARY_DIR}/tools/mlir/include) - set(LLVM_MLIR_BINARY_DIR ${CMAKE_BINARY_DIR}/bin) - set(MLIR_INCLUDE_DIRS "${MLIR_INCLUDE_DIR};${MLIR_GENERATED_INCLUDE_DIR}") endif() #------------------------------------------------------------------------------- @@ -188,6 +184,24 @@ if(BUDDY_MLIR_USE_MIMALLOC) find_package(mimalloc REQUIRED) endif() +#------------------------------------------------------------------------------- +# The RISC-V toolchain +#------------------------------------------------------------------------------- + +if(BUDDY_MLIR_ENABLE_RISCV_GNU_TOOLCHAIN) + set(RISCV_GNU_TOOLCHAIN_DIR "${BUDDY_SOURCE_DIR}/thirdparty/riscv-gnu-toolchain") + set(RISCV_GNU_TOOLCHAIN_INSTALL_DIR "${CMAKE_BINARY_DIR}/thirdparty/riscv-gnu-toolchain") + ExternalProject_Add( + riscv-gnu-toolchain + SOURCE_DIR ${RISCV_GNU_TOOLCHAIN_DIR} + PREFIX ${RISCV_GNU_TOOLCHAIN_INSTALL_DIR} + CONFIGURE_COMMAND ${RISCV_GNU_TOOLCHAIN_DIR}/configure --prefix=${RISCV_GNU_TOOLCHAIN_INSTALL_DIR} + BUILD_COMMAND make clean && make linux build-qemu -j + BUILD_IN_SOURCE TRUE + INSTALL_COMMAND "" + ) +endif() + #------------------------------------------------------------------------------- # Initialize Python packages #------------------------------------------------------------------------------- @@ -201,6 +215,8 @@ if(BUDDY_MLIR_ENABLE_PYTHON_PACKAGES) # Create empty __init__.py files to make these directories Python packages file(WRITE ${BUDDY_MLIR_PYTHON_PACKAGES_DIR}/buddy/__init__.py "") file(WRITE ${BUDDY_MLIR_PYTHON_PACKAGES_DIR}/buddy/compiler/__init__.py "") + + install(DIRECTORY ${BUDDY_MLIR_PYTHON_PACKAGES_DIR}/buddy DESTINATION python_packages) endif() #------------------------------------------------------------------------------- diff --git a/README.md b/README.md index cb9a5f1c2..be650591b 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,17 @@ If you want to add domain-specific framework support, please add the following c | -------------- | ------------- | ------------- | | OpenCV | `-DBUDDY_ENABLE_OPENCV=ON` | Add `-DOpenCV_DIR=` or install OpenCV release version on your local device. | +To build buddy-mlir with custom LLVM sources: + +``` +$ cmake -G Ninja .. \ + -DMLIR_DIR=PATH/TO/LLVM/lib/cmake/mlir \ + -DLLVM_DIR=PATH/TO/LLVM/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DLLVM_MAIN_SRC_DIR=PATH/TO/LLVM_SOURCE +``` +

One-step building strategy

If you only want to use our tools and integrate them more easily into your projects, you can choose to use the one-step build strategy. @@ -134,7 +145,7 @@ This repository have nix flake support. You can follow the [nix installation ins nix develop . ``` -This will setup a bash shell with `clang`, `clangd`, `cmake`, `ninja`, and other necessary dependencies to build buddy-mlir from source. +This will setup a bash shell with `clang`, `ccls`, `cmake`, `ninja`, and other necessary dependencies to build buddy-mlir from source. - If you want to use the buddy-mlir bintools diff --git a/backend/include/llvm/IR/CMakeLists.txt b/backend/include/llvm/IR/CMakeLists.txt index b3447eae6..2de6b999b 100644 --- a/backend/include/llvm/IR/CMakeLists.txt +++ b/backend/include/llvm/IR/CMakeLists.txt @@ -1,4 +1,4 @@ -include_directories(${LLVM_PROJECT_SOURCE_DIR}/llvm/include/llvm/IR/) +include_directories(${LLVM_MAIN_SRC_DIR}/include/llvm/IR/) set(LLVM_TARGET_DEFINITIONS IntrinsicsBuddyExt.td) tablegen(LLVM IntrinsicImpl.inc -gen-intrinsic-impl) diff --git a/backend/llvm/lib/Analysis/CMakeLists.txt b/backend/llvm/lib/Analysis/CMakeLists.txt index 2a3a65971..117f75d89 100644 --- a/backend/llvm/lib/Analysis/CMakeLists.txt +++ b/backend/llvm/lib/Analysis/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Analysis_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Analysis) +set(LLVM_Analysis_DIR ${LLVM_MAIN_SRC_DIR}/lib/Analysis) add_llvm_component_library(LLVMBuddyAnalysis diff --git a/backend/llvm/lib/AsmParser/CMakeLists.txt b/backend/llvm/lib/AsmParser/CMakeLists.txt index b5411d100..d687d1d3b 100644 --- a/backend/llvm/lib/AsmParser/CMakeLists.txt +++ b/backend/llvm/lib/AsmParser/CMakeLists.txt @@ -1,6 +1,6 @@ # AsmParser -set(LLVM_AsmParser_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/AsmParser) +set(LLVM_AsmParser_DIR ${LLVM_MAIN_SRC_DIR}/lib/AsmParser) add_llvm_component_library(LLVMBuddyAsmParser ${LLVM_AsmParser_DIR}/LLLexer.cpp diff --git a/backend/llvm/lib/Bitcode/Reader/CMakeLists.txt b/backend/llvm/lib/Bitcode/Reader/CMakeLists.txt index cf92a543f..7ea904801 100644 --- a/backend/llvm/lib/Bitcode/Reader/CMakeLists.txt +++ b/backend/llvm/lib/Bitcode/Reader/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Reader_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Bitcode/Reader) +set(LLVM_Reader_DIR ${LLVM_MAIN_SRC_DIR}/lib/Bitcode/Reader) add_llvm_component_library(LLVMBuddyBitReader ${LLVM_Reader_DIR}/BitcodeAnalyzer.cpp diff --git a/backend/llvm/lib/Bitcode/Writer/CMakeLists.txt b/backend/llvm/lib/Bitcode/Writer/CMakeLists.txt index f19595cea..a8b7f0c27 100644 --- a/backend/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ b/backend/llvm/lib/Bitcode/Writer/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Writer_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Bitcode/Writer) +set(LLVM_Writer_DIR ${LLVM_MAIN_SRC_DIR}/lib/Bitcode/Writer) add_llvm_component_library(LLVMBuddyBitWriter diff --git a/backend/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/backend/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt index fe3273dd5..b942f4f73 100644 --- a/backend/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/backend/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_AsmPrinter_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/CodeGen/AsmPrinter) +set(LLVM_AsmPrinter_DIR ${LLVM_MAIN_SRC_DIR}/lib/CodeGen/AsmPrinter) add_llvm_component_library(LLVMBuddyAsmPrinter ${LLVM_AsmPrinter_DIR}/AccelTable.cpp diff --git a/backend/llvm/lib/CodeGen/CMakeLists.txt b/backend/llvm/lib/CodeGen/CMakeLists.txt index 1794b38fa..7eb38876d 100644 --- a/backend/llvm/lib/CodeGen/CMakeLists.txt +++ b/backend/llvm/lib/CodeGen/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_CodeGen_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/CodeGen) +set(LLVM_CodeGen_DIR ${LLVM_MAIN_SRC_DIR}/lib/CodeGen) add_llvm_component_library(LLVMBuddyCodeGen ${LLVM_CodeGen_DIR}/AggressiveAntiDepBreaker.cpp diff --git a/backend/llvm/lib/CodeGen/MIRParser/CMakeLists.txt b/backend/llvm/lib/CodeGen/MIRParser/CMakeLists.txt index 6275b1ece..1ab94ee93 100644 --- a/backend/llvm/lib/CodeGen/MIRParser/CMakeLists.txt +++ b/backend/llvm/lib/CodeGen/MIRParser/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_MIRParser_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/CodeGen/MIRParser) +set(LLVM_MIRParser_DIR ${LLVM_MAIN_SRC_DIR}/lib/CodeGen/MIRParser) add_llvm_component_library(LLVMBuddyMIRParser ${LLVM_MIRParser_DIR}/MILexer.cpp diff --git a/backend/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt b/backend/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt index 4bb3cde98..3b467a4ed 100644 --- a/backend/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/backend/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_SelectionDAG_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/CodeGen/SelectionDAG) +set(LLVM_SelectionDAG_DIR ${LLVM_MAIN_SRC_DIR}/lib/CodeGen/SelectionDAG) add_llvm_component_library(LLVMBuddySelectionDAG ${LLVM_SelectionDAG_DIR}/DAGCombiner.cpp diff --git a/backend/llvm/lib/IR/CMakeLists.txt b/backend/llvm/lib/IR/CMakeLists.txt index e6895a1f8..0d5618473 100644 --- a/backend/llvm/lib/IR/CMakeLists.txt +++ b/backend/llvm/lib/IR/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_IR_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/IR) +set(LLVM_IR_DIR ${LLVM_MAIN_SRC_DIR}/lib/IR) add_llvm_component_library(LLVMBuddyCore ${LLVM_IR_DIR}/AbstractCallSite.cpp diff --git a/backend/llvm/lib/IRReader/CMakeLists.txt b/backend/llvm/lib/IRReader/CMakeLists.txt index 9b315dec3..72e95722a 100644 --- a/backend/llvm/lib/IRReader/CMakeLists.txt +++ b/backend/llvm/lib/IRReader/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_IRReader_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/IRReader) +set(LLVM_IRReader_DIR ${LLVM_MAIN_SRC_DIR}/lib/IRReader) add_llvm_component_library(LLVMBuddyIRReader ${LLVM_IRReader_DIR}/IRReader.cpp diff --git a/backend/llvm/lib/Object/CMakeLists.txt b/backend/llvm/lib/Object/CMakeLists.txt index 8695d55ba..a8425e97c 100644 --- a/backend/llvm/lib/Object/CMakeLists.txt +++ b/backend/llvm/lib/Object/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Object_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Object) +set(LLVM_Object_DIR ${LLVM_MAIN_SRC_DIR}/lib/Object) add_llvm_component_library(LLVMBuddyObject ${LLVM_Object_DIR}/Archive.cpp diff --git a/backend/llvm/lib/ProfileData/CMakeLists.txt b/backend/llvm/lib/ProfileData/CMakeLists.txt index 9ae05a36f..742ecf662 100644 --- a/backend/llvm/lib/ProfileData/CMakeLists.txt +++ b/backend/llvm/lib/ProfileData/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_ProfileData_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/ProfileData) +set(LLVM_ProfileData_DIR ${LLVM_MAIN_SRC_DIR}/lib/ProfileData) add_llvm_component_library(LLVMBuddyProfileData ${LLVM_ProfileData_DIR}/GCOV.cpp diff --git a/backend/llvm/lib/Remarks/CMakeLists.txt b/backend/llvm/lib/Remarks/CMakeLists.txt index 4ed877577..5c1c81b7d 100644 --- a/backend/llvm/lib/Remarks/CMakeLists.txt +++ b/backend/llvm/lib/Remarks/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Remarks_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Remarks) +set(LLVM_Remarks_DIR ${LLVM_MAIN_SRC_DIR}/lib/Remarks) add_llvm_component_library(LLVMBuddyRemarks ${LLVM_Remarks_DIR}/BitstreamRemarkParser.cpp diff --git a/backend/llvm/lib/Target/CMakeLists.txt b/backend/llvm/lib/Target/CMakeLists.txt index c6298c383..1dd5cd34f 100644 --- a/backend/llvm/lib/Target/CMakeLists.txt +++ b/backend/llvm/lib/Target/CMakeLists.txt @@ -2,7 +2,7 @@ list(APPEND LLVM_COMMON_DEPENDS buddy_intrinsics_gen) list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target) -set(LLVM_Target_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Target) +set(LLVM_Target_DIR ${LLVM_MAIN_SRC_DIR}/lib/Target) add_llvm_component_library(LLVMBuddyTarget ${LLVM_Target_DIR}/Target.cpp diff --git a/backend/llvm/lib/Target/RISCV/CMakeLists.txt b/backend/llvm/lib/Target/RISCV/CMakeLists.txt index 4a66f6529..6bfee7c2f 100644 --- a/backend/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/backend/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,7 +21,7 @@ macro(buddy_add_llvm_target target_name) set( CURRENT_LLVM_TARGET LLVM${target_name} ) endmacro(buddy_add_llvm_target) -set(LLVM_TARGET_RISCV_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Target/RISCV) +set(LLVM_TARGET_RISCV_DIR ${LLVM_MAIN_SRC_DIR}/lib/Target/RISCV) # ------------------------------------------------------------------------------ # Configure RISC-V Buddy Extension. diff --git a/backend/llvm/lib/Transforms/IPO/CMakeLists.txt b/backend/llvm/lib/Transforms/IPO/CMakeLists.txt index 74ff79863..08392abf8 100644 --- a/backend/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/backend/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_IPO_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Transforms/IPO) +set(LLVM_IPO_DIR ${LLVM_MAIN_SRC_DIR}/lib/Transforms/IPO) add_llvm_component_library(LLVMBuddyIPO ${LLVM_IPO_DIR}/AlwaysInliner.cpp diff --git a/backend/llvm/lib/Transforms/Scalar/CMakeLists.txt b/backend/llvm/lib/Transforms/Scalar/CMakeLists.txt index c3c412b9a..6bbcf432e 100644 --- a/backend/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/backend/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Scalar_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Transforms/Scalar) +set(LLVM_Scalar_DIR ${LLVM_MAIN_SRC_DIR}/lib/Transforms/Scalar) add_llvm_component_library(LLVMBuddyScalarOpts ${LLVM_Scalar_DIR}/ADCE.cpp diff --git a/backend/llvm/lib/Transforms/Utils/CMakeLists.txt b/backend/llvm/lib/Transforms/Utils/CMakeLists.txt index 989a672ed..e3313e07b 100644 --- a/backend/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/backend/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Utils_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Transforms/Utils) +set(LLVM_Utils_DIR ${LLVM_MAIN_SRC_DIR}/lib/Transforms/Utils) add_llvm_component_library(LLVMBuddyTransformUtils diff --git a/backend/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/backend/llvm/lib/Transforms/Vectorize/CMakeLists.txt index e9cece2c4..669aae585 100644 --- a/backend/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/backend/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_Vectorize_DIR ${LLVM_PROJECT_SOURCE_DIR}/llvm/lib/Transforms/Vectorize) +set(LLVM_Vectorize_DIR ${LLVM_MAIN_SRC_DIR}/lib/Transforms/Vectorize) add_llvm_component_library(LLVMBuddyVectorize ${LLVM_Vectorize_DIR}/LoadStoreVectorizer.cpp diff --git a/docs/PythonEnvironment.md b/docs/PythonEnvironment.md new file mode 100644 index 000000000..77f431e85 --- /dev/null +++ b/docs/PythonEnvironment.md @@ -0,0 +1,10 @@ +# Python Virtual Environment Setup Guide for Buddy-mlir + +We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements. + +```bash +$ conda create -n python=3.11 +$ conda activate +$ cd buddy-mlir +$ pip install -r requirements.txt +``` \ No newline at end of file diff --git a/docs/RVVEnvironment.md b/docs/RVVEnvironment.md new file mode 100644 index 000000000..ddca0ab8f --- /dev/null +++ b/docs/RVVEnvironment.md @@ -0,0 +1,153 @@ +# Environment Setup Guide for MLIR and RVV Testing and Experiments + +This guide provides instructions on setting up an environment to test the RISC-V Vector Extension using the buddy-mlir project. +The target platform for emulation is QEMU. + +## Requirements + +Before proceed any further make sure that you installed dependencies below + +* [LLVM dependecies](https://llvm.org/docs/GettingStarted.html#requirements) +* [GNU Toolchain dependecies](https://github.com/riscv-collab/riscv-gnu-toolchain#prerequisites) +* [QEMU dependecies](https://wiki.qemu.org/Hosts/Linux) + +## Build Steps + +> **_NOTE:_** The build process includes several heavy stages. It may take significant time to clone and build all components. + +0. Prepare `buddy-mlir` and Submodules + +``` +$ git clone https://github.com/buddy-compiler/buddy-mlir.git +$ cd buddy-mlir +$ git submodule update --init +``` + +1. Build Local LLVM/MLIR + +``` +$ cd buddy-mlir +$ mkdir llvm/build +$ cd llvm/build +$ cmake -G Ninja ../llvm \ + -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \ + -DLLVM_TARGETS_TO_BUILD="host;RISCV" \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DOPENMP_ENABLE_LIBOMPTARGET=OFF \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ + -DPython3_EXECUTABLE=$(which python3) +$ ninja check-clang check-mlir omp +$ export BUILD_LOCAL_LLVM_DIR=$PWD +``` + +2. Build Local `buddy-mlir` + +``` +$ cd buddy-mlir +$ mkdir build +$ cd build +$ cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_RISCV_GNU_TOOLCHAIN=ON \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) +$ ninja +$ ninja check-buddy +$ export BUILD_RISCV_GNU_TOOLCHAIN_DIR=$PWD/thirdparty/riscv-gnu-toolchain/ +$ export RISCV_GNU_TOOLCHAIN_SYSROOT_DIR=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}/sysroot/ +``` + +3. Build Cross-Compiled Clang + +``` +$ cd buddy-mlir +$ mkdir llvm/build-cross-clang-rv +$ cd llvm/build-cross-clang-rv +$ cmake -G Ninja ../llvm \ + -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_TARGETS_TO_BUILD="RISCV" \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_C_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang \ + -DCMAKE_CXX_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DLLVM_TABLEGEN=${BUILD_LOCAL_LLVM_DIR}/bin/llvm-tblgen \ + -DCLANG_TABLEGEN=${BUILD_LOCAL_LLVM_DIR}/bin/clang-tblgen \ + -DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu \ + -DLLVM_TARGET_ARCH=RISCV64 \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ZSTD=Off +$ ninja clang lli +``` + +4. Build Cross-Compiled MLIR + +``` +$ cd buddy-mlir +$ mkdir llvm/build-cross-mlir-rv +$ cd llvm/build-cross-mlir-rv +$ cmake -G Ninja ../../llvm/llvm \ + -DLLVM_ENABLE_PROJECTS="mlir" \ + -DLLVM_BUILD_EXAMPLES=OFF \ + -DCMAKE_CROSSCOMPILING=True \ + -DLLVM_TARGET_ARCH=RISCV64 \ + -DLLVM_TARGETS_TO_BUILD=RISCV \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DLLVM_NATIVE_ARCH=RISCV \ + -DLLVM_HOST_TRIPLE=riscv64-unknown-linux-gnu \ + -DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu \ + -DCMAKE_C_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang \ + -DCMAKE_CXX_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DMLIR_TABLEGEN=${BUILD_LOCAL_LLVM_DIR}/bin/mlir-tblgen \ + -DLLVM_TABLEGEN=${BUILD_LOCAL_LLVM_DIR}/bin/llvm-tblgen \ + -DMLIR_LINALG_ODS_YAML_GEN=${BUILD_LOCAL_LLVM_DIR}/bin/mlir-linalg-ods-yaml-gen \ + -DMLIR_PDLL_TABLEGEN=${BUILD_LOCAL_LLVM_DIR}/bin/mlir-pdll \ + -DLLVM_ENABLE_ZSTD=Off +$ ninja +$ export BUILD_CROSS_MLIR_DIR=$PWD +``` + +5. Build Cross-Compiled `buddy-mlir` + +``` +$ cd buddy-mlir +$ mkdir build-cross-rv +$ cd build-cross-rv +$ cmake -G Ninja .. \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DMLIR_DIR=${BUILD_CROSS_MLIR_DIR}/lib/cmake/mlir \ + -DLLVM_DIR=${BUILD_CROSS_MLIR_DIR}/lib/cmake/llvm \ + -DCMAKE_CROSSCOMPILING=True \ + -DLLVM_TARGETS_TO_BUILD=RISCV \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DLLVM_NATIVE_ARCH=RISCV \ + -DLLVM_HOST_TRIPLE=riscv64-unknown-linux-gnu \ + -DCMAKE_C_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang \ + -DCMAKE_CXX_COMPILER=${BUILD_LOCAL_LLVM_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT_DIR} --gcc-toolchain=${BUILD_RISCV_GNU_TOOLCHAIN_DIR}" \ + -DLLVM_ENABLE_ZSTD=Off +$ ninja StaticMLIRCRunnerUtils StaticMLIRRunnerUtils +``` + +## Testing RVV Environment + +``` +$ cd buddy-mlir +$ cd examples/RVVDialect/ +$ make rvv-mul-add-run + +// Expected Output: +Unranked Memref base@ = 0x55555729aaa0 rank = 1 offset = 0 sizes = [20] strides = [1] data = +[0, 12, 26, 42, 60, 80, 102, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] +``` + +Congratulations! Your RVV environment is now fully set up. Enjoy exploring and testing! diff --git a/docs/rvv-enviroment.md b/docs/rvv-enviroment.md deleted file mode 100644 index f48a8262d..000000000 --- a/docs/rvv-enviroment.md +++ /dev/null @@ -1,35 +0,0 @@ -# Setting up environment for testing MLIR RVV dialect - -This guide will help to set up environment for testing RISC-V Vector Extension using buddy-mlir project and -corresponding RVV Dialect. As a target platform QEMU emulator is used. - -## Requirements - -Before proceed any further make sure that you installed dependencies below - -* [LLVM dependecies](https://llvm.org/docs/GettingStarted.html#requirements) -* [GNU Toolchain dependecies](https://github.com/riscv-collab/riscv-gnu-toolchain#prerequisites) -* [QEMU dependecies](https://wiki.qemu.org/Hosts/Linux) - -## Build steps - -1. Clone buddy-mlir project -``` bash -git clone git@github.com:buddy-compiler/buddy-mlir.git -cd buddy-mlir -git submodule update --init -``` -> **_NOTE:_** `buddly-mlir` contains `llvm-project` as a submodule. `llvm-project` is large, so cloning will take a while - -2. Run a script building environment -``` -cd buddy-mlir/thirdparty -./build-rvv-env.sh -``` -> **_NOTE:_** The scripts consist of multiple heavy stages, so be patient - it will take a while to clone and build -everything. -Detailed description of the steps can be found in [the page](https://gist.github.com/zhanghb97/ad44407e169de298911b8a4235e68497) - -> **_NOTE:_** By default, the script allows `make` to use all available threads for compilation. It may lead -to consuming a lot of memory and crashing the compiler. If you face with the issue, try to limit the number of threads -by passing a corresponding argument to the script. For example, `./build-rvv-env.sh 4` diff --git a/examples/BuddyBert/CMakeLists.txt b/examples/BuddyBert/CMakeLists.txt index 93dc7c2da..95c98dfa9 100644 --- a/examples/BuddyBert/CMakeLists.txt +++ b/examples/BuddyBert/CMakeLists.txt @@ -7,13 +7,13 @@ add_custom_command( add_custom_command( OUTPUT forward.o - COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyBert/forward.mlir + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyBert/forward.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" | - ${LLVM_MLIR_BINARY_DIR}/mlir-opt + ${LLVM_TOOLS_BINARY_DIR}/mlir-opt -pass-pipeline "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers),convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, convert-func-to-llvm, reconcile-unrealized-casts)" | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyBert/forward.o + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyBert/forward.o DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyBert/forward.mlir COMMENT "Building forward.o" VERBATIM) @@ -22,11 +22,11 @@ add_custom_command( OUTPUT subgraph0.o COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyBert/subgraph0.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, func-bufferize-dynamic-offset, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize))" | - ${LLVM_MLIR_BINARY_DIR}/mlir-opt + ${LLVM_TOOLS_BINARY_DIR}/mlir-opt -pass-pipeline "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers),convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, convert-func-to-llvm, reconcile-unrealized-casts)" | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyBert/subgraph0.o + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyBert/subgraph0.o DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyBert/subgraph0.mlir COMMENT "Building subgraph0.o" VERBATIM) @@ -36,7 +36,7 @@ add_library(BERT STATIC forward.o subgraph0.o) SET_TARGET_PROPERTIES(BERT PROPERTIES LINKER_LANGUAGE C) add_executable(buddy-bert-run bert-main.cpp) -target_link_directories(buddy-bert-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR}) +target_link_directories(buddy-bert-run PRIVATE ${LLVM_LIBRARY_DIR}) set(BUDDY_BERT_LIBS BERT mlir_c_runner_utils) target_link_libraries(buddy-bert-run ${BUDDY_BERT_LIBS}) diff --git a/examples/BuddyConvolution/.gitignore b/examples/BuddyConvolution/.gitignore new file mode 100644 index 000000000..df9389428 --- /dev/null +++ b/examples/BuddyConvolution/.gitignore @@ -0,0 +1,4 @@ +log.mlir +log.ll +log.s +a.out diff --git a/examples/BuddyConvolution/conv2d-nhwc-fhwc-opt.mlir b/examples/BuddyConvolution/conv2d-nhwc-fhwc-opt.mlir new file mode 100644 index 000000000..76d5e4d93 --- /dev/null +++ b/examples/BuddyConvolution/conv2d-nhwc-fhwc-opt.mlir @@ -0,0 +1,137 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-vector-to-scf \ +// RUN: -lower-affine \ +// RUN: -arith-bufferize \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -O3 -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// Using `8` as the vector size. +#map = affine_map<(d0) -> (d0 floordiv 8)> +#map0 = affine_map<(d0, d1, d2, d3) -> (d2)> +#map1 = affine_map<(d0, d1) -> (d0 + d1)> +#map2 = affine_map<(d0, d1) -> (d0 + d1 * 8)> +#map3 = affine_map<(d0) -> (d0 * 8)> + +module { + func.func private @printMemrefF32(memref<*xf32>) + func.func private @rtclock() -> f64 + + func.func @conv_2d_nhwc_fhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + %f0 = arith.constant 0. : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %n = memref.dim %arg0, %c0 : memref + %h_i = memref.dim %arg0, %c1 : memref + %w_i = memref.dim %arg0, %c2 : memref + %c = memref.dim %arg0, %c3 : memref + %f = memref.dim %arg1, %c0 : memref + %h_k = memref.dim %arg1, %c1 : memref + %w_k = memref.dim %arg1, %c2 : memref + %h_o = memref.dim %arg2, %c1 : memref + %w_o = memref.dim %arg2, %c2 : memref + + // Output is NHoWoF + affine.for %idx_n = %c0 to %n { + affine.for %idx_f = %c0 to %f { + affine.for %idx_c = %c0 to %c { + affine.for %idx_h_o = %c0 to %h_o { + affine.for %idx_h_k = %c0 to %h_k { + affine.for %idx_w_k = %c0 to %w_k { + affine.for %idx_w_o = %c0 to #map(%w_o) { + %kernel_ele = memref.load %arg1[%idx_f, %idx_h_k, %idx_w_k, %idx_c] : memref + %kernel_vec = vector.broadcast %kernel_ele : f32 to vector<8xf32> + %in_iter_h = affine.apply #map1 (%idx_h_k, %idx_h_o) + %in_iter_w = affine.apply #map2 (%idx_w_k, %idx_w_o) + %out_iter_w = affine.apply #map3 (%idx_w_o) + %input_vec = vector.transfer_read %arg0[%idx_n, %in_iter_h, %in_iter_w, %idx_c], %f0 + { permutation_map = #map0 } : memref, vector<8xf32> + %output_vec = vector.transfer_read %arg2[%idx_n, %idx_h_o, %out_iter_w, %idx_f], %f0 + { permutation_map = #map0 } : memref, vector<8xf32> + %res_vec = vector.fma %kernel_vec, %input_vec, %output_vec : vector<8xf32> + vector.transfer_write %res_vec, %arg2[%idx_n, %idx_h_o, %out_iter_w, %idx_f] + { permutation_map = #map0 } : vector<8xf32>, memref + } + } + } + } + } + } + } + + return + } + + func.func @alloc_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref + scf.for %idx0 = %c0 to %arg0 step %c1 { + scf.for %idx1 = %c0 to %arg1 step %c1 { + scf.for %idx2 = %c0 to %arg2 step %c1 { + scf.for %idx3 = %c0 to %arg3 step %c1 { + memref.store %arg4, %0[%idx0, %idx1, %idx2, %idx3] : memref + } + } + } + } + return %0 : memref + } + + func.func @main() { + %f0 = arith.constant 0.000000e+00 : f32 + %f2 = arith.constant 2.000000e+00 : f32 + %f3 = arith.constant 3.000000e+00 : f32 + + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c5 = arith.constant 5 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %c12 = arith.constant 12 : index + %c16 = arith.constant 16 : index + %c24 = arith.constant 24 : index + %c28 = arith.constant 28 : index + + // %v0 = call @alloc_f32(%c1, %c12, %c12, %c6, %f2) : (index, index, index, index, f32) -> memref + // %v1 = call @alloc_f32(%c16, %c5, %c5, %c6, %f3) : (index, index, index, index, f32) -> memref + // %v2 = call @alloc_f32(%c1, %c8, %c8, %c16, %f0) : (index, index, index, index, f32) -> memref + + %v0 = call @alloc_f32(%c1, %c28, %c28, %c1, %f2) : (index, index, index, index, f32) -> memref + %v1 = call @alloc_f32(%c6, %c5, %c5, %c1, %f3) : (index, index, index, index, f32) -> memref + %v2 = call @alloc_f32(%c1, %c24, %c24, %c6, %f0) : (index, index, index, index, f32) -> memref + + %t_start = call @rtclock() : () -> f64 + call @conv_2d_nhwc_fhwc(%v0, %v1, %v2) : (memref, memref, memref) -> () + %t_end = call @rtclock() : () -> f64 + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref + // CHECK: [ + // CHECK: [ + // CHECK: [ + // CHECK: [150{{(, 150)*}}], + %print_v2 = memref.cast %v2 : memref to memref<*xf32> + call @printMemrefF32(%print_v2) : (memref<*xf32>) -> () + + %time = arith.subf %t_end, %t_start : f64 + vector.print %time : f64 + + memref.dealloc %v0 : memref + memref.dealloc %v1 : memref + memref.dealloc %v2 : memref + + return + } +} diff --git a/examples/BuddyConvolution/conv2d-nhwc-fhwc.mlir b/examples/BuddyConvolution/conv2d-nhwc-fhwc.mlir new file mode 100644 index 000000000..90759355e --- /dev/null +++ b/examples/BuddyConvolution/conv2d-nhwc-fhwc.mlir @@ -0,0 +1,88 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-linalg-to-loops \ +// RUN: -lower-affine \ +// RUN: -arith-bufferize \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @printMemrefF32(memref<*xf32>) + func.func private @rtclock() -> f64 + + func.func @conv_2d_nhwc_fhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc_fhwc ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return + } + + func.func @alloc_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref + scf.for %idx0 = %c0 to %arg0 step %c1 { + scf.for %idx1 = %c0 to %arg1 step %c1 { + scf.for %idx2 = %c0 to %arg2 step %c1 { + scf.for %idx3 = %c0 to %arg3 step %c1 { + memref.store %arg4, %0[%idx0, %idx1, %idx2, %idx3] : memref + } + } + } + } + return %0 : memref + } + + func.func @main() { + %f0 = arith.constant 0.000000e+00 : f32 + %f2 = arith.constant 2.000000e+00 : f32 + %f3 = arith.constant 3.000000e+00 : f32 + + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c5 = arith.constant 5 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %c12 = arith.constant 12 : index + %c16 = arith.constant 16 : index + %c24 = arith.constant 24 : index + %c28 = arith.constant 28 : index + + // %v0 = call @alloc_f32(%c1, %c12, %c12, %c6, %f2) : (index, index, index, index, f32) -> memref + // %v1 = call @alloc_f32(%c16, %c5, %c5, %c6, %f3) : (index, index, index, index, f32) -> memref + // %v2 = call @alloc_f32(%c1, %c8, %c8, %c16, %f0) : (index, index, index, index, f32) -> memref + + %v0 = call @alloc_f32(%c1, %c28, %c28, %c1, %f2) : (index, index, index, index, f32) -> memref + %v1 = call @alloc_f32(%c6, %c5, %c5, %c1, %f3) : (index, index, index, index, f32) -> memref + %v2 = call @alloc_f32(%c1, %c24, %c24, %c6, %f0) : (index, index, index, index, f32) -> memref + + %t_start = call @rtclock() : () -> f64 + call @conv_2d_nhwc_fhwc(%v0, %v1, %v2) : (memref, memref, memref) -> () + %t_end = call @rtclock() : () -> f64 + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref + // CHECK: [ + // CHECK: [ + // CHECK: [ + // CHECK: [150{{(, 150)*}}], + %print_v2 = memref.cast %v2 : memref to memref<*xf32> + call @printMemrefF32(%print_v2) : (memref<*xf32>) -> () + + %time = arith.subf %t_end, %t_start : f64 + vector.print %time : f64 + + memref.dealloc %v0 : memref + memref.dealloc %v1 : memref + memref.dealloc %v2 : memref + return + } +} diff --git a/examples/BuddyConvolution/conv2d.mlir b/examples/BuddyConvolution/conv2d.mlir new file mode 100644 index 000000000..c4f1ac2ef --- /dev/null +++ b/examples/BuddyConvolution/conv2d.mlir @@ -0,0 +1,71 @@ +// RUN: buddy-opt %s \ +// RUN: -conv-vectorization \ +// RUN: -convert-linalg-to-loops \ +// RUN: -lower-affine \ +// RUN: -arith-bufferize \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -llvm-request-c-wrappers \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +#map0 = affine_map<(d0, d1) -> (d0 + d1 - 1)> + +module { + func.func private @printMemrefF32(memref<*xf32>) + + func.func @conv_2d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return + } + + func.func @alloc_f32(%arg0: index, %arg1: index, %arg2: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1) : memref + scf.for %arg3 = %c0 to %arg0 step %c1 { + scf.for %arg4 = %c0 to %arg1 step %c1 { + memref.store %arg2, %0[%arg3, %arg4] : memref + } + } + return %0 : memref + } + + func.func @main() { + %c0 = arith.constant 0.000000e+00 : f32 + %c1 = arith.constant 1.000000e+00 : f32 + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + + %current_v1 = arith.constant 3 : index + %current_v2 = arith.constant 8 : index + %current_v0 = affine.apply #map0(%current_v2, %current_v1) + + %v0 = call @alloc_f32(%current_v0, %current_v0, %c1) : (index, index, f32) -> memref + %v1 = call @alloc_f32(%current_v1, %current_v1, %c1) : (index, index, f32) -> memref + %v2 = call @alloc_f32(%current_v2, %current_v2, %c0) : (index, index, f32) -> memref + + call @conv_2d(%v0, %v1, %v2) : (memref, memref, memref) -> () + + %print_v2 = memref.cast %v2 : memref to memref<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [8, 8] strides = [8, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [9{{(, 9)*}}], + call @printMemrefF32(%print_v2) : (memref<*xf32>) -> () + + memref.dealloc %v0 : memref + memref.dealloc %v1 : memref + memref.dealloc %v2 : memref + return + } +} diff --git a/examples/BuddyConvolution/makefile b/examples/BuddyConvolution/makefile new file mode 100644 index 000000000..196264376 --- /dev/null +++ b/examples/BuddyConvolution/makefile @@ -0,0 +1,127 @@ +#!/bin/bash +BUDDY_OPT := ../../build/bin/buddy-opt +MLIR_OPT := ../../llvm/build/bin/mlir-opt +CLANG := ../../llvm/build/bin/clang +MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate +MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner +LLC := ../../llvm/build/bin/llc +OPT_FLAG := -O3 +MLIR_LIB := ../../llvm/build/lib/ + +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib +MTRIPLE := x86_64-apple-darwin +endif + +conv2d-lower: + @${BUDDY_OPT} ./conv2d.mlir \ + -conv-vectorization \ + -convert-linalg-to-loops \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -llvm-request-c-wrappers \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts \ + -o ./log.mlir + +conv2d-translate: + @${BUDDY_OPT} ./conv2d.mlir \ + -conv-vectorization \ + -convert-linalg-to-loops \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -llvm-request-c-wrappers \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll + +conv2d-run: + @${BUDDY_OPT} ./conv2d.mlir \ + -conv-vectorization \ + -convert-linalg-to-loops \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -llvm-request-c-wrappers \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +conv2d-nhwc-fhwc-run: + @${BUDDY_OPT} ./conv2d-nhwc-fhwc.mlir \ + -convert-linalg-to-loops \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +conv2d-nhwc-fhwc-aot: + @${BUDDY_OPT} ./conv2d-nhwc-fhwc.mlir \ + -convert-linalg-to-loops \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll + ${CLANG} log.ll ${OPT_FLAG} \ + -L${MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ + -o a.out + @LD_LIBRARY_PATH=${MLIR_LIB} ./a.out + +conv2d-nhwc-fhwc-opt-run: + @${BUDDY_OPT} ./conv2d-nhwc-fhwc-opt.mlir \ + -convert-vector-to-scf \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} -O3 -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +conv2d-nhwc-fhwc-opt-aot: + @${BUDDY_OPT} ./conv2d-nhwc-fhwc-opt.mlir \ + -convert-vector-to-scf \ + -lower-affine \ + -arith-bufferize \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll + ${CLANG} log.ll -O3 \ + -L${MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ + -o a.out + @LD_LIBRARY_PATH=${MLIR_LIB} ./a.out diff --git a/examples/BuddyGPU/.gitignore b/examples/BuddyGPU/.gitignore new file mode 100644 index 000000000..0194ea7a6 --- /dev/null +++ b/examples/BuddyGPU/.gitignore @@ -0,0 +1,3 @@ +log.mlir +log.ll +log.s diff --git a/examples/BuddyGPU/makefile b/examples/BuddyGPU/makefile new file mode 100644 index 000000000..677396d1d --- /dev/null +++ b/examples/BuddyGPU/makefile @@ -0,0 +1,8 @@ +#!/bin/bash +BUDDY_OPT := ../../build/bin/buddy-opt + +buddy-gpu-matmul-lower: + @${BUDDY_OPT} matmul.mlir \ + -transform-preload-library="transform-library-paths=transform.mlir" \ + -transform-interpreter="entry-point=codegen" \ + -o log.mlir diff --git a/examples/BuddyGPU/matmul.mlir b/examples/BuddyGPU/matmul.mlir new file mode 100644 index 000000000..2f0fa226c --- /dev/null +++ b/examples/BuddyGPU/matmul.mlir @@ -0,0 +1,12 @@ +!unit = f32 +!lhs = tensor<5376x2048x!unit> +!rhs = tensor<2048x5376x!unit> +!res = tensor<5376x5376x!unit> + +func.func @matmul(%arg0: !lhs, %arg1: !rhs) -> !res { + %cst = arith.constant 0.000000e+00 : !unit + %0 = tensor.empty() : !res + %1 = linalg.fill ins(%cst : !unit) outs(%0 : !res) -> !res + %2 = linalg.matmul ins(%arg0, %arg1: !lhs, !rhs) outs(%1: !res) -> !res + func.return %2 : !res +} diff --git a/examples/BuddyGPU/transform.mlir b/examples/BuddyGPU/transform.mlir new file mode 100644 index 000000000..ef2645199 --- /dev/null +++ b/examples/BuddyGPU/transform.mlir @@ -0,0 +1,23 @@ +module attributes { transform.with_named_sequence } { + transform.named_sequence @codegen(%arg0: !transform.any_op) { + // Match the target operations and assign them to SSA values. + %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg0 + : (!transform.any_op) -> !transform.any_op + %fill = transform.structured.match ops{["linalg.fill"]} in %arg0 + : (!transform.any_op) -> !transform.any_op + + // Perform tiling for the grid. + // For the matrix multiplication of 5376x2048 and 2048x5376, the compilation + // strategy sets the tile size for grid-based partitioning to 128x256. + // This means that each 128x256 matmul tile is computed within a GPU block, + // while multiple such blocks are computed in parallel across the grid. + // `tile_sizes` specify the dimensions of the tiled matmul result. + // `%tiled_op` is the tiled matmul operation within the `scf.forall` loop. + // `%forall_op` is the `scf.forall` loop that maintains tile information. + %tiled_op, %forall_op = transform.structured.tile_using_forall %matmul + tile_sizes [128, 256] (mapping = [#gpu.block, #gpu.block]) + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + + transform.yield + } +} // module diff --git a/examples/BuddyGen/.gitignore b/examples/BuddyGen/.gitignore new file mode 100644 index 000000000..df9389428 --- /dev/null +++ b/examples/BuddyGen/.gitignore @@ -0,0 +1,4 @@ +log.mlir +log.ll +log.s +a.out diff --git a/examples/BuddyGen/GenMemRef.cpp b/examples/BuddyGen/GenMemRef.cpp new file mode 100644 index 000000000..8ca2526b7 --- /dev/null +++ b/examples/BuddyGen/GenMemRef.cpp @@ -0,0 +1,43 @@ +//===- GenMemRef.cpp ------------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +// $ export LLVM_DIR=$PWD/../../llvm/ +// $ export LLVM_BUILD_DIR=$LLVM_DIR/build +// $ c++ GenMemRef.cpp \ + -I $LLVM_DIR/llvm/include/ -I $LLVM_BUILD_DIR/include/ \ + -I $LLVM_DIR/mlir/include/ -I $LLVM_BUILD_DIR/tools/mlir/include/ \ + -L$LLVM_BUILD_DIR/lib -lMLIRIR -lMLIRParser -lMLIRSupport -lLLVMCore \ + -lLLVMSupport -lncurses -ltinfo -lstdc++ -lLLVMDemangle \ + -o a.out +// $ ./a.out + +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/MLIRContext.h" + +int main() { + mlir::MLIRContext context; + mlir::OpBuilder builder(&context); + mlir::Type eleType = builder.getF64Type(); + // Target memref type: + // `memref>` + mlir::MemRefType memrefType = mlir::MemRefType::get( + {mlir::ShapedType::kDynamic}, eleType, + mlir::StridedLayoutAttr::get( + &context, /*offset=*/mlir::ShapedType::kDynamic, /*strides=*/{1})); + memrefType.dump(); + return 0; +} diff --git a/examples/BuddyLeNet/CMakeLists.txt b/examples/BuddyLeNet/CMakeLists.txt index 9698f617b..928f1f88c 100644 --- a/examples/BuddyLeNet/CMakeLists.txt +++ b/examples/BuddyLeNet/CMakeLists.txt @@ -6,25 +6,26 @@ add_custom_command( add_custom_command( OUTPUT forward.o - COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" | - ${LLVM_MLIR_BINARY_DIR}/mlir-opt + ${LLVM_TOOLS_BINARY_DIR}/mlir-opt -pass-pipeline "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers),convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, convert-func-to-llvm, reconcile-unrealized-casts)" | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/forward.o + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/forward.o DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir COMMENT "Building forward.o" VERBATIM) add_custom_command( OUTPUT subgraph0.o - COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | ${BUDDY_BINARY_DIR}/buddy-opt -eliminate-empty-tensors - -convert-tensor-to-linalg + -convert-tensor-to-linalg -linalg-bufferize + -batchmatmul-optimize -convert-linalg-to-affine-loops -lower-affine -func-bufferize-dynamic-offset @@ -42,9 +43,9 @@ add_custom_command( -convert-arith-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir COMMENT "Building subgraph0.o" VERBATIM) @@ -54,7 +55,7 @@ add_library(LENET STATIC subgraph0.o forward.o) SET_TARGET_PROPERTIES(LENET PROPERTIES LINKER_LANGUAGE C) add_executable(buddy-lenet-run buddy-lenet-main.cpp) -target_link_directories(buddy-lenet-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR}) +target_link_directories(buddy-lenet-run PRIVATE ${LLVM_LIBRARY_DIR}) set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${OpenCV_LIBS}) target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS}) diff --git a/examples/BuddyLeNet/README.md b/examples/BuddyLeNet/README.md index 5988edbe7..23ac086cf 100644 --- a/examples/BuddyLeNet/README.md +++ b/examples/BuddyLeNet/README.md @@ -24,9 +24,7 @@ $ cmake -G Ninja .. \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DCMAKE_BUILD_TYPE=RELEASE \ -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ - -DPython3_EXECUTABLE=$(which python3) \ - -DBUDDY_ENABLE_OPENCV=ON \ - -DOpenCV_DIR= + -DPython3_EXECUTABLE=$(which python3) $ ninja $ ninja check-buddy ``` diff --git a/examples/BuddyLeNet/buddy-lenet-main.cpp b/examples/BuddyLeNet/buddy-lenet-main.cpp index 4e2dc2efe..ca12820ba 100644 --- a/examples/BuddyLeNet/buddy-lenet-main.cpp +++ b/examples/BuddyLeNet/buddy-lenet-main.cpp @@ -15,41 +15,24 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include +#include #include #include #include #include -#include #include #include #include constexpr size_t ParamsSize = 44426; -const std::string ImgName = "3.png"; +const std::string ImgName = "8.bmp"; /// Declare LeNet forward function. extern "C" void _mlir_ciface_forward(MemRef *output, MemRef *arg0, - Img *input); - -/// Function for preprocessing the image to match model input requirements. -const cv::Mat imagePreprocessing() { - // Get the directory of the LeNet example and construct the image path. - std::string lenetDir = getenv("LENET_EXAMPLE_PATH"); - std::string imgPath = lenetDir + "/images/" + ImgName; - // Read the image in grayscale mode. - cv::Mat inputImage = cv::imread(imgPath, cv::IMREAD_GRAYSCALE); - assert(!inputImage.empty() && "Could not read the image."); - cv::Mat resizedImage; - int imageWidth = 28; - int imageHeight = 28; - // Resize the image to 28x28 pixels. - cv::resize(inputImage, resizedImage, cv::Size(imageWidth, imageHeight), - cv::INTER_LINEAR); - return resizedImage; -} + dip::Image *input); /// Print [Log] label in bold blue format. void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } @@ -112,19 +95,16 @@ int main() { const std::string title = "LeNet Inference Powered by Buddy Compiler"; std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; - // Preprocess the image to match the input requirements of the model. - cv::Mat image = imagePreprocessing(); - - // Define the sizes of the input and output tensors. - intptr_t sizesInput[4] = {1, 1, 28, 28}; + // Define the sizes of the output tensors. intptr_t sizesOutput[2] = {1, 10}; // Create input and output containers for the image and model output. - Img input(image, sizesInput, true); + std::string lenetDir = getenv("LENET_EXAMPLE_PATH"); + std::string imgPath = lenetDir + "/images/" + ImgName; + dip::Image input(imgPath, dip::DIP_GRAYSCALE, true /* norm */); MemRef output(sizesOutput); // Load model parameters from the specified file. - std::string lenetDir = getenv("LENET_EXAMPLE_PATH"); std::string paramsDir = lenetDir + "/arg0.data"; MemRef paramsContainer({ParamsSize}); loadParameters(paramsDir, paramsContainer); diff --git a/examples/BuddyLeNet/fake-lenet.mlir b/examples/BuddyLeNet/fake-lenet.mlir index 48d91a7fd..d7d80a533 100644 --- a/examples/BuddyLeNet/fake-lenet.mlir +++ b/examples/BuddyLeNet/fake-lenet.mlir @@ -1,5 +1,6 @@ module { func.func private @printMemrefF32(%ptr : tensor<*xf32>) + func.func private @rtclock() -> f64 func.func @forward(%arg0: tensor<44426xf32>, %arg1: tensor<1x1x28x28xf32>) -> tensor<1x10xf32> { %extracted_slice = tensor.extract_slice %arg0[0] [150] [1] : tensor<44426xf32> to tensor<150xf32> @@ -81,10 +82,16 @@ module { %fake_params = arith.constant dense<1.0> : tensor<44426xf32> %fake_input = arith.constant dense<2.0> : tensor<1x1x28x28xf32> + %t_start = call @rtclock() : () -> f64 %fake_output = call @forward(%fake_params, %fake_input) : (tensor<44426xf32>, tensor<1x1x28x28xf32>) -> tensor<1x10xf32> + %t_end = call @rtclock() : () -> f64 %tensor_unranked = tensor.cast %fake_output : tensor<1x10xf32> to tensor<*xf32> call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + + %time = arith.subf %t_end, %t_start : f64 + vector.print %time : f64 + return } } diff --git a/examples/BuddyLeNet/images/8.bmp b/examples/BuddyLeNet/images/8.bmp new file mode 100644 index 000000000..7a9e02a29 Binary files /dev/null and b/examples/BuddyLeNet/images/8.bmp differ diff --git a/examples/BuddyLeNet/makefile b/examples/BuddyLeNet/makefile index 6f0664272..fe87b6da1 100644 --- a/examples/BuddyLeNet/makefile +++ b/examples/BuddyLeNet/makefile @@ -1,30 +1,33 @@ #!/bin/bash -BUDDY_OPT := ../../build/bin/buddy-opt -MLIR_OPT := ../../llvm/build/bin/mlir-opt -MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate -MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner -LLC := ../../llvm/build/bin/llc -OPT_FLAG := -O0 +BUDDY_BUILD_DIR := ../../build/ +LLVM_BUILD_DIR := ../../llvm/build/ +BUDDY_OPT := ${BUDDY_BUILD_DIR}/bin/buddy-opt +MLIR_OPT := ${LLVM_BUILD_DIR}/bin/mlir-opt +MLIR_TRANSLATE := ${LLVM_BUILD_DIR}/bin/mlir-translate +MLIR_CPU_RUNNER := ${LLVM_BUILD_DIR}/bin/mlir-cpu-runner +LLC := ${LLVM_BUILD_DIR}/bin/llc +OPT_FLAG := -O3 ifeq ($(shell uname),Linux) -MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so -MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so -MLIR_ASYNC_RUNTIME := ../../llvm/build/lib/libmlir_async_runtime.so +MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_c_runner_utils.so +MLIR_ASYNC_RUNTIME := ${LLVM_BUILD_DIR}/lib/libmlir_async_runtime.so MTRIPLE := x86_64-unknown-linux-gnu else ifeq ($(shell uname),Darwin) -MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib -MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib -MLIR_ASYNC_RUNTIME := ./../llvm/build/lib/libmlir_async_runtime.dylib +MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_c_runner_utils.dylib +MLIR_ASYNC_RUNTIME := ${LLVM_BUILD_DIR}/lib/libmlir_async_runtime.dylib MTRIPLE := x86_64-apple-darwin endif buddy-lenet-lower: - @${MLIR_OPT} ./fake-lenet.mlir \ + @${BUDDY_OPT} ./fake-lenet.mlir \ -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \ - ${MLIR_OPT} \ + ${BUDDY_OPT} \ -eliminate-empty-tensors \ -convert-tensor-to-linalg \ -linalg-bufferize \ + -batchmatmul-optimize \ -convert-linalg-to-affine-loops \ -lower-affine \ -func-bufferize \ @@ -38,16 +41,15 @@ buddy-lenet-lower: -convert-arith-to-llvm \ -finalize-memref-to-llvm \ -convert-scf-to-cf \ - -llvm-request-c-wrappers \ -convert-arith-to-llvm \ -convert-func-to-llvm \ -reconcile-unrealized-casts \ -o ./log.mlir buddy-lenet-translate: - @${MLIR_OPT} ./fake-lenet.mlir \ + @${BUDDY_OPT} ./fake-lenet.mlir \ -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \ - ${MLIR_OPT} \ + ${BUDDY_OPT} \ -eliminate-empty-tensors \ -convert-tensor-to-linalg \ -linalg-bufferize \ @@ -64,7 +66,6 @@ buddy-lenet-translate: -convert-arith-to-llvm \ -finalize-memref-to-llvm \ -convert-scf-to-cf \ - -llvm-request-c-wrappers \ -convert-arith-to-llvm \ -convert-func-to-llvm \ -reconcile-unrealized-casts | \ @@ -72,9 +73,9 @@ buddy-lenet-translate: buddy-lenet-run: - @${MLIR_OPT} ./fake-lenet.mlir \ + @${BUDDY_OPT} ./fake-lenet.mlir \ -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \ - ${MLIR_OPT} \ + ${BUDDY_OPT} \ -eliminate-empty-tensors \ -convert-tensor-to-linalg \ -linalg-bufferize \ @@ -91,7 +92,33 @@ buddy-lenet-run: -convert-arith-to-llvm \ -finalize-memref-to-llvm \ -convert-scf-to-cf \ - -llvm-request-c-wrappers \ + -convert-arith-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +buddy-lenet-opt-run: + @${BUDDY_OPT} ./fake-lenet.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \ + ${BUDDY_OPT} \ + -eliminate-empty-tensors \ + -convert-tensor-to-linalg \ + -linalg-bufferize \ + -batchmatmul-optimize \ + -convert-linalg-to-affine-loops \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ -convert-arith-to-llvm \ -convert-func-to-llvm \ -reconcile-unrealized-casts | \ diff --git a/examples/BuddyLlama/CMakeLists.txt b/examples/BuddyLlama/CMakeLists.txt index 97aa736cb..a6bfc2f74 100644 --- a/examples/BuddyLlama/CMakeLists.txt +++ b/examples/BuddyLlama/CMakeLists.txt @@ -6,14 +6,14 @@ add_custom_command( add_custom_command( OUTPUT forward.o - COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/forward.mlir + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/forward.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | ${BUDDY_BINARY_DIR}/buddy-opt -arith-expand -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize - -matmul-paralell-vectorization-optimize + -matmul-parallel-vectorization-optimize -batchmatmul-optimize -convert-linalg-to-affine-loops -affine-loop-fusion @@ -40,9 +40,9 @@ add_custom_command( -convert-math-to-libm -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLlama/forward.o DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/forward.mlir COMMENT "Building forward.o " @@ -50,14 +50,14 @@ add_custom_command( add_custom_command( OUTPUT subgraph.o - COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/subgraph0.mlir + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/subgraph0.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | ${BUDDY_BINARY_DIR}/buddy-opt -arith-expand -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize - -matmul-paralell-vectorization-optimize + -matmul-parallel-vectorization-optimize -batchmatmul-optimize -convert-linalg-to-affine-loops -affine-loop-fusion @@ -85,9 +85,9 @@ add_custom_command( -convert-math-to-libm -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llvm-as | - ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLlama/subgraph.o DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLlama/subgraph0.mlir COMMENT "Building subgraph.o " @@ -107,7 +107,7 @@ SET_TARGET_PROPERTIES( LINKER_LANGUAGE C) add_executable(buddy-llama-run llama-main.cpp) -target_link_directories(buddy-llama-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR}) +target_link_directories(buddy-llama-run PRIVATE ${LLVM_LIBRARY_DIR}) set(BUDDY_LLAMA_LIBS LLAMA diff --git a/examples/BuddyLlama/import-llama2.py b/examples/BuddyLlama/import-llama2.py index fbd12e5bf..2903d6bd8 100644 --- a/examples/BuddyLlama/import-llama2.py +++ b/examples/BuddyLlama/import-llama2.py @@ -1,11 +1,3 @@ -import os -import torch -import torch._dynamo as dynamo -from transformers import LlamaForCausalLM, LlamaTokenizer -from torch._inductor.decomposition import decompositions as inductor_decomp -import numpy - -from buddy.compiler.frontend import DynamoCompiler # ===- import-llama2.py -------------------------------------------------------- # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,6 +17,15 @@ # This is the test of llama2 model. # # ===--------------------------------------------------------------------------- + +import os +import torch +import torch._dynamo as dynamo +from transformers import LlamaForCausalLM, LlamaTokenizer +from torch._inductor.decomposition import decompositions as inductor_decomp +import numpy + +from buddy.compiler.frontend import DynamoCompiler from buddy.compiler.ops import tosa from buddy.compiler.graph import GraphDriver from buddy.compiler.graph.transform import simply_fuse diff --git a/examples/BuddyLlama/llama_annotation.mlir b/examples/BuddyLlama/llama_annotation.mlir new file mode 100644 index 000000000..acb735d12 --- /dev/null +++ b/examples/BuddyLlama/llama_annotation.mlir @@ -0,0 +1,6012 @@ +#map = affine_map<(d0, d1, d2) -> (d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#map3 = affine_map<(d0, d1) -> (d0, d1)> +#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +#map5 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +#map6 = affine_map<(d0, d1, d2) -> (d0, 0, d1, d2)> +#map7 = affine_map<(d0, d1) -> (0, d0, d1)> +module { + func.func @subgraph0(%arg0: tensor<32000x4096xf32>, %arg1: tensor<1x40xi64>, %arg2: tensor<4096xf32>, %arg3: tensor<4096x4096xf32>, %arg4: tensor<4096x4096xf32>, %arg5: tensor<4096x4096xf32>, %arg6: tensor<1x1x2048x128xf32>, %arg7: tensor<1x1x2048x128xf32>, %arg8: tensor<4096x4096xf32>, %arg9: tensor<4096xf32>, %arg10: tensor<11008x4096xf32>, %arg11: tensor<11008x4096xf32>, %arg12: tensor<4096x11008xf32>, %arg13: tensor<4096xf32>, %arg14: tensor<4096x4096xf32>, %arg15: tensor<4096x4096xf32>, %arg16: tensor<4096x4096xf32>, %arg17: tensor<1x1x2048x128xf32>, %arg18: tensor<1x1x2048x128xf32>, %arg19: tensor<4096x4096xf32>, %arg20: tensor<4096xf32>, %arg21: tensor<11008x4096xf32>, %arg22: tensor<11008x4096xf32>, %arg23: tensor<4096x11008xf32>, %arg24: tensor<4096xf32>, %arg25: tensor<4096x4096xf32>, %arg26: tensor<4096x4096xf32>, %arg27: tensor<4096x4096xf32>, %arg28: tensor<1x1x2048x128xf32>, %arg29: tensor<1x1x2048x128xf32>, %arg30: tensor<4096x4096xf32>, %arg31: tensor<4096xf32>, %arg32: tensor<11008x4096xf32>, %arg33: tensor<11008x4096xf32>, %arg34: tensor<4096x11008xf32>, %arg35: tensor<4096xf32>, %arg36: tensor<4096x4096xf32>, %arg37: tensor<4096x4096xf32>, %arg38: tensor<4096x4096xf32>, %arg39: tensor<1x1x2048x128xf32>, %arg40: tensor<1x1x2048x128xf32>, %arg41: tensor<4096x4096xf32>, %arg42: tensor<4096xf32>, %arg43: tensor<11008x4096xf32>, %arg44: tensor<11008x4096xf32>, %arg45: tensor<4096x11008xf32>, %arg46: tensor<4096xf32>, %arg47: tensor<4096x4096xf32>, %arg48: tensor<4096x4096xf32>, %arg49: tensor<4096x4096xf32>, %arg50: tensor<1x1x2048x128xf32>, %arg51: tensor<1x1x2048x128xf32>, %arg52: tensor<4096x4096xf32>, %arg53: tensor<4096xf32>, %arg54: tensor<11008x4096xf32>, %arg55: tensor<11008x4096xf32>, %arg56: tensor<4096x11008xf32>, %arg57: tensor<4096xf32>, %arg58: tensor<4096x4096xf32>, %arg59: tensor<4096x4096xf32>, %arg60: tensor<4096x4096xf32>, %arg61: tensor<1x1x2048x128xf32>, %arg62: tensor<1x1x2048x128xf32>, %arg63: tensor<4096x4096xf32>, %arg64: tensor<4096xf32>, %arg65: tensor<11008x4096xf32>, %arg66: tensor<11008x4096xf32>, %arg67: tensor<4096x11008xf32>, %arg68: tensor<4096xf32>, %arg69: tensor<4096x4096xf32>, %arg70: tensor<4096x4096xf32>, %arg71: tensor<4096x4096xf32>, %arg72: tensor<1x1x2048x128xf32>, %arg73: tensor<1x1x2048x128xf32>, %arg74: tensor<4096x4096xf32>, %arg75: tensor<4096xf32>, %arg76: tensor<11008x4096xf32>, %arg77: tensor<11008x4096xf32>, %arg78: tensor<4096x11008xf32>, %arg79: tensor<4096xf32>, %arg80: tensor<4096x4096xf32>, %arg81: tensor<4096x4096xf32>, %arg82: tensor<4096x4096xf32>, %arg83: tensor<1x1x2048x128xf32>, %arg84: tensor<1x1x2048x128xf32>, %arg85: tensor<4096x4096xf32>, %arg86: tensor<4096xf32>, %arg87: tensor<11008x4096xf32>, %arg88: tensor<11008x4096xf32>, %arg89: tensor<4096x11008xf32>, %arg90: tensor<4096xf32>, %arg91: tensor<4096x4096xf32>, %arg92: tensor<4096x4096xf32>, %arg93: tensor<4096x4096xf32>, %arg94: tensor<1x1x2048x128xf32>, %arg95: tensor<1x1x2048x128xf32>, %arg96: tensor<4096x4096xf32>, %arg97: tensor<4096xf32>, %arg98: tensor<11008x4096xf32>, %arg99: tensor<11008x4096xf32>, %arg100: tensor<4096x11008xf32>, %arg101: tensor<4096xf32>, %arg102: tensor<4096x4096xf32>, %arg103: tensor<4096x4096xf32>, %arg104: tensor<4096x4096xf32>, %arg105: tensor<1x1x2048x128xf32>, %arg106: tensor<1x1x2048x128xf32>, %arg107: tensor<4096x4096xf32>, %arg108: tensor<4096xf32>, %arg109: tensor<11008x4096xf32>, %arg110: tensor<11008x4096xf32>, %arg111: tensor<4096x11008xf32>, %arg112: tensor<4096xf32>, %arg113: tensor<4096x4096xf32>, %arg114: tensor<4096x4096xf32>, %arg115: tensor<4096x4096xf32>, %arg116: tensor<1x1x2048x128xf32>, %arg117: tensor<1x1x2048x128xf32>, %arg118: tensor<4096x4096xf32>, %arg119: tensor<4096xf32>, %arg120: tensor<11008x4096xf32>, %arg121: tensor<11008x4096xf32>, %arg122: tensor<4096x11008xf32>, %arg123: tensor<4096xf32>, %arg124: tensor<4096x4096xf32>, %arg125: tensor<4096x4096xf32>, %arg126: tensor<4096x4096xf32>, %arg127: tensor<1x1x2048x128xf32>, %arg128: tensor<1x1x2048x128xf32>, %arg129: tensor<4096x4096xf32>, %arg130: tensor<4096xf32>, %arg131: tensor<11008x4096xf32>, %arg132: tensor<11008x4096xf32>, %arg133: tensor<4096x11008xf32>, %arg134: tensor<4096xf32>, %arg135: tensor<4096x4096xf32>, %arg136: tensor<4096x4096xf32>, %arg137: tensor<4096x4096xf32>, %arg138: tensor<1x1x2048x128xf32>, %arg139: tensor<1x1x2048x128xf32>, %arg140: tensor<4096x4096xf32>, %arg141: tensor<4096xf32>, %arg142: tensor<11008x4096xf32>, %arg143: tensor<11008x4096xf32>, %arg144: tensor<4096x11008xf32>, %arg145: tensor<4096xf32>, %arg146: tensor<4096x4096xf32>, %arg147: tensor<4096x4096xf32>, %arg148: tensor<4096x4096xf32>, %arg149: tensor<1x1x2048x128xf32>, %arg150: tensor<1x1x2048x128xf32>, %arg151: tensor<4096x4096xf32>, %arg152: tensor<4096xf32>, %arg153: tensor<11008x4096xf32>, %arg154: tensor<11008x4096xf32>, %arg155: tensor<4096x11008xf32>, %arg156: tensor<4096xf32>, %arg157: tensor<4096x4096xf32>, %arg158: tensor<4096x4096xf32>, %arg159: tensor<4096x4096xf32>, %arg160: tensor<1x1x2048x128xf32>, %arg161: tensor<1x1x2048x128xf32>, %arg162: tensor<4096x4096xf32>, %arg163: tensor<4096xf32>, %arg164: tensor<11008x4096xf32>, %arg165: tensor<11008x4096xf32>, %arg166: tensor<4096x11008xf32>, %arg167: tensor<4096xf32>, %arg168: tensor<4096x4096xf32>, %arg169: tensor<4096x4096xf32>, %arg170: tensor<4096x4096xf32>, %arg171: tensor<1x1x2048x128xf32>, %arg172: tensor<1x1x2048x128xf32>, %arg173: tensor<4096x4096xf32>, %arg174: tensor<4096xf32>, %arg175: tensor<11008x4096xf32>, %arg176: tensor<11008x4096xf32>, %arg177: tensor<4096x11008xf32>, %arg178: tensor<4096xf32>, %arg179: tensor<4096x4096xf32>, %arg180: tensor<4096x4096xf32>, %arg181: tensor<4096x4096xf32>, %arg182: tensor<1x1x2048x128xf32>, %arg183: tensor<1x1x2048x128xf32>, %arg184: tensor<4096x4096xf32>, %arg185: tensor<4096xf32>, %arg186: tensor<11008x4096xf32>, %arg187: tensor<11008x4096xf32>, %arg188: tensor<4096x11008xf32>, %arg189: tensor<4096xf32>, %arg190: tensor<4096x4096xf32>, %arg191: tensor<4096x4096xf32>, %arg192: tensor<4096x4096xf32>, %arg193: tensor<1x1x2048x128xf32>, %arg194: tensor<1x1x2048x128xf32>, %arg195: tensor<4096x4096xf32>, %arg196: tensor<4096xf32>, %arg197: tensor<11008x4096xf32>, %arg198: tensor<11008x4096xf32>, %arg199: tensor<4096x11008xf32>, %arg200: tensor<4096xf32>, %arg201: tensor<4096x4096xf32>, %arg202: tensor<4096x4096xf32>, %arg203: tensor<4096x4096xf32>, %arg204: tensor<1x1x2048x128xf32>, %arg205: tensor<1x1x2048x128xf32>, %arg206: tensor<4096x4096xf32>, %arg207: tensor<4096xf32>, %arg208: tensor<11008x4096xf32>, %arg209: tensor<11008x4096xf32>, %arg210: tensor<4096x11008xf32>, %arg211: tensor<4096xf32>, %arg212: tensor<4096x4096xf32>, %arg213: tensor<4096x4096xf32>, %arg214: tensor<4096x4096xf32>, %arg215: tensor<1x1x2048x128xf32>, %arg216: tensor<1x1x2048x128xf32>, %arg217: tensor<4096x4096xf32>, %arg218: tensor<4096xf32>, %arg219: tensor<11008x4096xf32>, %arg220: tensor<11008x4096xf32>, %arg221: tensor<4096x11008xf32>, %arg222: tensor<4096xf32>, %arg223: tensor<4096x4096xf32>, %arg224: tensor<4096x4096xf32>, %arg225: tensor<4096x4096xf32>, %arg226: tensor<1x1x2048x128xf32>, %arg227: tensor<1x1x2048x128xf32>, %arg228: tensor<4096x4096xf32>, %arg229: tensor<4096xf32>, %arg230: tensor<11008x4096xf32>, %arg231: tensor<11008x4096xf32>, %arg232: tensor<4096x11008xf32>, %arg233: tensor<4096xf32>, %arg234: tensor<4096x4096xf32>, %arg235: tensor<4096x4096xf32>, %arg236: tensor<4096x4096xf32>, %arg237: tensor<1x1x2048x128xf32>, %arg238: tensor<1x1x2048x128xf32>, %arg239: tensor<4096x4096xf32>, %arg240: tensor<4096xf32>, %arg241: tensor<11008x4096xf32>, %arg242: tensor<11008x4096xf32>, %arg243: tensor<4096x11008xf32>, %arg244: tensor<4096xf32>, %arg245: tensor<4096x4096xf32>, %arg246: tensor<4096x4096xf32>, %arg247: tensor<4096x4096xf32>, %arg248: tensor<1x1x2048x128xf32>, %arg249: tensor<1x1x2048x128xf32>, %arg250: tensor<4096x4096xf32>, %arg251: tensor<4096xf32>, %arg252: tensor<11008x4096xf32>, %arg253: tensor<11008x4096xf32>, %arg254: tensor<4096x11008xf32>, %arg255: tensor<4096xf32>, %arg256: tensor<4096x4096xf32>, %arg257: tensor<4096x4096xf32>, %arg258: tensor<4096x4096xf32>, %arg259: tensor<1x1x2048x128xf32>, %arg260: tensor<1x1x2048x128xf32>, %arg261: tensor<4096x4096xf32>, %arg262: tensor<4096xf32>, %arg263: tensor<11008x4096xf32>, %arg264: tensor<11008x4096xf32>, %arg265: tensor<4096x11008xf32>, %arg266: tensor<4096xf32>, %arg267: tensor<4096x4096xf32>, %arg268: tensor<4096x4096xf32>, %arg269: tensor<4096x4096xf32>, %arg270: tensor<1x1x2048x128xf32>, %arg271: tensor<1x1x2048x128xf32>, %arg272: tensor<4096x4096xf32>, %arg273: tensor<4096xf32>, %arg274: tensor<11008x4096xf32>, %arg275: tensor<11008x4096xf32>, %arg276: tensor<4096x11008xf32>, %arg277: tensor<4096xf32>, %arg278: tensor<4096x4096xf32>, %arg279: tensor<4096x4096xf32>, %arg280: tensor<4096x4096xf32>, %arg281: tensor<1x1x2048x128xf32>, %arg282: tensor<1x1x2048x128xf32>, %arg283: tensor<4096x4096xf32>, %arg284: tensor<4096xf32>, %arg285: tensor<11008x4096xf32>, %arg286: tensor<11008x4096xf32>, %arg287: tensor<4096x11008xf32>, %arg288: tensor<4096xf32>, %arg289: tensor<4096x4096xf32>, %arg290: tensor<4096x4096xf32>, %arg291: tensor<4096x4096xf32>, %arg292: tensor<1x1x2048x128xf32>, %arg293: tensor<1x1x2048x128xf32>, %arg294: tensor<4096x4096xf32>, %arg295: tensor<4096xf32>, %arg296: tensor<11008x4096xf32>, %arg297: tensor<11008x4096xf32>, %arg298: tensor<4096x11008xf32>, %arg299: tensor<4096xf32>, %arg300: tensor<4096x4096xf32>, %arg301: tensor<4096x4096xf32>, %arg302: tensor<4096x4096xf32>, %arg303: tensor<1x1x2048x128xf32>, %arg304: tensor<1x1x2048x128xf32>, %arg305: tensor<4096x4096xf32>, %arg306: tensor<4096xf32>, %arg307: tensor<11008x4096xf32>, %arg308: tensor<11008x4096xf32>, %arg309: tensor<4096x11008xf32>, %arg310: tensor<4096xf32>, %arg311: tensor<4096x4096xf32>, %arg312: tensor<4096x4096xf32>, %arg313: tensor<4096x4096xf32>, %arg314: tensor<1x1x2048x128xf32>, %arg315: tensor<1x1x2048x128xf32>, %arg316: tensor<4096x4096xf32>, %arg317: tensor<4096xf32>, %arg318: tensor<11008x4096xf32>, %arg319: tensor<11008x4096xf32>, %arg320: tensor<4096x11008xf32>, %arg321: tensor<4096xf32>, %arg322: tensor<4096x4096xf32>, %arg323: tensor<4096x4096xf32>, %arg324: tensor<4096x4096xf32>, %arg325: tensor<1x1x2048x128xf32>, %arg326: tensor<1x1x2048x128xf32>, %arg327: tensor<4096x4096xf32>, %arg328: tensor<4096xf32>, %arg329: tensor<11008x4096xf32>, %arg330: tensor<11008x4096xf32>, %arg331: tensor<4096x11008xf32>, %arg332: tensor<4096xf32>, %arg333: tensor<4096x4096xf32>, %arg334: tensor<4096x4096xf32>, %arg335: tensor<4096x4096xf32>, %arg336: tensor<1x1x2048x128xf32>, %arg337: tensor<1x1x2048x128xf32>, %arg338: tensor<4096x4096xf32>, %arg339: tensor<4096xf32>, %arg340: tensor<11008x4096xf32>, %arg341: tensor<11008x4096xf32>, %arg342: tensor<4096x11008xf32>, %arg343: tensor<4096xf32>, %arg344: tensor<4096x4096xf32>, %arg345: tensor<4096x4096xf32>, %arg346: tensor<4096x4096xf32>, %arg347: tensor<1x1x2048x128xf32>, %arg348: tensor<1x1x2048x128xf32>, %arg349: tensor<4096x4096xf32>, %arg350: tensor<4096xf32>, %arg351: tensor<11008x4096xf32>, %arg352: tensor<11008x4096xf32>, %arg353: tensor<4096x11008xf32>, %arg354: tensor<4096xf32>, %arg355: tensor<32000x4096xf32>) -> (tensor<1x40x4096xf32>, tensor<1x40x32000xf32>) { + %0 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]> : tensor<40xi64>}> : () -> tensor<40xi64> + %1 = tosa.reshape %0 {new_shape = array} : (tensor<40xi64>) -> tensor<1x40xi64> + %2 = tosa.reshape %1 {new_shape = array} : (tensor<1x40xi64>) -> tensor<1x40xi64> + %3 = tosa.cast %arg1 : (tensor<1x40xi64>) -> tensor<1x40xi32> + %4 = tosa.reshape %arg0 {new_shape = array} : (tensor<32000x4096xf32>) -> tensor<1x32000x4096xf32> + %5 = tosa.gather %4, %3 : (tensor<1x32000x4096xf32>, tensor<1x40xi32>) -> tensor<1x40x4096xf32> + %6 = tosa.reshape %5 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %cst = arith.constant dense : tensor<1x40xi1> + %cst_0 = arith.constant dense<-3.40282347E+38> : tensor<40x40xf32> + %7 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]> : tensor<40xi64>}> : () -> tensor<40xi64> + %8 = "tosa.const"() <{value = dense<1> : tensor<40xi64>}> : () -> tensor<40xi64> + %9 = tosa.add %7, %8 : (tensor<40xi64>, tensor<40xi64>) -> tensor<40xi64> + %10 = tosa.reshape %9 {new_shape = array} : (tensor<40xi64>) -> tensor<40x1xi64> + %11 = tensor.empty() : tensor<40x40xi1> + %12 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%7, %10 : tensor<40xi64>, tensor<40x1xi64>) outs(%11 : tensor<40x40xi1>) { + ^bb0(%in: i64, %in_742: i64, %out: i1): + %4175 = arith.cmpi slt, %in, %in_742 : i64 + linalg.yield %4175 : i1 + } -> tensor<40x40xi1> + %cst_1 = arith.constant 0.000000e+00 : f32 + %13 = tensor.empty() : tensor<40x40xf32> + %14 = linalg.generic {indexing_maps = [#map3, #map3, #map3], iterator_types = ["parallel", "parallel"]} ins(%12, %cst_0 : tensor<40x40xi1>, tensor<40x40xf32>) outs(%13 : tensor<40x40xf32>) { + ^bb0(%in: i1, %in_742: f32, %out: f32): + %4175 = arith.select %in, %cst_1, %in_742 : f32 + linalg.yield %4175 : f32 + } -> tensor<40x40xf32> + %extracted_slice = tensor.extract_slice %cst[0, 0] [1, 40] [1, 1] : tensor<1x40xi1> to tensor<1x40xi1> + %15 = tosa.reshape %extracted_slice {new_shape = array} : (tensor<1x40xi1>) -> tensor<1x1x40xi1> + %16 = tosa.reshape %15 {new_shape = array} : (tensor<1x1x40xi1>) -> tensor<1x1x1x40xi1> + %extracted_slice_2 = tensor.extract_slice %16[0, 0, 0, 0] [1, 1, 1, 40] [1, 1, 1, 1] : tensor<1x1x1x40xi1> to tensor<1x1x1x40xi1> + %17 = "tosa.const"() <{value = dense : tensor<1x1x40x40xi1>}> : () -> tensor<1x1x40x40xi1> + %18 = tosa.add %extracted_slice_2, %17 : (tensor<1x1x1x40xi1>, tensor<1x1x40x40xi1>) -> tensor<1x1x40x40xi1> + %19 = tosa.cast %18 : (tensor<1x1x40x40xi1>) -> tensor<1x1x40x40xf32> + %20 = "tosa.const"() <{value = dense<1.000000e+00> : tensor<1x1x40x40xf32>}> : () -> tensor<1x1x40x40xf32> + %21 = tosa.sub %20, %19 : (tensor<1x1x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x1x40x40xf32> + %22 = tosa.cast %21 : (tensor<1x1x40x40xf32>) -> tensor<1x1x40x40xi1> + %cst_3 = arith.constant -3.40282347E+38 : f32 + %23 = tensor.empty() : tensor<1x1x40x40xf32> + %24 = linalg.generic {indexing_maps = [#map4, #map4, #map4], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%22, %21 : tensor<1x1x40x40xi1>, tensor<1x1x40x40xf32>) outs(%23 : tensor<1x1x40x40xf32>) { + ^bb0(%in: i1, %in_742: f32, %out: f32): + %4175 = arith.select %in, %cst_3, %in_742 : f32 + linalg.yield %4175 : f32 + } -> tensor<1x1x40x40xf32> + %25 = tosa.reshape %14 {new_shape = array} : (tensor<40x40xf32>) -> tensor<1x40x40xf32> + %26 = tosa.reshape %25 {new_shape = array} : (tensor<1x40x40xf32>) -> tensor<1x1x40x40xf32> + %extracted_slice_4 = tensor.extract_slice %26[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x40xf32> to tensor<1x1x40x40xf32> + %extracted_slice_5 = tensor.extract_slice %extracted_slice_4[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x40xf32> to tensor<1x1x40x40xf32> + %27 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x40xf32>}> : () -> tensor<1x1x40x40xf32> + %28 = tosa.add %extracted_slice_5, %27 : (tensor<1x1x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x1x40x40xf32> + %29 = tosa.add %24, %28 : (tensor<1x1x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x1x40x40xf32> + // RMSNorm begins + %30 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32 = arith.constant 2 : i32 + %31 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%6 : tensor<1x40x4096xf32>) outs(%30 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %32 = tosa.reduce_sum %31 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %33 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %34 = tosa.reciprocal %33 : (tensor<1xf32>) -> tensor<1xf32> + %35 = tosa.mul %34, %32 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %36 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %37 = tosa.add %35, %36 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %38 = tosa.rsqrt %37 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %39 = tosa.mul %6, %38 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %40 = tosa.reshape %arg2 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + // %41 is the input matrix X after embedding, + // then there are three consecutive similar codes representing the calculation of Q, K, V (%46, %51, %56): + %41 = tosa.mul %40, %39 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + + %42 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %43 = tosa.transpose %arg3, %42 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %44 = tosa.reshape %41 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_6 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %45 = linalg.matmul {cast = #linalg.type_fn} ins(%44, %43 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_6 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %46 = tosa.reshape %45 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + + %47 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %48 = tosa.transpose %arg4, %47 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %49 = tosa.reshape %41 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_7 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %50 = linalg.matmul {cast = #linalg.type_fn} ins(%49, %48 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_7 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %51 = tosa.reshape %50 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + + %52 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %53 = tosa.transpose %arg5, %52 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %54 = tosa.reshape %41 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_8 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %55 = linalg.matmul {cast = #linalg.type_fn} ins(%54, %53 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_8 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %56 = tosa.reshape %55 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + // completed the calculation of Q, K, V; dimensions is (batch, seq_len, num_heads, head_dims) + // transpose Q, K, V dimensions for RoPE and dot product + + // // begin of RoPE + %57 = tosa.reshape %46 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %58 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %59 = tosa.transpose %57, %58 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %60 = tosa.reshape %51 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %61 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %62 = tosa.transpose %60, %61 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %63 = tosa.reshape %56 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %64 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %65 = tosa.transpose %63, %64 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %extracted_slice_9 = tensor.extract_slice %arg6[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_10 = tensor.extract_slice %extracted_slice_9[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_11 = tensor.extract_slice %extracted_slice_10[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_12 = tensor.extract_slice %arg7[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_13 = tensor.extract_slice %extracted_slice_12[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_14 = tensor.extract_slice %extracted_slice_13[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %66 = tensor.empty() : tensor<1x40x128xf32> + %67 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_11 : tensor<1x1x40x128xf32>) outs(%66 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %68 = tensor.empty() : tensor<40x128xf32> + %69 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%67 : tensor<1x40x128xf32>) outs(%68 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %70 = tensor.empty() : tensor<1x40x128xf32> + %71 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_14 : tensor<1x1x40x128xf32>) outs(%70 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %72 = tensor.empty() : tensor<40x128xf32> + %73 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%71 : tensor<1x40x128xf32>) outs(%72 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + // precompute_theta_pos_frequencies function, which is used to calculating special values ​​of RoPE according to: https://hyper.ai/wiki/29220 + %74 = tensor.empty() : tensor<1x40x128xf32> + %75 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%74 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %69[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %76 = tosa.reshape %75 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %77 = tensor.empty() : tensor<1x40x128xf32> + %78 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%77 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %73[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %79 = tosa.reshape %78 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %80 = tosa.mul %59, %76 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_15 = tensor.extract_slice %59[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_16 = tensor.extract_slice %59[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %81 = tosa.negate %extracted_slice_16 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %82 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice = tensor.insert_slice %81 into %82[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_17 = tensor.insert_slice %extracted_slice_15 into %inserted_slice[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %83 = tosa.mul %inserted_slice_17, %79 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %84 = tosa.add %80, %83 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %85 = tosa.mul %62, %76 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_18 = tensor.extract_slice %62[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_19 = tensor.extract_slice %62[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %86 = tosa.negate %extracted_slice_19 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %87 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_20 = tensor.insert_slice %86 into %87[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_21 = tensor.insert_slice %extracted_slice_18 into %inserted_slice_20[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + // end of RoPE, begin of Softmax(QK/sqrt(d_k)): + %88 = tosa.mul %inserted_slice_21, %79 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %89 = tosa.add %85, %88 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %90 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %91 = tosa.transpose %89, %90 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %92 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %93 = tosa.add %84, %92 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %94 = tosa.reshape %93 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %95 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %96 = tosa.add %91, %95 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %97 = tosa.reshape %96 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %98 = tosa.matmul %94, %97 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %99 = tosa.reshape %98 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %100 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %101 = tosa.reciprocal %100 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %102 = tosa.mul %99, %101 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %103 = tosa.add %102, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %104 = tosa.reduce_max %103 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %105 = tosa.sub %103, %104 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %106 = tosa.exp %105 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %107 = tosa.reduce_sum %106 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %108 = tosa.reciprocal %107 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %109 = tosa.mul %106, %108 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + // end of Softmax(QK/sqrt(d_k)), begin of matmul with V + %110 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %111 = tosa.add %109, %110 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %112 = tosa.reshape %111 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %113 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %114 = tosa.add %65, %113 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %115 = tosa.reshape %114 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + // + %116 = tosa.matmul %112, %115 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + // complete one head Softmax(QK/sqrt(d_k)), collect all heads. + %117 = tosa.reshape %116 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %118 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %119 = tosa.transpose %117, %118 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %120 = tosa.identity %119 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %121 = tosa.reshape %120 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %122 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %123 = tosa.transpose %arg8, %122 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %124 = tosa.reshape %121 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_22 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %125 = linalg.matmul {cast = #linalg.type_fn} ins(%124, %123 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_22 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %126 = tosa.reshape %125 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %127 = tosa.add %6, %126 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + // end of GQA(Group Query Attention) block, begin of FFN block(RMSNorm --> SwiGLU). + %128 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_23 = arith.constant 2 : i32 + %129 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%127 : tensor<1x40x4096xf32>) outs(%128 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_23 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %130 = tosa.reduce_sum %129 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %131 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %132 = tosa.reciprocal %131 : (tensor<1xf32>) -> tensor<1xf32> + %133 = tosa.mul %132, %130 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %134 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %135 = tosa.add %133, %134 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %136 = tosa.rsqrt %135 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %137 = tosa.mul %127, %136 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %138 = tosa.reshape %arg9 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %139 = tosa.mul %138, %137 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %140 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %141 = tosa.transpose %arg10, %140 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %142 = tosa.reshape %139 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_24 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %143 = linalg.matmul {cast = #linalg.type_fn} ins(%142, %141 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_24 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %144 = tosa.reshape %143 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %145 = tosa.sigmoid %144 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %146 = tosa.mul %144, %145 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %147 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %148 = tosa.transpose %arg11, %147 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %149 = tosa.reshape %139 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_25 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %150 = linalg.matmul {cast = #linalg.type_fn} ins(%149, %148 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_25 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %151 = tosa.reshape %150 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %152 = tosa.mul %146, %151 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %153 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %154 = tosa.transpose %arg12, %153 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %155 = tosa.reshape %152 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_26 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %156 = linalg.matmul {cast = #linalg.type_fn} ins(%155, %154 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_26 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %157 = tosa.reshape %156 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %158 = tosa.add %127, %157 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + // end of last decoder block, begin of new decoder block. + %159 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_27 = arith.constant 2 : i32 + %160 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%158 : tensor<1x40x4096xf32>) outs(%159 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_27 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %161 = tosa.reduce_sum %160 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %162 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %163 = tosa.reciprocal %162 : (tensor<1xf32>) -> tensor<1xf32> + %164 = tosa.mul %163, %161 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %165 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %166 = tosa.add %164, %165 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %167 = tosa.rsqrt %166 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %168 = tosa.mul %158, %167 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %169 = tosa.reshape %arg13 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + // %170 is the input matrix X after embedding, + // then there are three consecutive similar code block representing the calculation of Q, K, V (%175, %180, %185): + %170 = tosa.mul %169, %168 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + + %171 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %172 = tosa.transpose %arg14, %171 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %173 = tosa.reshape %170 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_28 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %174 = linalg.matmul {cast = #linalg.type_fn} ins(%173, %172 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_28 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %175 = tosa.reshape %174 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + + %176 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %177 = tosa.transpose %arg15, %176 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %178 = tosa.reshape %170 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_29 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %179 = linalg.matmul {cast = #linalg.type_fn} ins(%178, %177 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_29 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %180 = tosa.reshape %179 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + + %181 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %182 = tosa.transpose %arg16, %181 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %183 = tosa.reshape %170 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_30 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %184 = linalg.matmul {cast = #linalg.type_fn} ins(%183, %182 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_30 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %185 = tosa.reshape %184 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + // completed the calculation of Q, K, V above. + %186 = tosa.reshape %175 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %187 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %188 = tosa.transpose %186, %187 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %189 = tosa.reshape %180 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %190 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %191 = tosa.transpose %189, %190 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %192 = tosa.reshape %185 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %193 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %194 = tosa.transpose %192, %193 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %extracted_slice_31 = tensor.extract_slice %arg17[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_32 = tensor.extract_slice %extracted_slice_31[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_33 = tensor.extract_slice %extracted_slice_32[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_34 = tensor.extract_slice %arg18[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_35 = tensor.extract_slice %extracted_slice_34[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_36 = tensor.extract_slice %extracted_slice_35[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %195 = tensor.empty() : tensor<1x40x128xf32> + %196 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_33 : tensor<1x1x40x128xf32>) outs(%195 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %197 = tensor.empty() : tensor<40x128xf32> + // #map2 = affine_map<(d0, d1, d2) -> (d0, d1)> + // #map3 = affine_map<(d0, d1) -> (d0, d1)> + // #map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + // #map5 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> + // #map6 = affine_map<(d0, d1, d2) -> (d0, 0, d1, d2)> + // #map7 = affine_map<(d0, d1) -> (0, d0, d1)> + %198 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%196 : tensor<1x40x128xf32>) outs(%197 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %199 = tensor.empty() : tensor<1x40x128xf32> + %200 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_36 : tensor<1x1x40x128xf32>) outs(%199 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %201 = tensor.empty() : tensor<40x128xf32> + %202 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%200 : tensor<1x40x128xf32>) outs(%201 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %203 = tensor.empty() : tensor<1x40x128xf32> + %204 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%203 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %198[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %205 = tosa.reshape %204 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %206 = tensor.empty() : tensor<1x40x128xf32> + %207 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%206 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %202[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %208 = tosa.reshape %207 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %209 = tosa.mul %188, %205 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_37 = tensor.extract_slice %188[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_38 = tensor.extract_slice %188[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %210 = tosa.negate %extracted_slice_38 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %211 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_39 = tensor.insert_slice %210 into %211[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_40 = tensor.insert_slice %extracted_slice_37 into %inserted_slice_39[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %212 = tosa.mul %inserted_slice_40, %208 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %213 = tosa.add %209, %212 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + + %214 = tosa.mul %191, %205 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_41 = tensor.extract_slice %191[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_42 = tensor.extract_slice %191[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %215 = tosa.negate %extracted_slice_42 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %216 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_43 = tensor.insert_slice %215 into %216[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_44 = tensor.insert_slice %extracted_slice_41 into %inserted_slice_43[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %217 = tosa.mul %inserted_slice_44, %208 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %218 = tosa.add %214, %217 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + + %219 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %220 = tosa.transpose %218, %219 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %221 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %222 = tosa.add %213, %221 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %223 = tosa.reshape %222 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %224 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %225 = tosa.add %220, %224 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %226 = tosa.reshape %225 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %227 = tosa.matmul %223, %226 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %228 = tosa.reshape %227 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %229 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %230 = tosa.reciprocal %229 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %231 = tosa.mul %228, %230 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %232 = tosa.add %231, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %233 = tosa.reduce_max %232 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %234 = tosa.sub %232, %233 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %235 = tosa.exp %234 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %236 = tosa.reduce_sum %235 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %237 = tosa.reciprocal %236 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %238 = tosa.mul %235, %237 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %239 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %240 = tosa.add %238, %239 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %241 = tosa.reshape %240 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %242 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %243 = tosa.add %194, %242 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %244 = tosa.reshape %243 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %245 = tosa.matmul %241, %244 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %246 = tosa.reshape %245 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %247 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %248 = tosa.transpose %246, %247 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %249 = tosa.identity %248 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %250 = tosa.reshape %249 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %251 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %252 = tosa.transpose %arg19, %251 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %253 = tosa.reshape %250 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_45 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %254 = linalg.matmul {cast = #linalg.type_fn} ins(%253, %252 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_45 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %255 = tosa.reshape %254 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %256 = tosa.add %158, %255 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %257 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_46 = arith.constant 2 : i32 + %258 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%256 : tensor<1x40x4096xf32>) outs(%257 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_46 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %259 = tosa.reduce_sum %258 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %260 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %261 = tosa.reciprocal %260 : (tensor<1xf32>) -> tensor<1xf32> + %262 = tosa.mul %261, %259 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %263 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %264 = tosa.add %262, %263 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %265 = tosa.rsqrt %264 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %266 = tosa.mul %256, %265 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %267 = tosa.reshape %arg20 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %268 = tosa.mul %267, %266 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %269 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %270 = tosa.transpose %arg21, %269 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %271 = tosa.reshape %268 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_47 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %272 = linalg.matmul {cast = #linalg.type_fn} ins(%271, %270 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_47 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %273 = tosa.reshape %272 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %274 = tosa.sigmoid %273 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %275 = tosa.mul %273, %274 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %276 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %277 = tosa.transpose %arg22, %276 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %278 = tosa.reshape %268 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_48 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %279 = linalg.matmul {cast = #linalg.type_fn} ins(%278, %277 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_48 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %280 = tosa.reshape %279 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %281 = tosa.mul %275, %280 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %282 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %283 = tosa.transpose %arg23, %282 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %284 = tosa.reshape %281 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_49 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %285 = linalg.matmul {cast = #linalg.type_fn} ins(%284, %283 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_49 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %286 = tosa.reshape %285 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %287 = tosa.add %256, %286 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %288 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_50 = arith.constant 2 : i32 + %289 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%287 : tensor<1x40x4096xf32>) outs(%288 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_50 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %290 = tosa.reduce_sum %289 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %291 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %292 = tosa.reciprocal %291 : (tensor<1xf32>) -> tensor<1xf32> + %293 = tosa.mul %292, %290 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %294 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %295 = tosa.add %293, %294 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %296 = tosa.rsqrt %295 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %297 = tosa.mul %287, %296 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %298 = tosa.reshape %arg24 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %299 = tosa.mul %298, %297 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %300 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %301 = tosa.transpose %arg25, %300 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %302 = tosa.reshape %299 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_51 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %303 = linalg.matmul {cast = #linalg.type_fn} ins(%302, %301 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_51 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %304 = tosa.reshape %303 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %305 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %306 = tosa.transpose %arg26, %305 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %307 = tosa.reshape %299 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_52 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %308 = linalg.matmul {cast = #linalg.type_fn} ins(%307, %306 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_52 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %309 = tosa.reshape %308 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %310 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %311 = tosa.transpose %arg27, %310 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %312 = tosa.reshape %299 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_53 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %313 = linalg.matmul {cast = #linalg.type_fn} ins(%312, %311 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_53 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %314 = tosa.reshape %313 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %315 = tosa.reshape %304 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %316 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %317 = tosa.transpose %315, %316 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %318 = tosa.reshape %309 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %319 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %320 = tosa.transpose %318, %319 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %321 = tosa.reshape %314 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %322 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %323 = tosa.transpose %321, %322 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_54 = tensor.extract_slice %arg28[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_55 = tensor.extract_slice %extracted_slice_54[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_56 = tensor.extract_slice %extracted_slice_55[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_57 = tensor.extract_slice %arg29[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_58 = tensor.extract_slice %extracted_slice_57[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_59 = tensor.extract_slice %extracted_slice_58[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %324 = tensor.empty() : tensor<1x40x128xf32> + %325 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_56 : tensor<1x1x40x128xf32>) outs(%324 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %326 = tensor.empty() : tensor<40x128xf32> + %327 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%325 : tensor<1x40x128xf32>) outs(%326 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %328 = tensor.empty() : tensor<1x40x128xf32> + %329 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_59 : tensor<1x1x40x128xf32>) outs(%328 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %330 = tensor.empty() : tensor<40x128xf32> + %331 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%329 : tensor<1x40x128xf32>) outs(%330 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %332 = tensor.empty() : tensor<1x40x128xf32> + %333 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%332 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %327[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %334 = tosa.reshape %333 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %335 = tensor.empty() : tensor<1x40x128xf32> + %336 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%335 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %331[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %337 = tosa.reshape %336 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %338 = tosa.mul %317, %334 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_60 = tensor.extract_slice %317[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_61 = tensor.extract_slice %317[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %339 = tosa.negate %extracted_slice_61 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %340 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_62 = tensor.insert_slice %339 into %340[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_63 = tensor.insert_slice %extracted_slice_60 into %inserted_slice_62[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %341 = tosa.mul %inserted_slice_63, %337 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %342 = tosa.add %338, %341 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %343 = tosa.mul %320, %334 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_64 = tensor.extract_slice %320[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_65 = tensor.extract_slice %320[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %344 = tosa.negate %extracted_slice_65 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %345 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_66 = tensor.insert_slice %344 into %345[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_67 = tensor.insert_slice %extracted_slice_64 into %inserted_slice_66[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %346 = tosa.mul %inserted_slice_67, %337 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %347 = tosa.add %343, %346 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %348 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %349 = tosa.transpose %347, %348 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %350 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %351 = tosa.add %342, %350 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %352 = tosa.reshape %351 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %353 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %354 = tosa.add %349, %353 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %355 = tosa.reshape %354 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %356 = tosa.matmul %352, %355 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %357 = tosa.reshape %356 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %358 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %359 = tosa.reciprocal %358 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %360 = tosa.mul %357, %359 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %361 = tosa.add %360, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %362 = tosa.reduce_max %361 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %363 = tosa.sub %361, %362 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %364 = tosa.exp %363 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %365 = tosa.reduce_sum %364 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %366 = tosa.reciprocal %365 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %367 = tosa.mul %364, %366 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %368 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %369 = tosa.add %367, %368 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %370 = tosa.reshape %369 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %371 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %372 = tosa.add %323, %371 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %373 = tosa.reshape %372 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %374 = tosa.matmul %370, %373 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %375 = tosa.reshape %374 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %376 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %377 = tosa.transpose %375, %376 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %378 = tosa.identity %377 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %379 = tosa.reshape %378 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %380 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %381 = tosa.transpose %arg30, %380 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %382 = tosa.reshape %379 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_68 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %383 = linalg.matmul {cast = #linalg.type_fn} ins(%382, %381 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_68 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %384 = tosa.reshape %383 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %385 = tosa.add %287, %384 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %386 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_69 = arith.constant 2 : i32 + %387 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%385 : tensor<1x40x4096xf32>) outs(%386 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_69 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %388 = tosa.reduce_sum %387 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %389 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %390 = tosa.reciprocal %389 : (tensor<1xf32>) -> tensor<1xf32> + %391 = tosa.mul %390, %388 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %392 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %393 = tosa.add %391, %392 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %394 = tosa.rsqrt %393 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %395 = tosa.mul %385, %394 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %396 = tosa.reshape %arg31 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %397 = tosa.mul %396, %395 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %398 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %399 = tosa.transpose %arg32, %398 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %400 = tosa.reshape %397 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_70 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %401 = linalg.matmul {cast = #linalg.type_fn} ins(%400, %399 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_70 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %402 = tosa.reshape %401 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %403 = tosa.sigmoid %402 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %404 = tosa.mul %402, %403 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %405 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %406 = tosa.transpose %arg33, %405 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %407 = tosa.reshape %397 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_71 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %408 = linalg.matmul {cast = #linalg.type_fn} ins(%407, %406 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_71 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %409 = tosa.reshape %408 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %410 = tosa.mul %404, %409 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %411 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %412 = tosa.transpose %arg34, %411 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %413 = tosa.reshape %410 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_72 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %414 = linalg.matmul {cast = #linalg.type_fn} ins(%413, %412 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_72 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %415 = tosa.reshape %414 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %416 = tosa.add %385, %415 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %417 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_73 = arith.constant 2 : i32 + %418 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%416 : tensor<1x40x4096xf32>) outs(%417 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_73 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %419 = tosa.reduce_sum %418 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %420 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %421 = tosa.reciprocal %420 : (tensor<1xf32>) -> tensor<1xf32> + %422 = tosa.mul %421, %419 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %423 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %424 = tosa.add %422, %423 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %425 = tosa.rsqrt %424 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %426 = tosa.mul %416, %425 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %427 = tosa.reshape %arg35 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %428 = tosa.mul %427, %426 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %429 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %430 = tosa.transpose %arg36, %429 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %431 = tosa.reshape %428 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_74 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %432 = linalg.matmul {cast = #linalg.type_fn} ins(%431, %430 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_74 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %433 = tosa.reshape %432 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %434 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %435 = tosa.transpose %arg37, %434 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %436 = tosa.reshape %428 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_75 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %437 = linalg.matmul {cast = #linalg.type_fn} ins(%436, %435 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_75 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %438 = tosa.reshape %437 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %439 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %440 = tosa.transpose %arg38, %439 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %441 = tosa.reshape %428 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_76 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %442 = linalg.matmul {cast = #linalg.type_fn} ins(%441, %440 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_76 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %443 = tosa.reshape %442 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %444 = tosa.reshape %433 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %445 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %446 = tosa.transpose %444, %445 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %447 = tosa.reshape %438 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %448 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %449 = tosa.transpose %447, %448 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %450 = tosa.reshape %443 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %451 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %452 = tosa.transpose %450, %451 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_77 = tensor.extract_slice %arg39[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_78 = tensor.extract_slice %extracted_slice_77[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_79 = tensor.extract_slice %extracted_slice_78[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_80 = tensor.extract_slice %arg40[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_81 = tensor.extract_slice %extracted_slice_80[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_82 = tensor.extract_slice %extracted_slice_81[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %453 = tensor.empty() : tensor<1x40x128xf32> + %454 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_79 : tensor<1x1x40x128xf32>) outs(%453 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %455 = tensor.empty() : tensor<40x128xf32> + %456 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%454 : tensor<1x40x128xf32>) outs(%455 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %457 = tensor.empty() : tensor<1x40x128xf32> + %458 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_82 : tensor<1x1x40x128xf32>) outs(%457 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %459 = tensor.empty() : tensor<40x128xf32> + %460 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%458 : tensor<1x40x128xf32>) outs(%459 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %461 = tensor.empty() : tensor<1x40x128xf32> + %462 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%461 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %456[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %463 = tosa.reshape %462 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %464 = tensor.empty() : tensor<1x40x128xf32> + %465 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%464 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %460[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %467 = tosa.mul %446, %463 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_83 = tensor.extract_slice %446[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_84 = tensor.extract_slice %446[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %468 = tosa.negate %extracted_slice_84 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %469 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_85 = tensor.insert_slice %468 into %469[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_86 = tensor.insert_slice %extracted_slice_83 into %inserted_slice_85[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %470 = tosa.mul %inserted_slice_86, %466 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %471 = tosa.add %467, %470 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %472 = tosa.mul %449, %463 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_87 = tensor.extract_slice %449[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_88 = tensor.extract_slice %449[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %473 = tosa.negate %extracted_slice_88 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %474 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_89 = tensor.insert_slice %473 into %474[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_90 = tensor.insert_slice %extracted_slice_87 into %inserted_slice_89[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %475 = tosa.mul %inserted_slice_90, %466 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %476 = tosa.add %472, %475 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %477 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %478 = tosa.transpose %476, %477 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %479 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %480 = tosa.add %471, %479 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %481 = tosa.reshape %480 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %482 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %483 = tosa.add %478, %482 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %484 = tosa.reshape %483 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %485 = tosa.matmul %481, %484 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %486 = tosa.reshape %485 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %487 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %488 = tosa.reciprocal %487 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %489 = tosa.mul %486, %488 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %490 = tosa.add %489, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %491 = tosa.reduce_max %490 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %492 = tosa.sub %490, %491 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %493 = tosa.exp %492 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %494 = tosa.reduce_sum %493 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %495 = tosa.reciprocal %494 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %496 = tosa.mul %493, %495 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %497 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %498 = tosa.add %496, %497 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %499 = tosa.reshape %498 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %500 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %501 = tosa.add %452, %500 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %502 = tosa.reshape %501 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %503 = tosa.matmul %499, %502 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %504 = tosa.reshape %503 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %505 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %506 = tosa.transpose %504, %505 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %507 = tosa.identity %506 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %508 = tosa.reshape %507 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %509 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %510 = tosa.transpose %arg41, %509 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %511 = tosa.reshape %508 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_91 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %512 = linalg.matmul {cast = #linalg.type_fn} ins(%511, %510 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_91 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %513 = tosa.reshape %512 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %514 = tosa.add %416, %513 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %515 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_92 = arith.constant 2 : i32 + %516 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%514 : tensor<1x40x4096xf32>) outs(%515 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_92 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %517 = tosa.reduce_sum %516 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %518 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %519 = tosa.reciprocal %518 : (tensor<1xf32>) -> tensor<1xf32> + %520 = tosa.mul %519, %517 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %521 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %522 = tosa.add %520, %521 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %523 = tosa.rsqrt %522 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %524 = tosa.mul %514, %523 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %525 = tosa.reshape %arg42 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %526 = tosa.mul %525, %524 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %527 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %528 = tosa.transpose %arg43, %527 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %529 = tosa.reshape %526 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_93 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %530 = linalg.matmul {cast = #linalg.type_fn} ins(%529, %528 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_93 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %531 = tosa.reshape %530 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %532 = tosa.sigmoid %531 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %533 = tosa.mul %531, %532 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %534 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %535 = tosa.transpose %arg44, %534 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %536 = tosa.reshape %526 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_94 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %537 = linalg.matmul {cast = #linalg.type_fn} ins(%536, %535 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_94 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %538 = tosa.reshape %537 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %539 = tosa.mul %533, %538 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %540 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %541 = tosa.transpose %arg45, %540 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %542 = tosa.reshape %539 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_95 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %543 = linalg.matmul {cast = #linalg.type_fn} ins(%542, %541 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_95 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %544 = tosa.reshape %543 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %545 = tosa.add %514, %544 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %546 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_96 = arith.constant 2 : i32 + %547 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%545 : tensor<1x40x4096xf32>) outs(%546 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_96 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %548 = tosa.reduce_sum %547 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %549 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %550 = tosa.reciprocal %549 : (tensor<1xf32>) -> tensor<1xf32> + %551 = tosa.mul %550, %548 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %552 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %553 = tosa.add %551, %552 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %554 = tosa.rsqrt %553 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %555 = tosa.mul %545, %554 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %556 = tosa.reshape %arg46 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %557 = tosa.mul %556, %555 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %558 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %559 = tosa.transpose %arg47, %558 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %560 = tosa.reshape %557 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_97 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %561 = linalg.matmul {cast = #linalg.type_fn} ins(%560, %559 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_97 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %562 = tosa.reshape %561 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %563 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %564 = tosa.transpose %arg48, %563 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %565 = tosa.reshape %557 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_98 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %566 = linalg.matmul {cast = #linalg.type_fn} ins(%565, %564 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_98 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %567 = tosa.reshape %566 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %568 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %569 = tosa.transpose %arg49, %568 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %570 = tosa.reshape %557 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_99 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %571 = linalg.matmul {cast = #linalg.type_fn} ins(%570, %569 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_99 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %572 = tosa.reshape %571 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %573 = tosa.reshape %562 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %574 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %575 = tosa.transpose %573, %574 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %576 = tosa.reshape %567 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %577 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %578 = tosa.transpose %576, %577 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %579 = tosa.reshape %572 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %580 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %581 = tosa.transpose %579, %580 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_100 = tensor.extract_slice %arg50[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_101 = tensor.extract_slice %extracted_slice_100[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_102 = tensor.extract_slice %extracted_slice_101[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_103 = tensor.extract_slice %arg51[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_104 = tensor.extract_slice %extracted_slice_103[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_105 = tensor.extract_slice %extracted_slice_104[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %582 = tensor.empty() : tensor<1x40x128xf32> + %583 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_102 : tensor<1x1x40x128xf32>) outs(%582 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %584 = tensor.empty() : tensor<40x128xf32> + %585 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%583 : tensor<1x40x128xf32>) outs(%584 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %586 = tensor.empty() : tensor<1x40x128xf32> + %587 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_105 : tensor<1x1x40x128xf32>) outs(%586 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %588 = tensor.empty() : tensor<40x128xf32> + %589 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%587 : tensor<1x40x128xf32>) outs(%588 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %590 = tensor.empty() : tensor<1x40x128xf32> + %591 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%590 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %585[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %592 = tosa.reshape %591 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %593 = tensor.empty() : tensor<1x40x128xf32> + %594 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%593 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %589[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %595 = tosa.reshape %594 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %596 = tosa.mul %575, %592 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_106 = tensor.extract_slice %575[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_107 = tensor.extract_slice %575[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %597 = tosa.negate %extracted_slice_107 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %598 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_108 = tensor.insert_slice %597 into %598[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_109 = tensor.insert_slice %extracted_slice_106 into %inserted_slice_108[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %599 = tosa.mul %inserted_slice_109, %595 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %600 = tosa.add %596, %599 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %601 = tosa.mul %578, %592 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_110 = tensor.extract_slice %578[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_111 = tensor.extract_slice %578[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %602 = tosa.negate %extracted_slice_111 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %603 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_112 = tensor.insert_slice %602 into %603[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_113 = tensor.insert_slice %extracted_slice_110 into %inserted_slice_112[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %604 = tosa.mul %inserted_slice_113, %595 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %605 = tosa.add %601, %604 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %606 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %607 = tosa.transpose %605, %606 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %608 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %609 = tosa.add %600, %608 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %610 = tosa.reshape %609 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %611 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %612 = tosa.add %607, %611 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %613 = tosa.reshape %612 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %614 = tosa.matmul %610, %613 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %615 = tosa.reshape %614 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %616 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %617 = tosa.reciprocal %616 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %618 = tosa.mul %615, %617 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %619 = tosa.add %618, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %620 = tosa.reduce_max %619 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %621 = tosa.sub %619, %620 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %622 = tosa.exp %621 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %623 = tosa.reduce_sum %622 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %624 = tosa.reciprocal %623 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %625 = tosa.mul %622, %624 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %626 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %627 = tosa.add %625, %626 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %628 = tosa.reshape %627 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %629 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %630 = tosa.add %581, %629 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %631 = tosa.reshape %630 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %632 = tosa.matmul %628, %631 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %633 = tosa.reshape %632 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %634 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %635 = tosa.transpose %633, %634 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %636 = tosa.identity %635 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %637 = tosa.reshape %636 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %638 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %639 = tosa.transpose %arg52, %638 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %640 = tosa.reshape %637 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_114 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %641 = linalg.matmul {cast = #linalg.type_fn} ins(%640, %639 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_114 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %642 = tosa.reshape %641 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %643 = tosa.add %545, %642 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %644 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_115 = arith.constant 2 : i32 + %645 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%643 : tensor<1x40x4096xf32>) outs(%644 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_115 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %646 = tosa.reduce_sum %645 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %647 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %648 = tosa.reciprocal %647 : (tensor<1xf32>) -> tensor<1xf32> + %649 = tosa.mul %648, %646 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %650 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %651 = tosa.add %649, %650 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %652 = tosa.rsqrt %651 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %653 = tosa.mul %643, %652 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %654 = tosa.reshape %arg53 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %655 = tosa.mul %654, %653 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %656 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %657 = tosa.transpose %arg54, %656 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %658 = tosa.reshape %655 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_116 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %659 = linalg.matmul {cast = #linalg.type_fn} ins(%658, %657 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_116 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %660 = tosa.reshape %659 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %661 = tosa.sigmoid %660 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %662 = tosa.mul %660, %661 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %663 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %664 = tosa.transpose %arg55, %663 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %665 = tosa.reshape %655 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_117 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %666 = linalg.matmul {cast = #linalg.type_fn} ins(%665, %664 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_117 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %667 = tosa.reshape %666 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %668 = tosa.mul %662, %667 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %669 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %670 = tosa.transpose %arg56, %669 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %671 = tosa.reshape %668 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_118 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %672 = linalg.matmul {cast = #linalg.type_fn} ins(%671, %670 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_118 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %673 = tosa.reshape %672 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %674 = tosa.add %643, %673 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %675 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_119 = arith.constant 2 : i32 + %676 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%674 : tensor<1x40x4096xf32>) outs(%675 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_119 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %677 = tosa.reduce_sum %676 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %678 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %679 = tosa.reciprocal %678 : (tensor<1xf32>) -> tensor<1xf32> + %680 = tosa.mul %679, %677 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %681 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %682 = tosa.add %680, %681 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %683 = tosa.rsqrt %682 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %684 = tosa.mul %674, %683 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %685 = tosa.reshape %arg57 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %686 = tosa.mul %685, %684 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %687 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %688 = tosa.transpose %arg58, %687 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %689 = tosa.reshape %686 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_120 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %690 = linalg.matmul {cast = #linalg.type_fn} ins(%689, %688 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_120 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %691 = tosa.reshape %690 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %692 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %693 = tosa.transpose %arg59, %692 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %694 = tosa.reshape %686 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_121 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %695 = linalg.matmul {cast = #linalg.type_fn} ins(%694, %693 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_121 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %696 = tosa.reshape %695 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %697 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %698 = tosa.transpose %arg60, %697 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %699 = tosa.reshape %686 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_122 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %700 = linalg.matmul {cast = #linalg.type_fn} ins(%699, %698 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_122 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %701 = tosa.reshape %700 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %702 = tosa.reshape %691 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %703 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %704 = tosa.transpose %702, %703 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %705 = tosa.reshape %696 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %706 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %707 = tosa.transpose %705, %706 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %708 = tosa.reshape %701 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %709 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %710 = tosa.transpose %708, %709 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_123 = tensor.extract_slice %arg61[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_124 = tensor.extract_slice %extracted_slice_123[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_125 = tensor.extract_slice %extracted_slice_124[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_126 = tensor.extract_slice %arg62[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_127 = tensor.extract_slice %extracted_slice_126[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_128 = tensor.extract_slice %extracted_slice_127[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %711 = tensor.empty() : tensor<1x40x128xf32> + %712 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_125 : tensor<1x1x40x128xf32>) outs(%711 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %713 = tensor.empty() : tensor<40x128xf32> + %714 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%712 : tensor<1x40x128xf32>) outs(%713 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %715 = tensor.empty() : tensor<1x40x128xf32> + %716 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_128 : tensor<1x1x40x128xf32>) outs(%715 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %717 = tensor.empty() : tensor<40x128xf32> + %718 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%716 : tensor<1x40x128xf32>) outs(%717 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %719 = tensor.empty() : tensor<1x40x128xf32> + %720 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%719 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %714[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %721 = tosa.reshape %720 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %722 = tensor.empty() : tensor<1x40x128xf32> + %723 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%722 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %718[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %724 = tosa.reshape %723 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %725 = tosa.mul %704, %721 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_129 = tensor.extract_slice %704[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_130 = tensor.extract_slice %704[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %726 = tosa.negate %extracted_slice_130 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %727 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_131 = tensor.insert_slice %726 into %727[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_132 = tensor.insert_slice %extracted_slice_129 into %inserted_slice_131[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %728 = tosa.mul %inserted_slice_132, %724 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %729 = tosa.add %725, %728 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %730 = tosa.mul %707, %721 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_133 = tensor.extract_slice %707[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_134 = tensor.extract_slice %707[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %731 = tosa.negate %extracted_slice_134 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %732 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_135 = tensor.insert_slice %731 into %732[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_136 = tensor.insert_slice %extracted_slice_133 into %inserted_slice_135[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %733 = tosa.mul %inserted_slice_136, %724 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %734 = tosa.add %730, %733 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %735 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %736 = tosa.transpose %734, %735 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %737 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %738 = tosa.add %729, %737 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %739 = tosa.reshape %738 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %740 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %741 = tosa.add %736, %740 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %742 = tosa.reshape %741 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %743 = tosa.matmul %739, %742 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %744 = tosa.reshape %743 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %745 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %746 = tosa.reciprocal %745 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %747 = tosa.mul %744, %746 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %748 = tosa.add %747, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %749 = tosa.reduce_max %748 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %750 = tosa.sub %748, %749 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %751 = tosa.exp %750 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %752 = tosa.reduce_sum %751 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %753 = tosa.reciprocal %752 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %754 = tosa.mul %751, %753 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %755 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %756 = tosa.add %754, %755 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %757 = tosa.reshape %756 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %758 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %759 = tosa.add %710, %758 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %760 = tosa.reshape %759 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %761 = tosa.matmul %757, %760 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %762 = tosa.reshape %761 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %763 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %764 = tosa.transpose %762, %763 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %765 = tosa.identity %764 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %766 = tosa.reshape %765 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %767 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %768 = tosa.transpose %arg63, %767 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %769 = tosa.reshape %766 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_137 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %770 = linalg.matmul {cast = #linalg.type_fn} ins(%769, %768 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_137 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %771 = tosa.reshape %770 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %772 = tosa.add %674, %771 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %773 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_138 = arith.constant 2 : i32 + %774 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%772 : tensor<1x40x4096xf32>) outs(%773 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_138 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %775 = tosa.reduce_sum %774 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %776 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %777 = tosa.reciprocal %776 : (tensor<1xf32>) -> tensor<1xf32> + %778 = tosa.mul %777, %775 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %779 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %780 = tosa.add %778, %779 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %781 = tosa.rsqrt %780 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %782 = tosa.mul %772, %781 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %783 = tosa.reshape %arg64 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %784 = tosa.mul %783, %782 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %785 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %786 = tosa.transpose %arg65, %785 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %787 = tosa.reshape %784 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_139 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %788 = linalg.matmul {cast = #linalg.type_fn} ins(%787, %786 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_139 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %789 = tosa.reshape %788 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %790 = tosa.sigmoid %789 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %791 = tosa.mul %789, %790 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %792 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %793 = tosa.transpose %arg66, %792 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %794 = tosa.reshape %784 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_140 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %795 = linalg.matmul {cast = #linalg.type_fn} ins(%794, %793 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_140 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %796 = tosa.reshape %795 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %797 = tosa.mul %791, %796 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %798 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %799 = tosa.transpose %arg67, %798 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %800 = tosa.reshape %797 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_141 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %801 = linalg.matmul {cast = #linalg.type_fn} ins(%800, %799 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_141 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %802 = tosa.reshape %801 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %803 = tosa.add %772, %802 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %804 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_142 = arith.constant 2 : i32 + %805 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%803 : tensor<1x40x4096xf32>) outs(%804 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_142 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %806 = tosa.reduce_sum %805 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %807 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %808 = tosa.reciprocal %807 : (tensor<1xf32>) -> tensor<1xf32> + %809 = tosa.mul %808, %806 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %810 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %811 = tosa.add %809, %810 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %812 = tosa.rsqrt %811 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %813 = tosa.mul %803, %812 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %814 = tosa.reshape %arg68 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %815 = tosa.mul %814, %813 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %816 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %817 = tosa.transpose %arg69, %816 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %818 = tosa.reshape %815 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_143 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %819 = linalg.matmul {cast = #linalg.type_fn} ins(%818, %817 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_143 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %820 = tosa.reshape %819 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %821 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %822 = tosa.transpose %arg70, %821 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %823 = tosa.reshape %815 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_144 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %824 = linalg.matmul {cast = #linalg.type_fn} ins(%823, %822 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_144 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %825 = tosa.reshape %824 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %826 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %827 = tosa.transpose %arg71, %826 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %828 = tosa.reshape %815 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_145 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %829 = linalg.matmul {cast = #linalg.type_fn} ins(%828, %827 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_145 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %830 = tosa.reshape %829 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %831 = tosa.reshape %820 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %832 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %833 = tosa.transpose %831, %832 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %834 = tosa.reshape %825 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %835 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %836 = tosa.transpose %834, %835 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %837 = tosa.reshape %830 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %838 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %839 = tosa.transpose %837, %838 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_146 = tensor.extract_slice %arg72[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_147 = tensor.extract_slice %extracted_slice_146[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_148 = tensor.extract_slice %extracted_slice_147[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_149 = tensor.extract_slice %arg73[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_150 = tensor.extract_slice %extracted_slice_149[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_151 = tensor.extract_slice %extracted_slice_150[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %840 = tensor.empty() : tensor<1x40x128xf32> + %841 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_148 : tensor<1x1x40x128xf32>) outs(%840 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %842 = tensor.empty() : tensor<40x128xf32> + %843 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%841 : tensor<1x40x128xf32>) outs(%842 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %844 = tensor.empty() : tensor<1x40x128xf32> + %845 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_151 : tensor<1x1x40x128xf32>) outs(%844 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %846 = tensor.empty() : tensor<40x128xf32> + %847 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%845 : tensor<1x40x128xf32>) outs(%846 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %848 = tensor.empty() : tensor<1x40x128xf32> + %849 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%848 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %843[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %850 = tosa.reshape %849 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %851 = tensor.empty() : tensor<1x40x128xf32> + %852 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%851 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %847[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %853 = tosa.reshape %852 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %854 = tosa.mul %833, %850 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_152 = tensor.extract_slice %833[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_153 = tensor.extract_slice %833[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %855 = tosa.negate %extracted_slice_153 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %856 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_154 = tensor.insert_slice %855 into %856[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_155 = tensor.insert_slice %extracted_slice_152 into %inserted_slice_154[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %857 = tosa.mul %inserted_slice_155, %853 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %858 = tosa.add %854, %857 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %859 = tosa.mul %836, %850 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_156 = tensor.extract_slice %836[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_157 = tensor.extract_slice %836[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %860 = tosa.negate %extracted_slice_157 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %861 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_158 = tensor.insert_slice %860 into %861[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_159 = tensor.insert_slice %extracted_slice_156 into %inserted_slice_158[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %862 = tosa.mul %inserted_slice_159, %853 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %863 = tosa.add %859, %862 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %864 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %865 = tosa.transpose %863, %864 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %866 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %867 = tosa.add %858, %866 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %868 = tosa.reshape %867 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %869 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %870 = tosa.add %865, %869 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %871 = tosa.reshape %870 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %872 = tosa.matmul %868, %871 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %873 = tosa.reshape %872 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %874 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %875 = tosa.reciprocal %874 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %876 = tosa.mul %873, %875 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %877 = tosa.add %876, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %878 = tosa.reduce_max %877 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %879 = tosa.sub %877, %878 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %880 = tosa.exp %879 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %881 = tosa.reduce_sum %880 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %882 = tosa.reciprocal %881 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %883 = tosa.mul %880, %882 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %884 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %885 = tosa.add %883, %884 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %886 = tosa.reshape %885 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %887 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %888 = tosa.add %839, %887 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %889 = tosa.reshape %888 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %890 = tosa.matmul %886, %889 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %891 = tosa.reshape %890 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %892 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %893 = tosa.transpose %891, %892 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %894 = tosa.identity %893 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %895 = tosa.reshape %894 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %896 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %897 = tosa.transpose %arg74, %896 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %898 = tosa.reshape %895 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_160 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %899 = linalg.matmul {cast = #linalg.type_fn} ins(%898, %897 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_160 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %900 = tosa.reshape %899 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %901 = tosa.add %803, %900 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %902 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_161 = arith.constant 2 : i32 + %903 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%901 : tensor<1x40x4096xf32>) outs(%902 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_161 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %904 = tosa.reduce_sum %903 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %905 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %906 = tosa.reciprocal %905 : (tensor<1xf32>) -> tensor<1xf32> + %907 = tosa.mul %906, %904 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %908 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %909 = tosa.add %907, %908 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %910 = tosa.rsqrt %909 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %911 = tosa.mul %901, %910 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %912 = tosa.reshape %arg75 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %913 = tosa.mul %912, %911 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %914 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %915 = tosa.transpose %arg76, %914 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %916 = tosa.reshape %913 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_162 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %917 = linalg.matmul {cast = #linalg.type_fn} ins(%916, %915 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_162 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %918 = tosa.reshape %917 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %919 = tosa.sigmoid %918 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %920 = tosa.mul %918, %919 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %921 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %922 = tosa.transpose %arg77, %921 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %923 = tosa.reshape %913 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_163 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %924 = linalg.matmul {cast = #linalg.type_fn} ins(%923, %922 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_163 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %925 = tosa.reshape %924 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %926 = tosa.mul %920, %925 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %927 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %928 = tosa.transpose %arg78, %927 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %929 = tosa.reshape %926 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_164 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %930 = linalg.matmul {cast = #linalg.type_fn} ins(%929, %928 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_164 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %931 = tosa.reshape %930 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %932 = tosa.add %901, %931 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %933 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_165 = arith.constant 2 : i32 + %934 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%932 : tensor<1x40x4096xf32>) outs(%933 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_165 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %935 = tosa.reduce_sum %934 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %936 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %937 = tosa.reciprocal %936 : (tensor<1xf32>) -> tensor<1xf32> + %938 = tosa.mul %937, %935 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %939 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %940 = tosa.add %938, %939 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %941 = tosa.rsqrt %940 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %942 = tosa.mul %932, %941 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %943 = tosa.reshape %arg79 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %944 = tosa.mul %943, %942 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %945 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %946 = tosa.transpose %arg80, %945 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %947 = tosa.reshape %944 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_166 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %948 = linalg.matmul {cast = #linalg.type_fn} ins(%947, %946 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_166 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %949 = tosa.reshape %948 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %950 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %951 = tosa.transpose %arg81, %950 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %952 = tosa.reshape %944 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_167 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %953 = linalg.matmul {cast = #linalg.type_fn} ins(%952, %951 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_167 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %954 = tosa.reshape %953 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %955 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %956 = tosa.transpose %arg82, %955 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %957 = tosa.reshape %944 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_168 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %958 = linalg.matmul {cast = #linalg.type_fn} ins(%957, %956 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_168 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %959 = tosa.reshape %958 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %960 = tosa.reshape %949 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %961 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %962 = tosa.transpose %960, %961 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %963 = tosa.reshape %954 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %964 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %965 = tosa.transpose %963, %964 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %966 = tosa.reshape %959 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %967 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %968 = tosa.transpose %966, %967 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_169 = tensor.extract_slice %arg83[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_170 = tensor.extract_slice %extracted_slice_169[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_171 = tensor.extract_slice %extracted_slice_170[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_172 = tensor.extract_slice %arg84[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_173 = tensor.extract_slice %extracted_slice_172[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_174 = tensor.extract_slice %extracted_slice_173[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %969 = tensor.empty() : tensor<1x40x128xf32> + %970 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_171 : tensor<1x1x40x128xf32>) outs(%969 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %971 = tensor.empty() : tensor<40x128xf32> + %972 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%970 : tensor<1x40x128xf32>) outs(%971 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %973 = tensor.empty() : tensor<1x40x128xf32> + %974 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_174 : tensor<1x1x40x128xf32>) outs(%973 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %975 = tensor.empty() : tensor<40x128xf32> + %976 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%974 : tensor<1x40x128xf32>) outs(%975 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %977 = tensor.empty() : tensor<1x40x128xf32> + %978 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%977 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %972[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %979 = tosa.reshape %978 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %980 = tensor.empty() : tensor<1x40x128xf32> + %981 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%980 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %976[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %982 = tosa.reshape %981 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %983 = tosa.mul %962, %979 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_175 = tensor.extract_slice %962[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_176 = tensor.extract_slice %962[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %984 = tosa.negate %extracted_slice_176 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %985 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_177 = tensor.insert_slice %984 into %985[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_178 = tensor.insert_slice %extracted_slice_175 into %inserted_slice_177[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %986 = tosa.mul %inserted_slice_178, %982 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %987 = tosa.add %983, %986 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %988 = tosa.mul %965, %979 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_179 = tensor.extract_slice %965[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_180 = tensor.extract_slice %965[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %989 = tosa.negate %extracted_slice_180 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %990 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_181 = tensor.insert_slice %989 into %990[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_182 = tensor.insert_slice %extracted_slice_179 into %inserted_slice_181[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %991 = tosa.mul %inserted_slice_182, %982 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %992 = tosa.add %988, %991 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %993 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %994 = tosa.transpose %992, %993 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %995 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %996 = tosa.add %987, %995 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %997 = tosa.reshape %996 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %998 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %999 = tosa.add %994, %998 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1000 = tosa.reshape %999 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1001 = tosa.matmul %997, %1000 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1002 = tosa.reshape %1001 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1003 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1004 = tosa.reciprocal %1003 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1005 = tosa.mul %1002, %1004 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1006 = tosa.add %1005, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1007 = tosa.reduce_max %1006 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1008 = tosa.sub %1006, %1007 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1009 = tosa.exp %1008 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1010 = tosa.reduce_sum %1009 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1011 = tosa.reciprocal %1010 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1012 = tosa.mul %1009, %1011 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1013 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1014 = tosa.add %1012, %1013 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1015 = tosa.reshape %1014 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1016 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1017 = tosa.add %968, %1016 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1018 = tosa.reshape %1017 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1019 = tosa.matmul %1015, %1018 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1020 = tosa.reshape %1019 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1021 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1022 = tosa.transpose %1020, %1021 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1023 = tosa.identity %1022 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1024 = tosa.reshape %1023 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1025 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1026 = tosa.transpose %arg85, %1025 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1027 = tosa.reshape %1024 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_183 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1028 = linalg.matmul {cast = #linalg.type_fn} ins(%1027, %1026 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_183 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1029 = tosa.reshape %1028 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1030 = tosa.add %932, %1029 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1031 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_184 = arith.constant 2 : i32 + %1032 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1030 : tensor<1x40x4096xf32>) outs(%1031 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_184 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1033 = tosa.reduce_sum %1032 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1034 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1035 = tosa.reciprocal %1034 : (tensor<1xf32>) -> tensor<1xf32> + %1036 = tosa.mul %1035, %1033 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1037 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1038 = tosa.add %1036, %1037 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1039 = tosa.rsqrt %1038 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1040 = tosa.mul %1030, %1039 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1041 = tosa.reshape %arg86 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1042 = tosa.mul %1041, %1040 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1043 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1044 = tosa.transpose %arg87, %1043 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1045 = tosa.reshape %1042 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_185 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1046 = linalg.matmul {cast = #linalg.type_fn} ins(%1045, %1044 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_185 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1047 = tosa.reshape %1046 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1048 = tosa.sigmoid %1047 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1049 = tosa.mul %1047, %1048 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1050 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1051 = tosa.transpose %arg88, %1050 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1052 = tosa.reshape %1042 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_186 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1053 = linalg.matmul {cast = #linalg.type_fn} ins(%1052, %1051 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_186 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1054 = tosa.reshape %1053 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1055 = tosa.mul %1049, %1054 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1056 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1057 = tosa.transpose %arg89, %1056 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1058 = tosa.reshape %1055 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_187 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1059 = linalg.matmul {cast = #linalg.type_fn} ins(%1058, %1057 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_187 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1060 = tosa.reshape %1059 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1061 = tosa.add %1030, %1060 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1062 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_188 = arith.constant 2 : i32 + %1063 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1061 : tensor<1x40x4096xf32>) outs(%1062 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_188 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1064 = tosa.reduce_sum %1063 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1065 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1066 = tosa.reciprocal %1065 : (tensor<1xf32>) -> tensor<1xf32> + %1067 = tosa.mul %1066, %1064 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1068 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1069 = tosa.add %1067, %1068 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1070 = tosa.rsqrt %1069 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1071 = tosa.mul %1061, %1070 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1072 = tosa.reshape %arg90 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1073 = tosa.mul %1072, %1071 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1074 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1075 = tosa.transpose %arg91, %1074 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1076 = tosa.reshape %1073 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_189 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1077 = linalg.matmul {cast = #linalg.type_fn} ins(%1076, %1075 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_189 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1078 = tosa.reshape %1077 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1079 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1080 = tosa.transpose %arg92, %1079 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1081 = tosa.reshape %1073 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_190 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1082 = linalg.matmul {cast = #linalg.type_fn} ins(%1081, %1080 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_190 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1083 = tosa.reshape %1082 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1084 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1085 = tosa.transpose %arg93, %1084 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1086 = tosa.reshape %1073 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_191 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1087 = linalg.matmul {cast = #linalg.type_fn} ins(%1086, %1085 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_191 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1088 = tosa.reshape %1087 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1089 = tosa.reshape %1078 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1090 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1091 = tosa.transpose %1089, %1090 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1092 = tosa.reshape %1083 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1093 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1094 = tosa.transpose %1092, %1093 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1095 = tosa.reshape %1088 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1096 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1097 = tosa.transpose %1095, %1096 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_192 = tensor.extract_slice %arg94[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_193 = tensor.extract_slice %extracted_slice_192[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_194 = tensor.extract_slice %extracted_slice_193[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_195 = tensor.extract_slice %arg95[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_196 = tensor.extract_slice %extracted_slice_195[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_197 = tensor.extract_slice %extracted_slice_196[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1098 = tensor.empty() : tensor<1x40x128xf32> + %1099 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_194 : tensor<1x1x40x128xf32>) outs(%1098 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1100 = tensor.empty() : tensor<40x128xf32> + %1101 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1099 : tensor<1x40x128xf32>) outs(%1100 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1102 = tensor.empty() : tensor<1x40x128xf32> + %1103 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_197 : tensor<1x1x40x128xf32>) outs(%1102 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1104 = tensor.empty() : tensor<40x128xf32> + %1105 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1103 : tensor<1x40x128xf32>) outs(%1104 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1106 = tensor.empty() : tensor<1x40x128xf32> + %1107 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1106 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1101[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1108 = tosa.reshape %1107 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1109 = tensor.empty() : tensor<1x40x128xf32> + %1110 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1109 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1105[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1111 = tosa.reshape %1110 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1112 = tosa.mul %1091, %1108 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_198 = tensor.extract_slice %1091[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_199 = tensor.extract_slice %1091[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1113 = tosa.negate %extracted_slice_199 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1114 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_200 = tensor.insert_slice %1113 into %1114[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_201 = tensor.insert_slice %extracted_slice_198 into %inserted_slice_200[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1115 = tosa.mul %inserted_slice_201, %1111 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1116 = tosa.add %1112, %1115 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1117 = tosa.mul %1094, %1108 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_202 = tensor.extract_slice %1094[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_203 = tensor.extract_slice %1094[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1118 = tosa.negate %extracted_slice_203 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1119 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_204 = tensor.insert_slice %1118 into %1119[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_205 = tensor.insert_slice %extracted_slice_202 into %inserted_slice_204[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1120 = tosa.mul %inserted_slice_205, %1111 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1121 = tosa.add %1117, %1120 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1122 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1123 = tosa.transpose %1121, %1122 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1124 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1125 = tosa.add %1116, %1124 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1126 = tosa.reshape %1125 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1127 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1128 = tosa.add %1123, %1127 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1129 = tosa.reshape %1128 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1130 = tosa.matmul %1126, %1129 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1131 = tosa.reshape %1130 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1132 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1133 = tosa.reciprocal %1132 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1134 = tosa.mul %1131, %1133 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1135 = tosa.add %1134, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1136 = tosa.reduce_max %1135 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1137 = tosa.sub %1135, %1136 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1138 = tosa.exp %1137 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1139 = tosa.reduce_sum %1138 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1140 = tosa.reciprocal %1139 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1141 = tosa.mul %1138, %1140 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1142 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1143 = tosa.add %1141, %1142 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1144 = tosa.reshape %1143 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1145 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1146 = tosa.add %1097, %1145 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1147 = tosa.reshape %1146 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1148 = tosa.matmul %1144, %1147 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1149 = tosa.reshape %1148 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1150 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1151 = tosa.transpose %1149, %1150 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1152 = tosa.identity %1151 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1153 = tosa.reshape %1152 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1154 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1155 = tosa.transpose %arg96, %1154 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1156 = tosa.reshape %1153 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_206 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1157 = linalg.matmul {cast = #linalg.type_fn} ins(%1156, %1155 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_206 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1158 = tosa.reshape %1157 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1159 = tosa.add %1061, %1158 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1160 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_207 = arith.constant 2 : i32 + %1161 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1159 : tensor<1x40x4096xf32>) outs(%1160 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_207 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1162 = tosa.reduce_sum %1161 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1163 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1164 = tosa.reciprocal %1163 : (tensor<1xf32>) -> tensor<1xf32> + %1165 = tosa.mul %1164, %1162 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1166 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1167 = tosa.add %1165, %1166 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1168 = tosa.rsqrt %1167 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1169 = tosa.mul %1159, %1168 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1170 = tosa.reshape %arg97 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1171 = tosa.mul %1170, %1169 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1172 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1173 = tosa.transpose %arg98, %1172 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1174 = tosa.reshape %1171 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_208 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1175 = linalg.matmul {cast = #linalg.type_fn} ins(%1174, %1173 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_208 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1176 = tosa.reshape %1175 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1177 = tosa.sigmoid %1176 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1178 = tosa.mul %1176, %1177 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1179 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1180 = tosa.transpose %arg99, %1179 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1181 = tosa.reshape %1171 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_209 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1182 = linalg.matmul {cast = #linalg.type_fn} ins(%1181, %1180 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_209 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1183 = tosa.reshape %1182 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1184 = tosa.mul %1178, %1183 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1185 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1186 = tosa.transpose %arg100, %1185 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1187 = tosa.reshape %1184 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_210 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1188 = linalg.matmul {cast = #linalg.type_fn} ins(%1187, %1186 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_210 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1189 = tosa.reshape %1188 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1190 = tosa.add %1159, %1189 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1191 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_211 = arith.constant 2 : i32 + %1192 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1190 : tensor<1x40x4096xf32>) outs(%1191 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_211 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1193 = tosa.reduce_sum %1192 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1194 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1195 = tosa.reciprocal %1194 : (tensor<1xf32>) -> tensor<1xf32> + %1196 = tosa.mul %1195, %1193 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1197 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1198 = tosa.add %1196, %1197 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1199 = tosa.rsqrt %1198 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1200 = tosa.mul %1190, %1199 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1201 = tosa.reshape %arg101 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1202 = tosa.mul %1201, %1200 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1203 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1204 = tosa.transpose %arg102, %1203 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1205 = tosa.reshape %1202 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_212 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1206 = linalg.matmul {cast = #linalg.type_fn} ins(%1205, %1204 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_212 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1207 = tosa.reshape %1206 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1208 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1209 = tosa.transpose %arg103, %1208 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1210 = tosa.reshape %1202 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_213 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1211 = linalg.matmul {cast = #linalg.type_fn} ins(%1210, %1209 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_213 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1212 = tosa.reshape %1211 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1213 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1214 = tosa.transpose %arg104, %1213 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1215 = tosa.reshape %1202 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_214 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1216 = linalg.matmul {cast = #linalg.type_fn} ins(%1215, %1214 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_214 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1217 = tosa.reshape %1216 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1218 = tosa.reshape %1207 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1219 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1220 = tosa.transpose %1218, %1219 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1221 = tosa.reshape %1212 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1222 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1223 = tosa.transpose %1221, %1222 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1224 = tosa.reshape %1217 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1225 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1226 = tosa.transpose %1224, %1225 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_215 = tensor.extract_slice %arg105[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_216 = tensor.extract_slice %extracted_slice_215[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_217 = tensor.extract_slice %extracted_slice_216[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_218 = tensor.extract_slice %arg106[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_219 = tensor.extract_slice %extracted_slice_218[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_220 = tensor.extract_slice %extracted_slice_219[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1227 = tensor.empty() : tensor<1x40x128xf32> + %1228 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_217 : tensor<1x1x40x128xf32>) outs(%1227 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1229 = tensor.empty() : tensor<40x128xf32> + %1230 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1228 : tensor<1x40x128xf32>) outs(%1229 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1231 = tensor.empty() : tensor<1x40x128xf32> + %1232 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_220 : tensor<1x1x40x128xf32>) outs(%1231 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1233 = tensor.empty() : tensor<40x128xf32> + %1234 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1232 : tensor<1x40x128xf32>) outs(%1233 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1235 = tensor.empty() : tensor<1x40x128xf32> + %1236 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1235 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1230[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1237 = tosa.reshape %1236 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1238 = tensor.empty() : tensor<1x40x128xf32> + %1239 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1238 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1234[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1240 = tosa.reshape %1239 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1241 = tosa.mul %1220, %1237 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_221 = tensor.extract_slice %1220[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_222 = tensor.extract_slice %1220[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1242 = tosa.negate %extracted_slice_222 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1243 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_223 = tensor.insert_slice %1242 into %1243[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_224 = tensor.insert_slice %extracted_slice_221 into %inserted_slice_223[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1244 = tosa.mul %inserted_slice_224, %1240 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1245 = tosa.add %1241, %1244 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1246 = tosa.mul %1223, %1237 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_225 = tensor.extract_slice %1223[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_226 = tensor.extract_slice %1223[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1247 = tosa.negate %extracted_slice_226 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1248 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_227 = tensor.insert_slice %1247 into %1248[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_228 = tensor.insert_slice %extracted_slice_225 into %inserted_slice_227[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1249 = tosa.mul %inserted_slice_228, %1240 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1250 = tosa.add %1246, %1249 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1251 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1252 = tosa.transpose %1250, %1251 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1253 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1254 = tosa.add %1245, %1253 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1255 = tosa.reshape %1254 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1256 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1257 = tosa.add %1252, %1256 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1258 = tosa.reshape %1257 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1259 = tosa.matmul %1255, %1258 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1260 = tosa.reshape %1259 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1261 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1262 = tosa.reciprocal %1261 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1263 = tosa.mul %1260, %1262 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1264 = tosa.add %1263, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1265 = tosa.reduce_max %1264 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1266 = tosa.sub %1264, %1265 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1267 = tosa.exp %1266 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1268 = tosa.reduce_sum %1267 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1269 = tosa.reciprocal %1268 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1270 = tosa.mul %1267, %1269 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1271 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1272 = tosa.add %1270, %1271 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1273 = tosa.reshape %1272 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1274 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1275 = tosa.add %1226, %1274 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1276 = tosa.reshape %1275 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1277 = tosa.matmul %1273, %1276 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1278 = tosa.reshape %1277 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1279 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1280 = tosa.transpose %1278, %1279 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1281 = tosa.identity %1280 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1282 = tosa.reshape %1281 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1283 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1284 = tosa.transpose %arg107, %1283 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1285 = tosa.reshape %1282 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_229 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1286 = linalg.matmul {cast = #linalg.type_fn} ins(%1285, %1284 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_229 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1287 = tosa.reshape %1286 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1288 = tosa.add %1190, %1287 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1289 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_230 = arith.constant 2 : i32 + %1290 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1288 : tensor<1x40x4096xf32>) outs(%1289 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_230 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1291 = tosa.reduce_sum %1290 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1292 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1293 = tosa.reciprocal %1292 : (tensor<1xf32>) -> tensor<1xf32> + %1294 = tosa.mul %1293, %1291 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1295 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1296 = tosa.add %1294, %1295 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1297 = tosa.rsqrt %1296 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1298 = tosa.mul %1288, %1297 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1299 = tosa.reshape %arg108 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1300 = tosa.mul %1299, %1298 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1301 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1302 = tosa.transpose %arg109, %1301 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1303 = tosa.reshape %1300 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_231 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1304 = linalg.matmul {cast = #linalg.type_fn} ins(%1303, %1302 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_231 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1305 = tosa.reshape %1304 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1306 = tosa.sigmoid %1305 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1307 = tosa.mul %1305, %1306 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1308 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1309 = tosa.transpose %arg110, %1308 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1310 = tosa.reshape %1300 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_232 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1311 = linalg.matmul {cast = #linalg.type_fn} ins(%1310, %1309 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_232 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1312 = tosa.reshape %1311 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1313 = tosa.mul %1307, %1312 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1314 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1315 = tosa.transpose %arg111, %1314 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1316 = tosa.reshape %1313 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_233 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1317 = linalg.matmul {cast = #linalg.type_fn} ins(%1316, %1315 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_233 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1318 = tosa.reshape %1317 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1319 = tosa.add %1288, %1318 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1320 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_234 = arith.constant 2 : i32 + %1321 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1319 : tensor<1x40x4096xf32>) outs(%1320 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_234 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1322 = tosa.reduce_sum %1321 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1323 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1324 = tosa.reciprocal %1323 : (tensor<1xf32>) -> tensor<1xf32> + %1325 = tosa.mul %1324, %1322 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1326 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1327 = tosa.add %1325, %1326 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1328 = tosa.rsqrt %1327 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1329 = tosa.mul %1319, %1328 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1330 = tosa.reshape %arg112 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1331 = tosa.mul %1330, %1329 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1332 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1333 = tosa.transpose %arg113, %1332 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1334 = tosa.reshape %1331 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_235 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1335 = linalg.matmul {cast = #linalg.type_fn} ins(%1334, %1333 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_235 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1336 = tosa.reshape %1335 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1337 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1338 = tosa.transpose %arg114, %1337 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1339 = tosa.reshape %1331 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_236 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1340 = linalg.matmul {cast = #linalg.type_fn} ins(%1339, %1338 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_236 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1341 = tosa.reshape %1340 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1342 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1343 = tosa.transpose %arg115, %1342 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1344 = tosa.reshape %1331 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_237 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1345 = linalg.matmul {cast = #linalg.type_fn} ins(%1344, %1343 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_237 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1346 = tosa.reshape %1345 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1347 = tosa.reshape %1336 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1348 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1349 = tosa.transpose %1347, %1348 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1350 = tosa.reshape %1341 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1351 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1352 = tosa.transpose %1350, %1351 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1353 = tosa.reshape %1346 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1354 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1355 = tosa.transpose %1353, %1354 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_238 = tensor.extract_slice %arg116[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_239 = tensor.extract_slice %extracted_slice_238[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_240 = tensor.extract_slice %extracted_slice_239[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_241 = tensor.extract_slice %arg117[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_242 = tensor.extract_slice %extracted_slice_241[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_243 = tensor.extract_slice %extracted_slice_242[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1356 = tensor.empty() : tensor<1x40x128xf32> + %1357 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_240 : tensor<1x1x40x128xf32>) outs(%1356 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1358 = tensor.empty() : tensor<40x128xf32> + %1359 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1357 : tensor<1x40x128xf32>) outs(%1358 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1360 = tensor.empty() : tensor<1x40x128xf32> + %1361 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_243 : tensor<1x1x40x128xf32>) outs(%1360 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1362 = tensor.empty() : tensor<40x128xf32> + %1363 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1361 : tensor<1x40x128xf32>) outs(%1362 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1364 = tensor.empty() : tensor<1x40x128xf32> + %1365 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1364 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1359[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1366 = tosa.reshape %1365 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1367 = tensor.empty() : tensor<1x40x128xf32> + %1368 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1367 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1363[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1369 = tosa.reshape %1368 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1370 = tosa.mul %1349, %1366 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_244 = tensor.extract_slice %1349[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_245 = tensor.extract_slice %1349[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1371 = tosa.negate %extracted_slice_245 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1372 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_246 = tensor.insert_slice %1371 into %1372[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_247 = tensor.insert_slice %extracted_slice_244 into %inserted_slice_246[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1373 = tosa.mul %inserted_slice_247, %1369 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1374 = tosa.add %1370, %1373 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1375 = tosa.mul %1352, %1366 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_248 = tensor.extract_slice %1352[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_249 = tensor.extract_slice %1352[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1376 = tosa.negate %extracted_slice_249 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1377 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_250 = tensor.insert_slice %1376 into %1377[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_251 = tensor.insert_slice %extracted_slice_248 into %inserted_slice_250[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1378 = tosa.mul %inserted_slice_251, %1369 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1379 = tosa.add %1375, %1378 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1380 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1381 = tosa.transpose %1379, %1380 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1382 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1383 = tosa.add %1374, %1382 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1384 = tosa.reshape %1383 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1385 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1386 = tosa.add %1381, %1385 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1387 = tosa.reshape %1386 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1388 = tosa.matmul %1384, %1387 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1389 = tosa.reshape %1388 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1390 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1391 = tosa.reciprocal %1390 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1392 = tosa.mul %1389, %1391 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1393 = tosa.add %1392, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1394 = tosa.reduce_max %1393 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1395 = tosa.sub %1393, %1394 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1396 = tosa.exp %1395 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1397 = tosa.reduce_sum %1396 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1398 = tosa.reciprocal %1397 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1399 = tosa.mul %1396, %1398 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1400 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1401 = tosa.add %1399, %1400 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1402 = tosa.reshape %1401 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1403 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1404 = tosa.add %1355, %1403 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1405 = tosa.reshape %1404 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1406 = tosa.matmul %1402, %1405 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1407 = tosa.reshape %1406 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1408 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1409 = tosa.transpose %1407, %1408 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1410 = tosa.identity %1409 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1411 = tosa.reshape %1410 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1412 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1413 = tosa.transpose %arg118, %1412 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1414 = tosa.reshape %1411 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_252 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1415 = linalg.matmul {cast = #linalg.type_fn} ins(%1414, %1413 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_252 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1416 = tosa.reshape %1415 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1417 = tosa.add %1319, %1416 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1418 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_253 = arith.constant 2 : i32 + %1419 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1417 : tensor<1x40x4096xf32>) outs(%1418 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_253 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1420 = tosa.reduce_sum %1419 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1421 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1422 = tosa.reciprocal %1421 : (tensor<1xf32>) -> tensor<1xf32> + %1423 = tosa.mul %1422, %1420 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1424 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1425 = tosa.add %1423, %1424 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1426 = tosa.rsqrt %1425 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1427 = tosa.mul %1417, %1426 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1428 = tosa.reshape %arg119 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1429 = tosa.mul %1428, %1427 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1430 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1431 = tosa.transpose %arg120, %1430 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1432 = tosa.reshape %1429 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_254 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1433 = linalg.matmul {cast = #linalg.type_fn} ins(%1432, %1431 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_254 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1434 = tosa.reshape %1433 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1435 = tosa.sigmoid %1434 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1436 = tosa.mul %1434, %1435 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1437 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1438 = tosa.transpose %arg121, %1437 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1439 = tosa.reshape %1429 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_255 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1440 = linalg.matmul {cast = #linalg.type_fn} ins(%1439, %1438 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_255 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1441 = tosa.reshape %1440 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1442 = tosa.mul %1436, %1441 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1443 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1444 = tosa.transpose %arg122, %1443 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1445 = tosa.reshape %1442 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_256 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1446 = linalg.matmul {cast = #linalg.type_fn} ins(%1445, %1444 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_256 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1447 = tosa.reshape %1446 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1448 = tosa.add %1417, %1447 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1449 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_257 = arith.constant 2 : i32 + %1450 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1448 : tensor<1x40x4096xf32>) outs(%1449 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_257 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1451 = tosa.reduce_sum %1450 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1452 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1453 = tosa.reciprocal %1452 : (tensor<1xf32>) -> tensor<1xf32> + %1454 = tosa.mul %1453, %1451 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1455 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1456 = tosa.add %1454, %1455 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1457 = tosa.rsqrt %1456 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1458 = tosa.mul %1448, %1457 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1459 = tosa.reshape %arg123 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1460 = tosa.mul %1459, %1458 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1461 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1462 = tosa.transpose %arg124, %1461 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1463 = tosa.reshape %1460 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_258 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1464 = linalg.matmul {cast = #linalg.type_fn} ins(%1463, %1462 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_258 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1465 = tosa.reshape %1464 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1466 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1467 = tosa.transpose %arg125, %1466 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1468 = tosa.reshape %1460 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_259 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1469 = linalg.matmul {cast = #linalg.type_fn} ins(%1468, %1467 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_259 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1470 = tosa.reshape %1469 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1471 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1472 = tosa.transpose %arg126, %1471 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1473 = tosa.reshape %1460 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_260 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1474 = linalg.matmul {cast = #linalg.type_fn} ins(%1473, %1472 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_260 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1475 = tosa.reshape %1474 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1476 = tosa.reshape %1465 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1477 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1478 = tosa.transpose %1476, %1477 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1479 = tosa.reshape %1470 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1480 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1481 = tosa.transpose %1479, %1480 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1482 = tosa.reshape %1475 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1483 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1484 = tosa.transpose %1482, %1483 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_261 = tensor.extract_slice %arg127[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_262 = tensor.extract_slice %extracted_slice_261[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_263 = tensor.extract_slice %extracted_slice_262[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_264 = tensor.extract_slice %arg128[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_265 = tensor.extract_slice %extracted_slice_264[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_266 = tensor.extract_slice %extracted_slice_265[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1485 = tensor.empty() : tensor<1x40x128xf32> + %1486 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_263 : tensor<1x1x40x128xf32>) outs(%1485 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1487 = tensor.empty() : tensor<40x128xf32> + %1488 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1486 : tensor<1x40x128xf32>) outs(%1487 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1489 = tensor.empty() : tensor<1x40x128xf32> + %1490 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_266 : tensor<1x1x40x128xf32>) outs(%1489 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1491 = tensor.empty() : tensor<40x128xf32> + %1492 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1490 : tensor<1x40x128xf32>) outs(%1491 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1493 = tensor.empty() : tensor<1x40x128xf32> + %1494 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1493 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1488[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1495 = tosa.reshape %1494 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1496 = tensor.empty() : tensor<1x40x128xf32> + %1497 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1496 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1492[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1498 = tosa.reshape %1497 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1499 = tosa.mul %1478, %1495 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_267 = tensor.extract_slice %1478[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_268 = tensor.extract_slice %1478[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1500 = tosa.negate %extracted_slice_268 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1501 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_269 = tensor.insert_slice %1500 into %1501[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_270 = tensor.insert_slice %extracted_slice_267 into %inserted_slice_269[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1502 = tosa.mul %inserted_slice_270, %1498 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1503 = tosa.add %1499, %1502 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1504 = tosa.mul %1481, %1495 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_271 = tensor.extract_slice %1481[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_272 = tensor.extract_slice %1481[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1505 = tosa.negate %extracted_slice_272 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1506 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_273 = tensor.insert_slice %1505 into %1506[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_274 = tensor.insert_slice %extracted_slice_271 into %inserted_slice_273[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1507 = tosa.mul %inserted_slice_274, %1498 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1508 = tosa.add %1504, %1507 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1509 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1510 = tosa.transpose %1508, %1509 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1511 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1512 = tosa.add %1503, %1511 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1513 = tosa.reshape %1512 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1514 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1515 = tosa.add %1510, %1514 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1516 = tosa.reshape %1515 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1517 = tosa.matmul %1513, %1516 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1518 = tosa.reshape %1517 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1519 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1520 = tosa.reciprocal %1519 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1521 = tosa.mul %1518, %1520 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1522 = tosa.add %1521, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1523 = tosa.reduce_max %1522 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1524 = tosa.sub %1522, %1523 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1525 = tosa.exp %1524 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1526 = tosa.reduce_sum %1525 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1527 = tosa.reciprocal %1526 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1528 = tosa.mul %1525, %1527 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1529 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1530 = tosa.add %1528, %1529 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1531 = tosa.reshape %1530 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1532 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1533 = tosa.add %1484, %1532 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1534 = tosa.reshape %1533 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1535 = tosa.matmul %1531, %1534 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1536 = tosa.reshape %1535 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1537 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1538 = tosa.transpose %1536, %1537 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1539 = tosa.identity %1538 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1540 = tosa.reshape %1539 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1541 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1542 = tosa.transpose %arg129, %1541 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1543 = tosa.reshape %1540 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_275 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1544 = linalg.matmul {cast = #linalg.type_fn} ins(%1543, %1542 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_275 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1545 = tosa.reshape %1544 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1546 = tosa.add %1448, %1545 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1547 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_276 = arith.constant 2 : i32 + %1548 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1546 : tensor<1x40x4096xf32>) outs(%1547 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_276 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1549 = tosa.reduce_sum %1548 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1550 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1551 = tosa.reciprocal %1550 : (tensor<1xf32>) -> tensor<1xf32> + %1552 = tosa.mul %1551, %1549 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1553 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1554 = tosa.add %1552, %1553 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1555 = tosa.rsqrt %1554 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1556 = tosa.mul %1546, %1555 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1557 = tosa.reshape %arg130 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1558 = tosa.mul %1557, %1556 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1559 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1560 = tosa.transpose %arg131, %1559 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1561 = tosa.reshape %1558 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_277 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1562 = linalg.matmul {cast = #linalg.type_fn} ins(%1561, %1560 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_277 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1563 = tosa.reshape %1562 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1564 = tosa.sigmoid %1563 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1565 = tosa.mul %1563, %1564 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1566 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1567 = tosa.transpose %arg132, %1566 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1568 = tosa.reshape %1558 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_278 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1569 = linalg.matmul {cast = #linalg.type_fn} ins(%1568, %1567 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_278 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1570 = tosa.reshape %1569 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1571 = tosa.mul %1565, %1570 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1572 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1573 = tosa.transpose %arg133, %1572 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1574 = tosa.reshape %1571 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_279 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1575 = linalg.matmul {cast = #linalg.type_fn} ins(%1574, %1573 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_279 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1576 = tosa.reshape %1575 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1577 = tosa.add %1546, %1576 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1578 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_280 = arith.constant 2 : i32 + %1579 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1577 : tensor<1x40x4096xf32>) outs(%1578 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_280 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1580 = tosa.reduce_sum %1579 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1581 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1582 = tosa.reciprocal %1581 : (tensor<1xf32>) -> tensor<1xf32> + %1583 = tosa.mul %1582, %1580 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1584 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1585 = tosa.add %1583, %1584 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1586 = tosa.rsqrt %1585 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1587 = tosa.mul %1577, %1586 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1588 = tosa.reshape %arg134 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1589 = tosa.mul %1588, %1587 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1590 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1591 = tosa.transpose %arg135, %1590 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1592 = tosa.reshape %1589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_281 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1593 = linalg.matmul {cast = #linalg.type_fn} ins(%1592, %1591 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_281 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1594 = tosa.reshape %1593 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1595 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1596 = tosa.transpose %arg136, %1595 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1597 = tosa.reshape %1589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_282 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1598 = linalg.matmul {cast = #linalg.type_fn} ins(%1597, %1596 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_282 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1599 = tosa.reshape %1598 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1600 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1601 = tosa.transpose %arg137, %1600 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1602 = tosa.reshape %1589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_283 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1603 = linalg.matmul {cast = #linalg.type_fn} ins(%1602, %1601 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_283 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1604 = tosa.reshape %1603 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1605 = tosa.reshape %1594 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1606 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1607 = tosa.transpose %1605, %1606 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1608 = tosa.reshape %1599 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1609 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1610 = tosa.transpose %1608, %1609 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1611 = tosa.reshape %1604 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1612 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1613 = tosa.transpose %1611, %1612 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_284 = tensor.extract_slice %arg138[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_285 = tensor.extract_slice %extracted_slice_284[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_286 = tensor.extract_slice %extracted_slice_285[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_287 = tensor.extract_slice %arg139[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_288 = tensor.extract_slice %extracted_slice_287[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_289 = tensor.extract_slice %extracted_slice_288[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1614 = tensor.empty() : tensor<1x40x128xf32> + %1615 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_286 : tensor<1x1x40x128xf32>) outs(%1614 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1616 = tensor.empty() : tensor<40x128xf32> + %1617 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1615 : tensor<1x40x128xf32>) outs(%1616 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1618 = tensor.empty() : tensor<1x40x128xf32> + %1619 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_289 : tensor<1x1x40x128xf32>) outs(%1618 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1620 = tensor.empty() : tensor<40x128xf32> + %1621 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1619 : tensor<1x40x128xf32>) outs(%1620 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1622 = tensor.empty() : tensor<1x40x128xf32> + %1623 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1622 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1617[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1624 = tosa.reshape %1623 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1625 = tensor.empty() : tensor<1x40x128xf32> + %1626 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1625 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1621[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1627 = tosa.reshape %1626 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1628 = tosa.mul %1607, %1624 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_290 = tensor.extract_slice %1607[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_291 = tensor.extract_slice %1607[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1629 = tosa.negate %extracted_slice_291 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1630 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_292 = tensor.insert_slice %1629 into %1630[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_293 = tensor.insert_slice %extracted_slice_290 into %inserted_slice_292[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1631 = tosa.mul %inserted_slice_293, %1627 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1632 = tosa.add %1628, %1631 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1633 = tosa.mul %1610, %1624 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_294 = tensor.extract_slice %1610[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_295 = tensor.extract_slice %1610[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1634 = tosa.negate %extracted_slice_295 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1635 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_296 = tensor.insert_slice %1634 into %1635[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_297 = tensor.insert_slice %extracted_slice_294 into %inserted_slice_296[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1636 = tosa.mul %inserted_slice_297, %1627 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1637 = tosa.add %1633, %1636 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1638 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1639 = tosa.transpose %1637, %1638 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1640 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1641 = tosa.add %1632, %1640 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1642 = tosa.reshape %1641 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1643 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1644 = tosa.add %1639, %1643 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1645 = tosa.reshape %1644 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1646 = tosa.matmul %1642, %1645 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1647 = tosa.reshape %1646 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1648 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1649 = tosa.reciprocal %1648 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1650 = tosa.mul %1647, %1649 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1651 = tosa.add %1650, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1652 = tosa.reduce_max %1651 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1653 = tosa.sub %1651, %1652 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1654 = tosa.exp %1653 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1655 = tosa.reduce_sum %1654 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1656 = tosa.reciprocal %1655 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1657 = tosa.mul %1654, %1656 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1658 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1659 = tosa.add %1657, %1658 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1660 = tosa.reshape %1659 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1661 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1662 = tosa.add %1613, %1661 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1663 = tosa.reshape %1662 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1664 = tosa.matmul %1660, %1663 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1665 = tosa.reshape %1664 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1666 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1667 = tosa.transpose %1665, %1666 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1668 = tosa.identity %1667 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1669 = tosa.reshape %1668 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1670 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1671 = tosa.transpose %arg140, %1670 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1672 = tosa.reshape %1669 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_298 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1673 = linalg.matmul {cast = #linalg.type_fn} ins(%1672, %1671 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_298 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1674 = tosa.reshape %1673 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1675 = tosa.add %1577, %1674 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1676 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_299 = arith.constant 2 : i32 + %1677 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1675 : tensor<1x40x4096xf32>) outs(%1676 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_299 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1678 = tosa.reduce_sum %1677 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1679 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1680 = tosa.reciprocal %1679 : (tensor<1xf32>) -> tensor<1xf32> + %1681 = tosa.mul %1680, %1678 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1682 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1683 = tosa.add %1681, %1682 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1684 = tosa.rsqrt %1683 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1685 = tosa.mul %1675, %1684 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1686 = tosa.reshape %arg141 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1687 = tosa.mul %1686, %1685 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1688 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1689 = tosa.transpose %arg142, %1688 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1690 = tosa.reshape %1687 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_300 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1691 = linalg.matmul {cast = #linalg.type_fn} ins(%1690, %1689 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_300 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1692 = tosa.reshape %1691 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1693 = tosa.sigmoid %1692 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1694 = tosa.mul %1692, %1693 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1695 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1696 = tosa.transpose %arg143, %1695 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1697 = tosa.reshape %1687 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_301 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1698 = linalg.matmul {cast = #linalg.type_fn} ins(%1697, %1696 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_301 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1699 = tosa.reshape %1698 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1700 = tosa.mul %1694, %1699 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1701 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1702 = tosa.transpose %arg144, %1701 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1703 = tosa.reshape %1700 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_302 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1704 = linalg.matmul {cast = #linalg.type_fn} ins(%1703, %1702 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_302 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1705 = tosa.reshape %1704 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1706 = tosa.add %1675, %1705 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1707 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_303 = arith.constant 2 : i32 + %1708 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1706 : tensor<1x40x4096xf32>) outs(%1707 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_303 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1709 = tosa.reduce_sum %1708 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1710 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1711 = tosa.reciprocal %1710 : (tensor<1xf32>) -> tensor<1xf32> + %1712 = tosa.mul %1711, %1709 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1713 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1714 = tosa.add %1712, %1713 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1715 = tosa.rsqrt %1714 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1716 = tosa.mul %1706, %1715 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1717 = tosa.reshape %arg145 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1718 = tosa.mul %1717, %1716 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1719 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1720 = tosa.transpose %arg146, %1719 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1721 = tosa.reshape %1718 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_304 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1722 = linalg.matmul {cast = #linalg.type_fn} ins(%1721, %1720 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_304 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1723 = tosa.reshape %1722 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1724 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1725 = tosa.transpose %arg147, %1724 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1726 = tosa.reshape %1718 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_305 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1727 = linalg.matmul {cast = #linalg.type_fn} ins(%1726, %1725 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_305 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1728 = tosa.reshape %1727 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1729 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1730 = tosa.transpose %arg148, %1729 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1731 = tosa.reshape %1718 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_306 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1732 = linalg.matmul {cast = #linalg.type_fn} ins(%1731, %1730 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_306 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1733 = tosa.reshape %1732 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1734 = tosa.reshape %1723 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1735 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1736 = tosa.transpose %1734, %1735 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1737 = tosa.reshape %1728 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1738 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1739 = tosa.transpose %1737, %1738 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1740 = tosa.reshape %1733 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1741 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1742 = tosa.transpose %1740, %1741 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_307 = tensor.extract_slice %arg149[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_308 = tensor.extract_slice %extracted_slice_307[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_309 = tensor.extract_slice %extracted_slice_308[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_310 = tensor.extract_slice %arg150[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_311 = tensor.extract_slice %extracted_slice_310[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_312 = tensor.extract_slice %extracted_slice_311[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1743 = tensor.empty() : tensor<1x40x128xf32> + %1744 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_309 : tensor<1x1x40x128xf32>) outs(%1743 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1745 = tensor.empty() : tensor<40x128xf32> + %1746 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1744 : tensor<1x40x128xf32>) outs(%1745 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1747 = tensor.empty() : tensor<1x40x128xf32> + %1748 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_312 : tensor<1x1x40x128xf32>) outs(%1747 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1749 = tensor.empty() : tensor<40x128xf32> + %1750 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1748 : tensor<1x40x128xf32>) outs(%1749 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1751 = tensor.empty() : tensor<1x40x128xf32> + %1752 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1751 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1746[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1753 = tosa.reshape %1752 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1754 = tensor.empty() : tensor<1x40x128xf32> + %1755 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1754 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1750[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1756 = tosa.reshape %1755 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1757 = tosa.mul %1736, %1753 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_313 = tensor.extract_slice %1736[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_314 = tensor.extract_slice %1736[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1758 = tosa.negate %extracted_slice_314 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1759 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_315 = tensor.insert_slice %1758 into %1759[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_316 = tensor.insert_slice %extracted_slice_313 into %inserted_slice_315[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1760 = tosa.mul %inserted_slice_316, %1756 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1761 = tosa.add %1757, %1760 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1762 = tosa.mul %1739, %1753 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_317 = tensor.extract_slice %1739[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_318 = tensor.extract_slice %1739[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1763 = tosa.negate %extracted_slice_318 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1764 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_319 = tensor.insert_slice %1763 into %1764[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_320 = tensor.insert_slice %extracted_slice_317 into %inserted_slice_319[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1765 = tosa.mul %inserted_slice_320, %1756 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1766 = tosa.add %1762, %1765 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1767 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1768 = tosa.transpose %1766, %1767 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1769 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1770 = tosa.add %1761, %1769 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1771 = tosa.reshape %1770 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1772 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1773 = tosa.add %1768, %1772 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1774 = tosa.reshape %1773 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1775 = tosa.matmul %1771, %1774 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1776 = tosa.reshape %1775 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1777 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1778 = tosa.reciprocal %1777 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1779 = tosa.mul %1776, %1778 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1780 = tosa.add %1779, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1781 = tosa.reduce_max %1780 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1782 = tosa.sub %1780, %1781 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1783 = tosa.exp %1782 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1784 = tosa.reduce_sum %1783 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1785 = tosa.reciprocal %1784 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1786 = tosa.mul %1783, %1785 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1787 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1788 = tosa.add %1786, %1787 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1789 = tosa.reshape %1788 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1790 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1791 = tosa.add %1742, %1790 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1792 = tosa.reshape %1791 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1793 = tosa.matmul %1789, %1792 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1794 = tosa.reshape %1793 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1795 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1796 = tosa.transpose %1794, %1795 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1797 = tosa.identity %1796 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1798 = tosa.reshape %1797 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1799 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1800 = tosa.transpose %arg151, %1799 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1801 = tosa.reshape %1798 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_321 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1802 = linalg.matmul {cast = #linalg.type_fn} ins(%1801, %1800 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_321 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1803 = tosa.reshape %1802 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1804 = tosa.add %1706, %1803 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1805 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_322 = arith.constant 2 : i32 + %1806 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1804 : tensor<1x40x4096xf32>) outs(%1805 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_322 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1807 = tosa.reduce_sum %1806 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1808 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1809 = tosa.reciprocal %1808 : (tensor<1xf32>) -> tensor<1xf32> + %1810 = tosa.mul %1809, %1807 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1811 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1812 = tosa.add %1810, %1811 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1813 = tosa.rsqrt %1812 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1814 = tosa.mul %1804, %1813 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1815 = tosa.reshape %arg152 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1816 = tosa.mul %1815, %1814 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1817 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1818 = tosa.transpose %arg153, %1817 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1819 = tosa.reshape %1816 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_323 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1820 = linalg.matmul {cast = #linalg.type_fn} ins(%1819, %1818 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_323 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1821 = tosa.reshape %1820 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1822 = tosa.sigmoid %1821 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1823 = tosa.mul %1821, %1822 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1824 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1825 = tosa.transpose %arg154, %1824 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1826 = tosa.reshape %1816 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_324 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1827 = linalg.matmul {cast = #linalg.type_fn} ins(%1826, %1825 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_324 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1828 = tosa.reshape %1827 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1829 = tosa.mul %1823, %1828 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1830 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1831 = tosa.transpose %arg155, %1830 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1832 = tosa.reshape %1829 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_325 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1833 = linalg.matmul {cast = #linalg.type_fn} ins(%1832, %1831 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_325 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1834 = tosa.reshape %1833 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1835 = tosa.add %1804, %1834 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1836 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_326 = arith.constant 2 : i32 + %1837 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1835 : tensor<1x40x4096xf32>) outs(%1836 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_326 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1838 = tosa.reduce_sum %1837 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1839 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1840 = tosa.reciprocal %1839 : (tensor<1xf32>) -> tensor<1xf32> + %1841 = tosa.mul %1840, %1838 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1842 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1843 = tosa.add %1841, %1842 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1844 = tosa.rsqrt %1843 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1845 = tosa.mul %1835, %1844 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1846 = tosa.reshape %arg156 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1847 = tosa.mul %1846, %1845 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1848 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1849 = tosa.transpose %arg157, %1848 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1850 = tosa.reshape %1847 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_327 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1851 = linalg.matmul {cast = #linalg.type_fn} ins(%1850, %1849 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_327 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1852 = tosa.reshape %1851 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1853 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1854 = tosa.transpose %arg158, %1853 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1855 = tosa.reshape %1847 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_328 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1856 = linalg.matmul {cast = #linalg.type_fn} ins(%1855, %1854 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_328 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1857 = tosa.reshape %1856 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1858 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1859 = tosa.transpose %arg159, %1858 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1860 = tosa.reshape %1847 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_329 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1861 = linalg.matmul {cast = #linalg.type_fn} ins(%1860, %1859 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_329 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1862 = tosa.reshape %1861 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1863 = tosa.reshape %1852 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1864 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1865 = tosa.transpose %1863, %1864 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1866 = tosa.reshape %1857 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1867 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1868 = tosa.transpose %1866, %1867 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1869 = tosa.reshape %1862 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1870 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1871 = tosa.transpose %1869, %1870 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_330 = tensor.extract_slice %arg160[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_331 = tensor.extract_slice %extracted_slice_330[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_332 = tensor.extract_slice %extracted_slice_331[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_333 = tensor.extract_slice %arg161[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_334 = tensor.extract_slice %extracted_slice_333[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_335 = tensor.extract_slice %extracted_slice_334[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %1872 = tensor.empty() : tensor<1x40x128xf32> + %1873 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_332 : tensor<1x1x40x128xf32>) outs(%1872 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1874 = tensor.empty() : tensor<40x128xf32> + %1875 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1873 : tensor<1x40x128xf32>) outs(%1874 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1876 = tensor.empty() : tensor<1x40x128xf32> + %1877 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_335 : tensor<1x1x40x128xf32>) outs(%1876 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %1878 = tensor.empty() : tensor<40x128xf32> + %1879 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%1877 : tensor<1x40x128xf32>) outs(%1878 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %1880 = tensor.empty() : tensor<1x40x128xf32> + %1881 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1880 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1875[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1882 = tosa.reshape %1881 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1883 = tensor.empty() : tensor<1x40x128xf32> + %1884 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%1883 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %1879[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %1885 = tosa.reshape %1884 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1886 = tosa.mul %1865, %1882 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_336 = tensor.extract_slice %1865[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_337 = tensor.extract_slice %1865[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1887 = tosa.negate %extracted_slice_337 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1888 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_338 = tensor.insert_slice %1887 into %1888[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_339 = tensor.insert_slice %extracted_slice_336 into %inserted_slice_338[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1889 = tosa.mul %inserted_slice_339, %1885 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1890 = tosa.add %1886, %1889 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1891 = tosa.mul %1868, %1882 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_340 = tensor.extract_slice %1868[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_341 = tensor.extract_slice %1868[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1892 = tosa.negate %extracted_slice_341 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1893 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_342 = tensor.insert_slice %1892 into %1893[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_343 = tensor.insert_slice %extracted_slice_340 into %inserted_slice_342[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1894 = tosa.mul %inserted_slice_343, %1885 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1895 = tosa.add %1891, %1894 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1896 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1897 = tosa.transpose %1895, %1896 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1898 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1899 = tosa.add %1890, %1898 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1900 = tosa.reshape %1899 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1901 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %1902 = tosa.add %1897, %1901 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %1903 = tosa.reshape %1902 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1904 = tosa.matmul %1900, %1903 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1905 = tosa.reshape %1904 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1906 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1907 = tosa.reciprocal %1906 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1908 = tosa.mul %1905, %1907 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1909 = tosa.add %1908, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %1910 = tosa.reduce_max %1909 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1911 = tosa.sub %1909, %1910 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1912 = tosa.exp %1911 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1913 = tosa.reduce_sum %1912 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %1914 = tosa.reciprocal %1913 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %1915 = tosa.mul %1912, %1914 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %1916 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %1917 = tosa.add %1915, %1916 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %1918 = tosa.reshape %1917 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %1919 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1920 = tosa.add %1871, %1919 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1921 = tosa.reshape %1920 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1922 = tosa.matmul %1918, %1921 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1923 = tosa.reshape %1922 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1924 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1925 = tosa.transpose %1923, %1924 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1926 = tosa.identity %1925 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %1927 = tosa.reshape %1926 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1928 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1929 = tosa.transpose %arg162, %1928 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1930 = tosa.reshape %1927 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_344 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1931 = linalg.matmul {cast = #linalg.type_fn} ins(%1930, %1929 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_344 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1932 = tosa.reshape %1931 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1933 = tosa.add %1835, %1932 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1934 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_345 = arith.constant 2 : i32 + %1935 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1933 : tensor<1x40x4096xf32>) outs(%1934 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_345 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1936 = tosa.reduce_sum %1935 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1937 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1938 = tosa.reciprocal %1937 : (tensor<1xf32>) -> tensor<1xf32> + %1939 = tosa.mul %1938, %1936 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1940 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1941 = tosa.add %1939, %1940 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1942 = tosa.rsqrt %1941 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1943 = tosa.mul %1933, %1942 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1944 = tosa.reshape %arg163 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1945 = tosa.mul %1944, %1943 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1946 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1947 = tosa.transpose %arg164, %1946 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1948 = tosa.reshape %1945 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_346 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1949 = linalg.matmul {cast = #linalg.type_fn} ins(%1948, %1947 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_346 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1950 = tosa.reshape %1949 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1951 = tosa.sigmoid %1950 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1952 = tosa.mul %1950, %1951 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1953 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1954 = tosa.transpose %arg165, %1953 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1955 = tosa.reshape %1945 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_347 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1956 = linalg.matmul {cast = #linalg.type_fn} ins(%1955, %1954 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_347 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1957 = tosa.reshape %1956 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1958 = tosa.mul %1952, %1957 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1959 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1960 = tosa.transpose %arg166, %1959 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1961 = tosa.reshape %1958 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_348 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1962 = linalg.matmul {cast = #linalg.type_fn} ins(%1961, %1960 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_348 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1963 = tosa.reshape %1962 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1964 = tosa.add %1933, %1963 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1965 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_349 = arith.constant 2 : i32 + %1966 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1964 : tensor<1x40x4096xf32>) outs(%1965 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_349 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %1967 = tosa.reduce_sum %1966 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1968 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1969 = tosa.reciprocal %1968 : (tensor<1xf32>) -> tensor<1xf32> + %1970 = tosa.mul %1969, %1967 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1971 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1972 = tosa.add %1970, %1971 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1973 = tosa.rsqrt %1972 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1974 = tosa.mul %1964, %1973 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1975 = tosa.reshape %arg167 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1976 = tosa.mul %1975, %1974 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1977 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1978 = tosa.transpose %arg168, %1977 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1979 = tosa.reshape %1976 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_350 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1980 = linalg.matmul {cast = #linalg.type_fn} ins(%1979, %1978 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_350 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1981 = tosa.reshape %1980 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1982 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1983 = tosa.transpose %arg169, %1982 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1984 = tosa.reshape %1976 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_351 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1985 = linalg.matmul {cast = #linalg.type_fn} ins(%1984, %1983 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_351 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1986 = tosa.reshape %1985 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1987 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1988 = tosa.transpose %arg170, %1987 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1989 = tosa.reshape %1976 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_352 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1990 = linalg.matmul {cast = #linalg.type_fn} ins(%1989, %1988 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_352 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1991 = tosa.reshape %1990 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1992 = tosa.reshape %1981 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1993 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1994 = tosa.transpose %1992, %1993 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1995 = tosa.reshape %1986 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1996 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1997 = tosa.transpose %1995, %1996 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1998 = tosa.reshape %1991 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1999 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2000 = tosa.transpose %1998, %1999 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_353 = tensor.extract_slice %arg171[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_354 = tensor.extract_slice %extracted_slice_353[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_355 = tensor.extract_slice %extracted_slice_354[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_356 = tensor.extract_slice %arg172[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_357 = tensor.extract_slice %extracted_slice_356[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_358 = tensor.extract_slice %extracted_slice_357[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2001 = tensor.empty() : tensor<1x40x128xf32> + %2002 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_355 : tensor<1x1x40x128xf32>) outs(%2001 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2003 = tensor.empty() : tensor<40x128xf32> + %2004 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2002 : tensor<1x40x128xf32>) outs(%2003 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2005 = tensor.empty() : tensor<1x40x128xf32> + %2006 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_358 : tensor<1x1x40x128xf32>) outs(%2005 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2007 = tensor.empty() : tensor<40x128xf32> + %2008 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2006 : tensor<1x40x128xf32>) outs(%2007 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2009 = tensor.empty() : tensor<1x40x128xf32> + %2010 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2009 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2004[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2011 = tosa.reshape %2010 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2012 = tensor.empty() : tensor<1x40x128xf32> + %2013 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2012 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2008[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2014 = tosa.reshape %2013 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2015 = tosa.mul %1994, %2011 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_359 = tensor.extract_slice %1994[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_360 = tensor.extract_slice %1994[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2016 = tosa.negate %extracted_slice_360 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2017 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_361 = tensor.insert_slice %2016 into %2017[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_362 = tensor.insert_slice %extracted_slice_359 into %inserted_slice_361[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2018 = tosa.mul %inserted_slice_362, %2014 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2019 = tosa.add %2015, %2018 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2020 = tosa.mul %1997, %2011 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_363 = tensor.extract_slice %1997[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_364 = tensor.extract_slice %1997[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2021 = tosa.negate %extracted_slice_364 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2022 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_365 = tensor.insert_slice %2021 into %2022[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_366 = tensor.insert_slice %extracted_slice_363 into %inserted_slice_365[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2023 = tosa.mul %inserted_slice_366, %2014 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2024 = tosa.add %2020, %2023 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2025 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2026 = tosa.transpose %2024, %2025 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2027 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2028 = tosa.add %2019, %2027 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2029 = tosa.reshape %2028 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2030 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2031 = tosa.add %2026, %2030 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2032 = tosa.reshape %2031 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2033 = tosa.matmul %2029, %2032 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2034 = tosa.reshape %2033 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2035 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2036 = tosa.reciprocal %2035 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2037 = tosa.mul %2034, %2036 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2038 = tosa.add %2037, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2039 = tosa.reduce_max %2038 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2040 = tosa.sub %2038, %2039 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2041 = tosa.exp %2040 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2042 = tosa.reduce_sum %2041 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2043 = tosa.reciprocal %2042 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2044 = tosa.mul %2041, %2043 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2045 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2046 = tosa.add %2044, %2045 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2047 = tosa.reshape %2046 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2048 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2049 = tosa.add %2000, %2048 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2050 = tosa.reshape %2049 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2051 = tosa.matmul %2047, %2050 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2052 = tosa.reshape %2051 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2053 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2054 = tosa.transpose %2052, %2053 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2055 = tosa.identity %2054 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2056 = tosa.reshape %2055 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2057 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2058 = tosa.transpose %arg173, %2057 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2059 = tosa.reshape %2056 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_367 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2060 = linalg.matmul {cast = #linalg.type_fn} ins(%2059, %2058 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_367 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2061 = tosa.reshape %2060 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2062 = tosa.add %1964, %2061 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2063 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_368 = arith.constant 2 : i32 + %2064 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2062 : tensor<1x40x4096xf32>) outs(%2063 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_368 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2065 = tosa.reduce_sum %2064 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2066 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2067 = tosa.reciprocal %2066 : (tensor<1xf32>) -> tensor<1xf32> + %2068 = tosa.mul %2067, %2065 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2069 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2070 = tosa.add %2068, %2069 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2071 = tosa.rsqrt %2070 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2072 = tosa.mul %2062, %2071 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2073 = tosa.reshape %arg174 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2074 = tosa.mul %2073, %2072 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2075 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2076 = tosa.transpose %arg175, %2075 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2077 = tosa.reshape %2074 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_369 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2078 = linalg.matmul {cast = #linalg.type_fn} ins(%2077, %2076 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_369 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2079 = tosa.reshape %2078 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2080 = tosa.sigmoid %2079 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2081 = tosa.mul %2079, %2080 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2082 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2083 = tosa.transpose %arg176, %2082 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2084 = tosa.reshape %2074 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_370 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2085 = linalg.matmul {cast = #linalg.type_fn} ins(%2084, %2083 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_370 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2086 = tosa.reshape %2085 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2087 = tosa.mul %2081, %2086 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2088 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2089 = tosa.transpose %arg177, %2088 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2090 = tosa.reshape %2087 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_371 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2091 = linalg.matmul {cast = #linalg.type_fn} ins(%2090, %2089 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_371 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2092 = tosa.reshape %2091 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2093 = tosa.add %2062, %2092 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2094 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_372 = arith.constant 2 : i32 + %2095 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2093 : tensor<1x40x4096xf32>) outs(%2094 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_372 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2096 = tosa.reduce_sum %2095 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2097 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2098 = tosa.reciprocal %2097 : (tensor<1xf32>) -> tensor<1xf32> + %2099 = tosa.mul %2098, %2096 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2100 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2101 = tosa.add %2099, %2100 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2102 = tosa.rsqrt %2101 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2103 = tosa.mul %2093, %2102 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2104 = tosa.reshape %arg178 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2105 = tosa.mul %2104, %2103 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2106 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2107 = tosa.transpose %arg179, %2106 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2108 = tosa.reshape %2105 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_373 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2109 = linalg.matmul {cast = #linalg.type_fn} ins(%2108, %2107 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_373 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2110 = tosa.reshape %2109 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2111 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2112 = tosa.transpose %arg180, %2111 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2113 = tosa.reshape %2105 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_374 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2114 = linalg.matmul {cast = #linalg.type_fn} ins(%2113, %2112 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_374 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2115 = tosa.reshape %2114 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2116 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2117 = tosa.transpose %arg181, %2116 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2118 = tosa.reshape %2105 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_375 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2119 = linalg.matmul {cast = #linalg.type_fn} ins(%2118, %2117 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_375 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2120 = tosa.reshape %2119 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2121 = tosa.reshape %2110 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2122 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2123 = tosa.transpose %2121, %2122 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2124 = tosa.reshape %2115 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2125 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2126 = tosa.transpose %2124, %2125 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2127 = tosa.reshape %2120 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2128 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2129 = tosa.transpose %2127, %2128 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_376 = tensor.extract_slice %arg182[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_377 = tensor.extract_slice %extracted_slice_376[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_378 = tensor.extract_slice %extracted_slice_377[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_379 = tensor.extract_slice %arg183[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_380 = tensor.extract_slice %extracted_slice_379[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_381 = tensor.extract_slice %extracted_slice_380[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2130 = tensor.empty() : tensor<1x40x128xf32> + %2131 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_378 : tensor<1x1x40x128xf32>) outs(%2130 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2132 = tensor.empty() : tensor<40x128xf32> + %2133 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2131 : tensor<1x40x128xf32>) outs(%2132 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2134 = tensor.empty() : tensor<1x40x128xf32> + %2135 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_381 : tensor<1x1x40x128xf32>) outs(%2134 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2136 = tensor.empty() : tensor<40x128xf32> + %2137 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2135 : tensor<1x40x128xf32>) outs(%2136 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2138 = tensor.empty() : tensor<1x40x128xf32> + %2139 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2138 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2133[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2140 = tosa.reshape %2139 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2141 = tensor.empty() : tensor<1x40x128xf32> + %2142 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2141 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2137[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2143 = tosa.reshape %2142 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2144 = tosa.mul %2123, %2140 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_382 = tensor.extract_slice %2123[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_383 = tensor.extract_slice %2123[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2145 = tosa.negate %extracted_slice_383 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2146 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_384 = tensor.insert_slice %2145 into %2146[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_385 = tensor.insert_slice %extracted_slice_382 into %inserted_slice_384[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2147 = tosa.mul %inserted_slice_385, %2143 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2148 = tosa.add %2144, %2147 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2149 = tosa.mul %2126, %2140 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_386 = tensor.extract_slice %2126[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_387 = tensor.extract_slice %2126[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2150 = tosa.negate %extracted_slice_387 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2151 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_388 = tensor.insert_slice %2150 into %2151[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_389 = tensor.insert_slice %extracted_slice_386 into %inserted_slice_388[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2152 = tosa.mul %inserted_slice_389, %2143 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2153 = tosa.add %2149, %2152 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2154 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2155 = tosa.transpose %2153, %2154 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2156 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2157 = tosa.add %2148, %2156 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2158 = tosa.reshape %2157 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2159 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2160 = tosa.add %2155, %2159 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2161 = tosa.reshape %2160 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2162 = tosa.matmul %2158, %2161 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2163 = tosa.reshape %2162 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2164 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2165 = tosa.reciprocal %2164 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2166 = tosa.mul %2163, %2165 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2167 = tosa.add %2166, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2168 = tosa.reduce_max %2167 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2169 = tosa.sub %2167, %2168 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2170 = tosa.exp %2169 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2171 = tosa.reduce_sum %2170 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2172 = tosa.reciprocal %2171 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2173 = tosa.mul %2170, %2172 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2174 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2175 = tosa.add %2173, %2174 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2176 = tosa.reshape %2175 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2177 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2178 = tosa.add %2129, %2177 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2179 = tosa.reshape %2178 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2180 = tosa.matmul %2176, %2179 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2181 = tosa.reshape %2180 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2182 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2183 = tosa.transpose %2181, %2182 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2184 = tosa.identity %2183 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2185 = tosa.reshape %2184 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2186 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2187 = tosa.transpose %arg184, %2186 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2188 = tosa.reshape %2185 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_390 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2189 = linalg.matmul {cast = #linalg.type_fn} ins(%2188, %2187 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_390 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2190 = tosa.reshape %2189 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2191 = tosa.add %2093, %2190 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2192 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_391 = arith.constant 2 : i32 + %2193 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2191 : tensor<1x40x4096xf32>) outs(%2192 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_391 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2194 = tosa.reduce_sum %2193 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2195 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2196 = tosa.reciprocal %2195 : (tensor<1xf32>) -> tensor<1xf32> + %2197 = tosa.mul %2196, %2194 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2198 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2199 = tosa.add %2197, %2198 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2200 = tosa.rsqrt %2199 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2201 = tosa.mul %2191, %2200 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2202 = tosa.reshape %arg185 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2203 = tosa.mul %2202, %2201 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2204 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2205 = tosa.transpose %arg186, %2204 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2206 = tosa.reshape %2203 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_392 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2207 = linalg.matmul {cast = #linalg.type_fn} ins(%2206, %2205 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_392 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2208 = tosa.reshape %2207 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2209 = tosa.sigmoid %2208 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2210 = tosa.mul %2208, %2209 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2211 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2212 = tosa.transpose %arg187, %2211 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2213 = tosa.reshape %2203 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_393 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2214 = linalg.matmul {cast = #linalg.type_fn} ins(%2213, %2212 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_393 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2215 = tosa.reshape %2214 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2216 = tosa.mul %2210, %2215 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2217 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2218 = tosa.transpose %arg188, %2217 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2219 = tosa.reshape %2216 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_394 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2220 = linalg.matmul {cast = #linalg.type_fn} ins(%2219, %2218 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_394 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2221 = tosa.reshape %2220 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2222 = tosa.add %2191, %2221 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2223 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_395 = arith.constant 2 : i32 + %2224 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2222 : tensor<1x40x4096xf32>) outs(%2223 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_395 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2225 = tosa.reduce_sum %2224 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2226 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2227 = tosa.reciprocal %2226 : (tensor<1xf32>) -> tensor<1xf32> + %2228 = tosa.mul %2227, %2225 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2229 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2230 = tosa.add %2228, %2229 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2231 = tosa.rsqrt %2230 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2232 = tosa.mul %2222, %2231 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2233 = tosa.reshape %arg189 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2234 = tosa.mul %2233, %2232 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2235 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2236 = tosa.transpose %arg190, %2235 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2237 = tosa.reshape %2234 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_396 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2238 = linalg.matmul {cast = #linalg.type_fn} ins(%2237, %2236 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_396 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2239 = tosa.reshape %2238 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2240 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2241 = tosa.transpose %arg191, %2240 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2242 = tosa.reshape %2234 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_397 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2243 = linalg.matmul {cast = #linalg.type_fn} ins(%2242, %2241 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_397 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2244 = tosa.reshape %2243 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2245 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2246 = tosa.transpose %arg192, %2245 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2247 = tosa.reshape %2234 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_398 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2248 = linalg.matmul {cast = #linalg.type_fn} ins(%2247, %2246 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_398 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2249 = tosa.reshape %2248 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2250 = tosa.reshape %2239 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2251 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2252 = tosa.transpose %2250, %2251 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2253 = tosa.reshape %2244 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2254 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2255 = tosa.transpose %2253, %2254 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2256 = tosa.reshape %2249 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2257 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2258 = tosa.transpose %2256, %2257 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_399 = tensor.extract_slice %arg193[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_400 = tensor.extract_slice %extracted_slice_399[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_401 = tensor.extract_slice %extracted_slice_400[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_402 = tensor.extract_slice %arg194[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_403 = tensor.extract_slice %extracted_slice_402[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_404 = tensor.extract_slice %extracted_slice_403[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2259 = tensor.empty() : tensor<1x40x128xf32> + %2260 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_401 : tensor<1x1x40x128xf32>) outs(%2259 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2261 = tensor.empty() : tensor<40x128xf32> + %2262 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2260 : tensor<1x40x128xf32>) outs(%2261 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2263 = tensor.empty() : tensor<1x40x128xf32> + %2264 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_404 : tensor<1x1x40x128xf32>) outs(%2263 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2265 = tensor.empty() : tensor<40x128xf32> + %2266 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2264 : tensor<1x40x128xf32>) outs(%2265 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2267 = tensor.empty() : tensor<1x40x128xf32> + %2268 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2267 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2262[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2269 = tosa.reshape %2268 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2270 = tensor.empty() : tensor<1x40x128xf32> + %2271 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2270 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2266[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2272 = tosa.reshape %2271 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2273 = tosa.mul %2252, %2269 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_405 = tensor.extract_slice %2252[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_406 = tensor.extract_slice %2252[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2274 = tosa.negate %extracted_slice_406 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2275 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_407 = tensor.insert_slice %2274 into %2275[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_408 = tensor.insert_slice %extracted_slice_405 into %inserted_slice_407[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2276 = tosa.mul %inserted_slice_408, %2272 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2277 = tosa.add %2273, %2276 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2278 = tosa.mul %2255, %2269 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_409 = tensor.extract_slice %2255[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_410 = tensor.extract_slice %2255[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2279 = tosa.negate %extracted_slice_410 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2280 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_411 = tensor.insert_slice %2279 into %2280[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_412 = tensor.insert_slice %extracted_slice_409 into %inserted_slice_411[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2281 = tosa.mul %inserted_slice_412, %2272 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2282 = tosa.add %2278, %2281 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2283 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2284 = tosa.transpose %2282, %2283 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2285 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2286 = tosa.add %2277, %2285 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2287 = tosa.reshape %2286 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2288 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2289 = tosa.add %2284, %2288 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2290 = tosa.reshape %2289 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2291 = tosa.matmul %2287, %2290 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2292 = tosa.reshape %2291 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2293 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2294 = tosa.reciprocal %2293 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2295 = tosa.mul %2292, %2294 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2296 = tosa.add %2295, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2297 = tosa.reduce_max %2296 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2298 = tosa.sub %2296, %2297 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2299 = tosa.exp %2298 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2300 = tosa.reduce_sum %2299 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2301 = tosa.reciprocal %2300 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2302 = tosa.mul %2299, %2301 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2303 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2304 = tosa.add %2302, %2303 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2305 = tosa.reshape %2304 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2306 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2307 = tosa.add %2258, %2306 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2308 = tosa.reshape %2307 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2309 = tosa.matmul %2305, %2308 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2310 = tosa.reshape %2309 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2311 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2312 = tosa.transpose %2310, %2311 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2313 = tosa.identity %2312 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2314 = tosa.reshape %2313 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2315 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2316 = tosa.transpose %arg195, %2315 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2317 = tosa.reshape %2314 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_413 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2318 = linalg.matmul {cast = #linalg.type_fn} ins(%2317, %2316 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_413 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2319 = tosa.reshape %2318 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2320 = tosa.add %2222, %2319 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2321 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_414 = arith.constant 2 : i32 + %2322 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2320 : tensor<1x40x4096xf32>) outs(%2321 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_414 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2323 = tosa.reduce_sum %2322 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2324 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2325 = tosa.reciprocal %2324 : (tensor<1xf32>) -> tensor<1xf32> + %2326 = tosa.mul %2325, %2323 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2327 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2328 = tosa.add %2326, %2327 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2329 = tosa.rsqrt %2328 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2330 = tosa.mul %2320, %2329 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2331 = tosa.reshape %arg196 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2332 = tosa.mul %2331, %2330 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2333 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2334 = tosa.transpose %arg197, %2333 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2335 = tosa.reshape %2332 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_415 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2336 = linalg.matmul {cast = #linalg.type_fn} ins(%2335, %2334 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_415 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2337 = tosa.reshape %2336 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2338 = tosa.sigmoid %2337 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2339 = tosa.mul %2337, %2338 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2340 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2341 = tosa.transpose %arg198, %2340 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2342 = tosa.reshape %2332 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_416 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2343 = linalg.matmul {cast = #linalg.type_fn} ins(%2342, %2341 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_416 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2344 = tosa.reshape %2343 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2345 = tosa.mul %2339, %2344 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2346 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2347 = tosa.transpose %arg199, %2346 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2348 = tosa.reshape %2345 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_417 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2349 = linalg.matmul {cast = #linalg.type_fn} ins(%2348, %2347 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_417 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2350 = tosa.reshape %2349 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2351 = tosa.add %2320, %2350 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2352 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_418 = arith.constant 2 : i32 + %2353 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2351 : tensor<1x40x4096xf32>) outs(%2352 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_418 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2354 = tosa.reduce_sum %2353 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2355 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2356 = tosa.reciprocal %2355 : (tensor<1xf32>) -> tensor<1xf32> + %2357 = tosa.mul %2356, %2354 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2358 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2359 = tosa.add %2357, %2358 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2360 = tosa.rsqrt %2359 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2361 = tosa.mul %2351, %2360 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2362 = tosa.reshape %arg200 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2363 = tosa.mul %2362, %2361 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2364 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2365 = tosa.transpose %arg201, %2364 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2366 = tosa.reshape %2363 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_419 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2367 = linalg.matmul {cast = #linalg.type_fn} ins(%2366, %2365 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_419 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2368 = tosa.reshape %2367 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2369 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2370 = tosa.transpose %arg202, %2369 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2371 = tosa.reshape %2363 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_420 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2372 = linalg.matmul {cast = #linalg.type_fn} ins(%2371, %2370 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_420 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2373 = tosa.reshape %2372 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2374 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2375 = tosa.transpose %arg203, %2374 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2376 = tosa.reshape %2363 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_421 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2377 = linalg.matmul {cast = #linalg.type_fn} ins(%2376, %2375 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_421 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2378 = tosa.reshape %2377 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2379 = tosa.reshape %2368 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2380 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2381 = tosa.transpose %2379, %2380 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2382 = tosa.reshape %2373 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2383 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2384 = tosa.transpose %2382, %2383 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2385 = tosa.reshape %2378 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2386 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2387 = tosa.transpose %2385, %2386 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_422 = tensor.extract_slice %arg204[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_423 = tensor.extract_slice %extracted_slice_422[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_424 = tensor.extract_slice %extracted_slice_423[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_425 = tensor.extract_slice %arg205[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_426 = tensor.extract_slice %extracted_slice_425[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_427 = tensor.extract_slice %extracted_slice_426[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2388 = tensor.empty() : tensor<1x40x128xf32> + %2389 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_424 : tensor<1x1x40x128xf32>) outs(%2388 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2390 = tensor.empty() : tensor<40x128xf32> + %2391 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2389 : tensor<1x40x128xf32>) outs(%2390 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2392 = tensor.empty() : tensor<1x40x128xf32> + %2393 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_427 : tensor<1x1x40x128xf32>) outs(%2392 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2394 = tensor.empty() : tensor<40x128xf32> + %2395 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2393 : tensor<1x40x128xf32>) outs(%2394 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2396 = tensor.empty() : tensor<1x40x128xf32> + %2397 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2396 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2391[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2398 = tosa.reshape %2397 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2399 = tensor.empty() : tensor<1x40x128xf32> + %2400 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2399 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2395[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2401 = tosa.reshape %2400 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2402 = tosa.mul %2381, %2398 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_428 = tensor.extract_slice %2381[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_429 = tensor.extract_slice %2381[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2403 = tosa.negate %extracted_slice_429 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2404 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_430 = tensor.insert_slice %2403 into %2404[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_431 = tensor.insert_slice %extracted_slice_428 into %inserted_slice_430[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2405 = tosa.mul %inserted_slice_431, %2401 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2406 = tosa.add %2402, %2405 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2407 = tosa.mul %2384, %2398 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_432 = tensor.extract_slice %2384[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_433 = tensor.extract_slice %2384[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2408 = tosa.negate %extracted_slice_433 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2409 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_434 = tensor.insert_slice %2408 into %2409[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_435 = tensor.insert_slice %extracted_slice_432 into %inserted_slice_434[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2410 = tosa.mul %inserted_slice_435, %2401 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2411 = tosa.add %2407, %2410 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2412 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2413 = tosa.transpose %2411, %2412 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2414 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2415 = tosa.add %2406, %2414 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2416 = tosa.reshape %2415 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2417 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2418 = tosa.add %2413, %2417 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2419 = tosa.reshape %2418 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2420 = tosa.matmul %2416, %2419 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2421 = tosa.reshape %2420 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2422 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2423 = tosa.reciprocal %2422 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2424 = tosa.mul %2421, %2423 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2425 = tosa.add %2424, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2426 = tosa.reduce_max %2425 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2427 = tosa.sub %2425, %2426 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2428 = tosa.exp %2427 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2429 = tosa.reduce_sum %2428 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2430 = tosa.reciprocal %2429 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2431 = tosa.mul %2428, %2430 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2432 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2433 = tosa.add %2431, %2432 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2434 = tosa.reshape %2433 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2435 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2436 = tosa.add %2387, %2435 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2437 = tosa.reshape %2436 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2438 = tosa.matmul %2434, %2437 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2439 = tosa.reshape %2438 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2440 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2441 = tosa.transpose %2439, %2440 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2442 = tosa.identity %2441 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2443 = tosa.reshape %2442 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2444 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2445 = tosa.transpose %arg206, %2444 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2446 = tosa.reshape %2443 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_436 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2447 = linalg.matmul {cast = #linalg.type_fn} ins(%2446, %2445 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_436 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2448 = tosa.reshape %2447 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2449 = tosa.add %2351, %2448 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2450 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_437 = arith.constant 2 : i32 + %2451 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2449 : tensor<1x40x4096xf32>) outs(%2450 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_437 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2452 = tosa.reduce_sum %2451 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2453 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2454 = tosa.reciprocal %2453 : (tensor<1xf32>) -> tensor<1xf32> + %2455 = tosa.mul %2454, %2452 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2456 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2457 = tosa.add %2455, %2456 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2458 = tosa.rsqrt %2457 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2459 = tosa.mul %2449, %2458 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2460 = tosa.reshape %arg207 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2461 = tosa.mul %2460, %2459 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2462 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2463 = tosa.transpose %arg208, %2462 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2464 = tosa.reshape %2461 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_438 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2465 = linalg.matmul {cast = #linalg.type_fn} ins(%2464, %2463 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_438 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2466 = tosa.reshape %2465 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2467 = tosa.sigmoid %2466 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2468 = tosa.mul %2466, %2467 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2469 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2470 = tosa.transpose %arg209, %2469 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2471 = tosa.reshape %2461 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_439 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2472 = linalg.matmul {cast = #linalg.type_fn} ins(%2471, %2470 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_439 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2473 = tosa.reshape %2472 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2474 = tosa.mul %2468, %2473 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2475 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2476 = tosa.transpose %arg210, %2475 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2477 = tosa.reshape %2474 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_440 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2478 = linalg.matmul {cast = #linalg.type_fn} ins(%2477, %2476 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_440 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2479 = tosa.reshape %2478 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2480 = tosa.add %2449, %2479 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2481 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_441 = arith.constant 2 : i32 + %2482 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2480 : tensor<1x40x4096xf32>) outs(%2481 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_441 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2483 = tosa.reduce_sum %2482 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2484 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2485 = tosa.reciprocal %2484 : (tensor<1xf32>) -> tensor<1xf32> + %2486 = tosa.mul %2485, %2483 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2487 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2488 = tosa.add %2486, %2487 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2489 = tosa.rsqrt %2488 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2490 = tosa.mul %2480, %2489 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2491 = tosa.reshape %arg211 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2492 = tosa.mul %2491, %2490 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2493 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2494 = tosa.transpose %arg212, %2493 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2495 = tosa.reshape %2492 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_442 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2496 = linalg.matmul {cast = #linalg.type_fn} ins(%2495, %2494 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_442 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2497 = tosa.reshape %2496 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2498 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2499 = tosa.transpose %arg213, %2498 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2500 = tosa.reshape %2492 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_443 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2501 = linalg.matmul {cast = #linalg.type_fn} ins(%2500, %2499 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_443 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2502 = tosa.reshape %2501 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2503 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2504 = tosa.transpose %arg214, %2503 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2505 = tosa.reshape %2492 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_444 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2506 = linalg.matmul {cast = #linalg.type_fn} ins(%2505, %2504 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_444 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2507 = tosa.reshape %2506 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2508 = tosa.reshape %2497 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2509 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2510 = tosa.transpose %2508, %2509 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2511 = tosa.reshape %2502 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2512 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2513 = tosa.transpose %2511, %2512 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2514 = tosa.reshape %2507 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2515 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2516 = tosa.transpose %2514, %2515 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_445 = tensor.extract_slice %arg215[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_446 = tensor.extract_slice %extracted_slice_445[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_447 = tensor.extract_slice %extracted_slice_446[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_448 = tensor.extract_slice %arg216[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_449 = tensor.extract_slice %extracted_slice_448[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_450 = tensor.extract_slice %extracted_slice_449[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2517 = tensor.empty() : tensor<1x40x128xf32> + %2518 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_447 : tensor<1x1x40x128xf32>) outs(%2517 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2519 = tensor.empty() : tensor<40x128xf32> + %2520 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2518 : tensor<1x40x128xf32>) outs(%2519 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2521 = tensor.empty() : tensor<1x40x128xf32> + %2522 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_450 : tensor<1x1x40x128xf32>) outs(%2521 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2523 = tensor.empty() : tensor<40x128xf32> + %2524 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2522 : tensor<1x40x128xf32>) outs(%2523 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2525 = tensor.empty() : tensor<1x40x128xf32> + %2526 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2525 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2520[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2527 = tosa.reshape %2526 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2528 = tensor.empty() : tensor<1x40x128xf32> + %2529 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2528 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2524[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2530 = tosa.reshape %2529 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2531 = tosa.mul %2510, %2527 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_451 = tensor.extract_slice %2510[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_452 = tensor.extract_slice %2510[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2532 = tosa.negate %extracted_slice_452 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2533 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_453 = tensor.insert_slice %2532 into %2533[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_454 = tensor.insert_slice %extracted_slice_451 into %inserted_slice_453[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2534 = tosa.mul %inserted_slice_454, %2530 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2535 = tosa.add %2531, %2534 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2536 = tosa.mul %2513, %2527 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_455 = tensor.extract_slice %2513[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_456 = tensor.extract_slice %2513[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2537 = tosa.negate %extracted_slice_456 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2538 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_457 = tensor.insert_slice %2537 into %2538[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_458 = tensor.insert_slice %extracted_slice_455 into %inserted_slice_457[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2539 = tosa.mul %inserted_slice_458, %2530 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2540 = tosa.add %2536, %2539 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2541 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2542 = tosa.transpose %2540, %2541 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2543 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2544 = tosa.add %2535, %2543 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2545 = tosa.reshape %2544 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2546 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2547 = tosa.add %2542, %2546 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2548 = tosa.reshape %2547 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2549 = tosa.matmul %2545, %2548 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2550 = tosa.reshape %2549 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2551 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2552 = tosa.reciprocal %2551 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2553 = tosa.mul %2550, %2552 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2554 = tosa.add %2553, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2555 = tosa.reduce_max %2554 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2556 = tosa.sub %2554, %2555 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2557 = tosa.exp %2556 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2558 = tosa.reduce_sum %2557 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2559 = tosa.reciprocal %2558 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2560 = tosa.mul %2557, %2559 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2561 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2562 = tosa.add %2560, %2561 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2563 = tosa.reshape %2562 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2564 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2565 = tosa.add %2516, %2564 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2566 = tosa.reshape %2565 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2567 = tosa.matmul %2563, %2566 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2568 = tosa.reshape %2567 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2569 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2570 = tosa.transpose %2568, %2569 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2571 = tosa.identity %2570 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2572 = tosa.reshape %2571 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2573 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2574 = tosa.transpose %arg217, %2573 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2575 = tosa.reshape %2572 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_459 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2576 = linalg.matmul {cast = #linalg.type_fn} ins(%2575, %2574 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_459 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2577 = tosa.reshape %2576 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2578 = tosa.add %2480, %2577 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2579 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_460 = arith.constant 2 : i32 + %2580 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2578 : tensor<1x40x4096xf32>) outs(%2579 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_460 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2581 = tosa.reduce_sum %2580 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2582 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2583 = tosa.reciprocal %2582 : (tensor<1xf32>) -> tensor<1xf32> + %2584 = tosa.mul %2583, %2581 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2585 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2586 = tosa.add %2584, %2585 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2587 = tosa.rsqrt %2586 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2588 = tosa.mul %2578, %2587 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2589 = tosa.reshape %arg218 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2590 = tosa.mul %2589, %2588 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2591 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2592 = tosa.transpose %arg219, %2591 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2593 = tosa.reshape %2590 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_461 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2594 = linalg.matmul {cast = #linalg.type_fn} ins(%2593, %2592 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_461 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2595 = tosa.reshape %2594 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2596 = tosa.sigmoid %2595 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2597 = tosa.mul %2595, %2596 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2598 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2599 = tosa.transpose %arg220, %2598 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2600 = tosa.reshape %2590 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_462 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2601 = linalg.matmul {cast = #linalg.type_fn} ins(%2600, %2599 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_462 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2602 = tosa.reshape %2601 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2603 = tosa.mul %2597, %2602 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2604 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2605 = tosa.transpose %arg221, %2604 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2606 = tosa.reshape %2603 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_463 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2607 = linalg.matmul {cast = #linalg.type_fn} ins(%2606, %2605 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_463 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2608 = tosa.reshape %2607 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2609 = tosa.add %2578, %2608 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2610 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_464 = arith.constant 2 : i32 + %2611 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2609 : tensor<1x40x4096xf32>) outs(%2610 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_464 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2612 = tosa.reduce_sum %2611 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2613 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2614 = tosa.reciprocal %2613 : (tensor<1xf32>) -> tensor<1xf32> + %2615 = tosa.mul %2614, %2612 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2616 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2617 = tosa.add %2615, %2616 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2618 = tosa.rsqrt %2617 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2619 = tosa.mul %2609, %2618 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2620 = tosa.reshape %arg222 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2621 = tosa.mul %2620, %2619 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2622 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2623 = tosa.transpose %arg223, %2622 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2624 = tosa.reshape %2621 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_465 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2625 = linalg.matmul {cast = #linalg.type_fn} ins(%2624, %2623 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_465 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2626 = tosa.reshape %2625 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2627 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2628 = tosa.transpose %arg224, %2627 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2629 = tosa.reshape %2621 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_466 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2630 = linalg.matmul {cast = #linalg.type_fn} ins(%2629, %2628 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_466 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2631 = tosa.reshape %2630 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2632 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2633 = tosa.transpose %arg225, %2632 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2634 = tosa.reshape %2621 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_467 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2635 = linalg.matmul {cast = #linalg.type_fn} ins(%2634, %2633 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_467 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2636 = tosa.reshape %2635 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2637 = tosa.reshape %2626 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2638 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2639 = tosa.transpose %2637, %2638 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2640 = tosa.reshape %2631 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2641 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2642 = tosa.transpose %2640, %2641 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2643 = tosa.reshape %2636 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2644 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2645 = tosa.transpose %2643, %2644 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_468 = tensor.extract_slice %arg226[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_469 = tensor.extract_slice %extracted_slice_468[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_470 = tensor.extract_slice %extracted_slice_469[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_471 = tensor.extract_slice %arg227[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_472 = tensor.extract_slice %extracted_slice_471[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_473 = tensor.extract_slice %extracted_slice_472[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2646 = tensor.empty() : tensor<1x40x128xf32> + %2647 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_470 : tensor<1x1x40x128xf32>) outs(%2646 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2648 = tensor.empty() : tensor<40x128xf32> + %2649 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2647 : tensor<1x40x128xf32>) outs(%2648 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2650 = tensor.empty() : tensor<1x40x128xf32> + %2651 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_473 : tensor<1x1x40x128xf32>) outs(%2650 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2652 = tensor.empty() : tensor<40x128xf32> + %2653 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2651 : tensor<1x40x128xf32>) outs(%2652 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2654 = tensor.empty() : tensor<1x40x128xf32> + %2655 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2654 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2649[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2656 = tosa.reshape %2655 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2657 = tensor.empty() : tensor<1x40x128xf32> + %2658 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2657 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2653[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2659 = tosa.reshape %2658 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2660 = tosa.mul %2639, %2656 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_474 = tensor.extract_slice %2639[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_475 = tensor.extract_slice %2639[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2661 = tosa.negate %extracted_slice_475 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2662 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_476 = tensor.insert_slice %2661 into %2662[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_477 = tensor.insert_slice %extracted_slice_474 into %inserted_slice_476[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2663 = tosa.mul %inserted_slice_477, %2659 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2664 = tosa.add %2660, %2663 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2665 = tosa.mul %2642, %2656 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_478 = tensor.extract_slice %2642[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_479 = tensor.extract_slice %2642[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2666 = tosa.negate %extracted_slice_479 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2667 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_480 = tensor.insert_slice %2666 into %2667[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_481 = tensor.insert_slice %extracted_slice_478 into %inserted_slice_480[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2668 = tosa.mul %inserted_slice_481, %2659 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2669 = tosa.add %2665, %2668 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2670 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2671 = tosa.transpose %2669, %2670 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2672 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2673 = tosa.add %2664, %2672 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2674 = tosa.reshape %2673 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2675 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2676 = tosa.add %2671, %2675 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2677 = tosa.reshape %2676 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2678 = tosa.matmul %2674, %2677 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2679 = tosa.reshape %2678 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2680 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2681 = tosa.reciprocal %2680 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2682 = tosa.mul %2679, %2681 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2683 = tosa.add %2682, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2684 = tosa.reduce_max %2683 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2685 = tosa.sub %2683, %2684 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2686 = tosa.exp %2685 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2687 = tosa.reduce_sum %2686 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2688 = tosa.reciprocal %2687 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2689 = tosa.mul %2686, %2688 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2690 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2691 = tosa.add %2689, %2690 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2692 = tosa.reshape %2691 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2693 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2694 = tosa.add %2645, %2693 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2695 = tosa.reshape %2694 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2696 = tosa.matmul %2692, %2695 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2697 = tosa.reshape %2696 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2698 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2699 = tosa.transpose %2697, %2698 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2700 = tosa.identity %2699 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2701 = tosa.reshape %2700 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2702 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2703 = tosa.transpose %arg228, %2702 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2704 = tosa.reshape %2701 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_482 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2705 = linalg.matmul {cast = #linalg.type_fn} ins(%2704, %2703 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_482 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2706 = tosa.reshape %2705 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2707 = tosa.add %2609, %2706 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2708 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_483 = arith.constant 2 : i32 + %2709 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2707 : tensor<1x40x4096xf32>) outs(%2708 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_483 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2710 = tosa.reduce_sum %2709 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2711 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2712 = tosa.reciprocal %2711 : (tensor<1xf32>) -> tensor<1xf32> + %2713 = tosa.mul %2712, %2710 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2714 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2715 = tosa.add %2713, %2714 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2716 = tosa.rsqrt %2715 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2717 = tosa.mul %2707, %2716 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2718 = tosa.reshape %arg229 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2719 = tosa.mul %2718, %2717 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2720 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2721 = tosa.transpose %arg230, %2720 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2722 = tosa.reshape %2719 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_484 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2723 = linalg.matmul {cast = #linalg.type_fn} ins(%2722, %2721 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_484 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2724 = tosa.reshape %2723 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2725 = tosa.sigmoid %2724 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2726 = tosa.mul %2724, %2725 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2727 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2728 = tosa.transpose %arg231, %2727 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2729 = tosa.reshape %2719 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_485 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2730 = linalg.matmul {cast = #linalg.type_fn} ins(%2729, %2728 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_485 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2731 = tosa.reshape %2730 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2732 = tosa.mul %2726, %2731 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2733 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2734 = tosa.transpose %arg232, %2733 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2735 = tosa.reshape %2732 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_486 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2736 = linalg.matmul {cast = #linalg.type_fn} ins(%2735, %2734 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_486 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2737 = tosa.reshape %2736 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2738 = tosa.add %2707, %2737 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2739 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_487 = arith.constant 2 : i32 + %2740 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2738 : tensor<1x40x4096xf32>) outs(%2739 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_487 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2741 = tosa.reduce_sum %2740 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2742 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2743 = tosa.reciprocal %2742 : (tensor<1xf32>) -> tensor<1xf32> + %2744 = tosa.mul %2743, %2741 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2745 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2746 = tosa.add %2744, %2745 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2747 = tosa.rsqrt %2746 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2748 = tosa.mul %2738, %2747 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2749 = tosa.reshape %arg233 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2750 = tosa.mul %2749, %2748 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2751 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2752 = tosa.transpose %arg234, %2751 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2753 = tosa.reshape %2750 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_488 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2754 = linalg.matmul {cast = #linalg.type_fn} ins(%2753, %2752 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_488 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2755 = tosa.reshape %2754 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2756 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2757 = tosa.transpose %arg235, %2756 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2758 = tosa.reshape %2750 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_489 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2759 = linalg.matmul {cast = #linalg.type_fn} ins(%2758, %2757 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_489 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2760 = tosa.reshape %2759 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2761 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2762 = tosa.transpose %arg236, %2761 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2763 = tosa.reshape %2750 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_490 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2764 = linalg.matmul {cast = #linalg.type_fn} ins(%2763, %2762 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_490 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2765 = tosa.reshape %2764 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2766 = tosa.reshape %2755 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2767 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2768 = tosa.transpose %2766, %2767 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2769 = tosa.reshape %2760 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2770 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2771 = tosa.transpose %2769, %2770 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2772 = tosa.reshape %2765 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2773 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2774 = tosa.transpose %2772, %2773 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_491 = tensor.extract_slice %arg237[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_492 = tensor.extract_slice %extracted_slice_491[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_493 = tensor.extract_slice %extracted_slice_492[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_494 = tensor.extract_slice %arg238[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_495 = tensor.extract_slice %extracted_slice_494[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_496 = tensor.extract_slice %extracted_slice_495[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2775 = tensor.empty() : tensor<1x40x128xf32> + %2776 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_493 : tensor<1x1x40x128xf32>) outs(%2775 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2777 = tensor.empty() : tensor<40x128xf32> + %2778 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2776 : tensor<1x40x128xf32>) outs(%2777 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2779 = tensor.empty() : tensor<1x40x128xf32> + %2780 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_496 : tensor<1x1x40x128xf32>) outs(%2779 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2781 = tensor.empty() : tensor<40x128xf32> + %2782 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2780 : tensor<1x40x128xf32>) outs(%2781 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2783 = tensor.empty() : tensor<1x40x128xf32> + %2784 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2783 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2778[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2785 = tosa.reshape %2784 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2786 = tensor.empty() : tensor<1x40x128xf32> + %2787 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2786 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2782[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2788 = tosa.reshape %2787 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2789 = tosa.mul %2768, %2785 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_497 = tensor.extract_slice %2768[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_498 = tensor.extract_slice %2768[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2790 = tosa.negate %extracted_slice_498 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2791 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_499 = tensor.insert_slice %2790 into %2791[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_500 = tensor.insert_slice %extracted_slice_497 into %inserted_slice_499[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2792 = tosa.mul %inserted_slice_500, %2788 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2793 = tosa.add %2789, %2792 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2794 = tosa.mul %2771, %2785 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_501 = tensor.extract_slice %2771[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_502 = tensor.extract_slice %2771[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2795 = tosa.negate %extracted_slice_502 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2796 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_503 = tensor.insert_slice %2795 into %2796[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_504 = tensor.insert_slice %extracted_slice_501 into %inserted_slice_503[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2797 = tosa.mul %inserted_slice_504, %2788 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2798 = tosa.add %2794, %2797 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2799 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2800 = tosa.transpose %2798, %2799 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2801 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2802 = tosa.add %2793, %2801 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2803 = tosa.reshape %2802 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2804 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2805 = tosa.add %2800, %2804 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2806 = tosa.reshape %2805 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2807 = tosa.matmul %2803, %2806 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2808 = tosa.reshape %2807 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2809 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2810 = tosa.reciprocal %2809 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2811 = tosa.mul %2808, %2810 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2812 = tosa.add %2811, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2813 = tosa.reduce_max %2812 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2814 = tosa.sub %2812, %2813 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2815 = tosa.exp %2814 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2816 = tosa.reduce_sum %2815 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2817 = tosa.reciprocal %2816 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2818 = tosa.mul %2815, %2817 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2819 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2820 = tosa.add %2818, %2819 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2821 = tosa.reshape %2820 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2822 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2823 = tosa.add %2774, %2822 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2824 = tosa.reshape %2823 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2825 = tosa.matmul %2821, %2824 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2826 = tosa.reshape %2825 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2827 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2828 = tosa.transpose %2826, %2827 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2829 = tosa.identity %2828 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2830 = tosa.reshape %2829 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2831 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2832 = tosa.transpose %arg239, %2831 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2833 = tosa.reshape %2830 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_505 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2834 = linalg.matmul {cast = #linalg.type_fn} ins(%2833, %2832 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_505 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2835 = tosa.reshape %2834 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2836 = tosa.add %2738, %2835 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2837 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_506 = arith.constant 2 : i32 + %2838 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2836 : tensor<1x40x4096xf32>) outs(%2837 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_506 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2839 = tosa.reduce_sum %2838 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2840 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2841 = tosa.reciprocal %2840 : (tensor<1xf32>) -> tensor<1xf32> + %2842 = tosa.mul %2841, %2839 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2843 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2844 = tosa.add %2842, %2843 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2845 = tosa.rsqrt %2844 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2846 = tosa.mul %2836, %2845 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2847 = tosa.reshape %arg240 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2848 = tosa.mul %2847, %2846 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2849 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2850 = tosa.transpose %arg241, %2849 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2851 = tosa.reshape %2848 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_507 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2852 = linalg.matmul {cast = #linalg.type_fn} ins(%2851, %2850 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_507 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2853 = tosa.reshape %2852 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2854 = tosa.sigmoid %2853 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2855 = tosa.mul %2853, %2854 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2856 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2857 = tosa.transpose %arg242, %2856 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2858 = tosa.reshape %2848 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_508 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2859 = linalg.matmul {cast = #linalg.type_fn} ins(%2858, %2857 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_508 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2860 = tosa.reshape %2859 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2861 = tosa.mul %2855, %2860 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2862 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2863 = tosa.transpose %arg243, %2862 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2864 = tosa.reshape %2861 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_509 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2865 = linalg.matmul {cast = #linalg.type_fn} ins(%2864, %2863 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_509 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2866 = tosa.reshape %2865 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2867 = tosa.add %2836, %2866 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2868 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_510 = arith.constant 2 : i32 + %2869 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2867 : tensor<1x40x4096xf32>) outs(%2868 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_510 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2870 = tosa.reduce_sum %2869 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2871 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2872 = tosa.reciprocal %2871 : (tensor<1xf32>) -> tensor<1xf32> + %2873 = tosa.mul %2872, %2870 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2874 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2875 = tosa.add %2873, %2874 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2876 = tosa.rsqrt %2875 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2877 = tosa.mul %2867, %2876 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2878 = tosa.reshape %arg244 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2879 = tosa.mul %2878, %2877 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2880 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2881 = tosa.transpose %arg245, %2880 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2882 = tosa.reshape %2879 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_511 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2883 = linalg.matmul {cast = #linalg.type_fn} ins(%2882, %2881 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_511 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2884 = tosa.reshape %2883 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2885 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2886 = tosa.transpose %arg246, %2885 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2887 = tosa.reshape %2879 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_512 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2888 = linalg.matmul {cast = #linalg.type_fn} ins(%2887, %2886 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_512 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2889 = tosa.reshape %2888 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2890 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2891 = tosa.transpose %arg247, %2890 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2892 = tosa.reshape %2879 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_513 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2893 = linalg.matmul {cast = #linalg.type_fn} ins(%2892, %2891 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_513 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2894 = tosa.reshape %2893 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2895 = tosa.reshape %2884 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2896 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2897 = tosa.transpose %2895, %2896 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2898 = tosa.reshape %2889 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2899 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2900 = tosa.transpose %2898, %2899 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2901 = tosa.reshape %2894 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2902 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2903 = tosa.transpose %2901, %2902 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_514 = tensor.extract_slice %arg248[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_515 = tensor.extract_slice %extracted_slice_514[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_516 = tensor.extract_slice %extracted_slice_515[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_517 = tensor.extract_slice %arg249[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_518 = tensor.extract_slice %extracted_slice_517[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_519 = tensor.extract_slice %extracted_slice_518[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %2904 = tensor.empty() : tensor<1x40x128xf32> + %2905 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_516 : tensor<1x1x40x128xf32>) outs(%2904 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2906 = tensor.empty() : tensor<40x128xf32> + %2907 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2905 : tensor<1x40x128xf32>) outs(%2906 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2908 = tensor.empty() : tensor<1x40x128xf32> + %2909 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_519 : tensor<1x1x40x128xf32>) outs(%2908 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %2910 = tensor.empty() : tensor<40x128xf32> + %2911 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%2909 : tensor<1x40x128xf32>) outs(%2910 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %2912 = tensor.empty() : tensor<1x40x128xf32> + %2913 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2912 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2907[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2914 = tosa.reshape %2913 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2915 = tensor.empty() : tensor<1x40x128xf32> + %2916 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%2915 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %2911[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %2917 = tosa.reshape %2916 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2918 = tosa.mul %2897, %2914 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_520 = tensor.extract_slice %2897[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_521 = tensor.extract_slice %2897[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2919 = tosa.negate %extracted_slice_521 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2920 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_522 = tensor.insert_slice %2919 into %2920[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_523 = tensor.insert_slice %extracted_slice_520 into %inserted_slice_522[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2921 = tosa.mul %inserted_slice_523, %2917 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2922 = tosa.add %2918, %2921 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2923 = tosa.mul %2900, %2914 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_524 = tensor.extract_slice %2900[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_525 = tensor.extract_slice %2900[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2924 = tosa.negate %extracted_slice_525 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2925 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_526 = tensor.insert_slice %2924 into %2925[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_527 = tensor.insert_slice %extracted_slice_524 into %inserted_slice_526[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2926 = tosa.mul %inserted_slice_527, %2917 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2927 = tosa.add %2923, %2926 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2928 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2929 = tosa.transpose %2927, %2928 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2930 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2931 = tosa.add %2922, %2930 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2932 = tosa.reshape %2931 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2933 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %2934 = tosa.add %2929, %2933 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %2935 = tosa.reshape %2934 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2936 = tosa.matmul %2932, %2935 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %2937 = tosa.reshape %2936 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2938 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2939 = tosa.reciprocal %2938 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2940 = tosa.mul %2937, %2939 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2941 = tosa.add %2940, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %2942 = tosa.reduce_max %2941 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2943 = tosa.sub %2941, %2942 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2944 = tosa.exp %2943 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2945 = tosa.reduce_sum %2944 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %2946 = tosa.reciprocal %2945 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %2947 = tosa.mul %2944, %2946 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %2948 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %2949 = tosa.add %2947, %2948 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2950 = tosa.reshape %2949 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %2951 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2952 = tosa.add %2903, %2951 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2953 = tosa.reshape %2952 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2954 = tosa.matmul %2950, %2953 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2955 = tosa.reshape %2954 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2956 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2957 = tosa.transpose %2955, %2956 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2958 = tosa.identity %2957 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %2959 = tosa.reshape %2958 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2960 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2961 = tosa.transpose %arg250, %2960 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2962 = tosa.reshape %2959 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_528 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2963 = linalg.matmul {cast = #linalg.type_fn} ins(%2962, %2961 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_528 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2964 = tosa.reshape %2963 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2965 = tosa.add %2867, %2964 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2966 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_529 = arith.constant 2 : i32 + %2967 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2965 : tensor<1x40x4096xf32>) outs(%2966 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_529 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2968 = tosa.reduce_sum %2967 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2969 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2970 = tosa.reciprocal %2969 : (tensor<1xf32>) -> tensor<1xf32> + %2971 = tosa.mul %2970, %2968 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2972 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2973 = tosa.add %2971, %2972 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2974 = tosa.rsqrt %2973 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2975 = tosa.mul %2965, %2974 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2976 = tosa.reshape %arg251 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2977 = tosa.mul %2976, %2975 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2978 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2979 = tosa.transpose %arg252, %2978 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2980 = tosa.reshape %2977 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_530 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2981 = linalg.matmul {cast = #linalg.type_fn} ins(%2980, %2979 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_530 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2982 = tosa.reshape %2981 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2983 = tosa.sigmoid %2982 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2984 = tosa.mul %2982, %2983 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2985 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2986 = tosa.transpose %arg253, %2985 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2987 = tosa.reshape %2977 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_531 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2988 = linalg.matmul {cast = #linalg.type_fn} ins(%2987, %2986 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_531 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2989 = tosa.reshape %2988 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2990 = tosa.mul %2984, %2989 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2991 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2992 = tosa.transpose %arg254, %2991 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2993 = tosa.reshape %2990 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_532 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2994 = linalg.matmul {cast = #linalg.type_fn} ins(%2993, %2992 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_532 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2995 = tosa.reshape %2994 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2996 = tosa.add %2965, %2995 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2997 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_533 = arith.constant 2 : i32 + %2998 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2996 : tensor<1x40x4096xf32>) outs(%2997 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_533 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %2999 = tosa.reduce_sum %2998 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3000 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3001 = tosa.reciprocal %3000 : (tensor<1xf32>) -> tensor<1xf32> + %3002 = tosa.mul %3001, %2999 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3003 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3004 = tosa.add %3002, %3003 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3005 = tosa.rsqrt %3004 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3006 = tosa.mul %2996, %3005 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3007 = tosa.reshape %arg255 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3008 = tosa.mul %3007, %3006 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3009 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3010 = tosa.transpose %arg256, %3009 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3011 = tosa.reshape %3008 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_534 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3012 = linalg.matmul {cast = #linalg.type_fn} ins(%3011, %3010 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_534 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3013 = tosa.reshape %3012 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3014 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3015 = tosa.transpose %arg257, %3014 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3016 = tosa.reshape %3008 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_535 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3017 = linalg.matmul {cast = #linalg.type_fn} ins(%3016, %3015 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_535 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3018 = tosa.reshape %3017 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3019 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3020 = tosa.transpose %arg258, %3019 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3021 = tosa.reshape %3008 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_536 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3022 = linalg.matmul {cast = #linalg.type_fn} ins(%3021, %3020 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_536 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3023 = tosa.reshape %3022 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3024 = tosa.reshape %3013 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3025 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3026 = tosa.transpose %3024, %3025 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3027 = tosa.reshape %3018 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3028 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3029 = tosa.transpose %3027, %3028 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3030 = tosa.reshape %3023 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3031 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3032 = tosa.transpose %3030, %3031 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_537 = tensor.extract_slice %arg259[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_538 = tensor.extract_slice %extracted_slice_537[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_539 = tensor.extract_slice %extracted_slice_538[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_540 = tensor.extract_slice %arg260[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_541 = tensor.extract_slice %extracted_slice_540[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_542 = tensor.extract_slice %extracted_slice_541[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3033 = tensor.empty() : tensor<1x40x128xf32> + %3034 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_539 : tensor<1x1x40x128xf32>) outs(%3033 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3035 = tensor.empty() : tensor<40x128xf32> + %3036 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3034 : tensor<1x40x128xf32>) outs(%3035 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3037 = tensor.empty() : tensor<1x40x128xf32> + %3038 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_542 : tensor<1x1x40x128xf32>) outs(%3037 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3039 = tensor.empty() : tensor<40x128xf32> + %3040 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3038 : tensor<1x40x128xf32>) outs(%3039 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3041 = tensor.empty() : tensor<1x40x128xf32> + %3042 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3041 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3036[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3043 = tosa.reshape %3042 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3044 = tensor.empty() : tensor<1x40x128xf32> + %3045 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3044 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3040[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3046 = tosa.reshape %3045 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3047 = tosa.mul %3026, %3043 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_543 = tensor.extract_slice %3026[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_544 = tensor.extract_slice %3026[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3048 = tosa.negate %extracted_slice_544 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3049 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_545 = tensor.insert_slice %3048 into %3049[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_546 = tensor.insert_slice %extracted_slice_543 into %inserted_slice_545[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3050 = tosa.mul %inserted_slice_546, %3046 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3051 = tosa.add %3047, %3050 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3052 = tosa.mul %3029, %3043 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_547 = tensor.extract_slice %3029[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_548 = tensor.extract_slice %3029[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3053 = tosa.negate %extracted_slice_548 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3054 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_549 = tensor.insert_slice %3053 into %3054[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_550 = tensor.insert_slice %extracted_slice_547 into %inserted_slice_549[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3055 = tosa.mul %inserted_slice_550, %3046 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3056 = tosa.add %3052, %3055 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3057 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3058 = tosa.transpose %3056, %3057 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3059 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3060 = tosa.add %3051, %3059 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3061 = tosa.reshape %3060 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3062 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3063 = tosa.add %3058, %3062 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3064 = tosa.reshape %3063 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3065 = tosa.matmul %3061, %3064 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3066 = tosa.reshape %3065 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3067 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3068 = tosa.reciprocal %3067 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3069 = tosa.mul %3066, %3068 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3070 = tosa.add %3069, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3071 = tosa.reduce_max %3070 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3072 = tosa.sub %3070, %3071 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3073 = tosa.exp %3072 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3074 = tosa.reduce_sum %3073 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3075 = tosa.reciprocal %3074 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3076 = tosa.mul %3073, %3075 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3077 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3078 = tosa.add %3076, %3077 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3079 = tosa.reshape %3078 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3080 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3081 = tosa.add %3032, %3080 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3082 = tosa.reshape %3081 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3083 = tosa.matmul %3079, %3082 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3084 = tosa.reshape %3083 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3085 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3086 = tosa.transpose %3084, %3085 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3087 = tosa.identity %3086 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3088 = tosa.reshape %3087 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3089 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3090 = tosa.transpose %arg261, %3089 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3091 = tosa.reshape %3088 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_551 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3092 = linalg.matmul {cast = #linalg.type_fn} ins(%3091, %3090 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_551 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3093 = tosa.reshape %3092 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3094 = tosa.add %2996, %3093 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3095 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_552 = arith.constant 2 : i32 + %3096 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3094 : tensor<1x40x4096xf32>) outs(%3095 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_552 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3097 = tosa.reduce_sum %3096 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3098 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3099 = tosa.reciprocal %3098 : (tensor<1xf32>) -> tensor<1xf32> + %3100 = tosa.mul %3099, %3097 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3101 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3102 = tosa.add %3100, %3101 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3103 = tosa.rsqrt %3102 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3104 = tosa.mul %3094, %3103 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3105 = tosa.reshape %arg262 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3106 = tosa.mul %3105, %3104 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3107 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3108 = tosa.transpose %arg263, %3107 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3109 = tosa.reshape %3106 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_553 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3110 = linalg.matmul {cast = #linalg.type_fn} ins(%3109, %3108 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_553 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3111 = tosa.reshape %3110 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3112 = tosa.sigmoid %3111 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3113 = tosa.mul %3111, %3112 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3114 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3115 = tosa.transpose %arg264, %3114 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3116 = tosa.reshape %3106 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_554 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3117 = linalg.matmul {cast = #linalg.type_fn} ins(%3116, %3115 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_554 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3118 = tosa.reshape %3117 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3119 = tosa.mul %3113, %3118 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3120 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3121 = tosa.transpose %arg265, %3120 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3122 = tosa.reshape %3119 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_555 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3123 = linalg.matmul {cast = #linalg.type_fn} ins(%3122, %3121 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_555 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3124 = tosa.reshape %3123 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3125 = tosa.add %3094, %3124 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3126 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_556 = arith.constant 2 : i32 + %3127 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3125 : tensor<1x40x4096xf32>) outs(%3126 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_556 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3128 = tosa.reduce_sum %3127 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3129 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3130 = tosa.reciprocal %3129 : (tensor<1xf32>) -> tensor<1xf32> + %3131 = tosa.mul %3130, %3128 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3132 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3133 = tosa.add %3131, %3132 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3134 = tosa.rsqrt %3133 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3135 = tosa.mul %3125, %3134 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3136 = tosa.reshape %arg266 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3137 = tosa.mul %3136, %3135 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3138 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3139 = tosa.transpose %arg267, %3138 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3140 = tosa.reshape %3137 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_557 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3141 = linalg.matmul {cast = #linalg.type_fn} ins(%3140, %3139 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_557 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3142 = tosa.reshape %3141 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3143 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3144 = tosa.transpose %arg268, %3143 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3145 = tosa.reshape %3137 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_558 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3146 = linalg.matmul {cast = #linalg.type_fn} ins(%3145, %3144 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_558 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3147 = tosa.reshape %3146 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3148 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3149 = tosa.transpose %arg269, %3148 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3150 = tosa.reshape %3137 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_559 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3151 = linalg.matmul {cast = #linalg.type_fn} ins(%3150, %3149 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_559 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3152 = tosa.reshape %3151 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3153 = tosa.reshape %3142 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3154 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3155 = tosa.transpose %3153, %3154 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3156 = tosa.reshape %3147 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3157 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3158 = tosa.transpose %3156, %3157 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3159 = tosa.reshape %3152 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3160 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3161 = tosa.transpose %3159, %3160 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_560 = tensor.extract_slice %arg270[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_561 = tensor.extract_slice %extracted_slice_560[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_562 = tensor.extract_slice %extracted_slice_561[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_563 = tensor.extract_slice %arg271[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_564 = tensor.extract_slice %extracted_slice_563[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_565 = tensor.extract_slice %extracted_slice_564[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3162 = tensor.empty() : tensor<1x40x128xf32> + %3163 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_562 : tensor<1x1x40x128xf32>) outs(%3162 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3164 = tensor.empty() : tensor<40x128xf32> + %3165 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3163 : tensor<1x40x128xf32>) outs(%3164 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3166 = tensor.empty() : tensor<1x40x128xf32> + %3167 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_565 : tensor<1x1x40x128xf32>) outs(%3166 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3168 = tensor.empty() : tensor<40x128xf32> + %3169 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3167 : tensor<1x40x128xf32>) outs(%3168 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3170 = tensor.empty() : tensor<1x40x128xf32> + %3171 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3170 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3165[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3172 = tosa.reshape %3171 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3173 = tensor.empty() : tensor<1x40x128xf32> + %3174 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3173 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3169[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3175 = tosa.reshape %3174 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3176 = tosa.mul %3155, %3172 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_566 = tensor.extract_slice %3155[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_567 = tensor.extract_slice %3155[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3177 = tosa.negate %extracted_slice_567 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3178 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_568 = tensor.insert_slice %3177 into %3178[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_569 = tensor.insert_slice %extracted_slice_566 into %inserted_slice_568[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3179 = tosa.mul %inserted_slice_569, %3175 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3180 = tosa.add %3176, %3179 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3181 = tosa.mul %3158, %3172 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_570 = tensor.extract_slice %3158[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_571 = tensor.extract_slice %3158[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3182 = tosa.negate %extracted_slice_571 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3183 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_572 = tensor.insert_slice %3182 into %3183[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_573 = tensor.insert_slice %extracted_slice_570 into %inserted_slice_572[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3184 = tosa.mul %inserted_slice_573, %3175 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3185 = tosa.add %3181, %3184 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3186 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3187 = tosa.transpose %3185, %3186 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3188 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3189 = tosa.add %3180, %3188 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3190 = tosa.reshape %3189 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3191 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3192 = tosa.add %3187, %3191 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3193 = tosa.reshape %3192 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3194 = tosa.matmul %3190, %3193 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3195 = tosa.reshape %3194 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3196 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3197 = tosa.reciprocal %3196 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3198 = tosa.mul %3195, %3197 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3199 = tosa.add %3198, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3200 = tosa.reduce_max %3199 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3201 = tosa.sub %3199, %3200 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3202 = tosa.exp %3201 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3203 = tosa.reduce_sum %3202 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3204 = tosa.reciprocal %3203 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3205 = tosa.mul %3202, %3204 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3206 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3207 = tosa.add %3205, %3206 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3208 = tosa.reshape %3207 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3209 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3210 = tosa.add %3161, %3209 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3211 = tosa.reshape %3210 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3212 = tosa.matmul %3208, %3211 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3213 = tosa.reshape %3212 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3214 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3215 = tosa.transpose %3213, %3214 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3216 = tosa.identity %3215 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3217 = tosa.reshape %3216 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3218 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3219 = tosa.transpose %arg272, %3218 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3220 = tosa.reshape %3217 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_574 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3221 = linalg.matmul {cast = #linalg.type_fn} ins(%3220, %3219 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_574 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3222 = tosa.reshape %3221 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3223 = tosa.add %3125, %3222 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3224 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_575 = arith.constant 2 : i32 + %3225 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3223 : tensor<1x40x4096xf32>) outs(%3224 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_575 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3226 = tosa.reduce_sum %3225 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3227 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3228 = tosa.reciprocal %3227 : (tensor<1xf32>) -> tensor<1xf32> + %3229 = tosa.mul %3228, %3226 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3230 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3231 = tosa.add %3229, %3230 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3232 = tosa.rsqrt %3231 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3233 = tosa.mul %3223, %3232 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3234 = tosa.reshape %arg273 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3235 = tosa.mul %3234, %3233 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3236 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3237 = tosa.transpose %arg274, %3236 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3238 = tosa.reshape %3235 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_576 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3239 = linalg.matmul {cast = #linalg.type_fn} ins(%3238, %3237 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_576 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3240 = tosa.reshape %3239 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3241 = tosa.sigmoid %3240 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3242 = tosa.mul %3240, %3241 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3243 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3244 = tosa.transpose %arg275, %3243 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3245 = tosa.reshape %3235 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_577 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3246 = linalg.matmul {cast = #linalg.type_fn} ins(%3245, %3244 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_577 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3247 = tosa.reshape %3246 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3248 = tosa.mul %3242, %3247 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3249 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3250 = tosa.transpose %arg276, %3249 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3251 = tosa.reshape %3248 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_578 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3252 = linalg.matmul {cast = #linalg.type_fn} ins(%3251, %3250 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_578 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3253 = tosa.reshape %3252 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3254 = tosa.add %3223, %3253 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3255 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_579 = arith.constant 2 : i32 + %3256 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3254 : tensor<1x40x4096xf32>) outs(%3255 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_579 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3257 = tosa.reduce_sum %3256 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3258 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3259 = tosa.reciprocal %3258 : (tensor<1xf32>) -> tensor<1xf32> + %3260 = tosa.mul %3259, %3257 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3261 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3262 = tosa.add %3260, %3261 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3263 = tosa.rsqrt %3262 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3264 = tosa.mul %3254, %3263 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3265 = tosa.reshape %arg277 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3266 = tosa.mul %3265, %3264 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3267 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3268 = tosa.transpose %arg278, %3267 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3269 = tosa.reshape %3266 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_580 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3270 = linalg.matmul {cast = #linalg.type_fn} ins(%3269, %3268 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_580 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3271 = tosa.reshape %3270 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3272 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3273 = tosa.transpose %arg279, %3272 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3274 = tosa.reshape %3266 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_581 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3275 = linalg.matmul {cast = #linalg.type_fn} ins(%3274, %3273 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_581 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3276 = tosa.reshape %3275 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3277 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3278 = tosa.transpose %arg280, %3277 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3279 = tosa.reshape %3266 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_582 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3280 = linalg.matmul {cast = #linalg.type_fn} ins(%3279, %3278 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_582 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3281 = tosa.reshape %3280 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3282 = tosa.reshape %3271 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3283 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3284 = tosa.transpose %3282, %3283 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3285 = tosa.reshape %3276 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3286 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3287 = tosa.transpose %3285, %3286 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3288 = tosa.reshape %3281 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3289 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3290 = tosa.transpose %3288, %3289 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_583 = tensor.extract_slice %arg281[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_584 = tensor.extract_slice %extracted_slice_583[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_585 = tensor.extract_slice %extracted_slice_584[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_586 = tensor.extract_slice %arg282[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_587 = tensor.extract_slice %extracted_slice_586[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_588 = tensor.extract_slice %extracted_slice_587[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3291 = tensor.empty() : tensor<1x40x128xf32> + %3292 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_585 : tensor<1x1x40x128xf32>) outs(%3291 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3293 = tensor.empty() : tensor<40x128xf32> + %3294 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3292 : tensor<1x40x128xf32>) outs(%3293 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3295 = tensor.empty() : tensor<1x40x128xf32> + %3296 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_588 : tensor<1x1x40x128xf32>) outs(%3295 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3297 = tensor.empty() : tensor<40x128xf32> + %3298 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3296 : tensor<1x40x128xf32>) outs(%3297 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3299 = tensor.empty() : tensor<1x40x128xf32> + %3300 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3299 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3294[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3301 = tosa.reshape %3300 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3302 = tensor.empty() : tensor<1x40x128xf32> + %3303 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3302 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3298[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3304 = tosa.reshape %3303 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3305 = tosa.mul %3284, %3301 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_589 = tensor.extract_slice %3284[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_590 = tensor.extract_slice %3284[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3306 = tosa.negate %extracted_slice_590 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3307 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_591 = tensor.insert_slice %3306 into %3307[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_592 = tensor.insert_slice %extracted_slice_589 into %inserted_slice_591[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3308 = tosa.mul %inserted_slice_592, %3304 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3309 = tosa.add %3305, %3308 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3310 = tosa.mul %3287, %3301 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_593 = tensor.extract_slice %3287[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_594 = tensor.extract_slice %3287[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3311 = tosa.negate %extracted_slice_594 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3312 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_595 = tensor.insert_slice %3311 into %3312[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_596 = tensor.insert_slice %extracted_slice_593 into %inserted_slice_595[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3313 = tosa.mul %inserted_slice_596, %3304 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3314 = tosa.add %3310, %3313 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3315 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3316 = tosa.transpose %3314, %3315 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3317 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3318 = tosa.add %3309, %3317 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3319 = tosa.reshape %3318 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3320 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3321 = tosa.add %3316, %3320 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3322 = tosa.reshape %3321 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3323 = tosa.matmul %3319, %3322 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3324 = tosa.reshape %3323 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3325 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3326 = tosa.reciprocal %3325 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3327 = tosa.mul %3324, %3326 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3328 = tosa.add %3327, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3329 = tosa.reduce_max %3328 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3330 = tosa.sub %3328, %3329 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3331 = tosa.exp %3330 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3332 = tosa.reduce_sum %3331 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3333 = tosa.reciprocal %3332 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3334 = tosa.mul %3331, %3333 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3335 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3336 = tosa.add %3334, %3335 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3337 = tosa.reshape %3336 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3338 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3339 = tosa.add %3290, %3338 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3340 = tosa.reshape %3339 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3341 = tosa.matmul %3337, %3340 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3342 = tosa.reshape %3341 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3343 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3344 = tosa.transpose %3342, %3343 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3345 = tosa.identity %3344 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3346 = tosa.reshape %3345 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3347 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3348 = tosa.transpose %arg283, %3347 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3349 = tosa.reshape %3346 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_597 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3350 = linalg.matmul {cast = #linalg.type_fn} ins(%3349, %3348 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_597 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3351 = tosa.reshape %3350 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3352 = tosa.add %3254, %3351 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3353 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_598 = arith.constant 2 : i32 + %3354 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3352 : tensor<1x40x4096xf32>) outs(%3353 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_598 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3355 = tosa.reduce_sum %3354 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3356 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3357 = tosa.reciprocal %3356 : (tensor<1xf32>) -> tensor<1xf32> + %3358 = tosa.mul %3357, %3355 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3359 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3360 = tosa.add %3358, %3359 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3361 = tosa.rsqrt %3360 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3362 = tosa.mul %3352, %3361 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3363 = tosa.reshape %arg284 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3364 = tosa.mul %3363, %3362 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3365 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3366 = tosa.transpose %arg285, %3365 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3367 = tosa.reshape %3364 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_599 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3368 = linalg.matmul {cast = #linalg.type_fn} ins(%3367, %3366 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_599 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3369 = tosa.reshape %3368 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3370 = tosa.sigmoid %3369 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3371 = tosa.mul %3369, %3370 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3372 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3373 = tosa.transpose %arg286, %3372 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3374 = tosa.reshape %3364 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_600 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3375 = linalg.matmul {cast = #linalg.type_fn} ins(%3374, %3373 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_600 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3376 = tosa.reshape %3375 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3377 = tosa.mul %3371, %3376 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3378 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3379 = tosa.transpose %arg287, %3378 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3380 = tosa.reshape %3377 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_601 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3381 = linalg.matmul {cast = #linalg.type_fn} ins(%3380, %3379 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_601 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3382 = tosa.reshape %3381 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3383 = tosa.add %3352, %3382 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3384 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_602 = arith.constant 2 : i32 + %3385 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3383 : tensor<1x40x4096xf32>) outs(%3384 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_602 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3386 = tosa.reduce_sum %3385 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3387 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3388 = tosa.reciprocal %3387 : (tensor<1xf32>) -> tensor<1xf32> + %3389 = tosa.mul %3388, %3386 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3390 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3391 = tosa.add %3389, %3390 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3392 = tosa.rsqrt %3391 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3393 = tosa.mul %3383, %3392 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3394 = tosa.reshape %arg288 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3395 = tosa.mul %3394, %3393 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3396 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3397 = tosa.transpose %arg289, %3396 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3398 = tosa.reshape %3395 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_603 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3399 = linalg.matmul {cast = #linalg.type_fn} ins(%3398, %3397 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_603 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3400 = tosa.reshape %3399 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3401 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3402 = tosa.transpose %arg290, %3401 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3403 = tosa.reshape %3395 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_604 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3404 = linalg.matmul {cast = #linalg.type_fn} ins(%3403, %3402 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_604 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3405 = tosa.reshape %3404 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3406 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3407 = tosa.transpose %arg291, %3406 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3408 = tosa.reshape %3395 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_605 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3409 = linalg.matmul {cast = #linalg.type_fn} ins(%3408, %3407 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_605 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3410 = tosa.reshape %3409 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3411 = tosa.reshape %3400 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3412 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3413 = tosa.transpose %3411, %3412 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3414 = tosa.reshape %3405 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3415 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3416 = tosa.transpose %3414, %3415 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3417 = tosa.reshape %3410 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3418 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3419 = tosa.transpose %3417, %3418 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_606 = tensor.extract_slice %arg292[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_607 = tensor.extract_slice %extracted_slice_606[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_608 = tensor.extract_slice %extracted_slice_607[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_609 = tensor.extract_slice %arg293[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_610 = tensor.extract_slice %extracted_slice_609[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_611 = tensor.extract_slice %extracted_slice_610[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3420 = tensor.empty() : tensor<1x40x128xf32> + %3421 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_608 : tensor<1x1x40x128xf32>) outs(%3420 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3422 = tensor.empty() : tensor<40x128xf32> + %3423 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3421 : tensor<1x40x128xf32>) outs(%3422 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3424 = tensor.empty() : tensor<1x40x128xf32> + %3425 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_611 : tensor<1x1x40x128xf32>) outs(%3424 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3426 = tensor.empty() : tensor<40x128xf32> + %3427 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3425 : tensor<1x40x128xf32>) outs(%3426 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3428 = tensor.empty() : tensor<1x40x128xf32> + %3429 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3428 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3423[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3430 = tosa.reshape %3429 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3431 = tensor.empty() : tensor<1x40x128xf32> + %3432 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3431 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3427[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3433 = tosa.reshape %3432 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3434 = tosa.mul %3413, %3430 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_612 = tensor.extract_slice %3413[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_613 = tensor.extract_slice %3413[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3435 = tosa.negate %extracted_slice_613 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3436 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_614 = tensor.insert_slice %3435 into %3436[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_615 = tensor.insert_slice %extracted_slice_612 into %inserted_slice_614[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3437 = tosa.mul %inserted_slice_615, %3433 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3438 = tosa.add %3434, %3437 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3439 = tosa.mul %3416, %3430 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_616 = tensor.extract_slice %3416[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_617 = tensor.extract_slice %3416[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3440 = tosa.negate %extracted_slice_617 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3441 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_618 = tensor.insert_slice %3440 into %3441[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_619 = tensor.insert_slice %extracted_slice_616 into %inserted_slice_618[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3442 = tosa.mul %inserted_slice_619, %3433 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3443 = tosa.add %3439, %3442 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3444 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3445 = tosa.transpose %3443, %3444 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3446 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3447 = tosa.add %3438, %3446 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3448 = tosa.reshape %3447 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3449 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3450 = tosa.add %3445, %3449 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3451 = tosa.reshape %3450 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3452 = tosa.matmul %3448, %3451 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3453 = tosa.reshape %3452 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3454 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3455 = tosa.reciprocal %3454 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3456 = tosa.mul %3453, %3455 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3457 = tosa.add %3456, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3458 = tosa.reduce_max %3457 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3459 = tosa.sub %3457, %3458 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3460 = tosa.exp %3459 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3461 = tosa.reduce_sum %3460 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3462 = tosa.reciprocal %3461 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3463 = tosa.mul %3460, %3462 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3464 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3465 = tosa.add %3463, %3464 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3466 = tosa.reshape %3465 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3467 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3468 = tosa.add %3419, %3467 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3469 = tosa.reshape %3468 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3470 = tosa.matmul %3466, %3469 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3471 = tosa.reshape %3470 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3472 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3473 = tosa.transpose %3471, %3472 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3474 = tosa.identity %3473 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3475 = tosa.reshape %3474 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3476 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3477 = tosa.transpose %arg294, %3476 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3478 = tosa.reshape %3475 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_620 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3479 = linalg.matmul {cast = #linalg.type_fn} ins(%3478, %3477 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_620 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3480 = tosa.reshape %3479 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3481 = tosa.add %3383, %3480 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3482 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_621 = arith.constant 2 : i32 + %3483 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3481 : tensor<1x40x4096xf32>) outs(%3482 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_621 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3484 = tosa.reduce_sum %3483 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3485 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3486 = tosa.reciprocal %3485 : (tensor<1xf32>) -> tensor<1xf32> + %3487 = tosa.mul %3486, %3484 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3488 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3489 = tosa.add %3487, %3488 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3490 = tosa.rsqrt %3489 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3491 = tosa.mul %3481, %3490 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3492 = tosa.reshape %arg295 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3493 = tosa.mul %3492, %3491 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3494 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3495 = tosa.transpose %arg296, %3494 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3496 = tosa.reshape %3493 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_622 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3497 = linalg.matmul {cast = #linalg.type_fn} ins(%3496, %3495 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_622 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3498 = tosa.reshape %3497 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3499 = tosa.sigmoid %3498 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3500 = tosa.mul %3498, %3499 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3501 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3502 = tosa.transpose %arg297, %3501 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3503 = tosa.reshape %3493 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_623 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3504 = linalg.matmul {cast = #linalg.type_fn} ins(%3503, %3502 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_623 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3505 = tosa.reshape %3504 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3506 = tosa.mul %3500, %3505 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3507 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3508 = tosa.transpose %arg298, %3507 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3509 = tosa.reshape %3506 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_624 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3510 = linalg.matmul {cast = #linalg.type_fn} ins(%3509, %3508 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_624 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3511 = tosa.reshape %3510 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3512 = tosa.add %3481, %3511 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3513 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_625 = arith.constant 2 : i32 + %3514 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3512 : tensor<1x40x4096xf32>) outs(%3513 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_625 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3515 = tosa.reduce_sum %3514 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3516 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3517 = tosa.reciprocal %3516 : (tensor<1xf32>) -> tensor<1xf32> + %3518 = tosa.mul %3517, %3515 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3519 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3520 = tosa.add %3518, %3519 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3521 = tosa.rsqrt %3520 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3522 = tosa.mul %3512, %3521 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3523 = tosa.reshape %arg299 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3524 = tosa.mul %3523, %3522 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3525 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3526 = tosa.transpose %arg300, %3525 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3527 = tosa.reshape %3524 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_626 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3528 = linalg.matmul {cast = #linalg.type_fn} ins(%3527, %3526 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_626 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3529 = tosa.reshape %3528 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3530 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3531 = tosa.transpose %arg301, %3530 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3532 = tosa.reshape %3524 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_627 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3533 = linalg.matmul {cast = #linalg.type_fn} ins(%3532, %3531 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_627 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3534 = tosa.reshape %3533 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3535 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3536 = tosa.transpose %arg302, %3535 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3537 = tosa.reshape %3524 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_628 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3538 = linalg.matmul {cast = #linalg.type_fn} ins(%3537, %3536 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_628 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3539 = tosa.reshape %3538 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3540 = tosa.reshape %3529 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3541 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3542 = tosa.transpose %3540, %3541 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3543 = tosa.reshape %3534 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3544 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3545 = tosa.transpose %3543, %3544 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3546 = tosa.reshape %3539 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3547 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3548 = tosa.transpose %3546, %3547 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_629 = tensor.extract_slice %arg303[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_630 = tensor.extract_slice %extracted_slice_629[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_631 = tensor.extract_slice %extracted_slice_630[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_632 = tensor.extract_slice %arg304[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_633 = tensor.extract_slice %extracted_slice_632[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_634 = tensor.extract_slice %extracted_slice_633[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3549 = tensor.empty() : tensor<1x40x128xf32> + %3550 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_631 : tensor<1x1x40x128xf32>) outs(%3549 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3551 = tensor.empty() : tensor<40x128xf32> + %3552 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3550 : tensor<1x40x128xf32>) outs(%3551 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3553 = tensor.empty() : tensor<1x40x128xf32> + %3554 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_634 : tensor<1x1x40x128xf32>) outs(%3553 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3555 = tensor.empty() : tensor<40x128xf32> + %3556 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3554 : tensor<1x40x128xf32>) outs(%3555 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3557 = tensor.empty() : tensor<1x40x128xf32> + %3558 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3557 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3552[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3559 = tosa.reshape %3558 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3560 = tensor.empty() : tensor<1x40x128xf32> + %3561 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3560 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3556[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3562 = tosa.reshape %3561 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3563 = tosa.mul %3542, %3559 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_635 = tensor.extract_slice %3542[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_636 = tensor.extract_slice %3542[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3564 = tosa.negate %extracted_slice_636 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3565 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_637 = tensor.insert_slice %3564 into %3565[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_638 = tensor.insert_slice %extracted_slice_635 into %inserted_slice_637[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3566 = tosa.mul %inserted_slice_638, %3562 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3567 = tosa.add %3563, %3566 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3568 = tosa.mul %3545, %3559 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_639 = tensor.extract_slice %3545[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_640 = tensor.extract_slice %3545[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3569 = tosa.negate %extracted_slice_640 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3570 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_641 = tensor.insert_slice %3569 into %3570[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_642 = tensor.insert_slice %extracted_slice_639 into %inserted_slice_641[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3571 = tosa.mul %inserted_slice_642, %3562 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3572 = tosa.add %3568, %3571 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3573 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3574 = tosa.transpose %3572, %3573 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3575 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3576 = tosa.add %3567, %3575 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3577 = tosa.reshape %3576 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3578 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3579 = tosa.add %3574, %3578 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3580 = tosa.reshape %3579 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3581 = tosa.matmul %3577, %3580 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3582 = tosa.reshape %3581 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3583 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3584 = tosa.reciprocal %3583 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3585 = tosa.mul %3582, %3584 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3586 = tosa.add %3585, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3587 = tosa.reduce_max %3586 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3588 = tosa.sub %3586, %3587 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3589 = tosa.exp %3588 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3590 = tosa.reduce_sum %3589 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3591 = tosa.reciprocal %3590 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3592 = tosa.mul %3589, %3591 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3593 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3594 = tosa.add %3592, %3593 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3595 = tosa.reshape %3594 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3596 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3597 = tosa.add %3548, %3596 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3598 = tosa.reshape %3597 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3599 = tosa.matmul %3595, %3598 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3600 = tosa.reshape %3599 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3601 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3602 = tosa.transpose %3600, %3601 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3603 = tosa.identity %3602 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3604 = tosa.reshape %3603 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3605 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3606 = tosa.transpose %arg305, %3605 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3607 = tosa.reshape %3604 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_643 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3608 = linalg.matmul {cast = #linalg.type_fn} ins(%3607, %3606 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_643 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3609 = tosa.reshape %3608 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3610 = tosa.add %3512, %3609 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3611 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_644 = arith.constant 2 : i32 + %3612 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3610 : tensor<1x40x4096xf32>) outs(%3611 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_644 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3613 = tosa.reduce_sum %3612 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3614 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3615 = tosa.reciprocal %3614 : (tensor<1xf32>) -> tensor<1xf32> + %3616 = tosa.mul %3615, %3613 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3617 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3618 = tosa.add %3616, %3617 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3619 = tosa.rsqrt %3618 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3620 = tosa.mul %3610, %3619 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3621 = tosa.reshape %arg306 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3622 = tosa.mul %3621, %3620 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3623 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3624 = tosa.transpose %arg307, %3623 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3625 = tosa.reshape %3622 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_645 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3626 = linalg.matmul {cast = #linalg.type_fn} ins(%3625, %3624 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_645 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3627 = tosa.reshape %3626 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3628 = tosa.sigmoid %3627 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3629 = tosa.mul %3627, %3628 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3630 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3631 = tosa.transpose %arg308, %3630 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3632 = tosa.reshape %3622 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_646 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3633 = linalg.matmul {cast = #linalg.type_fn} ins(%3632, %3631 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_646 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3634 = tosa.reshape %3633 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3635 = tosa.mul %3629, %3634 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3636 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3637 = tosa.transpose %arg309, %3636 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3638 = tosa.reshape %3635 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_647 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3639 = linalg.matmul {cast = #linalg.type_fn} ins(%3638, %3637 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_647 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3640 = tosa.reshape %3639 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3641 = tosa.add %3610, %3640 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3642 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_648 = arith.constant 2 : i32 + %3643 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3641 : tensor<1x40x4096xf32>) outs(%3642 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_648 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3644 = tosa.reduce_sum %3643 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3645 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3646 = tosa.reciprocal %3645 : (tensor<1xf32>) -> tensor<1xf32> + %3647 = tosa.mul %3646, %3644 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3648 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3649 = tosa.add %3647, %3648 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3650 = tosa.rsqrt %3649 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3651 = tosa.mul %3641, %3650 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3652 = tosa.reshape %arg310 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3653 = tosa.mul %3652, %3651 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3654 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3655 = tosa.transpose %arg311, %3654 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3656 = tosa.reshape %3653 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_649 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3657 = linalg.matmul {cast = #linalg.type_fn} ins(%3656, %3655 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_649 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3658 = tosa.reshape %3657 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3659 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3660 = tosa.transpose %arg312, %3659 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3661 = tosa.reshape %3653 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_650 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3662 = linalg.matmul {cast = #linalg.type_fn} ins(%3661, %3660 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_650 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3663 = tosa.reshape %3662 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3664 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3665 = tosa.transpose %arg313, %3664 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3666 = tosa.reshape %3653 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_651 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3667 = linalg.matmul {cast = #linalg.type_fn} ins(%3666, %3665 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_651 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3668 = tosa.reshape %3667 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3669 = tosa.reshape %3658 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3670 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3671 = tosa.transpose %3669, %3670 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3672 = tosa.reshape %3663 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3673 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3674 = tosa.transpose %3672, %3673 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3675 = tosa.reshape %3668 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3676 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3677 = tosa.transpose %3675, %3676 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_652 = tensor.extract_slice %arg314[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_653 = tensor.extract_slice %extracted_slice_652[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_654 = tensor.extract_slice %extracted_slice_653[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_655 = tensor.extract_slice %arg315[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_656 = tensor.extract_slice %extracted_slice_655[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_657 = tensor.extract_slice %extracted_slice_656[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3678 = tensor.empty() : tensor<1x40x128xf32> + %3679 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_654 : tensor<1x1x40x128xf32>) outs(%3678 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3680 = tensor.empty() : tensor<40x128xf32> + %3681 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3679 : tensor<1x40x128xf32>) outs(%3680 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3682 = tensor.empty() : tensor<1x40x128xf32> + %3683 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_657 : tensor<1x1x40x128xf32>) outs(%3682 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3684 = tensor.empty() : tensor<40x128xf32> + %3685 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3683 : tensor<1x40x128xf32>) outs(%3684 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3686 = tensor.empty() : tensor<1x40x128xf32> + %3687 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3686 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3681[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3688 = tosa.reshape %3687 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3689 = tensor.empty() : tensor<1x40x128xf32> + %3690 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3689 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3685[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3691 = tosa.reshape %3690 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3692 = tosa.mul %3671, %3688 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_658 = tensor.extract_slice %3671[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_659 = tensor.extract_slice %3671[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3693 = tosa.negate %extracted_slice_659 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3694 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_660 = tensor.insert_slice %3693 into %3694[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_661 = tensor.insert_slice %extracted_slice_658 into %inserted_slice_660[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3695 = tosa.mul %inserted_slice_661, %3691 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3696 = tosa.add %3692, %3695 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3697 = tosa.mul %3674, %3688 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_662 = tensor.extract_slice %3674[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_663 = tensor.extract_slice %3674[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3698 = tosa.negate %extracted_slice_663 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3699 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_664 = tensor.insert_slice %3698 into %3699[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_665 = tensor.insert_slice %extracted_slice_662 into %inserted_slice_664[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3700 = tosa.mul %inserted_slice_665, %3691 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3701 = tosa.add %3697, %3700 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3702 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3703 = tosa.transpose %3701, %3702 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3704 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3705 = tosa.add %3696, %3704 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3706 = tosa.reshape %3705 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3707 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3708 = tosa.add %3703, %3707 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3709 = tosa.reshape %3708 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3710 = tosa.matmul %3706, %3709 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3711 = tosa.reshape %3710 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3712 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3713 = tosa.reciprocal %3712 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3714 = tosa.mul %3711, %3713 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3715 = tosa.add %3714, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3716 = tosa.reduce_max %3715 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3717 = tosa.sub %3715, %3716 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3718 = tosa.exp %3717 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3719 = tosa.reduce_sum %3718 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3720 = tosa.reciprocal %3719 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3721 = tosa.mul %3718, %3720 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3722 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3723 = tosa.add %3721, %3722 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3724 = tosa.reshape %3723 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3725 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3726 = tosa.add %3677, %3725 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3727 = tosa.reshape %3726 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3728 = tosa.matmul %3724, %3727 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3729 = tosa.reshape %3728 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3730 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3731 = tosa.transpose %3729, %3730 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3732 = tosa.identity %3731 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3733 = tosa.reshape %3732 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3734 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3735 = tosa.transpose %arg316, %3734 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3736 = tosa.reshape %3733 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_666 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3737 = linalg.matmul {cast = #linalg.type_fn} ins(%3736, %3735 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_666 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3738 = tosa.reshape %3737 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3739 = tosa.add %3641, %3738 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3740 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_667 = arith.constant 2 : i32 + %3741 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3739 : tensor<1x40x4096xf32>) outs(%3740 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_667 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3742 = tosa.reduce_sum %3741 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3743 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3744 = tosa.reciprocal %3743 : (tensor<1xf32>) -> tensor<1xf32> + %3745 = tosa.mul %3744, %3742 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3746 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3747 = tosa.add %3745, %3746 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3748 = tosa.rsqrt %3747 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3749 = tosa.mul %3739, %3748 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3750 = tosa.reshape %arg317 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3751 = tosa.mul %3750, %3749 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3752 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3753 = tosa.transpose %arg318, %3752 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3754 = tosa.reshape %3751 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_668 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3755 = linalg.matmul {cast = #linalg.type_fn} ins(%3754, %3753 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_668 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3756 = tosa.reshape %3755 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3757 = tosa.sigmoid %3756 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3758 = tosa.mul %3756, %3757 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3759 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3760 = tosa.transpose %arg319, %3759 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3761 = tosa.reshape %3751 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_669 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3762 = linalg.matmul {cast = #linalg.type_fn} ins(%3761, %3760 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_669 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3763 = tosa.reshape %3762 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3764 = tosa.mul %3758, %3763 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3765 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3766 = tosa.transpose %arg320, %3765 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3767 = tosa.reshape %3764 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_670 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3768 = linalg.matmul {cast = #linalg.type_fn} ins(%3767, %3766 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_670 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3769 = tosa.reshape %3768 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3770 = tosa.add %3739, %3769 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3771 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_671 = arith.constant 2 : i32 + %3772 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3770 : tensor<1x40x4096xf32>) outs(%3771 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_671 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3773 = tosa.reduce_sum %3772 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3774 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3775 = tosa.reciprocal %3774 : (tensor<1xf32>) -> tensor<1xf32> + %3776 = tosa.mul %3775, %3773 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3777 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3778 = tosa.add %3776, %3777 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3779 = tosa.rsqrt %3778 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3780 = tosa.mul %3770, %3779 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3781 = tosa.reshape %arg321 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3782 = tosa.mul %3781, %3780 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3783 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3784 = tosa.transpose %arg322, %3783 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3785 = tosa.reshape %3782 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_672 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3786 = linalg.matmul {cast = #linalg.type_fn} ins(%3785, %3784 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_672 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3787 = tosa.reshape %3786 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3788 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3789 = tosa.transpose %arg323, %3788 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3790 = tosa.reshape %3782 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_673 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3791 = linalg.matmul {cast = #linalg.type_fn} ins(%3790, %3789 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_673 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3792 = tosa.reshape %3791 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3793 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3794 = tosa.transpose %arg324, %3793 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3795 = tosa.reshape %3782 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_674 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3796 = linalg.matmul {cast = #linalg.type_fn} ins(%3795, %3794 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_674 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3797 = tosa.reshape %3796 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3798 = tosa.reshape %3787 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3799 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3800 = tosa.transpose %3798, %3799 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3801 = tosa.reshape %3792 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3802 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3803 = tosa.transpose %3801, %3802 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3804 = tosa.reshape %3797 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3805 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3806 = tosa.transpose %3804, %3805 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_675 = tensor.extract_slice %arg325[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_676 = tensor.extract_slice %extracted_slice_675[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_677 = tensor.extract_slice %extracted_slice_676[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_678 = tensor.extract_slice %arg326[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_679 = tensor.extract_slice %extracted_slice_678[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_680 = tensor.extract_slice %extracted_slice_679[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3807 = tensor.empty() : tensor<1x40x128xf32> + %3808 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_677 : tensor<1x1x40x128xf32>) outs(%3807 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3809 = tensor.empty() : tensor<40x128xf32> + %3810 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3808 : tensor<1x40x128xf32>) outs(%3809 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3811 = tensor.empty() : tensor<1x40x128xf32> + %3812 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_680 : tensor<1x1x40x128xf32>) outs(%3811 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3813 = tensor.empty() : tensor<40x128xf32> + %3814 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3812 : tensor<1x40x128xf32>) outs(%3813 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3815 = tensor.empty() : tensor<1x40x128xf32> + %3816 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3815 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3810[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3817 = tosa.reshape %3816 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3818 = tensor.empty() : tensor<1x40x128xf32> + %3819 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3818 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3814[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3820 = tosa.reshape %3819 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3821 = tosa.mul %3800, %3817 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_681 = tensor.extract_slice %3800[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_682 = tensor.extract_slice %3800[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3822 = tosa.negate %extracted_slice_682 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3823 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_683 = tensor.insert_slice %3822 into %3823[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_684 = tensor.insert_slice %extracted_slice_681 into %inserted_slice_683[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3824 = tosa.mul %inserted_slice_684, %3820 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3825 = tosa.add %3821, %3824 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3826 = tosa.mul %3803, %3817 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_685 = tensor.extract_slice %3803[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_686 = tensor.extract_slice %3803[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3827 = tosa.negate %extracted_slice_686 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3828 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_687 = tensor.insert_slice %3827 into %3828[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_688 = tensor.insert_slice %extracted_slice_685 into %inserted_slice_687[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3829 = tosa.mul %inserted_slice_688, %3820 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3830 = tosa.add %3826, %3829 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3831 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3832 = tosa.transpose %3830, %3831 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3833 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3834 = tosa.add %3825, %3833 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3835 = tosa.reshape %3834 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3836 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3837 = tosa.add %3832, %3836 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3838 = tosa.reshape %3837 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3839 = tosa.matmul %3835, %3838 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3840 = tosa.reshape %3839 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3841 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3842 = tosa.reciprocal %3841 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3843 = tosa.mul %3840, %3842 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3844 = tosa.add %3843, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3845 = tosa.reduce_max %3844 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3846 = tosa.sub %3844, %3845 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3847 = tosa.exp %3846 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3848 = tosa.reduce_sum %3847 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3849 = tosa.reciprocal %3848 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3850 = tosa.mul %3847, %3849 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3851 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3852 = tosa.add %3850, %3851 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3853 = tosa.reshape %3852 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3854 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3855 = tosa.add %3806, %3854 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3856 = tosa.reshape %3855 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3857 = tosa.matmul %3853, %3856 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3858 = tosa.reshape %3857 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3859 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3860 = tosa.transpose %3858, %3859 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3861 = tosa.identity %3860 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3862 = tosa.reshape %3861 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3863 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3864 = tosa.transpose %arg327, %3863 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3865 = tosa.reshape %3862 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_689 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3866 = linalg.matmul {cast = #linalg.type_fn} ins(%3865, %3864 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_689 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3867 = tosa.reshape %3866 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3868 = tosa.add %3770, %3867 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3869 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_690 = arith.constant 2 : i32 + %3870 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3868 : tensor<1x40x4096xf32>) outs(%3869 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_690 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3871 = tosa.reduce_sum %3870 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3872 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3873 = tosa.reciprocal %3872 : (tensor<1xf32>) -> tensor<1xf32> + %3874 = tosa.mul %3873, %3871 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3875 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3876 = tosa.add %3874, %3875 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3877 = tosa.rsqrt %3876 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3878 = tosa.mul %3868, %3877 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3879 = tosa.reshape %arg328 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3880 = tosa.mul %3879, %3878 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3881 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3882 = tosa.transpose %arg329, %3881 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3883 = tosa.reshape %3880 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_691 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3884 = linalg.matmul {cast = #linalg.type_fn} ins(%3883, %3882 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_691 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3885 = tosa.reshape %3884 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3886 = tosa.sigmoid %3885 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3887 = tosa.mul %3885, %3886 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3888 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3889 = tosa.transpose %arg330, %3888 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3890 = tosa.reshape %3880 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_692 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3891 = linalg.matmul {cast = #linalg.type_fn} ins(%3890, %3889 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_692 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3892 = tosa.reshape %3891 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3893 = tosa.mul %3887, %3892 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3894 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3895 = tosa.transpose %arg331, %3894 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3896 = tosa.reshape %3893 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_693 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3897 = linalg.matmul {cast = #linalg.type_fn} ins(%3896, %3895 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_693 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3898 = tosa.reshape %3897 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3899 = tosa.add %3868, %3898 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3900 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_694 = arith.constant 2 : i32 + %3901 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3899 : tensor<1x40x4096xf32>) outs(%3900 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_694 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %3902 = tosa.reduce_sum %3901 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3903 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3904 = tosa.reciprocal %3903 : (tensor<1xf32>) -> tensor<1xf32> + %3905 = tosa.mul %3904, %3902 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3906 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3907 = tosa.add %3905, %3906 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3908 = tosa.rsqrt %3907 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3909 = tosa.mul %3899, %3908 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3910 = tosa.reshape %arg332 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3911 = tosa.mul %3910, %3909 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3912 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3913 = tosa.transpose %arg333, %3912 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3914 = tosa.reshape %3911 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_695 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3915 = linalg.matmul {cast = #linalg.type_fn} ins(%3914, %3913 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_695 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3916 = tosa.reshape %3915 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3917 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3918 = tosa.transpose %arg334, %3917 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3919 = tosa.reshape %3911 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_696 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3920 = linalg.matmul {cast = #linalg.type_fn} ins(%3919, %3918 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_696 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3921 = tosa.reshape %3920 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3922 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3923 = tosa.transpose %arg335, %3922 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3924 = tosa.reshape %3911 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_697 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3925 = linalg.matmul {cast = #linalg.type_fn} ins(%3924, %3923 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_697 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3926 = tosa.reshape %3925 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3927 = tosa.reshape %3916 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3928 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3929 = tosa.transpose %3927, %3928 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3930 = tosa.reshape %3921 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3931 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3932 = tosa.transpose %3930, %3931 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3933 = tosa.reshape %3926 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3934 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3935 = tosa.transpose %3933, %3934 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_698 = tensor.extract_slice %arg336[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_699 = tensor.extract_slice %extracted_slice_698[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_700 = tensor.extract_slice %extracted_slice_699[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_701 = tensor.extract_slice %arg337[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_702 = tensor.extract_slice %extracted_slice_701[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_703 = tensor.extract_slice %extracted_slice_702[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %3936 = tensor.empty() : tensor<1x40x128xf32> + %3937 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_700 : tensor<1x1x40x128xf32>) outs(%3936 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3938 = tensor.empty() : tensor<40x128xf32> + %3939 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3937 : tensor<1x40x128xf32>) outs(%3938 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3940 = tensor.empty() : tensor<1x40x128xf32> + %3941 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_703 : tensor<1x1x40x128xf32>) outs(%3940 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %3942 = tensor.empty() : tensor<40x128xf32> + %3943 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%3941 : tensor<1x40x128xf32>) outs(%3942 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %3944 = tensor.empty() : tensor<1x40x128xf32> + %3945 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3944 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3939[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3946 = tosa.reshape %3945 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3947 = tensor.empty() : tensor<1x40x128xf32> + %3948 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%3947 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %3943[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %3949 = tosa.reshape %3948 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3950 = tosa.mul %3929, %3946 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_704 = tensor.extract_slice %3929[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_705 = tensor.extract_slice %3929[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3951 = tosa.negate %extracted_slice_705 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3952 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_706 = tensor.insert_slice %3951 into %3952[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_707 = tensor.insert_slice %extracted_slice_704 into %inserted_slice_706[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3953 = tosa.mul %inserted_slice_707, %3949 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3954 = tosa.add %3950, %3953 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3955 = tosa.mul %3932, %3946 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_708 = tensor.extract_slice %3932[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_709 = tensor.extract_slice %3932[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3956 = tosa.negate %extracted_slice_709 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3957 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_710 = tensor.insert_slice %3956 into %3957[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_711 = tensor.insert_slice %extracted_slice_708 into %inserted_slice_710[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3958 = tosa.mul %inserted_slice_711, %3949 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3959 = tosa.add %3955, %3958 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3960 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3961 = tosa.transpose %3959, %3960 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3962 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3963 = tosa.add %3954, %3962 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3964 = tosa.reshape %3963 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3965 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %3966 = tosa.add %3961, %3965 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %3967 = tosa.reshape %3966 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3968 = tosa.matmul %3964, %3967 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %3969 = tosa.reshape %3968 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3970 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3971 = tosa.reciprocal %3970 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3972 = tosa.mul %3969, %3971 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3973 = tosa.add %3972, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %3974 = tosa.reduce_max %3973 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3975 = tosa.sub %3973, %3974 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3976 = tosa.exp %3975 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3977 = tosa.reduce_sum %3976 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %3978 = tosa.reciprocal %3977 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %3979 = tosa.mul %3976, %3978 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %3980 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3981 = tosa.add %3979, %3980 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %3982 = tosa.reshape %3981 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %3983 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %3984 = tosa.add %3935, %3983 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3985 = tosa.reshape %3984 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3986 = tosa.matmul %3982, %3985 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3987 = tosa.reshape %3986 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3988 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3989 = tosa.transpose %3987, %3988 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3990 = tosa.identity %3989 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %3991 = tosa.reshape %3990 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3992 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3993 = tosa.transpose %arg338, %3992 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3994 = tosa.reshape %3991 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_712 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3995 = linalg.matmul {cast = #linalg.type_fn} ins(%3994, %3993 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_712 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3996 = tosa.reshape %3995 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3997 = tosa.add %3899, %3996 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3998 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_713 = arith.constant 2 : i32 + %3999 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3997 : tensor<1x40x4096xf32>) outs(%3998 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_713 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %4000 = tosa.reduce_sum %3999 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %4001 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %4002 = tosa.reciprocal %4001 : (tensor<1xf32>) -> tensor<1xf32> + %4003 = tosa.mul %4002, %4000 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4004 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %4005 = tosa.add %4003, %4004 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4006 = tosa.rsqrt %4005 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4007 = tosa.mul %3997, %4006 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %4008 = tosa.reshape %arg339 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %4009 = tosa.mul %4008, %4007 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4010 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4011 = tosa.transpose %arg340, %4010 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %4012 = tosa.reshape %4009 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_714 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %4013 = linalg.matmul {cast = #linalg.type_fn} ins(%4012, %4011 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_714 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %4014 = tosa.reshape %4013 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %4015 = tosa.sigmoid %4014 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4016 = tosa.mul %4014, %4015 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4017 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4018 = tosa.transpose %arg341, %4017 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %4019 = tosa.reshape %4009 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_715 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %4020 = linalg.matmul {cast = #linalg.type_fn} ins(%4019, %4018 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_715 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %4021 = tosa.reshape %4020 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %4022 = tosa.mul %4016, %4021 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4023 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4024 = tosa.transpose %arg342, %4023 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %4025 = tosa.reshape %4022 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_716 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4026 = linalg.matmul {cast = #linalg.type_fn} ins(%4025, %4024 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_716 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4027 = tosa.reshape %4026 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4028 = tosa.add %3997, %4027 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4029 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_717 = arith.constant 2 : i32 + %4030 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4028 : tensor<1x40x4096xf32>) outs(%4029 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_717 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %4031 = tosa.reduce_sum %4030 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %4032 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %4033 = tosa.reciprocal %4032 : (tensor<1xf32>) -> tensor<1xf32> + %4034 = tosa.mul %4033, %4031 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4035 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %4036 = tosa.add %4034, %4035 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4037 = tosa.rsqrt %4036 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4038 = tosa.mul %4028, %4037 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %4039 = tosa.reshape %arg343 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %4040 = tosa.mul %4039, %4038 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4041 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4042 = tosa.transpose %arg344, %4041 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %4043 = tosa.reshape %4040 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_718 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4044 = linalg.matmul {cast = #linalg.type_fn} ins(%4043, %4042 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_718 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4045 = tosa.reshape %4044 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4046 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4047 = tosa.transpose %arg345, %4046 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %4048 = tosa.reshape %4040 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_719 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4049 = linalg.matmul {cast = #linalg.type_fn} ins(%4048, %4047 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_719 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4050 = tosa.reshape %4049 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4051 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4052 = tosa.transpose %arg346, %4051 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %4053 = tosa.reshape %4040 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_720 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4054 = linalg.matmul {cast = #linalg.type_fn} ins(%4053, %4052 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_720 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4055 = tosa.reshape %4054 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4056 = tosa.reshape %4045 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %4057 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %4058 = tosa.transpose %4056, %4057 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %4059 = tosa.reshape %4050 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %4060 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %4061 = tosa.transpose %4059, %4060 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %4062 = tosa.reshape %4055 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %4063 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %4064 = tosa.transpose %4062, %4063 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %extracted_slice_721 = tensor.extract_slice %arg347[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_722 = tensor.extract_slice %extracted_slice_721[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_723 = tensor.extract_slice %extracted_slice_722[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_724 = tensor.extract_slice %arg348[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_725 = tensor.extract_slice %extracted_slice_724[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_726 = tensor.extract_slice %extracted_slice_725[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %4065 = tensor.empty() : tensor<1x40x128xf32> + %4066 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_723 : tensor<1x1x40x128xf32>) outs(%4065 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %4067 = tensor.empty() : tensor<40x128xf32> + %4068 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%4066 : tensor<1x40x128xf32>) outs(%4067 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %4069 = tensor.empty() : tensor<1x40x128xf32> + %4070 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_726 : tensor<1x1x40x128xf32>) outs(%4069 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %4071 = tensor.empty() : tensor<40x128xf32> + %4072 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%4070 : tensor<1x40x128xf32>) outs(%4071 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %4073 = tensor.empty() : tensor<1x40x128xf32> + %4074 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%4073 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %4068[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %4075 = tosa.reshape %4074 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %4076 = tensor.empty() : tensor<1x40x128xf32> + %4077 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<1x40xi64>) outs(%4076 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %4072[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %4078 = tosa.reshape %4077 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %4079 = tosa.mul %4058, %4075 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_727 = tensor.extract_slice %4058[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_728 = tensor.extract_slice %4058[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %4080 = tosa.negate %extracted_slice_728 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %4081 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_729 = tensor.insert_slice %4080 into %4081[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_730 = tensor.insert_slice %extracted_slice_727 into %inserted_slice_729[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %4082 = tosa.mul %inserted_slice_730, %4078 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %4083 = tosa.add %4079, %4082 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %4084 = tosa.mul %4061, %4075 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_731 = tensor.extract_slice %4061[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_732 = tensor.extract_slice %4061[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %4085 = tosa.negate %extracted_slice_732 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %4086 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_733 = tensor.insert_slice %4085 into %4086[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_734 = tensor.insert_slice %extracted_slice_731 into %inserted_slice_733[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %4087 = tosa.mul %inserted_slice_734, %4078 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %4088 = tosa.add %4084, %4087 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %4089 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %4090 = tosa.transpose %4088, %4089 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %4091 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %4092 = tosa.add %4083, %4091 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %4093 = tosa.reshape %4092 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %4094 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x128x40xf32>}> : () -> tensor<1x32x128x40xf32> + %4095 = tosa.add %4090, %4094 : (tensor<1x32x128x40xf32>, tensor<1x32x128x40xf32>) -> tensor<1x32x128x40xf32> + %4096 = tosa.reshape %4095 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %4097 = tosa.matmul %4093, %4096 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %4098 = tosa.reshape %4097 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4099 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %4100 = tosa.reciprocal %4099 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4101 = tosa.mul %4098, %4100 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4102 = tosa.add %4101, %29 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %4103 = tosa.reduce_max %4102 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %4104 = tosa.sub %4102, %4103 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %4105 = tosa.exp %4104 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4106 = tosa.reduce_sum %4105 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %4107 = tosa.reciprocal %4106 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %4108 = tosa.mul %4105, %4107 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %4109 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %4110 = tosa.add %4108, %4109 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4111 = tosa.reshape %4110 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %4112 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %4113 = tosa.add %4064, %4112 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %4114 = tosa.reshape %4113 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %4115 = tosa.matmul %4111, %4114 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %4116 = tosa.reshape %4115 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %4117 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %4118 = tosa.transpose %4116, %4117 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %4119 = tosa.identity %4118 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32> + %4120 = tosa.reshape %4119 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %4121 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4122 = tosa.transpose %arg349, %4121 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %4123 = tosa.reshape %4120 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_735 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4124 = linalg.matmul {cast = #linalg.type_fn} ins(%4123, %4122 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_735 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4125 = tosa.reshape %4124 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4126 = tosa.add %4028, %4125 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4127 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_736 = arith.constant 2 : i32 + %4128 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4126 : tensor<1x40x4096xf32>) outs(%4127 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_736 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %4129 = tosa.reduce_sum %4128 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %4130 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %4131 = tosa.reciprocal %4130 : (tensor<1xf32>) -> tensor<1xf32> + %4132 = tosa.mul %4131, %4129 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4133 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %4134 = tosa.add %4132, %4133 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4135 = tosa.rsqrt %4134 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4136 = tosa.mul %4126, %4135 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %4137 = tosa.reshape %arg350 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %4138 = tosa.mul %4137, %4136 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4139 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4140 = tosa.transpose %arg351, %4139 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %4141 = tosa.reshape %4138 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_737 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %4142 = linalg.matmul {cast = #linalg.type_fn} ins(%4141, %4140 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_737 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %4143 = tosa.reshape %4142 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %4144 = tosa.sigmoid %4143 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4145 = tosa.mul %4143, %4144 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4146 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4147 = tosa.transpose %arg352, %4146 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %4148 = tosa.reshape %4138 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_738 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %4149 = linalg.matmul {cast = #linalg.type_fn} ins(%4148, %4147 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_738 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %4150 = tosa.reshape %4149 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %4151 = tosa.mul %4145, %4150 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %4152 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4153 = tosa.transpose %arg353, %4152 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %4154 = tosa.reshape %4151 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_739 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %4155 = linalg.matmul {cast = #linalg.type_fn} ins(%4154, %4153 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_739 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %4156 = tosa.reshape %4155 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %4157 = tosa.add %4126, %4156 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4158 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_740 = arith.constant 2 : i32 + %4159 = linalg.generic {indexing_maps = [#map5, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4157 : tensor<1x40x4096xf32>) outs(%4158 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %4175 = math.fpowi %in, %c2_i32_740 : f32, i32 + linalg.yield %4175 : f32 + } -> tensor<1x40x4096xf32> + %4160 = tosa.reduce_sum %4159 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %4161 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %4162 = tosa.reciprocal %4161 : (tensor<1xf32>) -> tensor<1xf32> + %4163 = tosa.mul %4162, %4160 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4164 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %4165 = tosa.add %4163, %4164 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4166 = tosa.rsqrt %4165 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %4167 = tosa.mul %4157, %4166 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %4168 = tosa.reshape %arg354 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %4169 = tosa.mul %4168, %4167 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4170 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %4171 = tosa.transpose %arg355, %4170 : (tensor<32000x4096xf32>, tensor<2xi32>) -> tensor<4096x32000xf32> + %4172 = tosa.reshape %4169 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_741 = arith.constant dense<0.000000e+00> : tensor<40x32000xf32> + %4173 = linalg.matmul {cast = #linalg.type_fn} ins(%4172, %4171 : tensor<40x4096xf32>, tensor<4096x32000xf32>) outs(%cst_741 : tensor<40x32000xf32>) -> tensor<40x32000xf32> + %4174 = tosa.reshape %4173 {new_shape = array} : (tensor<40x32000xf32>) -> tensor<1x40x32000xf32> + return %4169, %4174 : tensor<1x40x4096xf32>, tensor<1x40x32000xf32> + } +} + diff --git a/examples/BuddyMatmul/.gitignore b/examples/BuddyMatmul/.gitignore new file mode 100644 index 000000000..80a243fa8 --- /dev/null +++ b/examples/BuddyMatmul/.gitignore @@ -0,0 +1 @@ +log.* diff --git a/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir b/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir new file mode 100644 index 000000000..58c914239 --- /dev/null +++ b/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir @@ -0,0 +1,82 @@ +// RUN: buddy-opt %s \ +// RUN: -batchmatmul-optimize \ +// RUN: -convert-linalg-to-affine-loops \ +// RUN: -lower-affine \ +// RUN: -convert-vector-to-scf \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-func-to-llvm \ +// RUN: -expand-strided-metadata \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func.func private @printMemrefF32(memref<*xf32>) + +func.func @batch_matmul(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.batch_matmul + ins(%arg0, %arg1 : memref, memref) + outs(%arg2 : memref) + return +} + +func.func @alloc_f32(%arg0: index, %arg1: index, %arg2: index, %arg4: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1, %arg2) : memref + scf.for %idx0 = %c0 to %arg0 step %c1 { + scf.for %idx1 = %c0 to %arg1 step %c1 { + scf.for %idx2 = %c0 to %arg2 step %c1 { + memref.store %arg4, %0[%idx0, %idx1, %idx2] : memref + } + } + } + return %0 : memref +} + +func.func @main(){ + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c576 = arith.constant 576 : index + %c1024 = arith.constant 1024 : index + %c1000 = arith.constant 1000 : index + %f0 = arith.constant 0.0 : f32 + %f2 = arith.constant 2.0 : f32 + %f3 = arith.constant 3.0 : f32 + + %m0 = call @alloc_f32(%c1, %c1, %c576, %f2) : (index, index, index, f32) -> memref + %m1 = call @alloc_f32(%c1, %c576, %c1024, %f3) : (index, index, index, f32) -> memref + %m2 = call @alloc_f32(%c1, %c1, %c1024, %f0) : (index, index, index, f32) -> memref + + call @batch_matmul(%m0, %m1, %m2) : (memref, memref, memref) -> () + + %printed_m2 = memref.cast %m2 : memref to memref<*xf32> + + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 1, 1024] strides = [1024, 1024, 1] data = + // CHECK-NEXT: [ + // CHECK: [ + // CHECK: [3456{{(, 3456)*}}] + call @printMemrefF32(%printed_m2) : (memref<*xf32>) -> () + + %m3 = call @alloc_f32(%c1, %c1, %c1024, %f2) : (index, index, index, f32) -> memref + %m4 = call @alloc_f32(%c1, %c1024, %c1000, %f3) : (index, index, index, f32) -> memref + %m5 = call @alloc_f32(%c1, %c1, %c1000, %f0) : (index, index, index, f32) -> memref + + call @batch_matmul(%m3, %m4, %m5) : (memref, memref, memref) -> () + + %printed_m5 = memref.cast %m5 : memref to memref<*xf32> + + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 1, 1000] strides = [1000, 1000, 1] data = + // CHECK-NEXT: [ + // CHECK: [ + // CHECK: [6144{{(, 6144)*}}] + call @printMemrefF32(%printed_m5) : (memref<*xf32>) -> () + + return +} diff --git a/examples/BuddyMatmul/makefile b/examples/BuddyMatmul/makefile new file mode 100644 index 000000000..812e68b15 --- /dev/null +++ b/examples/BuddyMatmul/makefile @@ -0,0 +1,37 @@ +#!/bin/bash +BUDDY_BUILD_DIR := ../../build/ +LLVM_BUILD_DIR := ../../llvm/build/ +BUDDY_OPT := ${BUDDY_BUILD_DIR}/bin/buddy-opt +MLIR_OPT := ${LLVM_BUILD_DIR}/bin/mlir-opt +MLIR_TRANSLATE := ${LLVM_BUILD_DIR}/bin/mlir-translate +MLIR_CPU_RUNNER := ${LLVM_BUILD_DIR}/bin/mlir-cpu-runner +LLC := ${LLVM_BUILD_DIR}/bin/llc +OPT_FLAG := -O0 + +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_c_runner_utils.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_c_runner_utils.dylib +MTRIPLE := x86_64-apple-darwin +endif + +linalg-batchmatmul-f32-run: + @${BUDDY_OPT} ./linalg-batchmatmul-f32.mlir \ + -batchmatmul-optimize \ + -convert-linalg-to-affine-loops \ + -lower-affine \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + -convert-vector-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-arith-to-llvm \ + -convert-func-to-llvm \ + -expand-strided-metadata \ + -finalize-memref-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} diff --git a/examples/BuddyMobileNetV3/.gitignore b/examples/BuddyMobileNetV3/.gitignore new file mode 100644 index 000000000..9eb1b1736 --- /dev/null +++ b/examples/BuddyMobileNetV3/.gitignore @@ -0,0 +1,7 @@ +# model params file +arg0.data +arg1.data + +# model mlir file +forward.mlir +subgraph0.mlir diff --git a/examples/BuddyMobileNetV3/CMakeLists.txt b/examples/BuddyMobileNetV3/CMakeLists.txt new file mode 100644 index 000000000..8557058a6 --- /dev/null +++ b/examples/BuddyMobileNetV3/CMakeLists.txt @@ -0,0 +1,75 @@ +add_custom_command( + OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg0.data + ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/arg1.data + ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/forward.mlir + ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/subgraph0.mlir + COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/buddy-mobilenetv3-import.py + COMMENT "Generating forward.mlir, subgraph0.mlir and parameter files" +) + + +add_custom_command( + OUTPUT forward.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/forward.mlir + -pass-pipeline + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), \ + empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, \ + func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" | + ${LLVM_TOOLS_BINARY_DIR}/mlir-opt + -pass-pipeline + "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), \ + eliminate-empty-tensors, func.func(llvm-request-c-wrappers), \ + convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, \ + convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, \ + convert-func-to-llvm, reconcile-unrealized-casts)" | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${BUDDY_BINARY_DIR}/../examples/BuddyMobileNetV3/forward.o + DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/forward.mlir + COMMENT "Building forward.o" + VERBATIM) + + +add_custom_command( + OUTPUT subgraph0.o + COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/subgraph0.mlir + -pass-pipeline + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-arith, tosa-to-linalg, tosa-to-tensor))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -func-bufferize-dynamic-offset + -arith-bufferize + -func-bufferize + -tensor-bufferize + -linalg-bufferize + -finalizing-bufferize + -convert-linalg-to-loops + -lower-affine + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-math-to-llvm + -convert-math-to-libm + -convert-arith-to-llvm + -convert-func-to-llvm + -expand-strided-metadata + -finalize-memref-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${BUDDY_BINARY_DIR}/../examples/BuddyMobileNetV3/subgraph0.o + DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyMobileNetV3/subgraph0.mlir + buddy-opt + COMMENT "Building subgraph0.o" + VERBATIM) + +add_library(MOBILENETV3 STATIC subgraph0.o forward.o) + +SET_TARGET_PROPERTIES(MOBILENETV3 PROPERTIES LINKER_LANGUAGE C) + +add_executable(buddy-mobilenetv3-run buddy-mobilenetv3-main.cpp) +target_link_directories(buddy-mobilenetv3-run PRIVATE ${LLVM_LIBRARY_DIR}) + +set(BUDDY_MOBILENETV3_LIBS MOBILENETV3 mlir_c_runner_utils ${OpenCV_LIBS}) +target_link_libraries(buddy-mobilenetv3-run ${BUDDY_MOBILENETV3_LIBS}) diff --git a/examples/BuddyMobileNetV3/Labels.txt b/examples/BuddyMobileNetV3/Labels.txt new file mode 100644 index 000000000..fe811239d --- /dev/null +++ b/examples/BuddyMobileNetV3/Labels.txt @@ -0,0 +1,1001 @@ +background +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/examples/BuddyMobileNetV3/README.md b/examples/BuddyMobileNetV3/README.md new file mode 100644 index 000000000..1146addb6 --- /dev/null +++ b/examples/BuddyMobileNetV3/README.md @@ -0,0 +1,49 @@ +# Buddy Compiler MobileNetV3 Example + +## MobileNetV3 Model Inference + +0. Activate your python environment. + +1. Build buddy-mlir + +```bash +$ cd buddy-mlir +$ mkdir build && cd build +$ cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) \ + -DBUDDY_ENABLE_OPENCV=ON \ + -DOpenCV_DIR= +$ ninja +$ ninja check-buddy +``` + +2. Set the `PYTHONPATH` environment variable. + +Make sure you are in the build directory. + +```bash +$ export BUDDY_MLIR_BUILD_DIR=$PWD +$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build +$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH} +``` + +3. Set the `MOBILENETV3_EXAMPLE_PATH` environment variable. + +```bash +$ export MOBILENETV3_EXAMPLE_PATH=${BUDDY_MLIR_BUILD_DIR}/../examples/BuddyMobileNetV3/ +``` + +4. Build and run the MobileNetV3 example + +```bash +$ cmake -G Ninja .. -DBUDDY_MOBILENETV3_EXAMPLES=ON +$ ninja buddy-mobilenetv3-run +$ cd bin +$ ./buddy-mobilenetv3-run +``` + diff --git a/examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py b/examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py new file mode 100644 index 000000000..2403800bf --- /dev/null +++ b/examples/BuddyMobileNetV3/buddy-mobilenetv3-import.py @@ -0,0 +1,78 @@ +# ===- buddy-mobilenetv3-import.py --------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the MobileNet V3 model AOT importer. +# +# ===--------------------------------------------------------------------------- + +import os + +from pathlib import Path +import numpy as np +import torch +import torchvision.models as models +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.graph import GraphDriver +from buddy.compiler.graph.transform import simply_fuse +from buddy.compiler.ops import tosa + +# Retrieve the MobileNet V3 model path from environment variables. +model_path = os.environ.get("MOBILENETV3_EXAMPLE_PATH") +if model_path is None: + raise EnvironmentError( + "The environment variable 'MOBILENETV3_MODEL_PATH' is not set or is invalid." + ) + +model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1, pretrained=True) +model = model.eval() + +# Initialize Dynamo Compiler with specific configurations as an importer. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) +data = torch.randn([1, 3, 224, 224]) +# Import the model into MLIR module and parameters. +with torch.no_grad(): + graphs = dynamo_compiler.importer(model, data) +assert len(graphs) == 1 +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] +pattern_list = [simply_fuse] +graphs[0].fuse_ops(pattern_list) +driver = GraphDriver(graphs[0]) +driver.subgraphs[0].lower_to_top_level_ir() +path_prefix = os.path.dirname(os.path.abspath(__file__)) +with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file: + print(driver.subgraphs[0]._imported_module, file=module_file) +with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file: + print(driver.construct_main_graph(True), file=module_file) + +params = dynamo_compiler.imported_params[graph] +current_path = os.path.dirname(os.path.abspath(__file__)) + + +float32_param = np.concatenate( + [param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.float32] +) +float32_param.tofile(Path(current_path) / "arg0.data") + +int64_param = np.concatenate( + [param.detach().numpy().reshape([-1]) for param in params if param.dtype == torch.int64] +) +int64_param.tofile(Path(current_path) / "arg1.data") diff --git a/examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp b/examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp new file mode 100644 index 000000000..0c5318b37 --- /dev/null +++ b/examples/BuddyMobileNetV3/buddy-mobilenetv3-main.cpp @@ -0,0 +1,166 @@ +//===- buddy-mobilenetv3-main.cpp -----------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr size_t ParamsSize = 2554968; +const std::string ImgName = "dog.png"; + +// Declare the mobilenet C interface. +extern "C" void _mlir_ciface_forward(MemRef *output, + MemRef *arg0, + MemRef *arg1, + Img *input); + +const cv::Mat imagePreprocessing() { + // Get the directory of the LeNet example and construct the image path. + std::string mobilenetDir = getenv("MOBILENETV3_EXAMPLE_PATH"); + std::string imgPath = mobilenetDir + "/images/" + ImgName; + // Read the image in grayscale mode. + cv::Mat inputImage = cv::imread(imgPath, cv::IMREAD_GRAYSCALE); + assert(!inputImage.empty() && "Could not read the image."); + cv::Mat resizedImage; + int imageWidth = 224; + int imageHeight = 224; + // Resize the image to 224x224 pixels. + cv::resize(inputImage, resizedImage, cv::Size(imageWidth, imageHeight), + cv::INTER_LINEAR); + return resizedImage; +} + +/// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +void loadParameters(const std::string &floatParamPath, + const std::string &int64ParamPath, + MemRef &floatParam, + MemRef &int64Param) { + std::ifstream floatParamFile(floatParamPath, std::ios::in | std::ios::binary); + if (!floatParamFile.is_open()) { + std::string errMsg = "Failed to open float param file: " + + std::filesystem::canonical(floatParamPath).string(); + throw std::runtime_error(errMsg); + } + floatParamFile.read(reinterpret_cast(floatParam.getData()), + floatParam.getSize() * sizeof(float)); + if (floatParamFile.fail()) { + throw std::runtime_error("Failed to read float param file"); + } + floatParamFile.close(); + + + std::ifstream int64ParamFile(int64ParamPath, std::ios::in | std::ios::binary); + if (!int64ParamFile.is_open()) { + std::string errMsg = "Failed to open int64 param file: " + + std::filesystem::canonical(int64ParamPath).string(); + throw std::runtime_error(errMsg); + } + int64ParamFile.read(reinterpret_cast(int64Param.getData()), + int64Param.getSize() * sizeof(long long)); + if (int64ParamFile.fail()) { + throw std::runtime_error("Failed to read int64 param file"); + } + int64ParamFile.close(); +} + +// Softmax function. +void softmax(float *input, size_t size) { + size_t i; + float max_value = -INFINITY; + double sum = 0.0; + // Find the maximum value in the input array for numerical stability. + for (i = 0; i < size; ++i) { + if (max_value < input[i]) { + max_value = input[i]; + } + } + // Calculate the sum of the exponentials of the input elements, normalized by + // the max value. + for (i = 0; i < size; ++i) { + sum += exp(input[i] - max_value); + } + // Normalize the input array with the softmax calculation. + for (i = 0; i < size; ++i) { + input[i] = exp(input[i] - max_value) / sum; + } +} + +std::string getLabel(int idx) { + std::string mobilenetDir = getenv("MOBILENETV3_EXAMPLE_PATH"); + std::ifstream in( + mobilenetDir + "Labels.txt"); + assert(in.is_open() && "Could not read the label file."); + std::string label; + for (int i = 0; i < idx; ++i) + std::getline(in, label); + std::getline(in, label); + in.close(); + return label; +} + +int main() { + // Print the title of this example. + const std::string title = "MobileNetV3 Inference Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Preprocess the image to match the input requirements of the model. + cv::Mat image = imagePreprocessing(); + + // Define the sizes of the input and output tensors. + intptr_t sizesInput[4] = {1, 3, 224, 224}; + intptr_t sizesOutput[2] = {1, 1000}; + + // Create input and output containers for the image and model output. + Img input(image, sizesInput, true); + MemRef output(sizesOutput); + + // Load model parameters from the specified file. + std::string mobilenetDir = getenv("MOBILENETV3_EXAMPLE_PATH"); + std::string paramsDir = mobilenetDir + "/arg0.data"; + std::string intDir = mobilenetDir + "/arg1.data"; + MemRef paramsContainerf32({ParamsSize}); + MemRef ParamsContainerInt64({34}); + loadParameters(paramsDir, intDir, paramsContainerf32, ParamsContainerInt64); + // Call the forward function of the model. + _mlir_ciface_forward(&output, ¶msContainerf32, &ParamsContainerInt64, &input); + + auto out = output.getData(); + softmax(out, 1000); + // Find the classification and print the result. + float maxVal = 0; + float maxIdx = 0; + for (int i = 0; i < 1001; ++i) { + if (out[i] > maxVal) { + maxVal = out[i]; + maxIdx = i; + } + } + std::cout << "Classification Index: " << maxIdx << std::endl; + std::cout << "Classification: " << getLabel(maxIdx) << std::endl; + std::cout << "Probability: " << maxVal << std::endl; + + return 0; +} diff --git a/examples/BuddyMobileNetV3/images/curtain.png b/examples/BuddyMobileNetV3/images/curtain.png new file mode 100644 index 000000000..1ae383d35 Binary files /dev/null and b/examples/BuddyMobileNetV3/images/curtain.png differ diff --git a/examples/BuddyMobileNetV3/images/dog.png b/examples/BuddyMobileNetV3/images/dog.png new file mode 100644 index 000000000..12f0e0dd1 Binary files /dev/null and b/examples/BuddyMobileNetV3/images/dog.png differ diff --git a/examples/BuddyMobileNetV3/images/ice-cream.png b/examples/BuddyMobileNetV3/images/ice-cream.png new file mode 100644 index 000000000..209d8999d Binary files /dev/null and b/examples/BuddyMobileNetV3/images/ice-cream.png differ diff --git a/examples/BuddyMobileNetV3/images/kite.png b/examples/BuddyMobileNetV3/images/kite.png new file mode 100644 index 000000000..23ffe9613 Binary files /dev/null and b/examples/BuddyMobileNetV3/images/kite.png differ diff --git a/examples/BuddyMobileNetV3/images/traffic-light.png b/examples/BuddyMobileNetV3/images/traffic-light.png new file mode 100644 index 000000000..fa1a1e3f6 Binary files /dev/null and b/examples/BuddyMobileNetV3/images/traffic-light.png differ diff --git a/examples/BuddyNext/.gitignore b/examples/BuddyNext/.gitignore new file mode 100644 index 000000000..0194ea7a6 --- /dev/null +++ b/examples/BuddyNext/.gitignore @@ -0,0 +1,3 @@ +log.mlir +log.ll +log.s diff --git a/examples/BuddyNext/makefile b/examples/BuddyNext/makefile new file mode 100644 index 000000000..443907d35 --- /dev/null +++ b/examples/BuddyNext/makefile @@ -0,0 +1,232 @@ +#!/bin/bash +BUDDY_OPT := ../../build/bin/buddy-opt +MLIR_OPT := ../../llvm/build/bin/mlir-opt +MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate +MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner +LLC := ../../llvm/build/bin/llc +OPT_FLAG := -O0 + +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib +MTRIPLE := x86_64-apple-darwin +endif + +next-attention-lower: + @${MLIR_OPT} ./next-attention.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts \ + -o ./log.mlir + +next-attention-translate: + @${MLIR_OPT} ./next-attention.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll + +next-attention-run: + @${MLIR_OPT} ./next-attention.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +next-attention-loop-run: + @${MLIR_OPT} ./next-attention-loop.mlir \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +next-attention-fusion-run: + @${MLIR_OPT} ./next-attention-fusion.mlir \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +next-sigmoid-run: + @${MLIR_OPT} ./next-sigmoid.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +next-rope-run: + @${MLIR_OPT} ./next-rope.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} diff --git a/examples/BuddyNext/next-attention-fusion.mlir b/examples/BuddyNext/next-attention-fusion.mlir new file mode 100644 index 000000000..289bc5013 --- /dev/null +++ b/examples/BuddyNext/next-attention-fusion.mlir @@ -0,0 +1,240 @@ +// RUN: buddy-opt %s \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @rtclock() -> f64 + memref.global "private" constant @__constant_1x32x40x128xf32 : memref<1x32x40x128xf32> = dense<8.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_1x1x40x40xf32 : memref<1x1x40x40xf32> = dense<4.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_32x128x40xf32 : memref<32x128x40xf32> = dense<2.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_32x40x128xf32 : memref<32x40x128xf32> = dense<3.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_1x32x40x40xf32 : memref<1x32x40x40xf32> = dense<11.3137083> {alignment = 64 : i64} + func.func @kenerl(%arg0: tensor<32x40x128xf32>, %arg1: tensor<32x128x40xf32>, %arg2: tensor<1x1x40x40xf32>, %arg3: tensor<1x32x40x128xf32>) { + %t_start = call @rtclock() : () -> f64 + %cst = arith.constant 0.0883883461 : f32 + %c0 = arith.constant 0 : index + %cst_0 = arith.constant 0.000000e+00 : f32 + %cst_1 = arith.constant 1.000000e+00 : f32 + %cst_2 = arith.constant -3.40282347E+38 : f32 + %0 = bufferization.to_memref %arg3 : memref<1x32x40x128xf32, strided<[?, ?, ?, ?], offset: ?>> + %1 = bufferization.to_memref %arg2 : memref<1x1x40x40xf32, strided<[?, ?, ?, ?], offset: ?>> + %2 = bufferization.to_memref %arg1 : memref<32x128x40xf32, strided<[?, ?, ?], offset: ?>> + %3 = bufferization.to_memref %arg0 : memref<32x40x128xf32, strided<[?, ?, ?], offset: ?>> + + // MatMul + // %0 = tosa.matmul %t0, %t1 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + // Initialize MatMul Output. + %alloc = memref.alloc() {alignment = 64 : i64} : memref<32x40x40xf32> + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_0, %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + } + } + } + // Perform MatMul core operations: multiplication and addition. + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 128 { + %5 = affine.load %3[%arg4, %arg5, %arg7] : memref<32x40x128xf32, strided<[?, ?, ?], offset: ?>> + %6 = affine.load %2[%arg4, %arg7, %arg6] : memref<32x128x40xf32, strided<[?, ?, ?], offset: ?>> + %7 = affine.load %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + %8 = arith.mulf %5, %6 : f32 + %9 = arith.addf %7, %8 : f32 + affine.store %9, %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + } + } + } + } + + // Fusion: Reshape + Constant + Reciprocal + Multiplication + Addition + Reduce Max + // %1 = tosa.reshape %0 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %2 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + // %3 = tosa.reciprocal %2 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %4 = tosa.mul %1, %3 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %5 = tosa.add %4, %t2 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + // %6 = tosa.reduce_max %5 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %expand_shape = memref.expand_shape %alloc [[0, 1], [2], [3]] : memref<32x40x40xf32> into memref<1x32x40x40xf32> + %alloc_5 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + %alloc_6 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_2, %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %expand_shape[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + // Fusion point: reshape + constant + reciprocal -> %cst + %6 = arith.mulf %5, %cst : f32 + // Fusion point: addition + %7 = affine.load %1[%c0, %c0, %arg6, %arg7] : memref<1x1x40x40xf32, strided<[?, ?, ?, ?], offset: ?>> + %8 = arith.addf %6, %7 : f32 + // Fusion point: reduce max + %9 = affine.load %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + %10 = arith.cmpf ugt, %8, %9 : f32 + %11 = arith.select %10, %8, %9 : f32 + affine.store %11, %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + affine.store %8, %alloc_5[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Fusion: Subtraction + Exponentiation + Reduce Sum + // %7 = tosa.sub %5, %6 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + // %8 = tosa.exp %7 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %9 = tosa.reduce_sum %8 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %expand_shape_7 = memref.expand_shape %alloc_6 [[0], [1], [2, 3]] : memref<1x32x40xf32> into memref<1x32x40x1xf32> + %alloc_9 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + %alloc_10 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_0, %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_5[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %expand_shape_7[%c0, %arg5, %arg6, %c0] : memref<1x32x40x1xf32> + // Fusion point: subtraction + %7 = arith.subf %5, %6 : f32 + // Fusion point: exponentiation + %8 = math.exp %7 : f32 + // Fusion point: reduce sum + %9 = affine.load %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + %10 = arith.addf %8, %9 : f32 + affine.store %10, %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + affine.store %8, %alloc_9[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Fusion: Reciprocal + Multiplication + // %10 = tosa.reciprocal %9 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + // %11 = tosa.mul %8, %10 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %expand_shape_11 = memref.expand_shape %alloc_10 [[0], [1], [2, 3]] : memref<1x32x40xf32> into memref<1x32x40x1xf32> + %alloc_13 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + // Fusion point: reciprocal + %5 = affine.load %expand_shape_11[%c0, %arg5, %arg6, %c0] : memref<1x32x40x1xf32> + %6 = arith.divf %cst_1, %5 : f32 + affine.for %arg7 = 0 to 40 { + // Fusion point: multiplication + %7 = affine.load %alloc_9[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %8 = arith.mulf %6, %7 : f32 + affine.store %8, %alloc_13[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Prepare MatMul input memref. + // %12 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + // %13 = tosa.add %11, %12 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %14 = tosa.reshape %13 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + // %15 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + // %16 = tosa.add %t3, %15 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + // %17 = tosa.reshape %16 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %collapse_shape = memref.collapse_shape %alloc_13 [[0, 1], [2], [3]] : memref<1x32x40x40xf32> into memref<32x40x40xf32> + %alloc_14 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x128xf32> + // SSA value %0 is from %arg3 + memref.copy %0, %alloc_14 : memref<1x32x40x128xf32, strided<[?, ?, ?, ?], offset: ?>> to memref<1x32x40x128xf32> + %collapse_shape_15 = memref.collapse_shape %alloc_14 [[0, 1], [2], [3]] : memref<1x32x40x128xf32> into memref<32x40x128xf32> + + // MatMul + // %18 = tosa.matmul %14, %17 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + // Allocate space and initialize output. + %alloc_16 = memref.alloc() {alignment = 64 : i64} : memref<32x40x128xf32> + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 128 { + affine.store %cst_0, %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + } + } + } + // Perform MatMul core operations: multiplication and addition. + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 128 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %collapse_shape[%arg4, %arg5, %arg7] : memref<32x40x40xf32> + %6 = affine.load %collapse_shape_15[%arg4, %arg7, %arg6] : memref<32x40x128xf32> + %7 = affine.load %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + %8 = arith.mulf %5, %6 : f32 + %9 = arith.addf %7, %8 : f32 + affine.store %9, %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + } + } + } + } + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %cast = memref.cast %alloc_16 : memref<32x40x128xf32> to memref<*xf32> + %4 = bufferization.to_tensor %cast : memref<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [8{{(, 8)*}}], + + // Print results. + call @printMemrefF32(%4) : (tensor<*xf32>) -> () + // Print timings. + vector.print %time : f64 + + return + } + func.func @main() { + %0 = memref.get_global @__constant_32x40x128xf32 : memref<32x40x128xf32> + %1 = bufferization.to_tensor %0 : memref<32x40x128xf32> + %2 = memref.get_global @__constant_32x128x40xf32 : memref<32x128x40xf32> + %3 = bufferization.to_tensor %2 : memref<32x128x40xf32> + %4 = memref.get_global @__constant_1x1x40x40xf32 : memref<1x1x40x40xf32> + %5 = bufferization.to_tensor %4 : memref<1x1x40x40xf32> + %6 = memref.get_global @__constant_1x32x40x128xf32 : memref<1x32x40x128xf32> + %7 = bufferization.to_tensor %6 : memref<1x32x40x128xf32> + call @kenerl(%1, %3, %5, %7) : (tensor<32x40x128xf32>, tensor<32x128x40xf32>, tensor<1x1x40x40xf32>, tensor<1x32x40x128xf32>) -> () + return + } + func.func private @printMemrefF32(tensor<*xf32>) +} diff --git a/examples/BuddyNext/next-attention-loop.mlir b/examples/BuddyNext/next-attention-loop.mlir new file mode 100644 index 000000000..e47f275d5 --- /dev/null +++ b/examples/BuddyNext/next-attention-loop.mlir @@ -0,0 +1,314 @@ +// RUN: buddy-opt %s \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @rtclock() -> f64 + memref.global "private" constant @__constant_1x32x40x128xf32 : memref<1x32x40x128xf32> = dense<8.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_1x1x40x40xf32 : memref<1x1x40x40xf32> = dense<4.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_32x128x40xf32 : memref<32x128x40xf32> = dense<2.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_32x40x128xf32 : memref<32x40x128xf32> = dense<3.000000e+00> {alignment = 64 : i64} + memref.global "private" constant @__constant_1x32x40x40xf32 : memref<1x32x40x40xf32> = dense<11.3137083> {alignment = 64 : i64} + func.func @kenerl(%arg0: tensor<32x40x128xf32>, %arg1: tensor<32x128x40xf32>, %arg2: tensor<1x1x40x40xf32>, %arg3: tensor<1x32x40x128xf32>) { + %t_start = call @rtclock() : () -> f64 + %cst = arith.constant 0.0883883461 : f32 + %c0 = arith.constant 0 : index + %cst_0 = arith.constant 0.000000e+00 : f32 + %cst_1 = arith.constant 1.000000e+00 : f32 + %cst_2 = arith.constant -3.40282347E+38 : f32 + %0 = bufferization.to_memref %arg3 : memref<1x32x40x128xf32, strided<[?, ?, ?, ?], offset: ?>> + %1 = bufferization.to_memref %arg2 : memref<1x1x40x40xf32, strided<[?, ?, ?, ?], offset: ?>> + %2 = bufferization.to_memref %arg1 : memref<32x128x40xf32, strided<[?, ?, ?], offset: ?>> + %3 = bufferization.to_memref %arg0 : memref<32x40x128xf32, strided<[?, ?, ?], offset: ?>> + + // MatMul + // %0 = tosa.matmul %t0, %t1 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + // Initialize MatMul Output. + %alloc = memref.alloc() {alignment = 64 : i64} : memref<32x40x40xf32> + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_0, %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + } + } + } + // Perform MatMul core operations: multiplication and addition. + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 128 { + %5 = affine.load %3[%arg4, %arg5, %arg7] : memref<32x40x128xf32, strided<[?, ?, ?], offset: ?>> + %6 = affine.load %2[%arg4, %arg7, %arg6] : memref<32x128x40xf32, strided<[?, ?, ?], offset: ?>> + %7 = affine.load %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + %8 = arith.mulf %5, %6 : f32 + %9 = arith.addf %7, %8 : f32 + affine.store %9, %alloc[%arg4, %arg5, %arg6] : memref<32x40x40xf32> + } + } + } + } + + // Reshape + Constant + Reciprocal + // %1 = tosa.reshape %0 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %2 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + // %3 = tosa.reciprocal %2 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %expand_shape = memref.expand_shape %alloc [[0, 1], [2], [3]] : memref<32x40x40xf32> into memref<1x32x40x40xf32> + %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + affine.store %cst, %alloc_3[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Multiplication + // %4 = tosa.mul %1, %3 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %alloc_4 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %expand_shape[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %alloc_3[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %7 = arith.mulf %5, %6 : f32 + affine.store %7, %alloc_4[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Addition + // %5 = tosa.add %4, %t2 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %alloc_5 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_4[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %1[%c0, %c0, %arg6, %arg7] : memref<1x1x40x40xf32, strided<[?, ?, ?, ?], offset: ?>> + %7 = arith.addf %5, %6 : f32 + affine.store %7, %alloc_5[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Reduce Max + // %6 = tosa.reduce_max %5 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + // Initialize reduce max operation output. + %alloc_6 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_2, %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + // Perform reduce max operation. + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_5[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + %7 = arith.cmpf ugt, %5, %6 : f32 + %8 = arith.select %7, %5, %6 : f32 + %9 = arith.cmpf uno, %6, %6 : f32 + %10 = arith.select %9, %6, %8 : f32 + affine.store %10, %alloc_6[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + } + + // Subtraction + // %7 = tosa.sub %5, %6 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + // Allocate space and perform subtraction. + %expand_shape_7 = memref.expand_shape %alloc_6 [[0], [1], [2, 3]] : memref<1x32x40xf32> into memref<1x32x40x1xf32> + %alloc_8 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_5[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %expand_shape_7[%c0, %arg5, %arg6, %c0] : memref<1x32x40x1xf32> + %7 = arith.subf %5, %6 : f32 + affine.store %7, %alloc_8[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Exponentiation + // %8 = tosa.exp %7 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // Allocate space and perform exponentiation. + %alloc_9 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_8[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = math.exp %5 : f32 + affine.store %6, %alloc_9[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Reduce Sum + // %9 = tosa.reduce_sum %8 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + // Allocate space and initialize the output. + %alloc_10 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.store %cst_0, %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + // Perform reduce sum operation. + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_9[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + %7 = arith.addf %5, %6 : f32 + affine.store %7, %alloc_10[%arg4, %arg5, %arg6] : memref<1x32x40xf32> + } + } + } + } + + // Reciprocal + // %10 = tosa.reciprocal %9 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %expand_shape_11 = memref.expand_shape %alloc_10 [[0], [1], [2, 3]] : memref<1x32x40xf32> into memref<1x32x40x1xf32> + %alloc_12 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x1xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 1 { + %5 = affine.load %expand_shape_11[%c0, %arg5, %arg6, %c0] : memref<1x32x40x1xf32> + %6 = arith.divf %cst_1, %5 : f32 + affine.store %6, %alloc_12[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x1xf32> + } + } + } + } + + // Multiplication + // %11 = tosa.mul %8, %10 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %alloc_13 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x40xf32> + affine.for %arg4 = 0 to 1 { + affine.for %arg5 = 0 to 32 { + affine.for %arg6 = 0 to 40 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %alloc_9[%c0, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + %6 = affine.load %alloc_12[%c0, %arg5, %arg6, %c0] : memref<1x32x40x1xf32> + %7 = arith.mulf %5, %6 : f32 + affine.store %7, %alloc_13[%arg4, %arg5, %arg6, %arg7] : memref<1x32x40x40xf32> + } + } + } + } + + // Prepare MatMul input memref. + // %12 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + // %13 = tosa.add %11, %12 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + // %14 = tosa.reshape %13 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + // %15 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + // %16 = tosa.add %t3, %15 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + // %17 = tosa.reshape %16 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %collapse_shape = memref.collapse_shape %alloc_13 [[0, 1], [2], [3]] : memref<1x32x40x40xf32> into memref<32x40x40xf32> + %alloc_14 = memref.alloc() {alignment = 64 : i64} : memref<1x32x40x128xf32> + // SSA value %0 is from %arg3 + memref.copy %0, %alloc_14 : memref<1x32x40x128xf32, strided<[?, ?, ?, ?], offset: ?>> to memref<1x32x40x128xf32> + %collapse_shape_15 = memref.collapse_shape %alloc_14 [[0, 1], [2], [3]] : memref<1x32x40x128xf32> into memref<32x40x128xf32> + + // MatMul + // %18 = tosa.matmul %14, %17 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + // Allocate space and initialize output. + %alloc_16 = memref.alloc() {alignment = 64 : i64} : memref<32x40x128xf32> + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 128 { + affine.store %cst_0, %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + } + } + } + // Perform MatMul core operations: multiplication and addition. + affine.for %arg4 = 0 to 32 { + affine.for %arg5 = 0 to 40 { + affine.for %arg6 = 0 to 128 { + affine.for %arg7 = 0 to 40 { + %5 = affine.load %collapse_shape[%arg4, %arg5, %arg7] : memref<32x40x40xf32> + %6 = affine.load %collapse_shape_15[%arg4, %arg7, %arg6] : memref<32x40x128xf32> + %7 = affine.load %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + %8 = arith.mulf %5, %6 : f32 + %9 = arith.addf %7, %8 : f32 + affine.store %9, %alloc_16[%arg4, %arg5, %arg6] : memref<32x40x128xf32> + } + } + } + } + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %cast = memref.cast %alloc_16 : memref<32x40x128xf32> to memref<*xf32> + %4 = bufferization.to_tensor %cast : memref<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [8{{(, 8)*}}], + + // Print results. + call @printMemrefF32(%4) : (tensor<*xf32>) -> () + // Print timings. + vector.print %time : f64 + + return + } + func.func @main() { + %0 = memref.get_global @__constant_32x40x128xf32 : memref<32x40x128xf32> + %1 = bufferization.to_tensor %0 : memref<32x40x128xf32> + %2 = memref.get_global @__constant_32x128x40xf32 : memref<32x128x40xf32> + %3 = bufferization.to_tensor %2 : memref<32x128x40xf32> + %4 = memref.get_global @__constant_1x1x40x40xf32 : memref<1x1x40x40xf32> + %5 = bufferization.to_tensor %4 : memref<1x1x40x40xf32> + %6 = memref.get_global @__constant_1x32x40x128xf32 : memref<1x32x40x128xf32> + %7 = bufferization.to_tensor %6 : memref<1x32x40x128xf32> + call @kenerl(%1, %3, %5, %7) : (tensor<32x40x128xf32>, tensor<32x128x40xf32>, tensor<1x1x40x40xf32>, tensor<1x32x40x128xf32>) -> () + return + } + func.func private @printMemrefF32(tensor<*xf32>) +} diff --git a/examples/BuddyNext/next-attention.mlir b/examples/BuddyNext/next-attention.mlir new file mode 100644 index 000000000..36339be09 --- /dev/null +++ b/examples/BuddyNext/next-attention.mlir @@ -0,0 +1,91 @@ +// RUN: buddy-opt %s \ +// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \ +// RUN: | buddy-opt \ +// RUN: -arith-expand \ +// RUN: -eliminate-empty-tensors \ +// RUN: -empty-tensor-to-alloc-tensor \ +// RUN: -one-shot-bufferize \ +// RUN: -convert-linalg-to-affine-loops \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func.func private @rtclock() -> f64 + +func.func @kenerl(%t0 : tensor<32x40x128xf32>, %t1 : tensor<32x128x40xf32>, %t2 : tensor<1x1x40x40xf32>, %t3 : tensor<1x32x40x128xf32>) { + %t_start = call @rtclock() : () -> f64 + + %0 = tosa.matmul %t0, %t1 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %1 = tosa.reshape %0 {new_shape = array} : (tensor<32x40x40xf32>) -> tensor<1x32x40x40xf32> + %2 = "tosa.const"() <{value = dense<11.3137083> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %3 = tosa.reciprocal %2 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %4 = tosa.mul %1, %3 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %5 = tosa.add %4, %t2 : (tensor<1x32x40x40xf32>, tensor<1x1x40x40xf32>) -> tensor<1x32x40x40xf32> + %6 = tosa.reduce_max %5 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %7 = tosa.sub %5, %6 : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %8 = tosa.exp %7 : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %9 = tosa.reduce_sum %8 {axis = 3 : i32} : (tensor<1x32x40x40xf32>) -> tensor<1x32x40x1xf32> + %10 = tosa.reciprocal %9 : (tensor<1x32x40x1xf32>) -> tensor<1x32x40x1xf32> + %11 = tosa.mul %8, %10 {shift = 0 : i8} : (tensor<1x32x40x40xf32>, tensor<1x32x40x1xf32>) -> tensor<1x32x40x40xf32> + %12 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x40xf32>}> : () -> tensor<1x32x40x40xf32> + %13 = tosa.add %11, %12 : (tensor<1x32x40x40xf32>, tensor<1x32x40x40xf32>) -> tensor<1x32x40x40xf32> + %14 = tosa.reshape %13 {new_shape = array} : (tensor<1x32x40x40xf32>) -> tensor<32x40x40xf32> + %15 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %16 = tosa.add %t3, %15 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %17 = tosa.reshape %16 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %18 = tosa.matmul %14, %17 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %tensor_unranked = tensor.cast %18 : tensor<32x40x128xf32> to tensor<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [8{{(, 8)*}}], + + // Print results. + call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + // Print timings. + vector.print %time : f64 + + return +} + +func.func @main() { + + %c0 = arith.constant dense<3.0> : tensor<32x40x128xf32> + %c1 = arith.constant dense <2.0> : tensor<32x128x40xf32> + %c2 = arith.constant dense <4.0> : tensor<1x1x40x40xf32> + %c3 = arith.constant dense <8.0> : tensor<1x32x40x128xf32> + + call @kenerl(%c0, %c1, %c2, %c3) : (tensor<32x40x128xf32>, tensor<32x128x40xf32>, tensor<1x1x40x40xf32>, tensor<1x32x40x128xf32>) -> () + + return +} +func.func private @printMemrefF32(%ptr : tensor<*xf32>) diff --git a/examples/BuddyNext/next-rope.mlir b/examples/BuddyNext/next-rope.mlir new file mode 100644 index 000000000..091b2c220 --- /dev/null +++ b/examples/BuddyNext/next-rope.mlir @@ -0,0 +1,157 @@ +// RUN: buddy-opt %s \ +// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \ +// RUN: | buddy-opt \ +// RUN: -arith-expand \ +// RUN: -eliminate-empty-tensors \ +// RUN: -empty-tensor-to-alloc-tensor \ +// RUN: -one-shot-bufferize \ +// RUN: -convert-linalg-to-affine-loops \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func.func private @rtclock() -> f64 + +#map = affine_map<(d0, d1, d2) -> (d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> +#map3 = affine_map<(d0, d1) -> (d0, d1)> +#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +#map5 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +#map6 = affine_map<(d0, d1, d2) -> (d0, 0, d1, d2)> +#map7 = affine_map<(d0, d1) -> (0, d0, d1)> + +func.func @kenerl(%arg0 : tensor<1x40x4096xf32>, %arg1 : tensor<1x40x4096xf32>, %arg2 : tensor<1x40x4096xf32>, %arg3 : tensor<1x1x2048x128xf32>, %arg4 : tensor<1x1x2048x128xf32>, %arg5 : tensor<1x40xi64>) { + %t_start = call @rtclock() : () -> f64 + + %57 = tosa.reshape %arg0 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %58 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %59 = tosa.transpose %57, %58 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %60 = tosa.reshape %arg1 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %61 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %62 = tosa.transpose %60, %61 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %63 = tosa.reshape %arg2 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %64 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %65 = tosa.transpose %63, %64 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + + %extracted_slice_9 = tensor.extract_slice %arg3[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_10 = tensor.extract_slice %extracted_slice_9[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_11 = tensor.extract_slice %extracted_slice_10[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %extracted_slice_12 = tensor.extract_slice %arg4[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_13 = tensor.extract_slice %extracted_slice_12[0, 0, 0, 0] [1, 1, 2048, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x2048x128xf32> + %extracted_slice_14 = tensor.extract_slice %extracted_slice_13[0, 0, 0, 0] [1, 1, 40, 128] [1, 1, 1, 1] : tensor<1x1x2048x128xf32> to tensor<1x1x40x128xf32> + %66 = tensor.empty() : tensor<1x40x128xf32> + %67 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_11 : tensor<1x1x40x128xf32>) outs(%66 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %68 = tensor.empty() : tensor<40x128xf32> + %69 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%67 : tensor<1x40x128xf32>) outs(%68 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + %70 = tensor.empty() : tensor<1x40x128xf32> + %71 = linalg.generic {indexing_maps = [#map6, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_14 : tensor<1x1x40x128xf32>) outs(%70 : tensor<1x40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<1x40x128xf32> + %72 = tensor.empty() : tensor<40x128xf32> + %73 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel"]} ins(%71 : tensor<1x40x128xf32>) outs(%72 : tensor<40x128xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<40x128xf32> + // precompute_theta_pos_frequencies function, which is used to calculating special values ​​of RoPE according to: https://hyper.ai/wiki/29220 + %74 = tensor.empty() : tensor<1x40x128xf32> + %75 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg5 : tensor<1x40xi64>) outs(%74 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %69[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %76 = tosa.reshape %75 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %77 = tensor.empty() : tensor<1x40x128xf32> + %78 = linalg.generic {indexing_maps = [#map2, #map5], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg5 : tensor<1x40xi64>) outs(%77 : tensor<1x40x128xf32>) { + ^bb0(%in: i64, %out: f32): + %4175 = arith.index_cast %in : i64 to index + %4176 = linalg.index 2 : index + %extracted = tensor.extract %73[%4175, %4176] : tensor<40x128xf32> + linalg.yield %extracted : f32 + } -> tensor<1x40x128xf32> + %79 = tosa.reshape %78 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %80 = tosa.mul %59, %76 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_15 = tensor.extract_slice %59[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_16 = tensor.extract_slice %59[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %81 = tosa.negate %extracted_slice_16 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %82 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice = tensor.insert_slice %81 into %82[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_17 = tensor.insert_slice %extracted_slice_15 into %inserted_slice[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %83 = tosa.mul %inserted_slice_17, %79 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %84 = tosa.add %80, %83 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %85 = tosa.mul %62, %76 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_18 = tensor.extract_slice %62[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_19 = tensor.extract_slice %62[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %86 = tosa.negate %extracted_slice_19 : (tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %87 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_20 = tensor.insert_slice %86 into %87[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_21 = tensor.insert_slice %extracted_slice_18 into %inserted_slice_20[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %tensor_unranked = tensor.cast %inserted_slice_21 : tensor<1x32x40x128xf32> to tensor<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 128] strides = [163840, 5120, 128, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [ + // CHECK-SAME: [-3{{(, [-]?3)*}}], + + // Print results. + call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + // Print timings. + vector.print %time : f64 + + return +} + +func.func @main() { + + %c2 = arith.constant dense<2.0> : tensor<1x40x4096xf32> + %c3 = arith.constant dense<3.0> : tensor<1x40x4096xf32> + %c4 = arith.constant dense<4.0> : tensor<1x40x4096xf32> + %c5 = arith.constant dense<5.0> : tensor<1x1x2048x128xf32> + %c6 = arith.constant dense<6.0> : tensor<1x1x2048x128xf32> + %c7 = arith.constant dense<7> : tensor<1x40xi64> + + call @kenerl(%c2, %c3, %c4, %c5, %c6, %c7) : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>, tensor<1x40x4096xf32>, tensor<1x1x2048x128xf32>, tensor<1x1x2048x128xf32>, tensor<1x40xi64>) -> () + + return +} +func.func private @printMemrefF32(%ptr : tensor<*xf32>) diff --git a/examples/BuddyNext/next-sigmoid.mlir b/examples/BuddyNext/next-sigmoid.mlir new file mode 100644 index 000000000..f49f2d794 --- /dev/null +++ b/examples/BuddyNext/next-sigmoid.mlir @@ -0,0 +1,70 @@ +// RUN: buddy-opt %s \ +// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \ +// RUN: | buddy-opt \ +// RUN: -arith-expand \ +// RUN: -eliminate-empty-tensors \ +// RUN: -empty-tensor-to-alloc-tensor \ +// RUN: -one-shot-bufferize \ +// RUN: -convert-linalg-to-affine-loops \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func.func private @rtclock() -> f64 + +func.func @kenerl(%arg0 : tensor<1x40x11008xf32>) { + %t_start = call @rtclock() : () -> f64 + + %sigmoid = tosa.sigmoid %arg0 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %tensor_unranked = tensor.cast %sigmoid : tensor<1x40x11008xf32> to tensor<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 11008] strides = [440320, 11008, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [0.952574{{(, 0.952574)*}}], + + // Print results. + call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + // Print timings. + vector.print %time : f64 + + return +} + +func.func @main() { + + %c3 = arith.constant dense<3.0> : tensor<1x40x11008xf32> + + call @kenerl(%c3) : (tensor<1x40x11008xf32>) -> () + + return +} +func.func private @printMemrefF32(%ptr : tensor<*xf32>) diff --git a/examples/BuddyPython/module_gen.py b/examples/BuddyPython/module_gen.py index e2c722ceb..1f657d260 100644 --- a/examples/BuddyPython/module_gen.py +++ b/examples/BuddyPython/module_gen.py @@ -43,12 +43,11 @@ def foo(x, y): aot_autograd_decomposition=inductor_decomp, ) -# Pass the function and input data to the dynamo compiler's importer, the -# importer will first build a graph. Then, lower the graph to top-level IR. +# Pass the function and input data to the dynamo compiler's importer, the +# importer will first build a graph. Then, lower the graph to top-level IR. # (tosa, linalg, etc.). Finally, accepts the generated module and weight parameters. -graphs = dynamo_compiler.importer(foo, *(float32_in1, float32_in2)) +graphs = dynamo_compiler.importer(foo, float32_in1, float32_in2) graph = graphs[0] -graph.lower_to_top_level_ir(do_params_pack=True) +graph.lower_to_top_level_ir() print(graph._imported_module) -print(dynamo_compiler.imported_params[graph]) diff --git a/examples/BuddyWhisper/.gitignore b/examples/BuddyWhisper/.gitignore new file mode 100644 index 000000000..9dadf6451 --- /dev/null +++ b/examples/BuddyWhisper/.gitignore @@ -0,0 +1,6 @@ +# model params file +arg0.data + +# model mlir file +forward.mlir +subgraph0.mlir diff --git a/examples/BuddyWhisper/CMakeLists.txt b/examples/BuddyWhisper/CMakeLists.txt new file mode 100644 index 000000000..756d6db08 --- /dev/null +++ b/examples/BuddyWhisper/CMakeLists.txt @@ -0,0 +1,95 @@ +add_custom_command( + OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/arg0.data + COMMAND ${Python3_EXECUTABLE} ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/import-whisper.py + COMMENT "Generating forward.mlir, subgraph0.mlir and arg0.data..." +) +set(PATTERN_ARG "test-generalize-pad-tensor") +add_custom_command( + OUTPUT forward.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/forward.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" | + ${BUDDY_BINARY_DIR}/buddy-opt + -pass-pipeline "builtin.module( func.func(buffer-deallocation-simplification, convert-linalg-to-loops),matmul-parallel-vectorization-optimize, batchmatmul-optimize, eliminate-empty-tensors,func-bufferize-dynamic-offset, func.func(llvm-request-c-wrappers),convert-scf-to-openmp, convert-openmp-to-llvm, convert-math-to-llvm, convert-math-to-libm, convert-scf-to-cf, convert-arith-to-llvm, expand-strided-metadata, finalize-memref-to-llvm, convert-func-to-llvm, reconcile-unrealized-casts)" | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyWhisper/forward.o + DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/forward.mlir + COMMENT "Building forward.o" + VERBATIM) + +add_custom_command( + OUTPUT subgraph0.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/subgraph0.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | + ${LLVM_TOOLS_BINARY_DIR}/mlir-opt + -test-linalg-transform-patterns=${PATTERN_ARG} | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -convert-elementwise-to-linalg + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize-dynamic-offset + -tensor-bufferize + -convert-linalg-to-loops + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 -o ${BUDDY_BINARY_DIR}/../examples/BuddyWhisper/subgraph0.o + DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyWhisper/subgraph0.mlir + COMMENT "Building subgraph0.o " + VERBATIM) + +add_library(WHISPER STATIC forward.o subgraph0.o) + +SET_SOURCE_FILES_PROPERTIES( + template.o + PROPERTIES + EXTERNAL_OBJECT true + GENERATED true) + +SET_TARGET_PROPERTIES( + WHISPER + PROPERTIES + LINKER_LANGUAGE C) + +set(BUDDY_WHISPER_FILES + whisper-main.cpp +) + +add_executable(buddy-whisper-run ${BUDDY_WHISPER_FILES}) +target_link_directories(buddy-whisper-run PRIVATE ${LLVM_LIBRARY_DIR}) + +set(BUDDY_WHISPER_LIBS + WHISPER + BuddyLibDAP + mlir_c_runner_utils + omp +) +if(BUDDY_MLIR_USE_MIMALLOC) + list(APPEND BUDDY_WHISPER_LIBS mimalloc) +endif() + +target_link_libraries(buddy-whisper-run ${BUDDY_WHISPER_LIBS}) diff --git a/examples/BuddyWhisper/README.md b/examples/BuddyWhisper/README.md new file mode 100644 index 000000000..644a42c23 --- /dev/null +++ b/examples/BuddyWhisper/README.md @@ -0,0 +1,84 @@ +# Buddy Compiler WHISPER Example + +## Introduction +This example shows how to use Buddy Compiler to compile a WHISPER model to MLIR code then run it. The [model](openai/whisper-base) is a pre-trained model for automatic speech recognition (ASR) and speech translation (ST). + + +## How to run + +0. Enter Python virtual environment. + +We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements. + +``` +$ conda activate +$ cd buddy-mlir +$ pip install -r requirements.txt +``` + +1. Build and check LLVM/MLIR + +``` +$ cd buddy-mlir +$ mkdir llvm/build +$ cd llvm/build +$ cmake -G Ninja ../llvm \ + -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \ + -DLLVM_TARGETS_TO_BUILD="host;RISCV" \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DOPENMP_ENABLE_LIBOMPTARGET=OFF \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ + -DPython3_EXECUTABLE=$(which python3) +$ ninja check-clang check-mlir omp +``` + +2. Build and check buddy-mlir + +``` +$ cd buddy-mlir +$ mkdir build +$ cd build +$ cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) +$ ninja +$ ninja check-buddy +``` + +Set the `PYTHONPATH` environment variable. Make sure that the `PYTHONPATH` variable includes the directory of LLVM/MLIR python bindings and the directory of Buddy MLIR python packages. + +```bash +$ export PYTHONPATH=/path-to-buddy-mlir/llvm/build/tools/mlir/python_packages/mlir_core:/path-to-buddy-mlir/build/python_packages:${PYTHONPATH} + +// For example: +// Navigate to your buddy-mlir/build directory +$ cd buddy-mlir/build +$ export BUDDY_MLIR_BUILD_DIR=$PWD +$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build +$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH} +``` + +3. Set model environment variable. + +```bash +$ export WHISPER_MODEL_PATH=/path-to-whisper-model/ + +// For example: +$ export WHISPER_MODEL_PATH=/home/xxx/whisper-base +``` + +4. Build and run the WHISPER example + +```bash +$ cmake -G Ninja .. -DBUDDY_WHISPER_EXAMPLES=ON +$ ninja buddy-whisper-run +$ cd bin +$ ./buddy-whisper-run +``` + +5. Enjoy it! diff --git a/examples/BuddyWhisper/audio.wav b/examples/BuddyWhisper/audio.wav new file mode 100644 index 000000000..069c2329e Binary files /dev/null and b/examples/BuddyWhisper/audio.wav differ diff --git a/examples/BuddyWhisper/import-whisper.py b/examples/BuddyWhisper/import-whisper.py new file mode 100644 index 000000000..449646a67 --- /dev/null +++ b/examples/BuddyWhisper/import-whisper.py @@ -0,0 +1,79 @@ +# ===- import-whisper.py ------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is an example for whisper model. +# +# ===--------------------------------------------------------------------------- + +import os +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp +from transformers import WhisperForConditionalGeneration +import numpy + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa +from buddy.compiler.graph import GraphDriver +from buddy.compiler.graph.transform import simply_fuse + +# Retrieve the Whisper model path from environment variables. +model_path = os.environ.get("WHISPER_MODEL_PATH") +if model_path is None: + model_path = "openai/whisper-base" + +# Initialize the model from the specified model path. +model = WhisperForConditionalGeneration.from_pretrained(model_path) +model.config.use_cache = False + +# Generate placeholder for inputs. +input_features = torch.zeros(size=(1, 80, 3000), dtype=torch.float32) +decoder_input_ids = torch.zeros(size=(1, 448), dtype=torch.long) +inputs = { + "input_features": input_features, + "decoder_input_ids": decoder_input_ids, +} + +# Initialize Dynamo Compiler with specific configurations as an importer. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +# Import the model into MLIR module and parameters. +with torch.no_grad(): + graphs = dynamo_compiler.importer(model, **inputs) + +assert len(graphs) == 1 +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] +pattern_list = [simply_fuse] +graphs[0].fuse_ops(pattern_list) +driver = GraphDriver(graphs[0]) +driver.subgraphs[0].lower_to_top_level_ir() +path_prefix = os.path.dirname(os.path.abspath(__file__)) + + +with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file: + print(driver.subgraphs[0]._imported_module, file=module_file) + +with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file: + print(driver.construct_main_graph(True), file=module_file) + +all_param = numpy.concatenate( + [param.detach().numpy().reshape([-1]) for param in params] +) +all_param.tofile(os.path.join(path_prefix, "arg0.data")) diff --git a/examples/BuddyWhisper/vocab.txt b/examples/BuddyWhisper/vocab.txt new file mode 100644 index 000000000..50b9549e6 --- /dev/null +++ b/examples/BuddyWhisper/vocab.txt @@ -0,0 +1,51865 @@ +! +\" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +¡ +¢ +£ +¤ +¥ +¦ +§ +¨ +© +ª +« +¬ +® +¯ +° +± +² +³ +´ +µ +¶ +· +¸ +¹ +º +» +¼ +½ +¾ +¿ +À +Á + +à +Ä +Å +Æ +Ç +È +É +Ê +Ë +Ì +Í +Î +Ï +Ð +Ñ +Ò +Ó +Ô +Õ +Ö +× +Ø +Ù +Ú +Û +Ü +Ý +Þ +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +ì +í +î +ï +ð +ñ +ò +ó +ô +õ +ö +÷ +ø +ù +ú +û +ü +ý +þ +ÿ +Ā +ā +Ă +ă +Ą +ą +Ć +ć +Ĉ +ĉ +Ċ +ċ +Č +č +Ď +ď +Đ +đ +Ē +ē +Ĕ +ĕ +Ė +ė +Ę +ę +Ě +ě +Ĝ +ĝ +Ğ +ğ +Ġ +ġ +Ģ +ģ +Ĥ +ĥ +Ħ +ħ +Ĩ +ĩ +Ī +ī +Ĭ +ĭ +Į +į +İ +ı +IJ +ij +Ĵ +ĵ +Ķ +ķ +ĸ +Ĺ +ĺ +Ļ +ļ +Ľ +ľ +Ŀ +ŀ +Ł +ł +Ń +Ġt +Ġa +Ġth +in +er +Ġw +Ġs +ou +Ġthe +re +on +at +en +Ġc +it +is +Ġb +nd +Ġd +Ġm +Ġh +Ġo +ing +es +Ġp +Ġto +an +Ġf +or +ll +ĠI +Ġl +Ġy +ar +Ġg +Ġyou +ed +Ġand +Ġin +Ġof +as +Ġn +om +ic +Ġthat +us +et +ve +al +ow +le +Ġis +Ġe +Ġit +ot +'s +Ġbe +ion +ĠT +Ġwh +ĠA +ent +ĠS +Ġre +ay +Ġwe +Ġon +ere +Ġha +ut +ac +id +ig +os +ke +ver +im +ĠÐ +ĠTh +am +all +Ġfor +el +ch +ro +Ġthis +Ġst +ĠW +Ġu +ad +out +ir +ld +ct +Ġk +if +Ġgo +.. +о +ith +ly +ht +qu +Ġ- +Ġdo +Ġj +Ġhave +ĠB +Ġan +Ġwith +Ġare +Ġr +Ġde +Ġse +Ġso +Ġv +st +ill +ur +Ġli +ĠM +est +od +ally +'t +ust +Ġas +ĠC +ce +Ġme +а +е +il +ĠH +Ġwas +ter +th +Ġcan +ant +Ġcom +our +ight +ĠY +ation +ĠAnd +ol +Ġsh +ÑĤ +op +se +Ġnot +ĠSo +Ġne +un +Ġab +Ġlike +Ġat +ĠD +ie +Ġhe +Ġcon +Ġch +ore +Ġal +Ġor +Ġqu +ĠO +ome +ra +ul +ĠN +pp +Ġyour +ould +ĠP +Ġfr +ge +ers +'re +и +Ġthey +Ġwhat +use +Ġall +ĠThe +ĠL +ess +em +Ġkn +Ġjust +art +Ġpro +very +um +Ġlo +Ġì +Ġmy +ok +Ġex +ab +Ġthere +Ġbut +Ġknow +Ġsu +ĠG +Ñģ +ĠE +Ġma +оР+Ġen +Ġabout +ĠIt +ist +Ġwor +ri +ind +Ġone +ate +and +ink +Ġle +ort +'m +ĠF +ich +ÑĢ +ide +Ġget +Ġout +... +Ġwill +ãģ +ive +н +Ġfrom +ain +ĠWe +Ġup +pe +res +ca +ĠR +Ġif +Ġpl +Ġdon +ack +Ġ1 +Ġ\" +Ġtr +Ġus +ĠWh +ity +ĠJ +ĠYou +Ġhere +her +Ġsome +oug +ak +ard +Ġgoing +Ġun +ment +Ġthink +Ġpe +end +Ġ( +cause +Ġtim +ast +é +Ġour +Ġwant +ame +ies +Ġë +ud +ine +Ġreally +Ġte +Ġsee +ci +Ġby +so +ure +ose +Ġ[ +are +Ġmore +ah +one +ck +ople +аР+Ġthen +Ġthing +Ġthem +ven +ound +ost +ong +ect +Ġright +ag +Ġint +Ġpeople +Ġwhen +ous +pl +Ġtime +Ġim +Ġwho +Ġ2 +ap +Ġbecause +hing +Ġno +ice +Ġlook +Ġhas +Ġwould +Ġhow +act +Ġfe +nt +ough +Ġpr +ĠBut +Ġsay +Ñĥ +Ġnow +Ġman +Ġvery +Ġwork +iz +ĠK +iv +itt +Ġar +ep +Ġcl +Ġwhich +Ġco +ans +'ve +Ġsa +ff +'ll +Ġany +Ġact +Ġye +ber +ach +age +per +Ġalso +fer +Ġthese +Ġad +еР+ther +ace +ick +ake +reat +ire +ue +Ġag +ĠU +uch +ions +ry +00 +na +Ġdid +Ġque +Ġhad +Ġevery +ĠHe +Ġla +Ġway +Ġsp +ble +ĠThis +ass +Ġtheir +ite +Ġneed +Ġpart +Ġwere +Ġback +ip +own +omet +be +ase +Ġmake +irst +ia +ence +ang +ank +Ġgot +Ġpre +Ġcont +Ġother +pt +ĠThat +og +Ġgood +Ġinto +alk +Ġbeen +Ġam +Ġover +ually +Ġâ +ìĿ +Ġund +he +way +Ġgr +ÑĮ +Ġdif +Ġper +Ñı +ĠIn +Ġtw +ond +ars +int +orm +Ġlot +Ġwhere +Ġà +ĠV +Ġsomet +л +ens +Ġgu +Ġac +ug +Ñĭ +ı +Ġfirst +ree +Ġhis +ittle +Ġimp +Ġmo +av +Ġlittle +ĠWhat +Ġmuch +Ġz +Ġê +able +Ġп +Ġpo +Ġcomp +ne +Ġdis +Ġlet +ance +Ġher +Ġthings +Ġstart +ult +Ġapp +Ġres +Ġfo +Ġcould +Ġinter +Ġthose +Ġdes +Ġwell +Ġtwo +Ġkind +xt +ress +ely +ä +Ġbr +Ġthr +Ġв +Ġi +ish +Ġdiffer +Ġro +ĠSt +Ġsomething +Ġtake +Ġbo +ys +Ġshe +Ġtalk +lo +Ñĩ +Ġeven +к +ãĢ +Ġн +Ġbu +ĠIf +Ġdown +ĠCh +ade +ations +Ġuse +ord +Ġoff +Ġactually +Ġspe +du +ated +ater +oss +ning +ü +Ġdoes +ĠÑģ +Ġnew +Ġbet +vel +cess +ple +Ġhapp +ting +onna +Ġes +Ġday +Ġonly +ign +kay +sel +ents +ount +ild +ile +Ġsc +Ġhim +Ġagain +ving +Ġgonna +Ġcomm +Ġhel +other +Ġke +ical +Ġ3 +Ġel +Ġthrough +Ġcome +ark +day +ier +ó +Ġthan +ĠThey +Ġmay +Ġser +íķ +Ġcall +Ġdifferent +Ġshould +ĠThere +ary +ĠNow +ãĤ +thing +we +ory +fter +Ġput +ors +ial +ëĭ +Ġunder +Ġinc +ĠYe +ub +form +Ġvide +ภ+vers +Ġfeel +á +ody +ft +fore +Ġem +get +Ġsaid +ition +Ġrec +ious +atch +Ġtry +Ġhelp +Ġshow +д +Ġbit +ull +в +ÑĤо +gr +Ġplay +ife +ail +ĠYeah +Ġquest +Ġmany +Ġpers +Ġgreat +ÃŃ +Ġest +ng +ĠâĻ +ty +la +ĠOh +Ġ× +à® +ĠBe +ady +Ġmost +ction +ĠNo +Ġdoing +Ġbeing +Ġtoo +ces +Ġbl +.\" +Ġrem +iss +ons +>> +ru +wn +ont +ib +ell +Ġsm +oth +ual +Ġ>> +Ġph +les +oc +ful +Ġsec +ise +Ġadd +igh +ert +Ġsame +âĢ +Ġmean +Ġfind +ek +Ġend +-- +м +Ġstill +az +Ġ' +Ġmin +Ġyears +urn +Ġaround +self +Ġwr +bs +ought +ĠâĻª +Ġfl +ange +Ġafter +Ġpoint +mer +ved +Ġlong +oy +ä¸ +Ġcr +ways +Ġsy +Ġtra +Ġ20 +ave +Ġche +Ġent +Ġbefore +ph +Ġatt +ian +ily +Ġperson +Ġbig +Ġsch +Ġreal +Ġnext +Ġlove +Ġvideo +ĠLet +Ġfin +Ġmak +ible +Ġtoday +erm +ĠAl +ower +ann +ix +Ġpar +Ġstud +ö +Ġimport +te +Ġgive +ves +Ġdie +Ġdec +Ġtell +Ġк +ÑģÑĤ +Ġwhy +ically +ict +red +Ġbas +Ġsure +Ġbel +ating +Ġtak +Ġset +Ġlife +Ġdidn +ا +ob +und +ath +Ġop +Ġо +ait +Ġworld +Ġsupp +io +Ġcour +Ġи +ward +ен +Ġalways +up +Ġhand +ĠHow +cial +Ġcons +ĠÑ +Ġind +Ġ4 +ĠAs +Ġfun +ject +Ġimportant +Ġsur +ew +ates +Ġ5 +Ġdi +Ġmade +Ġins +Ġask +Ġet +Ġnum +Ġcar +ĠOkay +Ġsim +ik +Ġlast +ĠGo +Ġmus +Ġrel +ular +´ì +ĠWell +pect +ĠThank +Ġthree +ã +ãĥ +Ġinv +Ġgen +lic +Ġhappen +ëĬ +ien +ever +ов +Ġstr +ĠAll +Ġinst +ĠâĢ +Ġdef +Ġsl +Ġmight +ung +Ġyear +Ġown +Ġkeep +body +der +ĠÑĤ +Ġд +Ġanother +Ġmod +Ġev +Ġguys +Ġable +ão +que +ident +ĠYes +Ġits +Ġplace +Ġprodu +arn +Ġм +Ġrep +Ġexper +Ġfam +ities +ific +Ġhigh +ied +ool +iew +еÑĤ +ren +Ġdone +Ġ... +ëĬĶ +stem +ĠSe +Ġbetter +come +Ġdel +Ġty +Ġum +Ġho +ĠAn +Ġmon +ings +Ġsk +Ġob +com +blem +ope +stand +'d +ments +Ġele +ĠIs +Ġda +Ġreg +lease +ike +als +ize +ê° +Ġcare +Ġnever +ìĿ´ +ese +Ġmet +olog +ĠWhen +uck +еÑĢ +Ġé +Ġdat +ç +Ġexam +ility +Ġdet +cri +Ġused +ĠDo +Ġtrans +eg +ten +Ñİ +cus +Ġsecond +Ġbest +Ġhard +Ġide +Ġproblem +ê³ +ĠUn +Ñħ +ĠÎ +Ġwatch +ĠSh +atter +Ġpret +Ġder +Ġcourse +ÅŁ +ative +ics +Ġquestion +ute +ìĹ +ĠFor +ather +Ġcol +iend +Ġí +ĠZ +Ġdoesn +arch +Ġinterest +Ġpol +Ġcor +ience +Ġpres +Ġeach +Ġsystem +Ġfact +iel +ably +Ġer +Ġrun +ĠìĿ +Ġtop +ner +Ġthought +Ġeas +ient +Ġcre +ÑĪ +Ġcommun +ye +ready +llow +Ġeverything +omm +Ġmed +ļĶ +Ġcount +its +Ġcompl +hip +ÙĦ +ook +Ġtoget +Ġtogether +amp +Ġgame +Ġalready +ал +Ġcalled +ale +ÅĤ +ĠMy +Ġunderstand +Ġdr +Ġmom +ited +ол +Ġusing +zy +Ġnumber +ãĢģ +ced +Ġcle +но +ëĭ¤ +ince +Ġlooking +Ġpretty +Ġprob +ĠShe +Ġve +Ġgetting +Ġweek +Ġeff +uff +air +ues +ern +ĠQ +oup +ention +Ġside +ом +Ġform +Ġbus +Ġass +Ġed +ason +ween +âĢ¦ +Ġturn +Ġcur +Ġcoll +Ġdire +ĠGod +Ġ10 +Ġequ +Ġб +Ġopen +Ġsuch +ird +ак +Ġear +ÄĻ +gan +Ġpartic +Ġfriend +Ġexp +Ġext +Ġhome +Ġwater +ĠOn +ÑĤÑĮ +ork +ĠпÑĢ +Ġmove +ness +ense +ho +Ġchar +co +ins +Ġboth +Ġ19 +Ġgra +Ġbetween +á» +Ġìķ +ash +ĠRe +ai +alth +ures +ember +Ġav +Ġver +ê +oney +Ġthank +Ġmaybe +uc +ime +ê³ł +Ġaway +Ġname +ouse +Ġacc +Ġmusic +Ġchange +Ġpass +ger +Ġbuild +Ġval +iness +any +Ġfew +´ë +ta +Ġlist +Ã¥ +Ġold +Ġìŀ +Ġsort +Ġmem +Ġca +cept +Ġgener +Ġyeah +Ġwhile +Ġanything +ric +gram +Ġein +cy +uring +ĠDe +Ġpower +Ġcoming +Ġword +Ġ-- +Ġbelie +Ġfound +to +п +Ġmeans +Ġinform +ĠØ +ĠÑĩ +Ġsmall +000 +Ġcame +Ġíķ +wh +Ġworking +Ġexample +Ġpos +Ġdep +ê² +äº +ote +Ġdem +ì§ +ts +Ġvar +aut +Ġtri +chn +Ġhead +Ġwhole +×Ļ +ze +Ġtrying +Ġtem +Ġcou +ets +Ġ6 +Ġfil +velop +Ġcase +௠+Ġprobably +Ġokay +Ġplan +Ġsit +Ġschool +ĠThen +¸ë +me +Ġprocess +Ġfar +Ġread +Ġposs +Ġbre +Ġsol +icht +Ġsupport +ĠTo +ertain +Ġstarted +Ġcap +Ġleft +Ġdata +Ġtimes +ел +Ġwanted +ан +Ġtalking +Ġist +Ġhaving +ump +Ġcontin +Ġsub +Ġз +pr +ëĭĪ +ina +ż +Ġcreat +ode +×ķ +æĺ +!! +Ġterm +ism +од +ĠBecause +Ġwent +ider +Ġprov +Ġchild +Ġden +Ġlight +br +³Ð¾ +oh +Ġbook +ĠÙ +ution +ĠJust +ene +Ġfour +Ġvis +ê°Ģ +Ġhope +Ġmaking +ĠLe +ìķ +Ġopp +au +Ġmoney +Ġprogram +è +Ġstand +IN +Ġsign +Ġlearn +Ãł +ĠDon +Ġteam +Ġна +lud +Ġrest +ices +æľ +ĠÑĢ +Ġaut +Ġlead +ational +de +gy +Ġnice +Ġdas +Ġdist +Ġhum +ĠOne +æĪ +Ġcomes +Ġjo +Ġcent +Ġexpl +Ġmark +reen +led +gin +ìļĶ +Ġlevel +Ġconf +ush +Ġdevelop +Ġtest +eng +vious +ature +ем +ret +Ġje +Ġstuff +Ġclass +ows +Ġê· +Ġsi +Ġles +rop +çļ +Ġpor +Ġwar +ìĹIJ +Ġeveryone +Ġge +Ġcheck +ott +Ġsing +Ġart +Ġfollow +Ġ201 +ĠFr +ais +ìĸ +α +å° +ĠÃł +imes +Ġret +Ġchang +Ġpub +Ġinf +Ġtechn +ada +ives +Ġbeh +æĺ¯ +Ġlooks +ãĢĤ +з +ĠWhy +çļĦ +Ġenough +Ġbra +itch +ä» +Ġadv +б +Ġwithout +wer +meric +den +Ġcomplet +Ġidea +ters +ock +Ġdefin +Ġever +Ġgl +Ġonce +Ġbring +Ġsaying +Ġans +Ġhear +nect +Ġless +go +ream +ado +ìŀ +Ġmind +ente +Ġfull +Ġbad +Ġwom +Ġsomeone +Ġdu +Ġwon +Ġcontro +ortun +Ġhealth +Ġcho +ĠAr +Ġconc +Ġinformation +Ġstop +att +ately +ä½ +Ġgroup +ĠÑĥ +Ġquite +Ġresp +ER +ught +ê¸ +man +ized +ĠBr +Ġremember +Ġfamily +Ġbusiness +aw +Ġspec +Ġau +ĠOr +Äħ +Ġseen +Ġlar +Ġ7 +gg +bers +Ġdra +Ġmonth +Ġsays +Ġiss +Ġlive +Ġline +Ġmoment +Ġexc +els +Ġsound +Ġcool +Ġloc +Ġcertain +Ġdri +оÑĤ +ames +Ġmust +ny +иÑĤ +Ġkid +Ġinclud +ìĿĦ +ator +ÄŁ +ha +ared +Ġseem +й +ìĦ +Ġelse +Ġìł +irl +Ġ8 +Ġvo +Ġquestions +ines +ee +æĪij +ür +ĠAmeric +Ġstory +Ġserv +vern +ages +land +ĠâĢĵ +era +ĠCan +Ġpop +ether +Ġna +Ġorder +Ġmakes +Ġsince +con +ctor +Ġthough +Ġproduct +ли +Ġleg +Ġmeet +alf +ÑģÑı +unch +iter +ove +×ķ× +iet +ам +ital +Ġsuper +ling +Ġpay +Ġpara +Ġjob +ĠHere +Ġsw +ks +ption +ma +Ġbelieve +¬ë +Ġwait +ой +Ġunt +Ġquick +hr +ĠÑį +ĠPro +Ġmen +๠+Ġdays +Ġgoes +Ġspeak +ĠAt +ement +Ġmiss +Ġaw +Ġdesign +Ġproject +оÑĢ +ij +ants +ats +ĠChr +Ġ9 +Ġcut +Ġrequ +Ġне +ĠNot +aster +Ġmill +Ġparticular +Ġpie +Ġstudents +Ġfive +oun +ĠNe +Ġgi +Ġpas +Ġfree +ĠSp +lich +Ġprof +Ġeng +Ġprot +ĠLike +osed +Ġconnect +app +Ġë§ +iting +Ġblo +Ġlos +ists +Ġexperience +rent +Ġstay +Ġfood +ton +ruct +Ġhist +view +ining +most +ivers +bo +ãģĦ +ĠTr +gen +Ġplease +Ġcommunity +Ġce +AN +no +Ġbody +Ġhour +Ġvers +Ạ+cer +Ġê° +Ġreason +ĠRight +Ġlater +ÏĦ +Ġhouse +ĠX +он +Ġstate +fic +å¤ +ÅĽ +ield +Ġpri +Ġpast +Ġwalk +ology +ering +anna +Ġter +Ġhold +Ġorgan +ben +ο +ón +Ġeffect +Ġyourself +Ġplus +aj +ando +ural +Ġroom +lect +ê²Į +?\" +side +Ġbecome +ÑĨ +Ġ +ood +Ġconst +Ġnight +utes +ж +Ġbreak +Ġpain +Ġstep +ired +Ġnothing +Ġuntil +Ñĸ +ав +ÙĬ +Ġduring +ì§Ģ +less +oll +нÑĭ +ι +fect +iver +ıĦ +ither +ying +Ġbegin +×Ļ× +ivid +Ġç +Ġsal +Ġta +Ġpot +Ġ$ +Ġmar +Ġclear +Ġface +Ġgrow +Ġ* +Ġinside +Ġfriends +Ġleave +enn +Ġeasy +Ġarea +ality +oud +Ġeat +ÙĨ +Ġpur +orn +Ġsaw +Ġanswer +Ġfront +Ġbeaut +¼ë +Ġmatter +Ġson +ĠNew +Ġresult +ides +che +Ġfut +ps +Ġfocus +Ġinteresting +å¥ +Ġap +\". +Ġcreate +оÑģ +Ġpress +ross +Ġpick +line +Ġtook +ĠMay +row +Ġich +ĺë +Ġref +Ġmor +ract +arent +AR +Ġexact +Ġspace +work +ни +Ġbir +Ġdev +г +Ġtold +Ġpublic +cially +Ġview +ĠHey +med +llo +cc +Ġfac +Ġcouple +Ġheart +ler +Ġready +Ġalmost +aring +Ġhalf +ĠMe +avor +ique +Ġcharac +Ġpract +ON +ane +Ġil +на +Ġvi +lish +head +Ġleast +Ġbasically +ased +right +Ġyet +Ġtaking +Ġcountry +Ġwin +Ġisn +Ġpossible +Ġcam +Ġincre +Ġpat +Ġwanna +Ġconsider +Ġabs +Ġwithin +Ġhuman +Ġthinking +Ġoh +¡ľ +Ġqui +ases +Ġ0 +itely +ä¸į +Ġkill +Ġmil +Ġinvest +ister +Ġsuc +ional +elf +Ġwhether +Ġcontrol +Ġagainst +ots +ëĭĪëĭ¤ +ior +Ġpresent +Ġا +Ġwatching +ube +erv +Ġnicht +Ġgovern +ĠThese +Ġ: +uit +ugh +Ġworks +oo +Ġwir +Ġair +ĠTe +аз +ision +where +Ġtot +joy +ìĭ +Ġvol +Ġе +Ġclose +ĠAd +Ñī +ined +Ġuna +Ġê·¸ë +°ë +orry +Ġbro +Ġfilm +ift +20 +Ġtype +Ġhappened +ĠAm +Ġgirl +ĠAre +wards +Ġpour +Ġcolor +elt +аÑģ +Ġsense +lex +ĠWith +uss +rib +Ġrese +Ġnorm +Ġfuture +Ġdeal +ending +ey +Ġx +ero +ĠCl +uk +Ġwhatever +selves +Ġyoung +ìĬ +ĠMar +ĠChrist +Ġguess +Ġperform +Ġener +ron +Ġhit +Ġwond +Ġdirect +ĠEvery +Ġoften +Ġfa +Ġalong +Ġclick +ĠLook +Ġsitu +Ġhappy +ead +Ġago +Ġenc +Ġmyself +Ġcover +об +Ġmid +Ġcost +Ġten +ĠSch +Ġexpect +Ġwasn +Ġstrong +iful +Ġopportun +inal +yle +Ġshare +Ġtrue +Ġappro +Ġchall +Ġminutes +Ġchann +ĠëĤ +ε +li +Ġmess +ories +pecially +Ġwrong +Ġyes +ĠìĹ +iron +Ġallow +Ġsubs +Ġfore +Ġfight +Ġsocial +Ġcra +ana +Ġaff +Ġess +Ġways +Ġshort +Ġfall +Ġlaw +ĠWho +Ġenjoy +Ġcal +Ġaccess +fe +Ġnon +Ġacross +ery +viously +ĠEx +ided +Ġlink +ĠPr +Ġterms +aces +Ġland +azing +Ġ15 +Ġmult +Ġspecial +åĢ +iving +ìĿĢ +Ġtyp +Ġste +ĠÄ +Ġforward +åı +Ġfre +好 +Ġresearch +à¯į +аÑĤ +Ġmain +Ġrecord +Ġhu +Ġdefinitely +Ġeither +Ġlisten +Ġkey +Ġmarket +ĠÑĩÑĤо +ization +Ġvideos +Ġguy +Ġfig +Ġstra +ĠPl +ully +amos +Ġmention +Ġsong +Ġintern +ral +urs +Ġhon +Ġvalue +Ġbar +cle +ож +Äĩ +ľë +Ġzu +им +ä½ł +Ġsingle +Ġauch +cuss +Ġgets +Ġsometimes +å¾ +amb +mm +cing +Ġperfect +ĠBl +outh +ìł +Ġsci +par +Ġred +Ġpost +Ġmot +Ġelect +ĠEu +itive +ĠSome +Ġdescri +Ġcurrent +és +Ġtre +ĠEn +Ġmit +EN +Īë +ium +Ġheard +Ġsimple +lar +Ġeverybody +ilar +Ġneeds +Ġdiffic +ĠGood +ument +cent +Ġoper +аÑĤÑĮ +ety +Ġblack +Ġgiven +ones +Ġwel +éĢ +ĠìķĦ +Ġ30 +AT +Ġstat +ouch +ĠMr +аÑĢ +Ġsho +Ġcond +×Ķ +my +Ġchildren +Ġeu +ед +ìķĦ +tern +Ġuh +Ġhar +Ġprom +Ġpull +rew +Ġcompany +Ġbeautiful +ustom +íķĺ +ки +Ġstre +Ġamazing +ries +Ġsuccess +Ġmach +not +Ġdiscuss +Ġnat +¦¬ +Ġune +Ġdifficult +Ġris +ν +Ġcamp +Ġbuy +ä¸Ģ +Ġmag +po +ĠYour +Ġbehind +ica +ın +ĠOK +Ġlang +Ġwomen +Ġenv +Ġrece +Ġchannel +ially +ule +Ġ12 +thers +Ġbott +Ġreport +ently +fully +The +Ġsent +Ġevent +Ġenergy +lt +Ġwords +arr +dle +Ġahead +ards +ر +äºĨ +Ġtool +conom +еÑģ +Ġexactly +Ġfavor +Ġlow +Ġproper +ĠìŀĪ +Ġ! +Ġrelations +Ġmas +Ġkids +Ġentire +ude +Ùħ +ĠWhere +Ġones +Ġcity +olut +Ġsix +ability +ör +ili +ĠEs +Ġhappens +ains +Ġmodel +Ġpict +Ġespecially +Ġ100 +kt +Ġsoon +by +rodu +Ġann +Ġsubscri +ĠQu +Ġavail +iment +Ġvoc +ka +Ġ200 +aper +ĠInd +Ġì§ +hor +į° +jor +ил +Ġsqu +AU +arning +Ġг +IS +Ġл +ей +yes +åħ +ĠÐĴ +Ġorig +ого +Ġasked +ilt +ог +Ġcontinue +Ġìĺ +ram +Ġothers +ES +ohn +Ġlay +Ġbased +Ġpu +Ġappe +Ġlim +Ġprop +Ģë +min +Ġhot +ĠLa +Ġfast +Ġprotect +Ġamount +Ġaqu +Ġfund +Ġcustom +Ġcult +Ġhands +Ġhaven +Ġaud +Ġoutside +ĠAfter +aps +Ġanim +ploy +Ġhat +ĠFirst +Ġtreat +Ġep +Ġmater +Ġbuilding +Ġë° +åIJ +ìĦľ +za +ughter +ĠPe +ney +eter +atic +Ġeduc +기 +Ġmov +ĵ¤ +ama +ration +Ġsn +ÙĪ +Ġsum +Ġphot +ĠÐĿ +Ġ. +æľī +Ġfinish +itting +å® +Ġlarge +Ġìĸ +Ġwhite +ara +Ġmais +ĠHi +Ġdam +ĠاÙĦ +Ġbox +ĠHello +Ġsle +Ġopt +ried +¥¼ +Ġactiv +Ġnão +ĠCom +Ġplaying +Th +Ġavailable +Ġport +åĪ +ĠAh +Ġlas +Ġearly +Ġwonder +±° +Ġ18 +cul +Ġfunction +Ġmorning +lle +ients +ux +Ġcir +itions +Ġdeep +Ġpolit +yor +mp +aking +Įë +ĠMan +Ġmillion +Ġ/ +Ġindivid +Ġpan +Ġgovernment +Ġwrite +ĠTod +ament +ĠÏ +Ġwind +ĠEng +chen +Wh +ìľ +Ġident +ãģ§ +vent +urch +Ġhy +Ġya +Ġtrad +Ġrelationship +ú +Ġdou +OR +Ġswe +Ġneg +ination +Ġtext +ipp +Ġfine +ás +ĠDr +ĠCome +Ġmonths +,\" +ени +Ġhours +Ġpod +irt +Ġinvol +Ġcollect +Ġauf +Ġpa +Ġhistory +mb +ify +Ġ? +Ġbelow +asure +aby +Ġlangu +Ġant +Ġcomb +ato +Ġexist +Ġëĭ +Ġtakes +Ġcharacter +aff +Ġfield +Ġeconom +ief +Ġpiece +åľ +Ġreach +Ġê² +ony +Ġmaterial +Ġdig +Ġphys +Ġimpro +Ġsimilar +IC +Ġnet +yn +Ġposition +ÃŁ +Ġbene +read +Ġlearning +ume +Ġclean +ÑĤоÑĢ +Ġcook +Ġseems +Ġol +ĠUS +ĠJes +Ġà® +ential +iversity +acy +ĠÑı +olutely +rect +ĠPlease +Ġrepres +Ġtouch +men +Ġа +ión +ĠThanks +Ġang +Ġmajor +Ġitself +ills +\", +ians +Ġscreen +Ġhor +Ġknown +Ġenviron +Ġfinal +Ġfigure +ĠTw +Ġeyes +Ġimag +Ġseeing +Ġhair +rem +Ġapplic +ends +put +Ġnews +Ġcompletely +ughs +Ġknew +ified +ĠJe +ĠDid +Ġsituation +Ġflo +ms +Ġphone +Ġball +do +Ġparent +Ġsorry +ury +ин +ips +ад +Ġinstead +Ġhuge +Ġtu +Ġãģ +ĠGr +Ġdetail +ĠÐŁ +Ġindividual +Ġfire +Ġclos +Ġwer +une +Ġrunning +Ġconvers +Ġrecomm +Ġcomo +Ġsomebody +ĠJohn +ĠìĿ´ +ĠOur +ples +ĠPh +Ġanal +Ġ50 +Ġoffer +Ġ< +itional +gest +Ġvous +let +icy +Ġfeeling +LE +ros +Ġthird +ок +Ġseries +ĠAny +ised +old +Ġdraw +Ġservice +Ġcannot +bal +ãģĨ +Ġliving +ım +Ġdifference +Ġopportunity +Ġnear +orth +ken +Ġlocal +ت +ĠCon +Ġobject +Ġdass +ãģĻ +IJ× +Ġquickly +raph +Ġissues +éĢĻ +ĠAmerican +Ġprep +ences +Ġprofess +lling +of +Ġfoot +bre +Ġusually +Ġgeneral +da +ances +Ġdest +Ġocc +Ġmembers +Ġdans +Ġequal +zt +Ġbecom +Ġmoving +Ġspecific +ÃŃa +Ġfur +Ġnecess +Ġcommon +Ġattack +ĠÑįÑĤо +ĠToday +Ġuns +ĠGu +iod +Ġaccount +Ġgrand +Ġself +ĠEl +Ġtast +Ġcontent +Ġcu +Ħë +ĠMaybe +ĠJesus +ores +port +©´ +Ġgives +Ġnormal +ÑĢÑĥ +Ġimpact +är +Ġdies +Ġlab +sh +ios +ĠPres +ĠUnd +ĠOf +Ġfinally +Ġdoll +Ġvocê +ply +ĠAg +Ġtaken +Ġground +fort +Ġgave +ĠInst +Ġlost +Ġworked +Ġliter +Ġissue +Ġindust +Ġreturn +Ġhappening +Ġwants +ив +Ġproblems +ĠCar +Ŀ¼ +ĠAlso +Ġsize +Ġobviously +ĠSu +ĠSc +Ġrecommend +ources +astic +.... +Ġmi +lier +ĠEven +cia +Ġhur +va +Ġmass +Ġwouldn +unt +cks +Ġfelt +osp +light +олÑĮ +nie +Ġbottom +ĠбÑĭ +ored +ison +Ġgrad +Ġuma +Ġva +ĠìĤ +ression +ulation +ID +idence +Ġbur +Ġgone +lu +ìĸ´ì +Ġredu +Ġja +ìĿĺ +ita +Ġsoft +Ġça +ico +eral +ñ +af +Ġpoints +gu +Ġdé +apt +ax +ĠAlright +Ġcamera +Ġach +Ġпо +Ġsever +50 +Ġsie +Ïģ +Ġmal +Ġcomput +Ġmiddle +Ġcouldn +ming +Ġìĭ +ĠHis +Ġgames +Ġintrodu +Ġcell +por +Ġsleep +Ġë³ +iding +Ġou +Ġdeg +Ġdrink +Ġenvironment +ĠUnited +Ġtalked +Ġchoose +Ġjour +ege +ĠMin +Ġinte +Ġrather +Ġoffic +ка +aching +Ġmentioned +Ġfill +Ġtrack +Ġnie +Ġut +ĠвÑĭ +ibility +Ġvac +Ġrad +Ġpack +Ġsend +ĠDas +ĠAb +Ġengine +ãģĹ +Ġcompet +ô +ĠвÑģ +Ġdoor +Ġlonger +å°į +Ġlanguage +Ġextra +play +Ġwebs +umb +room +çľ +Ġbeginning +Ġrefer +AM +nen +igher +face +erc +Ġforget +Ġcomment +ек +лÑı +ror +że +ĠGe +Ġdark +Ġanyone +ante +ges +ìĬµ +Ñij +bed +je +ructure +Ġprim +ida +è¦ +ãģ¾ +Ġmix +Ġstarting +ĠìĿ´ë +Ġprovide +action +Ġmother +Ġperiod +Ġstick +ĠYouT +Ġtechnology +ê¹ +Ġbed +Ġgiving +Ġexplain +zen +imate +Ġrepresent +load +ĠHowever +Ġlives +uth +irit +ogn +Ġlik +Ġrespons +Ġpriv +Ġtom +ção +iam +Ġexcited +Ġcard +ground +Ġ×Ķ +Ġsens +Ġteach +ido +hod +Ġepis +Ġwelcome +Ġwall +ä¹ +Ġchance +hen +ĠС +ĠÄij +Ġsimply +ĠÑĤак +ring +ja +book +Ġseveral +ste +Ġcreated +ĠоÑĤ +Ġpush +== +Ġhigher +uf +ource +oke +Ġonline +Ġrele +Ġton +ensive +Ġfavorite +Ñĥд +Ġlooked +Ġvon +âĢĶ +Ġfür +Ġbutton +Ġbill +Ġchanges +!\" +Ġslow +ables +Ġdeath +ands +ateg +Ġthemselves +ãģ£ +Ġcop +ãģ® +Ġpersonal +ughing +Ġ11 +gar +ades +Ġneeded +Ġstudy +aged +ÑģÑĤв +ino +Ġdisc +ki +Ġaddress +ר +itten +esome +Ġж +¤ë +ura +Ġmu +Ġcontinu +for +Ġmatch +ãģ¦ +Ġstraight +IJë +ners +Ġdog +Ġdeb +ĠCO +Ġos +ged +came +Ġcorrect +ette +ĠSee +Ġincluding +ĠEuro +ester +Ġjump +ĠWhich +Ġкак +son +ya +ING +Ġeine +osh +ency +Ġmedia +Ġsubscribe +éĤ +Ġprin +Ġhab +ĠPer +ĠWas +Ġpage +itor +Ġtowards +Ġtried +enge +artment +Ġvari +Ġpaper +Ġpicture +Ġversion +Ġbrought +ware +ĠStates +Ġsich +ledge +Ġpercent +Ġgod +ec +ĠComm +Ġdecided +Ġselect +íķľ +). +urity +Ġfurther +Ġcomments +lement +Ġdream +Ġcenter +mi +Ġcas +Ġwoman +Ġroad +Ġfail +Ġbecame +lus +ilities +ãģ¯ +ĠCo +Ġmanage +Ġrecogn +Ġaction +Ġbenef +Ġearlier +׾ +Ġspeed +Ġment +Ġsoci +Ġshoot +ui +Ġä +Ġapply +vo +xim +Ġcause +Ġsurpr +Ġhaben +DI +Ġfather +ĠNext +ĠYouTube +Ġcode +Ġrole +gress +Ġgreen +ett +Ġbuilt +Ġflow +Ġbase +Ġtraining +Ġround +ĠWill +Ġpath +ĠRo +Ġinterested +ìĸ´ +Ġrespect +Ġchanged +ission +Ġstudent +ograph +Ġapproach +Ġshows +å°± +Ġtar +Ġcrit +Ġglo +ìĬµëĭĪëĭ¤ +Ġdead +ĠPresident +Ġthous +Ġbal +ster +ex +Ġabsolutely +Ġmic +Ġpractice +Ġquality +Ġlower +ogle +Ġsepar +ball +medi +Ġreview +ĠApp +Ġok +âĢĭ +Ġexperien +Ġconcern +entially +more +ĠJo +apan +ĠIch +istic +Ġfair +Ġwebsite +ires +ĠBy +Ġtravel +Ġrisk +Ġmir +Ġboard +Ġsen +Ġparents +ĠWow +Ġfeed +Ġsave +Ġserious +Ġinit +EL +undred +AS +Ġvan +orrow +Ġworth +Ġsearch +Ġ16 +Ġparts +ÑģÑĤÑĮ +Ġcompan +Ġmovie +Ġmethod +Ġill +Ġwish +dy +Ġitem +Ġminus +anger +Ġvoice +Ġskin +Ġareas +Ġeight +Ġobs +Ġ, +ай +Ġoil +Ġcy +Ġbaby +sy +Ġemploy +ĠKe +Ġplaces +Ġfix +Ġestá +ãģ¨ +ived +Ġlots +Ġseason +unk +alt +Ġtable +ĠТ +â +Ġattention +ãģª +ĠHer +Ġage +Ġpra +back +cil +Ġnetwork +rit +Ġdoc +Ġaren +igen +ĠëĦ +د +ender +Ġtotal +Ġprice +Ġcrazy +ìļ +iqu +though +You +Ùĩ +ãĤĵ +Ïħ +Ġsat +Ġbi +ĠDie +Ġsha +Ġthanks +uh +Ġstage +аж +ĠFl +Ġleav +Ġboy +Ġaf +ön +ĠGet +Ġaccept +Ġenter +Ġtur +ĠsiÄĻ +Ġhonest +ãĢĮ +Ġsam +Ġrepl +ging +Ġdevelopment +ĠAct +ora +ãĢį +ä¾ +Ġknows +Ġimage +ĠLord +иÑĤÑĮ +Ġweeks +Ġsex +Ķë +Ġhundred +Ġsounds +Ġlearned +Ġbud +ĠÑģÑĤ +Ġincred +âĻ +Ġnos +Ġdrop +Ġben +ĠÐĺ +Ġsafe +ata +Ġfuck +soci +Ġdan +Ġcross +10 +mo +vert +Ġ17 +zie +åķ +Ġdom +ĠBo +Ġsetting +Ġinvolved +arily +Ġsind +Ġsus +Ġworry +eth +ê¹Į +Ġsun +Ġhier +Ġcertainly +oul +orts +ĠEr +ĠUm +Ġcaus +Ġnatural +Ġü +Ġcry +ĠSec +Ġsom +æ² +Ġeducation +аеÑĤ +Ġmultip +Ġalone +Ġeye +Ġrate +ĠEurope +è¿ +mon +Ġfit +izing +pped +Ġpressure +the +иÑģ +ites +ĠAf +reci +attle +Ġservices +ĠGoogle +éģ +Ġcases +Ġdrive +Ġchalleng +uz +ĠMo +ìľ¼ë +val +åĢĭ +Ġfol +Ġì¢ +ffic +Ġra +Ġsin +Ġblue +Ġaffect +Ġmis +Ġshot +Ġоб +asing +Ġsignific +ĠChe +Ġê³ +Ġpositive +ì£ +Ġwie +Ġ40 +ording +ĠFrom +êµ +Ġbrand +Ġtrust +Ġple +Ġcommunic +Ġweight +Ġasking +Ġtax +ĠJapan +ãģŁ +Ġíķĺ +ops +ÏĤ +Ġputting +Ġroll +ĠAmerica +reg +ŀ× +atures +ension +ĠSomet +Ġoriginal +ping +ĠÅŁ +Ġproducts +ãĥ¼ +Ġcontact +olution +Ġgoal +Ġpow +Ġperformance +Ġblood +ators +ĠMich +Ġtemper +ĠDan +Ġsugg +ÑĤи +Ġimm +Ġoffice +Ġarri +Ġcomfort +ĠÐĶ +Ġsuggest +Ġplat +Ĥĺ +19 +Ġom +Ġseven +ĠCent +ille +Ġconcept +Ġbag +ün +ively +Ġdiv +mos +æī +Ġfeels +Ġir +akes +ley +Ġparticip +ĠÐļ +fl +just +Ġsil +ĠPa +AL +Ġgotta +Ġfan +Ġchallenge +Ġcompanies +ĠPeople + +Ġheroes +ĠBoston +Ġdependent +Ġmotivation +flix +Ġseam +кие +Ġdrain +oded +Ġguilty +ĠJenn +ingen +Ġgranted +ĠKelly +ĠSav +ĠUncle +ĠHonestly +ELI +Ġnavigate +Ġblessed +core +Ġearning +Ġsignals +Ġdisk +ials +Ġages +æħ +Ġparticle +ĠÑĩеÑĢ +Ġcann +Ġtier +Ġstatements +ê³łìļĶ +ĠëķĮ문ìĹIJ +ĠCho +Ġpolar +anç +ĠKenn +ĠNi +ĠFight +organ +éķ +ĠCha +ĠSÃŃ +ãĥª +Ġslic +Ġcertific +Ġtemplate +ĠFederal +Ġconsideration +Ġexplo +ĠMain +ĠNE +Ġalongside +Ġdressed +ĠPoint +Ġenvironments +Ġpróxim +Ġdaar +Ġprompt +Ġpursue +Ġentertainment +Ġthroat +Ġproblema +Ġmart +ì¼ +Ġprovider +ØĮ +Ġ×Ĺ +inte +making +Ġstroke +Ġtissue +Un +Ġprecious +ĠArts +inking +ĠÐŀн +ĠиÑģ +nah +ĠÐķÑģли +Ġcorners +Ġtricky +inch +lijk +Ġpressing +level +ANG +Ġradiation +ìĦł +Ġconfront +Ġvet +Ġrepresentative +Ġpropag +Ġcrap +ĠDec +Ġramp +епеÑĢÑĮ +ués +essen +cription +Ġbills +ĠMatthew +Ġanime +ất +Ġlowest +has +screen +ograp +ало +inton +ĠJah +èĢħ +itÃł +Ġkay +Ġrotation +ĠWere +abei +Ġtrials +Ġlever +ighty +Ġspoon +Ġhunt +cling +Ġdism +ĠболÑĮÑĪ +Ġassault +Ġíĺķ +Ġweekly +Ġmismo +Ġgenetic +ulpt +ĠStudent +Ġrealistic +Ġauthentic +æīĵ +asta +Ġarrested +Ġguidelines +Ġ׾×IJ +Ġдав +ĠComing +für +Ġrequests +ĥIJ +Ġanalyze +Ġinteress +Ġhalt +ĠOper +onom +Ġduck +Ġwithd +ser +ĠÏĮ +ĠHistory +Ġyoutube +ãĤį +Ġsaber +walk +font +Ġoverview +39 +üy +etti +Ġfrozen +Ġflesh +ÄŁi +ĠPM +ĠìĻĢ +é¢ +ÑĨии +Ġ기ë +íģ¬ +Ġprose +oooo +rates +WS +Ġautomatic +Ġcollecting +Åij +Ġneighbors +». +ĠExpl +Ġcircul +cover +weg +Ġsticks +Ġeller +Ġwww +Ġdorm +ĠExper +Ġstatistics +Ġemails +Ġgrave +imiz +HS +Ġuit +,' +Ġlaser +èī +ĠÑĤем +ÑĭÑĪ +ÑīÑij +Ġgenau +Ġtienen +Ġmeditation +ĠOrgan +Ġestimate +Ġ무ì +lets +ĠnÃły +Ġmindset +Ġreson +Ġmés +Ġnumerous +Ġvielleicht +ĠThird +uous +ĠDead +анд +HN +Ġracing +Ġagents +ĠUt +Ġtear +ĠHP +Ġchemistry +Ġsurvival +æĸ° +Ġconvinced +Ġ; +Ġregulations +ĠES +åĴĮ +300 +Ġense +Ġìµ +Ġdict +GA +ĠahÃŃ +åĭķ +Ġtej +ĠоÑģÑĤ +ĠElect +Ġintellectual +Ġbias +Ġburden +çĤ¹ +Ġìĸ´ëĸ» +Ġcheer +Ġsoph +Ġportfolio +uba +Ġestos +TV +For +Ġash +Ġkommer +Ġcollective +Ġwrest +ĠJetzt +ĠWat +reich +Ġprimer +active +Ġmie +icked +Ġhunting +Ġtestim +Ġcompassion +Ġر +Ġbrut +Ġsalad +обÑīе +Ġsolving +Ġfloating +ç· +Ġattractive +ÙĪÙĦ +Ġperd +iffer +Ġsculpt +hhh +ĠWeek +Ġenthus +Ġnad +Ġmerch +ĠíĻķ +Ġmile +好äºĨ +Ġθ +ĠëĤĺë +éĩį +38 +Ġchains +ĠAlmost +Ġtickets +rin +ĠCC +Ġdistributed +abetes +Ġtemperatures +Ġgained +Ġflexibility +Ġscreaming +Ġabroad +uno +Ġentrepreneurs +ĠNetwork +ĠCanadian +Ġprev +Ġsö +ĠÑĤебÑı +ĠPoke +ĠPod +ĠTurkey +çı¾åľ¨ +Ġabstract +Ġsnake +ĠAmy +ĠëĬIJëĤĮ +Ġbrave +ĠìŀĪìĸ´ìļĶ +ĠKal +Ġ2007 +ário +Ġmarked +gines +Ġalloc +ONG +Ġscientist +Ġesca +Ġracism +×ij× +ĠSams +ĠPenn +Ġloads +Ġந +über +Me +ixò +Ġperò +anne +Ġexpressed +меÑĢ +Ġmoet +Ġreturning +nia +Ġexpon +Pro +Ġloyal +ML +Ġlamp +Ġshy +Ġcomposition +ĠLy +Ġmagnetic +Ġpremier +Ġmeasured +Ġsummary +Ġattacked +Ġfinishing +ÐĹ +ç¥ +Ġsits +Ġhydrogen +Ġmai +ĠDeutsch +ası +Ġobtain +vie +Ġsoit +Ġë°Ķ +Ġlane +Ġconsegu +во +Ġease +akin +ĠFa +Ġuntuk +Ġburst +Ġcum +alım +úblic +idi +ĠRoyal +ĠKon +Ġcommonly +Ġremoving +Ġjur +ilib +Ġanch +íĸī +ượ +ĠÐľÑĭ +ĠAnth +ĠSÃ¥ +Ġinterrupt +Ġstere +ĠOS +onym +tery +ĠMaria +ê²ĥ +Ġexploring +Ġtransparent +Ġfate +ĠJung +Ġgrup +Ġdarker +ĠDoug +Ġmane +æĶ¾ +ại +dri +look +ĠDesign +Ġtutaj +Ġhorizontal +reon +orte +ĠCorrect +ĠSteven +Ġvine +02 +iÄĩ +Ġsiempre +ĠKey +åĥı +ĠGames +Ġnaar +Ġshocked +elve +ĠRose +ìĭ¬ +Ġstopping +ohl +ĠMix +Ġsuffered +Ġsigma +Ġweakness +ĠOw +ีà¹Ī +IF +Ġà®ħ +aded +ĠNetflix +anes +Ġremained +iry +Ġrip +ellt +Ġsilent +Ġproven +Ġtoxic +Ġalumin +Ġmultipl +aland +Ġ34 +06 +ĠBru +Ġìłķë§IJ +Just +boy +Ġshoe +Ġcreature +Ġheaded +ĠоÑĤк +æ± +Ġessence +Ġremarkable +Ġnúmer +Ġdrew +Ġpuzzle +ĠLibrary +ĠFu +ashes +kk +ĠIst +¦° +ĠBry +Ġceremony +Ġà®İ +Ġcri +equ +ãĤ¢ +Ġprize +Ġdimensions +ogram +Ġleather +Ġpopulations +uum +Ġvegan +Ñıд +Ġcómo +åĦ +Ġstrip +å£ +Ġvacation +ħķ +Ġmeals +ilipp +Ġents +aram +richt +Ġgrain +ĠSpain +Ġcheek +ĠAff +ION +ĠBring +Ġ38 +ielen +ulu +ĠболÑĮÑĪе +Ġannouncement +ĠÑĤÑĥÑĤ +ĠProphet +ardo +37 +Ġwoke +Ġtranslation +ĠNOT +ĠCL +ĠdÃ¼ÅŁ +ÑĨÑĸ +acer +ĠLoc +Ġperception +NO +Ġdiesen +Look +heart +aved +Ġboundary +Ġflows +Ñijм +Ġarguments +Ġelections +ıs +Ġheck +Ġsuitable +Ġfiber +ĠStra +xy +ĠHum +Ġmonthly +uper +Ġgolf +Ġlately +ĠGard +ĠRen +ĠAst +ĠFant +аÑģÑģ +Ġobser +ë¡ľ +Ġeasiest +įĶë +Ġwebsites +pol +Ġcocon +Ġà®ĩ +ĠVeg +Ġwalks +Ġintro +Ġdirected +ĠAnna +Ġëĵ¤ìĸ´ +ĠEastern +ĠSaint +ĠBow +Ġroast +ĠURL +Ġjeden +uras +aja +Ġsemi +Ġrapidly +Ġtargets +ĠControl +Ġbah +Ġreflection +Ġcreativity +holders +Ġìĺ¬ë +Ġamongst +Ġfeeding +ÑįÑĤомÑĥ +Ġвиде +Ġë§Įëĵ¤ +ĠSmart +Ġreliable +Ġvezes +Ġר +chuckles +azione +ĠWilliams +Ġaç +Ġslee +еÑī +Ġtimeline +Ġthorough +á»į +ĠOt +ạn +Ġimagination +Ġmechanics +rist +Ġclaimed +ÏĦη +ête +ĠHurry +ĠiPad +Ġconstru +ĠCla +ĠAls +ä¼ļ +utz +Ġcultures +Ġìĸ´ëĸ»ê²Į +Ġbelongs +Ġyer +ĠDoesn +Ġgeomet +Ġbid +Ġfoam +Ġhob +ĠBritain +Ġsubstance +Ġanniversary +ĠëĦĪ +Ġnoted +Ġgovernor +Ġstocks +31 +Ġdiye +ìĬ¤ë +Ġreb +zel +Ġmultiply +Ġoperator +Ħ¤ìļĶ +Ġwaters +Ġdär +Ġunser +ĠElizabeth +é«ĺ +Ġincreasingly +ĠGro +Ġengines +irs +Ø« +Ġtreasure +PC +inction +iri +Ġaccum +Ġvariation +Ġpom +Ġtitles +ĠFest +ós +Ġelder +nym +run +Ñıв +Ġinnovative +Ġnombre +Ġcoinc +Ġfranch +Ġentonces +Ġnichts +Ġexclusive +ĠCheers +ĠBi +uje +æŃ¡ +Ġpok +ĠPrem +Ġrocket +ELIPE +Ġhospitals +rium +Ġjuste +Ġhammer +Ġquantum +Ġresponses +lly +endi +Ġactively +Ġfridge +iate +long +Ġquem +Ġdeaths +Ġsuperior +cken +ìĿ´ìĹIJ +ktop +Ġgathered +£¨ +Ġdazu +Ġrecipes +Ġbuzz +cen +Ġanytime +onsense +Ġcircles +Ġsolved +Ġìĭł +Ġcoronavirus +ĠLuke +Ġbubb +Ġcontempor +rzy +ĠJane +Ġдом +Ġscrews +Ġhybrid +Ġcasual +Ġselbst +being +ĠÄIJ +ĠColumb +ĠÑħоÑĩ +Ġbucket +Ġevaluate +Ġidol +Ġreputation +ĠìĨĮë +ÙĪر +Ġhecho +Ġpoem +Ġsubjects +plant +ĠBeh +ĠSpeaking +Ġbatteries +Ġfollowers +öl +Ġgently +Ġsixt +Ġparameter +Ġikke +ĠTour +ĠDJ +otte +ĠJahren +Ġpreparation +ĠдÑĥм +Ġ800 +cop +iking +Ġ문 +ĠнÑĥ +ĠлеÑĤ +åIJĮ +ĠIde +Ġì¡°ê¸Ī +Ġlaughter +Ġmolecules +ĠRest +Ġobserved +dzie +Ġadvertising +erto +Ġmoins +ĠMIT +Ġexcit +Ġtum +Ġtyl +Ġinvested +Ġpharm +Ġunexpected +Ġphi +otype +weise +Ġgeç +jourd +Ġhorses +nÄħ +=\" +ĠSM +Ġfib +Ġclips +çķ¶ +å¦Ĥæŀľ +Ġregime +Ġrotate +rou +nik +Ġarmor +ðŁĺ +еÑĢа +度 +ĠOch +Ġrichtig +üzel +aneously +mek +éĮ¯ +ĠXiao +Ġexisted +worth +ãģ£ãģ¨ +Ġnaught +ĠheiÃŁt +ĠBal +Ġresid +ivot +omatic +Ġhired +Ġgradually +Ġonions +Ġcompat +Ġintim +Ġjew +Ġcontribution +ĠIre +acji +Ġslice +Ġimmun +ĠRus +Ġgrows +ĠSimilarly +Ġhardest +Ġstruck +Ġmeasurement +...] +they +ĠìłĢë +Ġsneak +Ġapplies +Ġнем +æĵ +×ijר +ĠЧÑĤо +Ġoutro +Ġinnocent +Ġmog +ĠSamsung +Ġmercy +Ġhandling +Ġintervention +idays +got +Ġcurric +Ġboundaries +Ġconfusing +Ŀ¼ëĬĶ +æĩ +Ġstitches +ÃŃvel +Ġtunnel +itä +Ġgost +imy +Ġczas +Ġmé +Ġcatal +ĠSimon +ĠLIAM +mic +ĠФ +Ġeyel +isas +ĠCPU +ĠDou +Ġnäch +Ġinfinity +Ġrif +ĠPeace +ĠCu +Ġminimal +Ġlistened +Ġpole +halb +Ġloaded +Ġsteady +ĠBesides +êm +Ġlap +Ġcoop +Ġfriendship +world +Ġgeh +Ġtylko +ĠLaura +Ġsurrounded +ĠEvent +Ġchap +ĠWonder +break +Ġdrove +Ġbroader +Ġchi +Fi +Ġgehen +Ġwestern +Ġintelligent +Ġpersist +Ġfounded +ãģĵãģ¨ +Ġhistoric +ĠfrÃ¥ +cksÃ¥ +Ġhandy +Ġsymp +Ġrows +Ġnutri +bur +ĠLeon +Ġsistema +Ġextensive +ĠÑĥв +íı +Ġnights +Ġcác +Ġcounting +ĠMust +allow +еÑģÑģ +Mom +Ġнадо +Ġbarrel +ãĥŀ +ARD +Ġinstallation +Ġinsect +Ġëħ¸ë +ujÄħ +ĠÄiji +Ġpacked +Ġfiction +Now +ĠYay +Ġpert +rons +unde +aches +Ġstyles +Ġaprès +oku +ĠVice +ınız +comm +Ġassigned +Ġinteractions +Ġacab +FELIPE +Ġrescue +Ġindustries +ĠAndy +Ġpraise +Ġflame +Ġsnack +íĤ +çģ +Ġswo +render +Ġboards +ĠÑĤом +enne +Ġpasta +Ġdevil +ĠFel +Ġhatte +Ġcolleg +eh +ì» +ãģĵãģ® +Ġproductive +forward +ип +Ġsmartphone +Ġinvis +Ġbum +Ġwhoa +ìŀĦ +ĠocksÃ¥ +ĠLang +ĠSyria +Ġsesi +ία +Ġapproval +48 +Ġодин +Ġëĸ +ĠHarr +ĠAdminist +Ġפ +ĠDean +fi +Ġcitizen +Ġshark +05 +Ġboil +Ġindicate +å¡ +Are +Ġlayout +Ġrefr +ĠPacific +AAAA +ĠAustralian +gression +Voice +алÑģÑı +Ġshelter +To +aupt +Ġevaluation +apor +Ġcurrency +Ġмного +igos +ãģ° +Ġoct +Ġroyal +è³ +asil +ĠChildren +Ġrien +Ġëĵľë +Ġbarrier +Ġejemplo +Ġek +ND +esp +ена +Ġpic +Ġkiller +Ġintegrate +Ġfewer +Ġdisabilities +Ġ.... +Ġtriangle +Ġfees +Ġwidely +emi +Ġoverwhelming +Ġzomb +Ġbere +Ġhood +ĠAye +ĠHarvard +ev +ĠÏĦοÏħ +Ġcups +ĠAuch +zona +Ġ1990 +ĠweiÃŁ +Ġcrunch +æ¥ +Ġзав +Ġmeasuring +Ġstations +ĠStephen +Ġshortly +Ġsigning +Ġcomedy +omo +Ġsuggestions +Ġsignature +ĠпÑĢив +Ġdisorder +aska +Ġworlds +Ġprecisely +norm +rav +ĠCivil +Inter +ĠCertain +Ġinjured +Ġsuggests +ĠGolden +Ġcyber +ĠØ´ +Ġtemporary +Ġcooper +Ġvoted +Ġought +ấy +xual +Ġpanels +Ġ95 +Ġhandsome +ĠпÑĢов +Ġpermit +Ġkein +Ġbadly +Ġnotifications +iza +ĠNotice +Ġinclusive +Ġanswering +ĠíĹ +uld +íħĮ +Ġnowadays +Ġ37 +Ġbolt +Ġstatic +ĠHop +Ġavant +ajo +Ġ맼ìŀĪ +Ġfifty +ĠFinal +Ġscores +ĠTap +Ġcyl +Ġconvince +Ġanyways +oda +Ġìķ¼ +Ġserves +ĠÑĤакой +ĠZoom +Ġsavings +ulo +Ġsouthern +viewer +Ġhoje +Ġseja +Ġrepresenting +Īëįĺ +lik +ĠSomebody +Ġbeast +Ġsticking +Ġinsist +Ġtalented +Ġexplaining +Ġattorney +éĥ¨ +Ġstairs +ĠDog +íĭ +Ġcig +Ġshaped +Ġsons +Ïģι +utt +ĠìĶ +Ġparad +ìĿ¸ëį° +Ġhorn +ĠJour +anno +Ġworldwide +åĬĽ +Ġparticipation +¦Ħ +Ġmów +Ġburned +Ġwriters +allah +ĠFund +Ġclever +ĠLeute +bin +Ġbeating +foot +ĠìĽIJ +ĠStudio +Ġvag +bey +rze +Ġopposition +Ġжиз +who +Ġê±´ +Ġtrace +ĠденÑĮ +Ġepid +Ġgesch +ĠNar +ĠBE +Ñĥй +ĠSign +edly +Ġclay +Ġinstantly +Ġgathering +ĠGalaxy +Ġbored +ĠBuddh +cé +Ġmam +Ġslope +Ġëĭ¤ìĿĮ +Ġschön +Ġpir +gef +amer +Ġhö +Ġcolleague +Ġpresents +adium +Ġவ +Ġfalar +beep +Ġdried +isms +Ġrope +Ġworkshop +Ġestud +Ġbands +Ġthemes +åħ¬ +ÙĬر +åIJİ +Ġreminder +ÑĤÑĥ +ĠBh +Ġcoconut +ĠÑģÑĤо +ĠChannel +Ġimmigration +äs +..... +主 +çĻ½ +stop +ĠкаÑĢ +Ġcoins +ĠÑĩаÑģ +Ġdestruction +lined +Ġbarriers +antine +Ġprinted +Ġcongratulations +ĠHeart +Ġinqu +tha +Ġhardly +ĠAven +Ġtinha +ĠSony +ĠNF +Ġgraduates +Ġsqueeze +eremy +ÏĦι +Ġepic +ĠJu +Ġolm +ĠLaughter +Ġbeliefs +ĠCru +ĠTrue +ĠSoul +oween +Ġromantic +Ġзв +Ġanos +ĠYup +éĺ¿ +dim +Ġinfer +Ġзам +Ġsoc +uka +Ġprecise +Ġdropping +Ġclue +Ġerrors +charge +ĠPu +ometer +Ġlambda +acional +ĠDong +Ġchamber +Ġthankful +ĠNu +ĠHawai +Ġinfo +Ġactivate +ĠQual +Ġqued +ÑĥлÑĮ +Ġcloth +åĸľ +Ġwichtig +55 +Ġotra +ographer +Ġcurios +Ġ1980 +Ġempres +dess +eur +Ġcluster +arter +obile +ĠYan +ĠAdv +Ġdiscipline +ĠìłķëıĦ +ĠPlace +ĠSelect +TE +ĠбÑĭла +Ġwhis +Ġbay +ĠDor +encing +Ġrepet +Ġficar +pad +Ġfog +uyor +Ġsnap +ibt +Ġsobie +Ġappointment +ĠRy +Ġceiling +ourse +Ġwrites +ĠAfghanistan +Ġmos +aze +Ġpenal +Ġcrystal +ICE +ê°IJ +éŁ +ĠTesla +Ġtheories +Ġappeal +Ġnewspaper +Ġcookies +æ© +ĠاÙĦÙĦ +Ġmaj +ĠGetting +kommen +ĠHeaven +ells +Ġdivine +Ä« +Ġakt +Ġhopes +ĠChen +wegen +*** +ĠFrage +Ġни +ู +minister +nesota +which +Ġexplicit +Ġverdad +Ġgraduated +ĠPhilipp +QL +ĠMI +Ġdevot +Ġcure +Ġclosest +ĠÃĦ +Ġsexy +ãģĽ +ĠDeath +oko +ugu +ĠAnne +itarian +esa +егод +ĠDur +Ġ000 +zeit +Ġtournament +Ġmelhor +ส +Ġindu +Ġflaw +Ġwars +ĠMind +ĠIron +ÑĤак +ĠVR +Ġsiz +ĠSouthern +Ġê·¸ëŁ¬ë +Ġawak +Ġìķŀ +Ġcube +believable +ifall +dis +Ġabandoned +mind +Ġparl +Ġclassical +èĭ +á»Ļt +ĠAuto +ĠBor +ç© +400 +ĠSociety +Ġsubtle +Ġmissions +Ġremembered +ĠEither +Ġdafür +ORD +Ġintensity +ESIN +ĠCup +Ġrarely +Ġtoys +ĠCharlie +ợ +Ġglaube +Ġrounds +TIN +Ġcapability +Ġderivative +Ġreferring +ĠdÃ¥ +ĠTALI +Ġcotton +Ġconfer +Ġcolumns +Ġliberal +Ġnunca +Ġμε +Ġindo +iben +ĠBeispiel +Ġê·¸ëłĩ +ĠÑĥÑĩ +Ġhoy +Ġfry +ĠScottish +èĬ +Ġciv +Ġconservative +Ġairpl +Ġsar +rus +Ġinvestments +Ġinfinite +Ġà®ķ +ĠTALIESIN +ĠGary +uell +Ġак +ĠCir +Ġritual +Ġ>>> +Ġtempt +ĠTech +ĠPokemon +Ġimprovements +Ġspare +Ġtranslate +Ġsonra +ĠFilm +wort +Ġми +Ġperiods +Ġjealous +ãģĦãģĦ +Ġtir +MI +Ġconducted +ĠìķĪëħķ +09 +ĠPolit +ĠWhereas +Ġmoisture +Ġsins +Ġkap +ĠÑįк +Ġbenim +Ġeliminate +Ġathletes +ĠManager +Ġfeatured +apore +äºĽ +Ġë°ľ +Ġperf +ĠThus +Ġdebut +обÑĢ +Ġseñ +Ġmysterious +words +Ķê°Ģ +Ġchecks +Ġvolunteer +Ġwashing +ĠMarvel +ĠAB +issors +!' +ĠFull +yeon +Ġweigh +ĠJOHN +Ġvos +Ġprocedures +Ġaddressed +ĠBerlin +puter +ĠBan +Ġmedication +Ġdrone +ĠÑĥб +ĠJean +Ġcaps +Ġdisappointed +Ġwore +ĠêµŃ +Ġorganize +ĠHalloween +Ġfantasy +yard +Ġnosotros +Ġjumped +Ġphotography +ĠName +rec +AB +Ġblessing +ĠShut +Ġbitter +pop +ãģĿãĤĮ +Ġdei +Ġfulfill +çIJĨ +Ġdengan +Ġbelo +ĠMeanwhile +Ġdepois +Ġdiabetes +Ġbund +ĠZealand +Ġdigest +Ġtires +Ġdod +agne +ết +Ġpeel +Ġзаб +Ġnodes +Ġtrends +ĠSwitch +ĠAward +ĠOrig +ĠHal +Ġestas +Ġ360 +Ġsimult +Ġcomic +ĠmÃł +Ġbalanced +ĠPrincess +Ġkilometers +ứ +Ġpartir +ì¤ij +soft +ĠView +Ġbiological +inst +44 +Ġmanera +Ġcomprehensive +ĠSab +Ġcrimes +yers +ĠCompany +ĠPhot +Ġpouco +iac +Ġbeim +inate +Ġsubsequ +ĠMayor +Ġcenturies +ères +ìŀĸìķĦìļĶ +Ġê·¸ëŁ¼ +ĠFrau +ĠOH +ĠëģĿ +ĠNah +ĠSeries +Ġovernight +íĴĪ +ĠâĢ¢ +Ġtrave +attered +Ġwarri +ĠGrund +ĠIndones +Ġscra +oby +ĠBrook +Ġcurs +Ġë¸ +Ġexplains +ramatic +Ġparticipating +Ġminut +Ġcontracts +Ġgegen +Ġdisappeared +ĠSN +Ġrobust +aph +Ġshrim +Ġdevast +cope +Ġmeets +Ġpeaceful +mate +Ġweld +Ġת +don +ÑĥÑĤÑĮ +Ġregistered +ĠNik +jin +Ġcav +Ġecht +iox +Ġflowing +ноÑģÑĤи +Ġtoe +Ġentity +ова +fits +ĠPatrick +ÑĤÑĢ +Ġleverage +Ġcorrel +iah +Ġstrings +istinct +Ġgue +archy +Ġtengo +ımız +Ġorbit +为 +ĠеÑīÑij +cake +Ġ׾×Ķ +ĠMinnesota +Ġbrake +owie +Ġcraw +기를 +Ġprogramme +ĠÑģлÑĥÑĩ +åıª +iences +ĠOui +ĠPers +imiento +ĠInvest +Ġslower +æĻĤåĢĻ +ĠBeth +Ġnurse +ĠSpring +Sp +Ġunemploy +ди +Ġgenius +ĠAaron +Ġê·¸ëŁ¬ +Ġei +ãģĹãĤĩ +Ġtanks +Ġaujourd +Ġcomplexity +ĠÑĢеÑĪ +Ġoldest +Ġletz +åħ¥ +Ġphenomenon +print +ĠBundes +itat +ê»ĺ +Ġ42 +ĠWi +Ġincom +Ġgek +Ġembrace +Ġties +oute +Ġdose +ĠFriends +ÑĭÑĤ +егоднÑı +Ġorg +Ħë¡ľ +óg +Ġexceed +Ġgods +Ġê±°ìĺĪìļĶ +Ġsociet +ĠUnivers +ität +Ġworden +Ġsmoking +Ġintens +abul +emia +èij +47 +fly +Ġ2006 +ĠSeriously +Ġprzez +æ¼ +cre +Ġnan +Ġmodes +оваÑĤÑĮ +ĠHang +emen +Ġbeneficial +Ġvoters +ĠBroad +Ġbent +Wow +Ġmul +åĵ¥ +ĠUC +Ġdamaged +ĠUkraine +Ġwipe +Ġstones +Ġmanagers +Ġrab +ÑģÑĤÑĢо +lat +Ġdece +Ġgraphic +Ġfoss +Ġdisagree +ĠAmen +Ġsecrets +hole +inkle +Ġfortunate +Ġì± +ìľĦ +èIJ¬ +Ġhabits +Ġburied +Ġhin +Ġvirtually +olas +ĠRP +ĠTab +low +Ġsacrific +Ġestimated +oln +Ùĭ +cur +ĠFeel +Ġcastle +Ġuseless +Ġdisg +ĠJacob +Ġgaan +Ġupside +Ġparece +ãĥ³ãĥ +Ġshipping +ĠCR +Ġdisrupt +acter +UND +fu +å®Į +ĠPick +ĠCharl +ĠBull +Ġenterprise +Ġpunishment +acking +Ġfraction +Ġtablet +Ġchord +Ġsimilarly +åħ¶å¯¦ +ĠToronto +Ġcourts +ÄŁl +eszcze +Ġpronoun +ĠSister +ĠMP +Ġgreatly +ĠDank +icop +Ġgarbage +Ġresolve +ĠSaf +ĠGun +Ġcompound +Ġë°° +ĠMusik +âĻ« +Ġchaos +ĠWhenever +Ġeuros +Ġorchest +Ġrefriger +alan +ื +ĠAmazing +Ġpud +agan +Ġjeszcze +isy +Ġaccuracy +ĠAma +isode +ëĮĢ +Ġinterpretation +ĠLiber +æ· +cam +Ġevolved +ĠKay +ÑĨÑĭ +Ġcreator +itas +Ġalarm +Ġcelebration +zent +Ġfuncion +Ġov +umbling +Ġ% +à¸Ī +Ġrestrictions +Ġнав +ĠKinder +Ġbanana +ÑĮÑı +Ġdiameter +Ġnorthern +urers +ĠPas +æĪijçļĦ +Ġworkforce +Ġjung +Ġguarante +Ġequilib +Ġsuite +Ġeuro +Ġdeliber +Ste +Ġdowntown +Ġchin +Ġcodes +edia +Ġsheep +reshold +wnie +ób +Ġunderlying +lia +jer +ÏĢÏĮ +çĿ +throp +Ġzap +Ġvacuum +ĠHab +Ġwrapped +ì¢ +Ġinventory +ма +Ġcoord +Ġplates +Ġsymm +Te +ĠwÅĤaÅĽnie +Ġreaches +Ġlonely +Script +lee +esser +Ġ걸 +ĠGesch +ĠMoving +Ġrép +ĠVill +åIJĪ +ĠRachel +Ġtemos +ONE +Ġstrain +Ġangel +ĠfÃ¥ +Tr +Ġacho +Ġhighlights +ĠWer +ĠCarl +Ġblur +Ġregards +· +илÑģÑı +Ġrecre +ĠYani +UCK +ł¸ +Ġelectrons +ĠSpiel +Ġved +Ú¾ +Ġbeam +Ġidiot +ëĵ¤ +наÑĩ +idd +Ġski +itative +Ġhypothes +ãģ§ãģĻãģŃ +enter +ĠìķĦëĭĪë +Ġihre +Ġpreview +angel +Ġdemon +Ġdus +Ġdic +ĠKom +LEY +...! +Ġsieht +ĠSonic +Ġtenho +anas +Ġdigit +ĠMaar +Ġundergrad +ouncer +uffy +Ġconversion +Ġdisconnect +Ġecho +omer +Ġcurriculum +Ġperché +Ġwand +..? +Ġrolled +Ġentrepreneur +Ġtheoret +ĠÑīо +Ġinsights +Ġzusammen +oin +rett +produ +Ġvisitors +eous +Ġgrandmother +Ġhumor +ĠниÑħ +zenia +inson +Ġreset +Ġbaseball +Ġmatching +ëĭ¤ê°Ģ +Ġpunto +ì¡ +Ġrede +Ġaddressing +Ġforecast +ĠBol +Ġcolored +Ġdocumentation +Ġexpectation +ĠNorthern +Ġcreo +Ġà®ļ +fon +Ġunsere +UM +Ġcopies +Ġexpanded +Ġveterans +ĠAlm +ĠвообÑīе +Ġpsychological +Ġnosso +Ġpayments +imeters +Ġ--> +ĠJennifer +Ġvolunteers +osse +orious +ĠбÑĭли +èĤ +ĠEss +ws +ĠBC +ĠIC +Woman +Ġvont +Ġethnic +ENN +имо +Ġlob +Ġoui +cs +Ġrehe +Ġìłģ +Ġchick +úsica +Ġkont +ĠDistrict +Ġpile +Ġав +ейÑģÑĤв +Ġ£ +Ġissued +Ġкомп +Ġprosper +Ġprofound +ĠDear +Ġãģĵ +Ġfunded +Ġbisa +ŀĺë +ף +ĠìĿĺ +Ġtwelve +ĠChampions +éĿŀ常 +Ñģл +Ġ2005 +pm +Ġonde +Ġdiffé +ĠChall +Ġdifficulties +Ġgarage +Ġdá +ünk +Ġ물 +Ġtran +Ġsubmitted +zw +ÙĪا +Ġark +ĠìĦ± +Ġgrocery +она +iere +Ġaest +Ġexhibition +Ġrés +Ġconsistency +Ġcookie +ней +Ġreplacement +æ²¹ +ĠSem +ĠìĤ¬ìļ© +800 +Ġgenes +Ġtransaction +ĠEL +Ġdurante +ibles +ĠEat +tail +issance +Ġtoss +Ġsurvived +Ġoffices +Ġsupportive +Where +Ġtoutes +Ġë§ī +Ġjokes +ieron +apers +Ġmature +ĠMarsh +Ġsido +kind +Ġrealmente +ĠChef +Ġquelque +Ġjudges +eft +ERS +Ġjet +Ġpersons +è» +izations +rik +Ġshops +ĠWy +Ġeleg +què +quoi +Ġjuga +Ġíķľë²Ī +ĠQuestion +ĠGlobal +Ġìķ½ê°Ħ +ĠStation +æİ¥ +ĠOhio +Ġsticky +Ġstressed +Ġgün +ĠíĿ +ÑģÑĤÑĥп +é¡Į +ĠPhD +immer +Ġmentor +Ġinvented +Ġreun +Ġinevit +ĠpolÃŃt +Ġexecute +ĠStory +Ġoutstanding +Ġguer +ĠRain +Ġchoses +ĠTit +ĠÑģеÑĢ +ĠSingapore +ĠNone +Ġchronic +°ëį° +Ġego +æł· +EST +ãģĤãĤĬ +ĠWang +ĠNAT +Ġaug +Ġdesktop +Ġeternal +ĠìĤ¬ìĭ¤ +ĠConstitution +ìĤ¬ë +×Ļ׾ +pres +ĠТÑĭ +Ġinterf +Ġlists +Ġfights +ften +ĠIowa +Ġmotivated +ĠHosp +Ġelsewhere +Ġpaths +Ġinstances +Bl +range +á»± +ĠSit +mana +Ġìĭľìŀij +Ġmình +ansas +Ġsna +Ġphilosoph +Ġpasse +Æ°á»Ŀi +akh +ental +Ġihn +ructor +ĠваÑĪ +Ġgenerous +Ġpivot +пол +Ġjamais +Ġcoment +ĠLew +odzi +ĠXbox +Ġвод +Ġconsent +īìŀ¥ +Ġdispar +lass +ĠGovernor +Beifall +Ġê°ľ +Ġbeloved +׳×ķ +sell +Ġhonored +leh +Ġwäre +unting +Ġfraud +ĠRAM +걸 +Ġkills +Ġeconomics +04 +пеÑĢ +Ġcoisas +ĠигÑĢ +ÃŃm +Ġmöchte +Ġìµľ +Ġstimul +Ġfastest +lv +Ġgén +ĠSounds +Ġ1970 +Ġhomework +speaking +Ġencouraging +Ġquery +Ġrevers +profit +Ġdy +Ġìŀij +ëĬĶëį°ìļĶ +Ġsoap +ĠGall +ĠCN +ĠAns +Ġfic +anks +Ġdessert +ĠìłĢíĿ¬ +ĠMaking +Ġcomeç +ê³Ħ +Ġassociation +Dad +hee +Ġhogy +Ġapro +Ġinvisible +American +íİ +Ġvibe +Ġemissions +Ġadvocate +Ġkicked +Ġvel +Ġsummar +Ġfreaking +chron +Ġpinch +Ġwszystk +iscal +Ġproved +Ġmindful +Ġtä +Ġnoises +Ġisolated +Ġcrossed +Ġê°ķ +ĠvoilÃł +Ġchore +ĠRA +Com +Ġrelaxed +atro +Ġprevention +Voiceover +OD +ĠCovid +Ġseparation +Ġ-[ +иÑĩего +çĻ¼ +ĠSD +bleep +Ġindependence +Ġpartial +Ġalgorithms +ĠAnyone +Ġassociate +hum +icular +Ġbạn +Ġbattles +Good +Applause +Ġbastante +Ġadvant +ĠSweet +Ġrefused +ãĤ¸ +ĠÑĤебе +plet +Ġencouraged +åĵ¦ +Ġmiracle +ĠBun +ĠVar +rimination +elect +ĠMult +Ġdelivering +eing +Ġcm +nehmen +ĠLine +Ġë§Į +enced +ĠSound +ĠContin +ijd +UNG +kle +Ġthreshold +Ġcompact +adt +Ġtoes +ĠPur +owned +mented +Ġdesigning +Ġvaccinated +Ġexhaust +Ġbasics +Ġconsists +ĠGuy +aczy +ĠmÃŃ +won +害 +Ġ85 +æĤ +Ġmum +Ġignor +Ġprinting +acular +pow +Ġexpanding +Ġgir +ĠCab +íĺ¸ +ÑĤÑĮÑģÑı +ĠìŬ룬ë¶Ħ +Ġangles +Ġterminal +ĠWon +ĠInteresting +Ġcrossing +Ġbonds +Ġpueden +Ġorb +ların +Ġcreepy +Ġnutrition +Ġallies +Ġwireless +Ġdesired +Ġcompute +ĠArizona +ĠBeautiful +Ġproduces +Ġnuestro +ted +Ġeligible +ĠÑģоз +icial +ĠHero +Ġconsume +Ġrobots +Ġpurchased +cción +Ġiz +ược +ίναι +ĠØ£ÙĨ +Ġshadows +ĠMedia +Ġprincess +Ġklar +Ġwooden +Ġusar +Ġgüzel +Ġslot +rade +ĠëĴ +Ġharmon +Ġingredient +orship +eki +Ġgrandfather +Ġexcitement +Ġpoliticians +..! +Ġouts +Ġseparately +ĠÑıк +ĠWelt +ĠPow +jan +Ġorientation +åıĭ +LC +agem +ÛĮÚº +åIJĹ +Ġbranches +aden +rente +ĠIhr +asm +Ġestão +ĠNic +Ġslave +Ġcompress +crowd +Ġclimbing +ĠManagement +ĠBah +Ġpanic +Ġkor +Ġcooling +Ġbind +Ġзад +Ġrack +Ġentit +Ġsends +Ġyourselves +des +ĠMuslims +Ġíļ +isma +cycle +unkt +ĠCore +Ġinjuries +Ġidentical +каÑı +ĠDeutschland +Ġее +isan +Ġtruc +leton +Ġbackup +Ġultra +Ġabund +illeurs +ĠbyÅĤo +åħĥ +orted +Ġearthqu +Ġкл +Ġobservation +Ġmaintenant +elen +Ġsettled +Ġpela +ĠEconom +ĠÕ +Ġsteering +ĠALL +ĠCher +Ġpatience +ĠSnow +Ġbor +Ġworthy +Ġcái +Ġק +Ġκα +dog +ĠKaren +illes +β +Ġagriculture +×ķף +ĠSean +Ġsensors +íķ´ë +agh +Ġpublicly +Ġpeux +ĠAlexander +Ġpriorit +Ġlazy +ardon +attering +Ġcostume +ست +è¿ĺ +Ġunw +ÐĽ +Ġthickness +quito +gunt +istas +neys +ĠëIJĺê²Į +ĠBrasil +Ġtoken +Ġaffili +lon +ĠfÃ¥r +ĠBeach +Ġwitch +ĠSeven +Ġpant +λλ +Ġcaptain +åĿ +Ġveut +Ġpouvoir +acz +ĠBarb +Ġutility +Ġcontemporary +Ġobtained +Ġpaintings +ear +Ġpean +ĠOg +Ġcust +лем +Ĥĺë +ĠIsso +Ġaconte +ĠTele +ĠAssistant +Ãī +íĸĪìĬµëĭĪëĭ¤ +Ġcounts +Ġbuck +ĠDeep +Ġtackle +Ġharsh +Ġdecides +éĹľ +.âĢĭ +éĤĬ +ĠAngel +Ġlaying +Ġcalories +Ġcontrolling +Ġadvantages +ĠÑįÑĤой +Ġapproaching +Ġthreats +akan +ematic +mann +ê³µ +mumbles +ació +Ġmaintaining +Ġfounder +lah +fight +Ġadmitted +âĢ¦. +ķĮ +abol +Ġusage +Ġnonsense +ĠPalest +Ġcontre +ĠDemocratic +ĠER +jekt +Ġarbit +Ġгол +ĠMichelle +icher +esh +ĠPho +ком +49 +ĠEnergy +οÏį +Ġcents +Ġrefers +Ġgospel +ĠSha +ĠShare +×Ļ׳ +Ġclinic +ĠëĦ£ +Ġequality +ugs +Ġshed +Ġplanes +Ġtoute +reck +Ġstrand +Ġbiology +Ġleague +ĠPok +Ġnúmero +ĠCoast +Ġconsistently +Ġnucle +OOOO +Ġobjet +Ġchor +Ġginger +Ġdabei +Ġcooperation +à¯į. +nten +ç¤ +lÃł +ìĸij +rado +Ġpassive +Ġgloves +Ġunderground +Ġlogical +Ġket +Ġfunctionality +¸ë¦¬ +Ġportal +eller +×Ļר +ĠTed +ĠGre +IJľ +Ġpersonnel +Ġemerging +ĠFür +Ġmeantime +usalem +ĠClear +Ġtrapped +Ġìļ° +Ġdispl +Ġmettre +Ġmunicip +Ġwithdraw +Ġspat +unes +Ġaccessibility +æĪij们 +Ġapare +Ġprospect +Ġназ +Ġcopper +ĠPRO +ÏħÏĦ +Ġattacking +ĠVin +ĠStone +Ġinvestigate +style +Ġλ +ë¡Ŀ +ë§Ī +Ġinspect +Ġliver +алиÑģÑĮ +Ġsera +halten +eman +Ġministry +'' +Ġdots +ãħĭãħĭãħĭãħĭ +ÑĥÑģÑĤ +ĠJak +AKE +Ġgaps +ucker +ĠинÑĤеÑĢеÑģ +ĠEmily +Ġinterval +Ġtender +ĠTechnology +game +Ġtrib +ÙĦا +ĠDevelopment +Ùħا +Ġwrist +Ġfires +Ġtargeted +ìłIJ +Ġsod +íļĮ +ĠolduÄŁ +Ġseasons +ventions +Ġнего +Ġsometime +лив +né +Ġtú +ĠDeus +Ġexecution +áp +ĠChange +ĠIndeed +Ġregulation +ĠHung +éis +Ġwishes +Ġjazz +Ġstructural +Ġblowing +ĠbyÄĩ +Ġthermal +phant +ÑĢÑĥз +анÑĤ +ĠPull +Ġconfusion +нÑĭми +Ġscenarios +ìłģìľ¼ë¡ľ +ĠдеÑĤ +Ġtattoo +Ġautre +Ġheating +Ġtreating +Ġпоним +Ġexclus +ĠLOL +wear +agle +Ġzurück +Ġrational +su +Ġdeter +ĠNative +à®ķள +ached +Ġãĥ +ĠEntonces +Ġhora +ìĿ´ìĹIJìļĶ +Ġlite +ë +Ġsixth +Ġболее +actor +Ġpsychology +缸 +Ġdemands +Ġpeer +Ġnewly +ĠWWE +Donald +ĠBox +Ġpine +Ġloading +ĠNico +ĠsÅĤ +omme +ART +Ġrecruit +Ġbugs +arents +ĠпÑĢоб +ĠInside +ipper +dramatic +Ġplanets +orde +Ġyoga +child +ĠMarie +ĠãģĤ +ĠBL +Ġfilmed +Ġrefresh +Ġtomatoes +Ġfet +Qué +Ġ!! +ĠëĤ´ë +rine +Ġinteractive +sal +annah +pez +ç¶ĵ +Ġunderstands +ĠTokyo +Ġlibraries +Ġreader +ijIJ +oz +ĠEnde +ĠFlo +Ġmild +Ġpoetry +Ġжив +æĦĽ +Ġbehave +Ġdoen +ĠSusan +page +raham +Ġcommunications +Ġtuning +Ġpac +Ġanxious +IO +Mark +Ġhiç +books +Ġpiss +Ġenabled +achelor +ĠFOR +Ġéc +ĠTR +ilst +hat +ĠìĿĮ +Ġtych +Ġjar +Ġbuilds +ĠArgent +Ġintermedi +Ġlou +Ġara +Ġassignment +Ġcabinet +Ġretirement +ãģ» +Ġdisabled +rica +Ġawards +Ġboots +Ġacknowled +Ġthy +Ġ구 +Ġsynd +ний +ilton +Ġprobl +ĠFal +Ġverdade +Ġ700 +ĠLearning +ocus +Ġpalace +Not +tain +cm +Ġmagnet +incoln +Ġfiguring +ĠLyn +ĠBoss +ĠVO +Ġdiagnosis +Ġequipped +watch +inos +aders +Ġshelf +Ġorganis +Ġnod +Ġkız +ppers +Ġrestore +Ġartic +ĠVoice +ıyorum +격 +Ġspreading +Ġhips +Ġward +ureau +Ġintersection +66 +Ġ39 +ç³ +Ġwaited +ì´ +hhhh +Ġdys +ĠEN +Ġbatch +Ġcaf +Ġmarker +大家好 +orable +ória +Ġstepped +Ġcelebrating +ана +Ġworn +ĠFol +Ġpla +Ġattempts +Ġtweet +Ġrust +gence +íĨµ +Ġrevel +Ġrecept +eness +Ġ(( +ãĥ¼ãĥ +!âĢĭ +ĠìĨIJ +Ġinfluenced +иж +ĠконеÑĩно +Ġcolleges +ioni +Ġsag +Ann +olar +Ġexpressions +Ġsuits +Ġownership +eland +piece +æĢİä¹Ī +Ġdespués +Ġtel +Ġinsult +Ġêµīìŀ¥ +ĠSmall +ĠFR +oka +berries +ĠAnton +елÑı +ÑıÑģ +Ġvalve +acts +Ġwoods +ண +Ġcultiv +Ġfá +ãģ¨ãģĦãģĨ +Ġcheers +Ġassumption +Ġfitness +ÃŃcul +Ġpodr +Ġweit +ĠHind +Ġdign +Ġзн +Ġsquad +Ġdestro +cere +shirt +immt +engers +Ġsä +kÅĤad +ĠÈĻ +Ġoccas +Ġì¤Ħ +Ġprocessor +ĠDM +ĠDaddy +Ġsooner +Ġstraightforward +Ġdepartments +ĠChrome +Ġworkplace +ĠPython +Ġmeng +ĠDAN +ĠIce +ĠëĪĪ +ĠGi +Ġhiring +Ġlanded +Ġdemocratic +iedz +ãģĺãĤĥ +Ġsev +icia +Ġespecial +ĠNous +Ġhät +Ġbou +pert +iesz +åijĢ +Ġvil +ÅĽli +Ġîn +Ġlosses +éķ· +Ġtoast +Ġrealm +ĠAustin +ĠInformation +Ġresume +Ġchase +Ġsalary +Ġë¶Ħ +лиÑĩ +ĠÑģлед +ĠFurther +Ġcaring +Ġvig +Ġvalor +è¿Ļ个 +ĠÑĩа +Ġanalytics +Ġglobe +ĠMAN +Ġnel +ìĿ´ìķ¼ +Ł¼ +Ġoy +íķĺìĦ¸ìļĶ +jen +Ġtroubles +ahaha +Ġchurches +uet +Ġmeasurements +bil +ì½ +ifully +инÑĥ +ĠWilson +¦´ +ĠíĮĮ +Ġì°¨ +Ġpúblic +ĠJerusalem +Ġnails +Ġspine +Ġhemos +Ġzn +quis +ĠLeben +Ġreferences +ITH +iper +ĠÑģебÑı +ìģ +ĠWa +state +§Ŀ +åħ± +ĠGener +Ġactress +ĠEnjoy +à¹ĥ +Ġ×Ĵ +Ġinfected +Ġshaking +Ġnick +ุ +Ġfot +Ġaccomplished +uke +Ġsheets +Ġfence +Ġnursing +Ġintroducing +Ġfeat +One +TO +Ġclubs +ĠBruce +onge +change +ĠBatman +åı° +ĠOfficer +Ġhydro +Ġsupplement +Ġcela +Ġlongest +Ġcompeting +Ġconhe +giving +Ġbrains +Ġloans +Ġwage +ĠClinton +ĠsÄĥ +aneous +Ġlord +ÑĢÑĥж +Ġquiz +Ġstiff +ĠLGB +sz +ME +mare +there +Ġnär +ĠMand +last +Ġdag +Ġhalfway +ĠBand +Ġëĭ¤ìĭľ +ĠAren +Ġile +PN +ento +Ġalgum +Ġsoccer +Ġblocked +ĠJonathan +Ġsew +ĠTestament +Ġvale +Ġbehavi +å§ĭ +Ġconna +ICH +Ġaudiences +ml +ammad +ĠìĤ´ì +IGH +Ġraces +emed +Ġmá»Ļt +ï +Ġovers +Ġdeclared +Ġsana +ĠUna +ĠÑĢе +ucks +Ġpairs +Ġange +Ne +Ġups +avy +ør +reek +Ġbehaviors +Ġreflected +Ġpriorities +Ġcondu +Ġretreat +Ġexpenses +Ġë´IJ +Ġtriple +Ġêµīìŀ¥íŀĪ +ält +Ġindigenous +Ġmining +Ġacceptable +Ġruin +CA +uine +Ġpipeline +ctic +êt +ĠвÑģего +Ġboun +ĠDigital +ĠBoom +ÑĨе +ĠлÑĥÑĩ +Ġasc +ĮĢë¡ľ +ĠGoodbye +Ġrender +enez +arre +ĠTHAT +bour +ición +ãĤŃ +Every +Ġwires +ĠParliament +nung +ateur +ĠSave +ĠPhys +Ġamor +ĠEve +Ġfright +Ġgamma +Ġmicros +mitt +ĠCode +ĠBey +pled +ĠиÑģполÑĮз +çĹ +ìĥī +她 +Ġmonet +ĠJahre +Ġluxury +Ġdeaf +Ġbetray +Ġê²° +ики +Ġdefeated +Ġundert +Ġweg +Ġcooler +ãģķãĤĵ +iami +éĤĦæľī +ĠJessica +ĠJoy +Ġsophistic +ении +ðĿĺ +Ġchili +ĠType +Ġproteins +Ġpresenting +alia +ìļ¸ +ĠMajor +Ġmolecule +umer +Ġcollapse +ĠAnyways +ĠMountain +anted +ãĢIJ +Ġвидео +æ°´ +Aud +Ġconqu +Ġvoll +Ġknit +Ġmembr +ĠMarket +Ġdari +Ġcalculated +ги +Ġshrimp +ĠMu +ĠпÑĢоÑĤ +Ġìĺģìĥģ +Ġproductivity +Ġcognitive +ĠHeb +ictions +ê²½ +Ġcré +för +Ġpraying +ashi +ĠTik +ór +wen +ÑĮÑİ +ixo +Ġ(\" +ĠÑĤел +Ġìĸ´ëĸ¤ +ĠпеÑĢед +ĠDrive +ãĢij +ĠEqu +Ġequilibrium +Ġdescribes +нее +42 +ĠCurrent +yy +Ġabsorb +Ġsoldier +ders +Ġtestimony +Ġdecline +ľë¡ľ +gage +Ġinspire +lapping +Ġspinning +Ġslavery +Ġfacial +Ġtraditions +ários +ĠHospital +Ġnest +ĠëĪĦ +Ġtoi +Ġfears +ìħ¨ +ĠMuh +Ġgraduation +Ġimpacted +Ġaunt +ĠLets +Ġaluminum +Ġdominant +ĠDavis +ĠNavy +Ġcompt +oples +Ġestava +è¥ +Ġscal +Ġpreserve +ĠOpp +Ġpractically +Ġmagnitude +Ġfitting +Ġcoordinate +Ġfurniture +ĠFamil +Ġexplosion +Ġdocumentary +ĠScript +Ġportray +mat +Ġscheduled +Ġdynamics +phy +aky +ĠUI +Che +Ġcontinuously +ĠProv +å°ij +Ñĥз +rah +Ġgerne +proof +Ġsecretary +ĠPatreon +scream +ĠKids +á»ĵi +Ġkg +Ġuncertainty +Ġкажд +Ġmitig +Ġreads +å·² +ĠRu +Ġpriest +Ġнед +Ġlimitations +Ġfloat +600 +ĠToy +ĠJimmy +Ġoffensive +eni +ĠXi +Ġeyebr +ĠTurk +Ġaccidentally +Ġohne +ĠSaud +95 +ĠDutch +анÑģ +ĠSeattle +Ġëĵ± +check +kÄĻ +Ġcontributions +Ġbeside +Ġquindi +Ġflew +æŶ +ذا +ĠLO +Ġwaist +ĠEV +Ġholidays +jon +Ġmisunder +Ñıн +Ġbout +Ġdimin +ẽ +ól +ĠGrace +Ġinputs +Ġdeny +Ġforming +ĠBild +Ġadequ +Ġfolk +Ġrejected +semb +Ġfrustrated +open +ĠBetter +ilon +Ġtowel +Ġdifferential +Ġsacred +Ġsail +éĩĮ +entimes +Ġgentleman +Ġiconic +Ġcomparing +Ġsagt +Ġtexts +Ġgrandma +Ġrolls +Ġcontents +ä¸į好 +оÑģÑģ +Ġsuspension +roit +¦¼ +Ġassez +Ġdort +ĠMath +ĠVictor +ĠJavaScript +ä¸įå°į +Ġenhan +ÅĻ +ĠBush +Ġpromotion +Ġkin +Ġmonsters +ĠColorado +Ġβ +íķ´ìļĶ +æŃ£ +ifferent +Ġnaked +Ġprod +etics +ĠWoman +Ġtreatments +Ġestoy +vé +Ġlifting +Ġyapt +ĠRober +Ġì¹ľ +Ġsubstitute +aku +ridge +Ġê±°ë +Ġresponded +Ġbé +ĠEngineer +Ġtransferred +ë² +Ġhaber +oop +ĠWE +Ġvest +Ġforty +ĠDS +Ġ2004 +Ġcoaching +nom +ĠBab +Ġnossa +ĠJake +Ġgy +Ġdeleg +Ġìŀł +ĠкÑĢаÑģ +Ġstandpoint +Ġdisad +Ġartwork +Ad +illo +ĠÄijược +ĠProm +ĠLib +Ġcriticism +Ġcontacts +ÑĢам +Ġachievement +ÐĶа +Ġdissol +ĠVegas +Ġstreams +ĠKent +ĠعÙĦÙī +Ġradius +Ġsucks +ĠAch +Ġfi +oust +ĠлÑİди +Ġpalette +ĠHaz +ĠAnthony +Ġtema +ĠCos +Ġsafer +αÏĤ +Ġcontrad +Ġmaior +Ġinflation +ĠSilver +Ġattending +íķľíħĮ +arto +Ġapplauding +Ġcomputing +ĠHat +æ» +know +makers +Ġconoc +Ġeducated +Ġmodified +Ġinclusion +mental +ŀIJ +isia +ĠÏĢοÏħ +Ġaun +ĠIreland +Ġkö +Ġcompliance +Ġinspiring +иÑĤелÑĮно +Ġdispos +ì°¨ +Ġwip +rical +rawd +Ġtres +Ġmobil +olutions +BO +Ġbounce +Ġassumed +ĠMedical +Ġfiscal +ĠngÆ°á»Ŀi +itionally +Ġstolen +ĠBM +Ġmechanisms +εί +Ġqualified +ĠìŀIJë +ughters +ĠHIV +ĠLots +Ġservers +Ġcarr +ĠTogether +Ġattracted +Ġkr +æĪijæĺ¯ +thur +inin +ĠHalf +ÈĽ +ĠPap +Ġreminded +ALL +Ġhelmet +Ġbottles +Ġprofessors +Ġseine +ÅĤÄħ +ãĥı +Ġê±°ìķ¼ +Ġ×¢×ľ +fun +ĠBird +Ġfighter +ĠëĶ°ë +ĠTool +Ġtin +inois +ë¶Ħ +×Ļף +ĠCAR +åIJį +irsty +Ġoutdoor +ĠNS +ãħİ +ffen +Ġlud +Hello +Ġroller +iele +ĠPoland +Ġapa +exp +Ġcertificate +ĠTown +аÑİÑĤÑģÑı +ilde +Ġdetermin +PR +Ġfreeze +Ġmainstream +Ġobjectives +blo +Ġtakie +åĵĪåĵĪ +Ġë°Ķë¡ľ +elet +ĠIV +ĠFast +Ġdere +emp +ĠDra +ĠìŀĪìĹĪ +Ġdiscrimination +Ġείναι +necess +æ® +ıģı +Ġposting +wiÅĽcie +Ġlub +Ġolive +Ġrim +Ġmodeling +Ġaño +ĠPakistan +Ġoverl +Ġinflam +NE +ìĹIJê²Į +Ġattended +Ġdealt +ĠAlt +ĠLincoln +Ġawake +Ġfilters +ĠWithin +czywiÅĽcie +Ġsû +ĠJohnny +Ġintegrity +Ġisolation +ĠEasy +ĠпÑĢин +ĠAlice +Ġsmiling +enix +,... +ζ +Ġbegun +Ġjewel +Ġconventional +Ġstatist +Ġhanded +Ġirre +Ġprohib +Ġsatellite +é¦Ļ +ĠIndust +Ġtraged +Ġtrava +Ġihm +Ġcruel +ĠAgora +ĠDoc +Ġzones +Ġmall +Ġtray +×ķ׳ +Ġirrit +Ġkans +ĠBeat +udge +ielle +Ġtrusted +Ġbikes +ĠÑĥп +ĠMember +wick +Ġcreators +Ġheritage +indistinct +Ġresur +ennen +Come +Ġfiring +ĠBueno +ĠТо +ikan +ettes +Ġkes +Ġtrips +Ġdivorce +ĠKl +Ġconsol +keep +기ê°Ģ +ĠReport +Ġhosting +Ġdiamond +Ġcomplic +Ġhelicop +Ġdepuis +ds +ĠChan +Ñıл +Ġscissors +ilation +Ġproportion +ERE +ĠÙĪاÙĦ +inta +Ġmuchas +uation +itis +æĬĬ +ÑıÑī +Ġniin +Ġemphasize +uela +Ġproducers +Ġrze +änder +ETH +æº +Ġconstitu +åĽ½ +Ġperformances +istle +gov +ĠLiter +Ġincorporate +Ġeducate +ĠNin +쪽 +ÙĩÙħ +eleration +×ķ×ij +ĠyaÅŁ +orous +ĠCas +Ġgrants +ëĬ¥ +amel +Ġê·¸ëłĩê²Į +ĠEste +ÑħодиÑĤ +ĠпоÑģле +Ġgent +Ġfocuses +alities +ĠRh +ë³´ +æ°ij +ĠDance +rr +Ġamer +Ġutilize +ĠlÃŃ +ĠAmong +Ġpregnancy +Ġloops +алоÑģÑĮ +ĠMoh +Ġcatching +Ġglob +Ġajud +Ġ[? +ĠAnal +looking +Ġsurfaces +Ġprogressive +Ġviral +08 +ξ +KA +Ġży +Ġpicks +annon +Ġbulk +ĠRoss +Ġdescribing +ĠGel +Ġlocally +Ġendless +Ġmassage +Ġcleaned +Ġtraveled +енÑĭ +Ġsentiment +igma +ĠNas +Ġchemicals +Ġrighteous +ĠMagic +Ġrelates +Ġtrucks +Ġ1960 +åĪ¥ +Ġappet +Ġsnacks +ĠSummer +Ġyüz +Ġpris +ĠMexican +Ġtransparen +Ġminority +Ġverte +Ġlassen +46 +лек +ép +ĠÑĦилÑĮ +Ġiyi +Ġspan +íķĺì§Ģ +Ġindicated +quar +Ġscholarship +ĠLGBT +Ġhistorically +óÅĤ +Ġminist +Ġpenet +ĠRap +Ġconservation +缴 +ĠHoney +ĠBei +idel +Ġresponsibilities +Ġmessy +ĠExcept +ORE +Ġinitiatives +Ġjunior +Ġdesigners +Ġexploration +Ġsponsor +Ġmobility +Ġinteg +lando +Ġbark +Ġindicates +ච+Ġemployer +å®ī +Ġcousin +Ġboiling +Ġchrom +Ġçal +Ġperpet +Ġcontained +Ġparks +Ы +ĠEngineering +Please +ĠStarting +hero +Ġlawyers +西 +Ġzd +Ġfranchise +rage +Ġintuit +ĠGL +reach +ĠElle +ĠnhÆ° +ĠNord +Ġbean +07 +Ġpleasant +å½ĵ +viron +Ġgradient +zus +ĠEM +Ġessay +ìĹIJìļĶ +ến +nu +ừ +ĠÃīs +Ġdenomin +ĠGirls +Ġpersonnes +ĠاÙĦØ£ +bild +ĠStat +Ġcompliment +ĠKate +Ġoptimal +Ġhid +دÙĬ +Ġquicker +wall +En +INE +??? +ì²´ +ĠAction +åŁ +Ġpenalty +ĠKaz +'? +Ġcried +Ġcanvas +fte +Ġexclud +¸ë¡ľ +Ġemphasis +Ġenzy +ĠHou +Ġoverseas +ÃŃamos +師 +öglich +Ġheadphones +cn +ĠAge +Ġakan +Ġcharacteristic +íķĺë©´ +gets +Ġë¶Ī +Ġrival +Ġborders +emente +emás +Ġyol +Ġcompe +enders +ından +Ġmöglich +Ġbubbles +natural +Ġarmed +Ġelabor +ĠìĿ´ë²Ī +Ġwashed +οÏħμε +è«ĭ +Ġflavors +Ġexiste +Ġprest +ĠThema +опÑĢоÑģ +eron +UE +eri +Ġconcer +Ġaixò +åħ© +Ġprotective +ĠзнаÑİ +ĠëĤł +ĠIII +Ġmeer +ĠShop +lli +ĠOrder +ĠMY +ĠGhost +ãĤĤãģĨ +adel +Ġstole +Ġreleasing +ĠComment +Ġtrains +ëªħ +Ġwissen +ensed +Ġdescend +Ġfier +Ġradi +Ġpersu +ç¢ +Ġмн +ĠDest +Ġworries +itet +bas +Ġstab +name +oric +ĠClose +Ġalumni +ĠSelf +ffe +itating +atherine +ĠRights +Ġellos +Ġwarrant +Ġnerve +Ġvegetable +ĠTeil +Ġê°ĻìĿ´ +RY +Ġsustainability +Ġsteht +Ġbrid +adaÅŁ +Ġtv +Ġduration +Ġpessoa +Ġmetrics +Ġadam +cas +аÑĢи +Ġevident +Ġdisplayed +ائ +Ġreck +ĠBuddha +Ġdele +ĠDiego +osph +Ġbla +ĠMik +ulator +Ġ2001 +Ġpromoting +ych +ĠEX +Ġlastly +Ġoutline +Ġspirits +Ġveux +Ġsubtract +ĠÅŁimdi +Ġpins +Ġburger +Ġmolto +ĠhabÃŃa +Ġë°ĺ +igu +erst +Ġnen +Ġbacon +itious +Ġcarries +Ġpromises +nde +ĠLeft +ĠLim +æ£ +Ġ44 +Ġcareers +Ġ주ë +Ġspeeds +qué +mad +market +isme +Ġ2003 +Ġrecess +ĠJUD +Ġracist +ĠSchl +Ġparler +Ġotros +ishes +Ġconverted +aaaa +ании +ĠArk +ĠChance +Ġelementary +εν +inks +Interviewer +Ġfreely +alah +Ġëĭ¤ë¥¸ +Ġrequested +Ġtorque +noÅĽci +oured +ĠStaff +Ġstain +ĠAlan +Ġvere +ĠWinter +Ġdefect +iedy +Ġbeats +Ġhá +umn +oons +itudes +Ġseit +oly +Ġreserv +Ġextr +Ġphysician +visor +Ġhandful +ĠNations +Ġì¢ĭìĿĢ +uccess +Ġupstairs +ĠSquare +Ġhein +ĠSeason +olis +Ġprince +Ġdefensive +ç½ +ĠмеÑģÑĤ +Ñĸй +ĠاÙĨ +umble +ê¹ĮìļĶ +Ġassass +Ġcircular +Ġqualities +Ġhmm +Ġblown +ĠLiz +ĠKur +ĠSA +Ġfindings +Ġcolours +Ġdelle +ĠIR +ĠAth +ĠDub +ĠOx +ĠØ® +Ġpockets +Ġgrill +Ġswitching +Ġpreferred +ĠWales +Ġexemplo +Ġchopped +Ġvaccination +Ġneuro +Ġspecify +ivos +Ġserá +Ġzie +Ġà®® +Ġresulting +ĠUgh +Ġmessed +CD +Ġpaar +Ġcomer +Ġcouch +ĠFestival +Ġ49 +vous +zens +種 +ĠKennedy +ĠTs +Ġë³´ìĹ +Ġdemonstration +Ġunto +Ġfrustrating +Ġlaboratory +Ġegy +Ġbeautifully +Ġìŀ¬ë +Ġalgu +Ġöyle +ä½łçľĭ +ĠPH +Ġfortune +Ġcleaner +ĠRobin +Ġsaus +ĠGeld +Ġkat +obs +Ġolur +Ġmatt +Ġquesta +Ġsuggestion +encer +оÑģÑĤ +Ġradar +Ġìŀ¡ +isha +ந +ãĤĵãģª +jes +Ġveel +ìĤ° +Ġauthors +ãĢİ +plan +Ġcollaborative +Ġinstinct +Ġfarming +auge +Edu +Ġmembership +Ġsimultaneously +Ġbake +Ġkä +Ġlectures +ÑĩеÑģ +Ġprendre +Ġcollaps +ĠSaya +ĠFut +Ġyog +ĠRather +رÙĬ +Ġcamps +олод +Ġsimulation +ĠMak +Laughs +Ġgrey +Ġsentences +yen +ĠUnless +Je +ĠSatan +ĠÑĤакже +ĠNA +Ġbron +Ġ?] +Ġsouls +Ġlightning +Ġimagined +Ġczyli +psilon +etta +Ġbelieving +Ġstrongest +ĠCON +Ġquelques +Ġimmigrants +Ġwallet +éĢĻæĺ¯ +ĠJersey +Ġimplications +Ġforb +ãĢı +Ġunbelievable +اء +Ġoperational +üs +ĠGM +Ġê·¸ëŁ°ëį° +Ġgracias +Ġentend +ĠRegard +rob +ĠÑĤеÑħ +èı +ĠRevolution +Ġwaar +ĠBiz +theless +Ġsponsored +quier +ĠìĿ¼ë +Ġtek +ĠëIJł +igkeit +ĠLuck +ĠCertainly +Ġtoll +ĠниÑĩего +ĠMoney +ĠÑģÑĤоÑĢ +ĠDouble +ĠWolf +Ġchunk +άν +ités +oning +Mar +Ġgrandes +Ġcollections +ĠEuropa +ĠаÑĢ +ĠâĢĭâĢĭâĢĭ +Ġê·¸ëŁ¬ë©´ +ĠобÑĬ +Ġãģª +Ġìĭľê°Ħ +ĠCustom +Ġì²ĺ +ÑĸлÑĮ +Ġindividually +íĹ +Ġdozen +Ġowe +ĠVictoria +åı¯èĥ½ +Ġbeet +urb +Ġanalog +ição +Ĥľ +soever +Ġmodo +Ġsubscribed +ìŀ¬ +Ġentities +çīĩ +Ġcloset +Ġresponding +Ġprinter +ĠStephan +ĠbyÅĤ +ĠDom +ĠFern +ĠPier +ĠwiÄĻc +Ġhence +Ġmodules +ãĥ¬ +ĠëĶ± +ĠDanny +ĠÑģебе +Ġvad +ĠìĹĦ +Ġsous +Ġsphere +BY +ĠPed +igned +Ġwheat +Ġunders +Ġevolve +Ġdeclar +Ġlightly +Ġidentifying +æĦıæĢĿ +Ġlegendary +Ġgenuine +Ġgrind +ĠUne +geben +Ġbicy +Ġjumps +Ġprovince +ziÄĻ +Ġ×IJ׳×Ļ +Ġhoc +Ġбл +ĠGrad +Ġrevenge +ĠاÙĦت +ooh +æĭľ +аÑĨии +å¹³ +Ġelectro +ĠëIJIJ +ãģ§ãģ¯ +Ġfals +riel +oker +ĠExcellent +ĠMorgan +Ġbrick +Ġsubstantial +Ġpollution +ĠTür +ĠEvet +Ġlung +ãģĸ +×Ļש +ommes +Ġrealizing +Ġhumble +ĠLock +Ġbod +Ġìĸ¸ +Ġpeers +uzz +Ġembedded +Ġclaro +Ġaggreg +Ġemployers +ĠRaj +Ġãģ¨ +ĠYi +Ġjeu +aters +Ġstrikes +nos +autres +dr +opher +ĠApparently +íĺĦ +Ġinfant +اب +ÑĤÑĭ +íĽ +Ú¯ +Ġredes +acaģım +ĠDAVID +ĠChicken +Ġperspectives +Ġviewer +Ġshar +ĠпÑĢоиз +ligt +eros +itable +илоÑģÑĮ +ĠdifÃŃ +´ëį° +Ġretired +Ġthats +zenie +beiten +Ġmycket +ĠRab +Ġinflamm +ì°® +Ġdum +Ġdaddy +æľŁ +Ġimmers +Ġplaylist +à¯Ĩ +Ġtraum +Ġrefuse +step +à®ļ +cup +Ġpops +rimin +ayım +Ġald +Ġunnecess +Ġdah +ĠIrish +Ġcompr +laÅŁ +TP +Ġtranslated +Sc +ceÄŁim +´IJ +Ġdrei +ĠлÑİдей +Ġquiero +Ġhele +zlich +Ġapples +Ġdistricts +Ġcredits +Ġasp +Ġëĭ¨ +oral +å½± +Ġstepping +ĠVa +Ġgains +65 +Ġnuestra +eday +assador +ĠLind +Ġcrops +ciendo +igue +Ġbana +Am +Ġpent +Ġaddiction +Ġpackaging +äd +ª¨ +Ġperquè +Ġcampaigns +Ġsteep +Ġneue +Ġembarrassed +Ġdistinction +itzer +åijĬ +Ġregistration +Ġllam +ĠAlmighty +liest +Ġuz +nak +çº +Ġteraz +iamente +Ġtransactions +Ġcôt +Ġswitched +Ġcombo +Ġprayers +Ġinternship +Ġaddresses +Ġcharity +ĠWOO +Ġbait +è¿ĩ +Ġ� +Ġfica +ĠTyler +aru +Ġatoms +ĠLevel +ĠпоÑĤом +Ġfame +ulk +Ġteaches +Ġrebuild +едÑĮ +ĠIndonesia +ushi +ĠShort +Ġensuring +fs +ele +Ġmarginal +Ġconclude +amt +Ġverify +ĠMcDonald +Ġskal +Ġreconst +ĠMann +Ġbasement +Ġtransformed +Ġoccasionally +zone +ĠDans +Ġкакой +Ġdiagnosed +ĠÏĦα +Ġcommands +Ġpresidential +Ġabb +Ġbracket +ĠLem +Ã¥ng +Ġfavorites +Ġrevol +ĠíĬ¹ +Ġharass +éħ +Ġcleans +ständ +Ġknocked +Ġpeoples +Ġmusicians +Ġmutual +ĠCold +88 +zej +atie +ĠHonor +Ġobsessed +ĠMUSIC +ĠBreak +úng +Ġmodify +Ġsöyle +Ġ×ŀ×Ķ +ĠOnline +fo +ĠMiller +Ġliking +Ġinhab +Ġgratitude +ĠJournal +arness +John +ĠGit +åīĽ +Ġsincere +ĠSci +ĠEli +Ġsymbols +Ġmanually +εÏĤ +ĠвÑĸд +ĠFat +Ġlabels +Ġsophisticated +umps +Ġreleases +Ġ47 +ĠOM +ê°Ģë +ĠBien +ĠRef +è¨ĺ +ĠSta +ĠEgg +Ġindicator +pson +Ġnasıl +Right +Ġconvey +Ġknot +Ġconnects +ulas +Ġpreced +Ġinequality +amiento +Ġreply +OY +Ġdismiss +ĠëIJľ +çĦ¡ +ĠÑħоÑĢоÑĪо +Ġméd +Ġrandomly +ĠOnt +uard +Ġpulls +ĠÑĤепеÑĢÑĮ +ĠNeed +ĠSoft +Ġstrengths +Ġgoed +umen +æŃ» +Ġíݸ +Ġдоб +Ġclarity +ĠAi +Ġballoon +ĠPand +ĠìķĦëĭ +Ġshiny +Ġsmallest +onia +hill +oting +Ġeing +Ġmerely +Ġseus +Ġнеп +ĠíĨµ +Ġguides +Ġspecialist +Ġsteak +ãĤĪãģĨ +Ġmigration +quele +Ġruined +Ġpupp +女 +Ġkend +angan +Ġpalm +Ġunfair +Ġzm +ĠDV +chester +иÑİ +Ġooh +erg +ATH +°© +åĵª +rison +Ġinvolving +Ġpartly +ançais +Ġvow +Ġprominent +Ġcryst +iba +Ġdeserves +Ġovert +Ġsensit +ĠWhe +Ġtighten +Ġintimid +Ġaliment +will +Ġstrengthen +ĠTan +åıĪ +ãģĹãģ¾ãģĻ +oni +ĠMun +Ġproph +Ġrehears +ĠKle +Ġveces +Ġwondered +oki +Ġsenses +´ìĭ +Æ°á»Ľ +ĠÈĻi +Ġmuchos +Ġwatches +ortunate +ĠJuan +ìŀĸìķĦ +ÑĢе +ei +ionen +Ġexperimental +Ġdaughters +à¸Ľ +Ġmentally +becca +aware +ìĦĿ +Ġwhatsoever +Ġenables +ĠLow +oid +à¸Ĭ +ód +غ +Ġconstructed +ĠLadies +Ġaccused +Ġан +Dan +Ġspawn +Ġcontainers +Ġartistic +ıp +Ġdiscl +Ġautres +inas +ĠNation +Ġnag +bean +whe +ľëıĦ +ĠSeoul +Ġíı¬ +ĠNich +Ġcomplement +Ġinterven +ĠModel +ĠOrange +namon +Ġcalculation +see +Ġustedes +Ġleb +Ġdoct +Ñĸн +Ġfoster +Ġelastic +ĠAhh +Ġace +ĠPink +ĠJeg +Ġdeer +ãģĹãģĦ +sis +Ġjako +ĠEmma +ÑģÑĤвенно +Ġportrait +Ġmaker +Ġaument +ÑĢоб +Ġairplane +Ġtransparency +Ġadjustment +ĠCDC +çon +Ġuploaded +ĠдейÑģÑĤв +ĠгоÑĤов +Ġiter +Ġcurse +ôn +merce +aran +Ġleak +çµIJ +Ġabsence +Ñģкий +Ġreaders +aler +Ġbeneath +ango +hetic +Ġfinns +Ġpoop +Ġduplic +Hi +igs +ologically +opp +Ġdizer +ĠAllen +Ġgli +Ġacceleration +Ġvitamin +ãĥŃ +vä +ĠAccess +à®Ļ +rás +Ġappreciated +Ġnah +Ġposter +Ġtale +Ġhighlighted +æĸĩ +żeli +Ġblockchain +Ġmicrow +Ġcinema +ĠChang +ĠSearch +usters +ĠZero +ĠDivision +ÑĢаÑģ +Ġscare +Ġjelly +ĠAdministration +SO +Ġlined +Ġê°Ħ +Ġgeben +Ġsoda +Ġwinners +³¼ +ÙĴ +ĠAmb +åķıé¡Į +åĶ +Ġpeg +å·± +43 +Ġraus +Ġrewards +Ġinclus +Ġhighway +Ġhah +Ġmultiplied +Ġsẽ +Ġdisciples +Ġning +Ġdressing +Ġattributes +ĠMosc +ĠGreece +Ġsek +ĠLearn +Ġjus +rendre +Ġpersonne +plete +Ġplacing +Ġluego +illance +ĠобÑī +Ġprovision +Ġlion +tra +boards +Ġbehaviour +hey +Ġsubscription +Ġprotagon +ãĥ£ +Ġvara +ĠÅŁu +Ġhaha +Ġteaspoon +æŁ +avoir +Ġcrypto +ĠÑģÑĤаÑĢ +ĠStore +abs +ĠStudents +Ġlaund +into +Ġapproached +°ľ +ÑĥÑİÑī +ĠLabor +otes +iatric +ĠgroÃŁ +utive +Ġид +ĠGib +Ġplacement +ĠdifÃŃcil +Ġfrog +ĠвÑģеÑħ +ĠJr +azed +ÑĥÑī +Ġê¼ +frame +аеÑĪÑĮ +Ġlockdown +åij³ +Ġmedi +Ġ×Ķ×ŀ× +ений +emale +ì¢ħ +ateral +Ġdistant +Ġbears +Ġjournalist +解 +ĠMarshall +ĠIhnen +uetooth +bag +ĠÄijã +ĠHighness +Ġì°į +ика +ĠWu +ĠFran +Ġpeng +Ġfon +Ġhypothesis +ĠÑĢÑĥ +Ġly +×ļ +ìĽĶ +ĠRadio +à¸ŀ +Dav +Ġembarrassing +ĠìŀĪìĸ´ +Ġcasting +Ġcage +ĠPsych +ĠìĿ¼ëĭ¨ +Ġž +imb +Ġdirectors +SH +ĠÏĦην +á»ģu +ĠkonuÅŁ +Ġoptional +quarters +iker +ĠSant +Ġverses +ë¶Ģ +Ġolar +ĠÏĩ +ãĥķ +Ġγια +ĠImm +Ġcontroversial +Ġersten +Ġrecip +ĠChristianity +Ġê´ľ +ordon +×ķש +Ġslash +ĠPf +ÑĥдÑĮ +×ķ×Ŀ +ĠPerry +Ġmamy +Ġbackgrounds +Ġà®İன +Ġpendant +ĠColumbia +Ġinverse +ĠÑĩеÑĢез +Ġsv +Ġdigging +41 +chem +Ġnavigation +ĠShin +ĠFront +PD +Ġbearing +ĠWasser +Ġwax +ĠCHRIS +ching +Ġpressed +El +ĠDal +onsin +Ġbinding +Ñģкой +poons +Ġmock +arest +кÑĢа +MM +Ġcorrupt +storm +Ġrefres +ĠCoach +llä +ĠTHIS +Ġparag +Ġìĵ° +pool +Ġbillions +Ġê¹Ģ +group +Ġwelcoming +cellence +ĠDuke +긴 +Ġprimera +ìł¸ +Ġpond +Ġstatue +Ġ구ë +Ġhatch +Ġinstrumental +Ġresidential +커 +Ġaccepting +oshi +date +ĠìĶ¨ +Ġplanted +Ġjoking +ĠìĦľ +Ġhated +ĠÑĢаÑģÑģк +Ġslept +Ġpackages +Ġislands +esen +ģı +Ġdiagon +ĠOsc +Ġmesh +Ġscales +arity +ĠDefense +ãģ¡ãĤĩ +ĠLewis +ĠÑģегоднÑı +Ġflies +uinely +ĠConsider +Ġstark +hew +ĠAsÃŃ +³´ë +Ġpropose +Ġíķĺë©´ +odo +ĠNormally +Ġheeft +ĠHarris +gro +ĠBlood +base +ĠiOS +Ġtouches +Ġinspir +Ġ×ĵ +Ġbinary +Ġì¶Ķ +Ġserial +Ġion +Ġunemployment +Ġodds +ĠFab +ĠFBI +BRUN +Ġweights +νο +atile +Ġnurses +Ġinvolvement +ĠíĶ¼ +Ġgovernance +ĠâĤ¬ +ÑĢÑĥп +ierra +íĺķ +ĠJerry +Ġbeard +Ġsalvation +ĠAlong +gentle +ĠKi +bol +ĠPlat +Ġhasht +è¿ij +Ġware +Ġpartie +ycz +Ġintr +Fih +nent +Ġcheat +ilen +Ġë¯ +orie +Ġfácil +etric +Ġaffecting +unciation +Ġaffairs +Ġbee +Ġviewing +Ġorang +ĠLan +ĠСÑĤ +ä¸ĸ +ĠMes +ĥģ +erie +Ġespa +Ġinterpre +Ġpossess +Ġpurely +rito +found +asma +ìłģìĿ¸ +Ġexamine +ĠÑĥм +Ġbesch +ĠTomorrow +ĠBlock +Ġvariant +Ġpreference +Ġcoaches +Ġmedications +ĠíĺĦ +Ġempire +ëĦ¤ +ĠIllinois +Ġcrispy +Ġthì +Ġbees +77 +Ġglow +èº +ĠStudies +åIJĦ +ĠChallenge +Ġunlikely +Ч +ıyorsun +DIE +Ġminimize +izard +Ġún +Ġencontrar +ĠKill +å» +Ġvanilla +ĠGrant +ĠGT +sea +Ġsought +вод +Ġnäm +ĠAunt +OWN +Ġpumpkin +stellen +Ġrag +егда +Ġstoryt +Ġforum +æ©Ł +Ġestaba +uche +Ġcongress +ĠRey +Ġdramatically +ĠSport +ĠYellow +Ġê³ĦìĨį +Ġdisgusting +ĠRecent +Ġacquired +Ġcables +çĶļ +din +Ġvisto +Ġcommunicating +ÑģÑĤавлÑı +еÑģÑĤо +ãĥ»ãĥ»ãĥ» +Ġrég +Ġsocks +Ġproces +because +Ġutter +Ġcolocar +Ġnewest +Ġgramm +表 +ä¸įçŁ¥éģĵ +Ġshifting +Ġcarrier +ĠÑģкоÑĢ +ĠSchw +Ġexecuted +Ġmaintained +ĠÏĨ +ĠMoses +Ġdisse +Ġhorr +ãĢľ +Ġrally +Ġallem +ĠEventually +Ġdiyor +lvania +Ġschnell +Ġê³¼ +Ġ매 +Ġstruggles +late +Ġclarify +ément +Ġmultiplic +ибо +Ġjourn +Ġfragr +Ġsurprisingly +Ġdesperate +52 +Ġsul +ĠRead +ĠFried +Ġmond +woo +Ġorganizing +ãģĹãĤĩãģĨ +ĠSoon +ĠвопÑĢоÑģ +ĠNur +ĠÐĹд +Ġspider +еÑģÑı +Ġtutorials +Ġnutrients +orer +Ġcoefficient +Ġarrangement +Ġpricing +nan +yu +BL +Ġtribe +ĠHoward +unks +Ġnewer +Ġprovin +Ġprediction +hos +Ġolsun +ĠAround +Ġvier +ĠÑģÑĤоÑĢон +Ġvalley +ĠEla +ifi +Ġgalaxy +Ġtranqu +Ġadvers +ĠTemple +iffs +igence +èĩªå·± +Ġkönnte +ĠÄijó +Did +Ġphotographs +ĠAWS +ÑĨиÑı +Ġguards +Ġappointed +ĠGil +Ġмом +Ġcod +ĠUnlike +Ġevenly +isconsin +Ġestou +Ġmnie +ĠExec +ĠMV +ĠEine +ä¿¡ +ĠRoger +ĠFac +ĠList +Ġfuer +аеÑĤе +omed +Ġattraction +èī² +Ġterrain +ĠDrop +Ġcorporations +Ġsciences +Ġthrone +ãģĦãģŁ +Ġaj +ĠRot +çī¹ +Ġsupporters +ĠBere +Here +Ġdiferentes +Ġsignificance +Ïĥη +æĪij覺å¾Ĺ +Ġclamp +ĠëĮĢë +Ġfabulous +rez +æĮģ +Ġassumptions +uther +wid +pot +è¿İ +Ġyan +ulin +ÑĢÑĭв +ĠSlow +ĠPennsy +Ġíķ´ìĦľ +Ġmeio +Ġwealthy +ĠEight +Ġpulse +Ġfriction +idity +ĠHoll +iyorum +Ġsounded +ĠCarr +Ġfork +âĺ +ĠPA +Ġconspir +Ġcoding +rt +ĠTyp +Ġìĸij +Ġпог +Ġmiser +ĠÑģмоÑĤÑĢ +ĠSweden +Ġolarak +ĠZhang +ĠChi +ĠTitan +Ġscreening +ĠSpider +ĠÅŀimdi +Ġobstacles +lara +Ġchallenged +pse +TON +ụ +ĠPi +Ġlagi +ieurs +Ġhurting +Ġneglect +Ġgenerating +Ġyoungest +Ġaudit +ĠÑĢез +Ïģά +Ġdonate +ĠPDF +Ġvisits +Ġcruise +PP +aser +Ġwsp +backs +ivals +ãģĨãĤĵ +Ġdeve +Ġproport +Ġcath +ĠEffect +Ġwinds +ĠìĻĶ +Ġcharts +Ġsama +Ġautomation +Ġпока +Ġolan +Ġboats +Ġcafe +Ġdenied +ĠMama +Ġblocking +ĠThor +Ġphenomenal +Ġstakeholders +Ġunos +ÑĥеÑĤ +ĠAbraham +ãģ§ãĤĤ +Ġdetection +Ġjuris +Ġpowered +zial +Ġwelfare +Ġupgrad +Ġmożna +ĠCase +cular +ĶìĿ´ +ãĥģ +ĠGuess +Ġcycles +ä¾ĭ +給 +rock +umi +Ġelite +Ġquè +åł± +ÑĤом +Ġshore +gunta +Ġku +Ġfaithful +ĠJeremy +aid +à· +ugal +å°įåķĬ +ĠVel +Ġvrai +stell +¨¸ +Ġkol +è½ +Ġquanto +ĠзаÑĢ +Ġ2002 +esy +Ġreserve +ĠмоменÑĤ +Ġdeployed +Ġdefining +Ġsau +Ġgaat +\") +Ġtransmit +Ġpublishing +Ġranking +Ġoffense +Ġ46 +pin +ĠTaking +Ġentitled +Ġgenuinely +Ġvariations +Ġfinde +Ġtau +Ġunfortunate +ĠRah +ports +ĠcÅ +Ġmonkey +Ġbrac +wei +lung +Ġartif +Ġsyrup +ĠÐĶав +Ġlifted +Ġchez +ĠAdvent +ĠStock +Ġdol +мен +иÑĪÑĮ +Ġyn +gio +det +Ġdesse +Ġgri +ĠChairman +çħ +Ġcuenta +anim +Ġcrab +Ġescal +Ġpremière +ĠGef +Ġdining +Ġseventh +Ġchasing +ĠTower +Ġbrutal +Ġfundamentally +ãģ¨ãģĨ +лениÑı +stage +Ġacquis +Ġcylinder +Ġcommander +mem +ĠUV +happy +Ġepsilon +Ġinvitation +Ġfarmer +chair +Ġdestiny +Ġsovere +ĠHebrew +Ġservant +Ġbew +Ġgast +uties +Ġadministrative +ĠCommand +éta +Ġnitrogen +ê·¼ +Ġabi +Ġvillain +Ġblanket +ĠSend +Ġbeaten +²Ħ +Ġvolunt +Ġscholar +ĠEmperor +Ġ43 +vable +ĠDus +ĠGU +Ġtargeting +www +Ġamendment +ìĨĮë +Ġting +Ġnasty +Ġgauge +ĠÑĢод +ĠHans +Your +αν +Ġprojet +ĠHawaii +Ġsuspicious +Ġschw +Ġremoval +Ġintrig +ĠMU +Ġponto +ा +ĠобÑĢаз +Ġguessing +pace +Ġmothers +Ġmillimeter +ление +没æľī +Ġavailability +icz +æѤ +Ġfract +Ġbases +km +ĠBTS +ĠField +Ġdzie +Ġsegundo +ĠëĤĺëĬĶ +Ġlegitimate +imas +Ġвн +Ġcorruption +Ġsmash +ĠValent +Ġaligned +ĠPennsylvania +Ġgab +ĠEun +enth +ĠMorning +Ġcandle +Ġbackpack +ĠIslamic +ações +Ġencry +Ġmushrooms +íĮĮ +dit +Ġtransit +ĠWisconsin +Ġparticipated +ĠIls +Ġunfold +¶Ģë +Ġprofits +Ġwarming +ĠGang +Ġnetworking +Ġmega +Ġthoroughly +lements +ĠHm +Ġdeciding +Ġemotionally +Ġexhausted +ĠÐŁÐ¾ÑĤ +cido +ĠHTML +Ġcopyright +Ġmelody +yim +Ġanders +oshop +Ġë³¼ +Ġathlete +ĠGE +Ġfrequent +Ġdesires +Ġneeding +ĠYun +Ġrifle +Ġlover +'T +Ġdense +Ġtão +Ġnotified +Ġidi +ìĹŃ +íĨ +Ġinteracting +Ġrapport +еÑĢи +ski +Ġbesser +Ġmanufacturer +ĠKyle +Ġaccountable +ĠSak +ĠPil +ĠDomin +Ġpresum +ĠÐĴÑģе +Ġvinegar +Ġguaranteed +çľĭåĪ° +Ġhandled +éŁ³ +cat +Ġcivilization +Ġaccomp +ĠVM +émon +Ġdeze +Ġgrades +Ġsollte +Ġstaring +×IJת +arnt +Ġhorizon +Ġtravail +hour +第ä¸Ģ +ĠED +ĠDak +Ġny +Ġconve +ĠCham +Ġfirms +ĠLiu +ĠÑģÑĤÑĢан +Ġlibert +Ġlenses +Ġintake +ĠвÑĭб +Ġmensen +hel +Ġpractition +Ġ350 +ãĤ³ +FO +Ġbeds +Ġancestors +ĠìĹĦì²Ń +Ġdisturb +ĠLastly +ĠSupport +ีà¹ī +ĠCorona +Ġenthusi +Ġвозм +ĠìĤ¬ëŀĮë +Ġ52 +bird +Ġreduces +ĠìŀĪìĿĦ +ĠGene +êµIJ +ÄĻp +ĠÃľber +Ġconcerning +user +Ġconcentrate +ĠWHAT +ishop +onymous +nold +Ġsuggesting +©° +ĠFish +........ +Ġvessel +Ġtrabajo +ãģµ +ĠOcean +å§IJ +yg +Ġtowns +del +Ġterrifying +ĠçalÄ±ÅŁ +Ġsino +Ġeats +Ġgez +Ġgeme +ĠìĻĦ +Ġcompart +Ġimplementing +ĠPotter +ĠGermans +ĠgÅĤ +Ġtennis +Ġcarpet +auer +ĠSaudi +yeong +Ġcurry +ĠForest +Ñĭл +Ġfifteen +Ġbolts +Ġ{\\ +¬´ +Ġsettlement +Ġlange +Ġbam +Get +íķĻ +Ġswap +ĠKhan +Ġcommence +Ġquarantine +Ġscored +çĸ +Ġ1950 +Ġthicker +Ġsûr +åı£ +ĠLarry +Ġallez +ìĭľëĬĶ +Ġgü +Ġspectacular +// +both +Ġstats +妳 +ĠNancy +Ġbunu +Ġcrust +Ġactivated +Ġê·¸ëŀ +outhe +Ġports +Ġneural +Ġjaw +Ġobservations +Ġvoit +aban +ải +¦¬ë¥¼ +omes +à¯ĭ +qui +Ġkindness +Ðij +Ġ41 +Ġmoderate +Ġangels +ĠTamb +èt +Ġchlor +ĠBilly +ì²ĺë +acon +Ġselecting +ĠDelta +Ġnull +denly +Ġciud +Ġtendency +Ġbreakdown +Ġmint +ÑĦоÑĢм +orph +Ġdawn +spr +ĠWILL +ächlich +Ġpuppy +700 +Ġத +Ġfails +ĠConc +Ġrelatives +Ġinviting +Ġautonom +Ġcomposed +Ġunity +Ġdecis +Ġaccessories +ĠCass +Ġbist +ĠTip +째 +Ġpunt +Ġráp +éĢ² +ANK +ãģļ +exist +Ġcompatible +Ġner +ĠемÑĥ +Ġaplic +Ġbapt +Ġfailing +ĠTamam +Ġoscill +Ġletzten +Ġrepeatedly +Ġjungle +ĠPush +hai +Ġη +Ġdeadly +Ñıж +wiÄħ +ĠCommon +ĠÎķ +Ġskate +TC +ĠMini +Ġhobby +ần +Ġroutes +Ġamigos +Ġconjun +Ġpartnerships +Ġnovo +Ġaver +Ġpouvez +bridge +Ġpreoc +him +Ġturb +Ġsob +ĠSnap +Ġì°¸ +minute +Ġtraject +ujÄĻ +Ġeager +Ġregulatory +Ġbanking +bling +ÑĪÑĮ +aż +Ġbizarre +itated +dire +Ġthreatened +Ġshining +Ġnesse +Ġcorps +ĠÑģÑĥ +Ġteles +Ġtemp +tem +Ġкан +Ġfever +New +Ġheavier +ĠSah +bud +Ġoutros +Ġì°¾ +Ġëªħ +arring +Ġê´ľì°® +ĠNap +Ġsemin +ĠThan +ifs +Ġdesen +ĠÑĤакое +Ġloses +ĠBalt +kon +ĠнапÑĢ +Ġvois +ĠMoscow +Ġchairs +his +Ġrefugees +kg +Ġkole +į¨ +аÑģибо +¦½ +ĠUniverse +ĠDirect +Ġcheating +ĠCin +Ġpatri +Ġadvise +ĠNether +Ġprimeiro +Ġmentioning +nut +56 +arı +Ġpetite +bled +Ġpensar +icio +IND +Ġveteran +Ġladder +Ġconsequence +ожал +ĠBurn +Ġrug +ĠMade +Ġgit +\"... +Ġcompetitors +Ġprzed +Ġapparent +ĠArgentina +ĠWorking +Ġcollaborate +woman +Ġretain +Ġleurs +Ġdashboard +×Ļ×ĵ +ĠEarly +BM +ĠеÑij +олог +Ġsatisfying +Ġoftentimes +Ġmapping +ünkü +arth +fold +Ġlaunching +Ġaura +Ġprecision +works +God +Ġstrap +ĠImper +Ġrivers +Ġ| +Ġcuer +regon +Ġarrival +каÑħ +ĠMiami +анÑĭ +Ġsurvivors +ĠSenior +David +Ġestado +Ġsectors +Ġpopping +Ġchim +ayı +Ġkunnen +Ġgallery +Ġsunlight +esehen +Ġyelling +ĠMein +ĠPhoenix +Ġmano +Ġhistoria +Ġoccurring +欸 +ì¸ +ади +å¾ħ +Ġinstitutional +ĠTut +ç² +Ġslaves +ãģ©ãģĨ +Ġforgiveness +Ġtwin +ĠHyun +нÑĮ +ĠKomm +andra +shot +ssä +ĠÑĨе +atta +Ġexpense +ĠGPU +ĠPast +ribly +ĠëŃIJìķ¼ +Ġгода +Ġrespir +æĿ± +ĠQueens +hops +Ġsérie +Ġpref +Ġcomed +Ġplut +ĠOverall +ĠãģĿ +Ġcush +Ġringing +Ġincorrect +ĠÑģÑĤÑĢ +Ġgeometry +Ġadvertis +ĠШ +Ġreviewed +ãģĤãģĤ +Ġdozens +Ġdetermination +ĠPhill +Ġcontributed +ĠCit +Ġpassengers +Ġcôté +Ġrever +Ġtechnological +Ġallen +Ġraining +avi +Ġsalty +Ġtyping +ĠÑĤе +Ġtilt +Ġì¹ĺ +ĠоÑĢ +ĠпÑĢÑıм +Ġrou +Ġarena +arat +åĪ« +HHHH +Ġmanufacturers +ĠEdward +Ġtuck +Ġblows +ingo +ĠMarc +ìķĦìĦľ +Mich +ĠClean +è´ +esto +ĠPack +Ġshaft +BRUNO +Ġaven +uur +ÑģколÑĮко +ê´Ģ +Ġautomated +Ġventure +Ġsurveillance +ĠGrow +ĠEmer +ĠдоÑĢ +Ġinvestor +ĠYok +Ġlatter +ĠNI +Ġfunctioning +ĠHamilton +Ġ51 +Ġmurdered +Ġanchor +Ġcuc +ĠSCP +ĠMadam +Ġconstraints +Ġbarn +anken +Ġë§İìĿĢ +ĠMotor +ĠDoing +Ġamen +etts +Ġinstructor +egt +ako +Ġposture +ivia +ĠPolish +Ġдва +Ġcolorful +Ġelbow +Ġparle +Ġpasser +Ġcondem +ortal +Ġfertil +اد +ĠColomb +Ġalignment +Ġastronaut +ĠMut +Ġsalmon +Ġstructured +ŀר +Ġclicks +Ġmiej +æĶ¿ +ãģĦãĤĦ +ĠRound +Ġrainbow +ĠVA +ãģĶãģĸ +ì§Ī +otz +, +Ġchords +ĠSanders +Ġë¶Ħë +Ben +Ġdarüber +ilians +Ġordering +ĠManh +Ġkilogram +ĠkarÅŁ +Ġgrasp +Ġghosts +alen +ĠJedi +Ġбли +Ġdownloaded +Ġconducting +ĠHak +Ġresearcher +ilan +good +ĠHannah +ĠdÃ¼ÅŁÃ¼n +ĠMessiah +uity +iona +Ġprobable +ĠYE +Ġindependently +Ġbuffer +burn +ourd +ĠMcK +Ġlingu +ujemy +еÑĢÑĤ +Ġintuitive +Ġcracks +appropri +nty +Ġgeen +Ġlend +Ġcertification +IDS +unter +pees +Ġtrump +Ġbankrupt +Ġfeas +èĹ +Ġduż +æ¸ħ +Ġviruses +Ġ58 +god +Ġжел +Ġstalk +Ind +achi +ĠCF +ĠCond +Ġsanct +Ġconten +Ġfreed +ĠRT +Ġmentors +족 +Ġportable +ĠPaulo +rane +HAHA +ĠSection +çĨ +hyun +ĠÎŃÏĩ +ĠPub +ĠIndepend +Ġcompounds +ĠÑģÑĭ +Ġmessaging +Ġdedication +Ġnoticing +Ġdevoted +ÑİÑĤÑģÑı +Ġsnakes +Ġbattlefield +pers +Ġdela +92 +Ġhai +illä +érer +every +Ġresponsive +×Ļ×ķ +opf +éī +Ĭ¸ +Because +Ġtourism +Ġê·¸ê²Į +×ķצ +Ġcans +stüt +Ġdonne +ĠDios +ĠUber +actory +Ġoriented +ĠHerm +Ġpatron +urf +bei +Ġprograma +ĠOhh +gener +Ġfist +ĠWendy +Ġanda +Ġguessed +Ġfreak +ä¸Ńåľĭ +ĠKings +chool +Ġoffline +ĠIndiana +ĠAlliance +Ġ53 +Ġparticul +ĠFocus +Ġinhabit +Ġê°ĻìĿĢëį° +ĠMcG +owski +ĠìĿ´ê±´ +ĠpaÅĦst +они +itta +Ġconfirmation +ĠBrooklyn +Ġnoodle +fund +itud +Ġgrandparents +Ġbarbecue +ειÏĤ +Ġá +Ġballot +ĠVeter +Ġpipes +igious +ĠGraph +ested +Ġë¸Įë +ĠKE +ãģ¡ãĤĩãģ£ãģ¨ +Ġeins +Ġhatred +ãģijãģ© +Ġdang +eeee +Ġarchae +ĠJesse +Ġdetected +Ġseni +burgh +Ġdisplacement +Ġdop +Ġconditioning +ĠнеÑģколÑĮко +Ġdisturbing +PH +Ġthinner +Ġwounded +ĠCuando +Ġcushion +Ġwhites +Ġpreferences +Ġì¤Ģë¹Ħ +Ġkaż +ĠGate +ĠPath +dles +à¸Ħร +imore +Ġë³´ìŬ +Ġdisciplines +á»ı +Ġmesma +ĠìĥĪë +Ġìĭ¬ +Ġging +Ġumbrella +IGHT +Ġpension +Ġcombining +SS +Ġrectangle +á»ĩt +Ġproxim +ĠCow +¸Į +Ġintentional +æķĻ +Ġdecid +ĠÑģкаж +ĠUma +iasm +buz +Ġdebris +Ġcass +ĠProp +iska +ëł¥ +esterol +ussian +ìĿ´ëŀij +Ġunlimited +Ġadmire +Ġtightly +Ġgenome +ĠJunior +venir +gus +ĠcÄĥ +ĠVlad +ĠíĤ +Ġrelativ +inci +Ġaunque +ĠBoys +ÑĨион +ĠSwiss +Ġphysicians +Ġíıī +ĠPET +Ġwounds +about +Ãłi +onz +urities +ĠÑĥвид +å·¦ +Ġmentality +Ġvariance +Ġsegunda +Ġvolcano +alie +à¥ĩ +Ġtiles +ĠTerry +ĠاÙĦÙĦÙĩ +Ġcanon +Ġscattered +pton +Ġdefinitions +Ġalgebra +oten +ablo +ijuana +Ġwrapping +Ġsesame +ĠнаÑĩина +ĠAlf +ĠÐłÐ¾ÑģÑģ +orno +Ġankle +Ġspecialty +Ġattempting +iliation +Ġ1920 +Ġphenomena +ĠProduct +ĠBuck +ĠAww +seen +Ġvoid +ĠFranklin +Ġadvocacy +ĠSep +Ġcoolest +ĠÑģÑĢазÑĥ +ĠQuand +Ġ900 +ĠTrad +dies +Ġhash +æĪijå°± +ä¹Łæĺ¯ +Ġpots +Ġsadly +Ġviable +ĠTiger +ĠONE +Ġneurons +owanie +ÄĹ +ĠShar +ĠLandes +Ġconferences +該 +Ġcredential +Ġlime +inee +xit +pay +Ġincons +Ġ>>: +èªį +Ġíŀĺë +Ġlesser +Ġspill +Ġpremise +Ġ365 +ĠHost +Ġtomar +×IJ׾ +ë²Ī +ĠWhats +Ġlightweight +ĠMap +fia +ellschaft +Ġvendors +uesto +ĠMister +ĠÐŁÑĢи +åı³ +hma +Ġintentionally +ĠTang +éĹ® +Ġidentification +Ġetcetera +ĠNee +ĠÑĤÑĢи +ê·¸ +Ġcryptocur +Ġinhale +Ġaddict +åIJĦä½į +Ġmau +ĠÑĤакаÑı +Ġë²Ħ +Ġcomprar +iedzieÄĩ +ĠоÑĤно +Ġbeginner +ĠмÑĥж +Ġobsc +Ġlimiting +ascular +Ġinspection +aci +Ġrejo +Mus +Ġzaten +Ġszcz +ĠMadrid +Ġvarieties +ĠestÃł +ĠShakes +Ġkits +Ġadminister +Ġlava +ĠgÃ¥ +試 +ת×Ļ +ĠWayne +Ġinstagram +Ġrated +paper +Ġbild +Ġpretending +Ġobserving +ĠÑģамом +Ġtror +Ġorganisms +Ġfalta +Ġhometown +ç± +Ġíĭ +Ġcheg +Ġì¡ +Ġcomma +isé +Ġlikelihood +avored +Ġgeldi +ников +Ġmedio +Ġjakie +ĠJup +Ġgreenhouse +Ġspit +кое +Ġкаж +ĠGram +ĠConference +Ġdeficit +sın +inse +uÄŁ +Ġricht +Ġcoincidence +åıį +Ġeurop +Ġbutterfly +pread +Ġìĸ¼ +èĢ¶ +Ġwavel +ĠInfin +ĠPlanet +Ġselfie +ientras +Ġarrog +oser +idal +ł×Ĺ׳×ķ +ütün +Ġfreshman +ĠMachine +ÏĥÏĦ +ĠDia +ìĿ´ëĭ¤ +ãģĵãģĨ +nea +Ġlisting +Ġconfigure +utor +Up +tschaft +rière +Ġupwards +ĠÑħоÑĩÑĥ +Ġsweep +Br +Ġexpressing +Ġunhappy +Ġmandatory +gender +ĠAÃŃ +Ġindicators +Ġoils +note +Ġsegur +ожеÑĤ +ynasty +Ġdistances +Ġmerge +BERT +Ġsurrender +Ġbuat +ĠAwards +Ġseñor +odox +Ġflavour +Ġabdom +Ġconfigur +86 +ĠDIY +Ġrigid +°ĺ +Ġcorporation +Ġgroom +jaw +ĠNear +ило +Ġopera +ĠInnov +иÑĢа +ĵ± +Ġspecified +Ġcosm +ĠFreedom +Ġclown +ĠNem +Ġвол +Ñijн +Ġcharger +à¹ģล +Ġinfluential +äsident +é¤ +ĠìĦłë +Ġvolumes +æIJ +Ġoutras +ĠTwitch +Ġfounding +Ġawhile +Ġcoil +ê°Ļ +Ġcả +ĠThrow +ĠHence +ommt +ĠBenjamin +глÑıд +Time +obic +Ġmour +Ġdread +ĠLÃł +ĠChile +Ġpreval +Ġvain +Ġartık +Ġpreserved +ĠоÑĤд +Ġwarehouse +Ġbeste +ĠSeveral +ĠSituation +Ġcardboard +Tod +erna +Ġgarant +Ġgesture +Ġhen +Ġspelling +osexual +Ġanne +Ġmice +ĠMeine +card +Ġrebell +Ġcerto +Ġìľłë +Ġverschied +ĠBos +Ġinvention +Ġtrze +Ġmanière +ĠChad +Ġspre +Ġorganisations +Ġpoorly +Ġanterior +Ġstair +кÑĢ +Ġatomic +Ġsympath +Ġcontinually +Ġkleine +ète +иÑī +οÏĤ +peut +Ġreposit +Ġentra +Em +Ġfinancing +Ġмног +Ġthesis +ĠComputer +eau +ĠTree +Ġbride +onsieur +shire +wic +DE +ĠìĪĺë +Ġacom +ĠPO +ersch +ĠпомоÑī +ĠArmen +Ġ죽 +Ġzor +Ġprints +ĠDass +港 +Ġdurable +ĠTransport +ìŀIJê°Ģ +Ġлег +Ġdét +ôle +amous +YN +Ġcliff +Ġgrammar +ĠÐŁÐ¾ÑįÑĤомÑĥ +ĠlÃłm +esch +Ġmiserable +Ġvolts +ĠCad +ukan +ÑĤив +rust +Ġìĺ¬ëĿ¼ +Ġverk +Ġchickens +ĠYoo +Ġoutfits +code +Ġhierarchy +netes +Ġcounterpart +Ġtôi +Ġted +ĠBart +ĠëĿ¼ +ĠGenau +Ġincoming +ĠABC +rique +ĠоÑĤп +qual +Ġincentive +Ġihren +׳×Ļ +loe +Ġ1930 +Ġbarg +Ġdiction +Ġönce +INS +Ġreh +isiaj +mouth +Ġscoring +lık +ĠìķĦ주 +ORIA +ĠEstados +Ġcompanion +Ġassemble +Ġpunished +Ġital +Ġprevents +istes +ĠKentucky +Ġlocate +Ġfasting +ãģ¨æĢĿ +ĥĢ +ĠSeb +ĠCrown +opia +Ġwhip +usz +ками +Ġdatabases +åŃĹ +Ġprosec +Ġ1997 +ĠìĤ´ì§Ŀ +ĠSolar +ĠPues +ĠZen +ollo +ĠGuru +Ġsqueez +ĠÐĹа +ĠÄį +ceptions +cca +izable +mand +Ġbreakthrough +Ġtablespoon +ĠSEC +ikh +ĠSão +Ġпло +amen +Ġprac +Ġdarling +Ġtaller +Ġrendering +Ġìļ°ë¦¬ê°Ģ +ĠÏĦηÏĤ +Ġmã +Ġesos +uerdo +ĠÑģÑĩиÑĤ +aller +ìĹĪìĸ´ìļĶ +Ġmillones +lerin +Ġpegar +onne +Ġenrollment +Ġliegt +Ġboa +wiÄĻ +bsp +Ġcycling +ĠBernie +Ġ1989 +ĠдалÑĮ +ĠDakota +ĠÑģвÑıз +ĠCP +Ġstare +íĤ¤ +Ġprosperity +Ġarrangements +Ġarriving +mä +Ġkayak +ipt +Ġpardon +Ġrelat +Ġverste +ĠFig +Ġfoil +ĠTalking +peare +Ġnoi +ĠпÑĢиÑĪ +Ġhockey +Ġado +ĠOUT +67 +Ġhormones +ĠAvenue +ĠSuperman +Ġprescription +ubernetes +CL +otive +NIS +ienen +Ġsadness +ĠVit +Ty +Ġstarter +Ġbede +Ġfoundations +Ġsore +åºĹ +ÑīеÑģÑĤв +ìļ°ë +ĠÑĩÑĥв +link +Ġmaneu +working +Ãłn +ĠAttack +ĠCart +veis +ĠResp +ensing +Ġì¢ĭìķĦìļĶ +Ġescuch +ĠRNA +Ĥ´ +Ġadop +Ġbending +عد +Ġmanages +usp +Ġtart +Ġrouter +Bo +Ġestablishing +Ġbalancing +Ġathletic +ĠSlo +Ġfills +Ġнаб +Ġдал +Ġposso +ĠVielen +Ġcritics +Ġlawsuit +ĠIsaac +ĠÑĦилÑĮм +Ġtras +Ġpraw +ĠCrazy +Ġneu +Ġkull +Ġtumor +ĠAPP +gate +ĠARE +98 +ĠSteam +Ġfucked +lage +ĠâĻ¬ +ĠMD +fy +Ġshells +ĠSeems +izers +Ġranges +ĠAntonio +ATION +ĠBaba +Ġìĥī +kun +Ġprayed +ÑĢÑı +ĠпÑĢоÑĤив +Ġseas +bury +Ġ×Ķש +Ġtrait +ĠDepending +Ġdre +Ġkönnt +ÑĨÑĥ +Ġlipstick +eez +ĠпÑĢимеÑĢ +Ġassignments +Bob +Ġmetals +Ġspecially +å°įä¸įå°į +ĠìĺĪë +ĠÅ¡ +Ġvista +Ġά +Ġtwins +Ġnotable +ĠSau +Ġdévelop +Ġçek +Ġpolynom +avam +Ġtambé +оном +Ġplasma +Ġefect +Ġläng +Ġcasi +Ñģа +ımı +ãģĻãĤĭ +ĵ¤ìĿĢ +Ġlabour +ossen +ĠPun +rif +Ġdoses +Ġoperates +илли +Ġjaar +staw +ĠìĤ¬ëŀij +Ġatm +Ġprotects +Ġimped +HO +Ġcima +Ġtoch +abis +Ġsendo +laus +Ġcurl +ĠNum +Ġsponsors +Ġdébut +ĠAlexa +ĠBür +ĠAmer +Ġcope +Ġизв +jal +Ġ1995 +apat +resse +ĠPrize +ĠClaire +ĠBrandon +Ġwszystko +Ġvalued +à¸Ļะ +Ġsect +Ġsecretly +Ġdiamonds +ĠEvan +ĠRPG +ãģ«ãģª +ĪëıĦ +ĠUniversal +Ġdoubts +ĠPin +wiÄħz +ļ© +Ġalbo +Ġbraucht +AUL +ĠMobile +grades +Ġschem +why +ĠNicht +pi +gle +Ġchorus +Ġgly +Ġreinforce +Ġmuff +ĠShen +ĠHola +Ñĥг +videmment +vial +acious +laimed +ĠRico +Ġvegg +Ġillustration +ĠButter +owad +Ġeux +Ġenfants +ĠLeader +ĠVillage +etically +ÙĨÙĬ +Ġstew +Ġsurprises +Ġcue +ĠGrandma +ĠCelsius +ĠRicht +enc +Ġpetition +Ġherb +Ġwicked +Ġschle +ocaly +Ġtransf +Ġtokens +ĠGray +ĠBBC +IK +Ġ1500 +zn +ĠNev +Ġkoy +Ġzar +Ġbullshit +ĠColombia +ulative +Ġwidespread +yect +kit +Ġempresa +Ġnour +Ġburns +atin +aired +Ġrevolutionary +ĠгодÑĥ +ĠLogan +Ġ1996 +ĠGraham +reb +ĠNHS +æľĽ +Ġcostumes +Ġnawet +Ġlovers +ĠLucy +ĠIndigenous +íķĺ기 +Ġimmunity +¥´ë +uito +Ġexcessive +Ġdonations +Ġ×Ķר +Ġ첫 +éīĦ +Ġdrying +melon +Ġsurveys +Ġ무ìĬ¨ +風 +aaa +Ġprobe +ancial +Ġlouder +Ġhotels +Ã¼ÄŁ +agner +Ġorigins +Ġë§Īì§Ģë§ī +Ġ** +Ġstrangers +ĠHaus +comed +Ġanthrop +Ġuso +ĠìķĦì§ģ +ĠYuan +ĠíķĦìļĶ +pler +ressive +Ġspraw +ĠStew +Ġ1994 +Ġelders +Ġmeinen +Ġjunt +Ġacoust +ĠWohn +Ġbananas +Ġprojection +ĠStick +legt +speed +ĠcÅ©ng +ĠWort +ĠBaltimore +ĠÑĨел +Ġdunno +å¼· +?, +ãĥīãĥ³ +ĠLocal +osto +ÐŃ +ода +ĠPortuguese +Ġtheirs +Ġdém +åı¦ +Ġdrauf +ĠBuddhist +erta +Ge +Ġcarrot +ĠWonderful +Ġsoak +Ġchairman +ggi +ICA +fried +Ġflick +ĠThroughout +Ġìļ°ë +Ġcough +Ġfluffy +school +Ġripped +-------- +ĠZukunft +Ġнеб +Ġsto +ĠBO +pent +ĠLawrence +ÏīÏĤ +sticks +ĠEins +ĠÑĢÑĭ +ĠStrong +Ġcaramel +Ġspite +azar +éĥ½æĺ¯ +Ġcritically +Ġobra +owitz +ĠZone +ĠÑĢек +Ġsug +arded +Ġgì +ffentlich +anche +ØŁ +astically +ìĿ¼ë +лав +Ġsimplest +ĠFriend +Ġquello +Ġambition +Ġabbiamo +åºķ +ĠÑĦоÑĢм +ĠEssa +Ġeducators +Ġstatistical +éĢĻéĤĬ +Ġchanger +Ġatau +étais +ĠShakespeare +ëIJĺ +Ġtriggers +Ġrealiz +Ġcelui +wheel +Ġloyalty +Ġscreams +kehr +ĠMega +east +Ġtops +ĠTotally +ountain +lord +Ġviolation +ĠGA +Ġnicer +ĠFresh +ĠMelissa +function +Ġrape +Ġexceptions +Ġsilicon +Ġliberty +Ġhouseholds +ãģįãģ¾ãģĻ +ĠCA +ĠÐŀб +Ġlib +ŀĮ +cific +Ġtropical +Ġinvestigating +HD +Ġadapter +ĠPitt +ancia +ĠShell +friendly +Ġconclusions +Ġturtle +Ġdecomp +Ġanimations +ĠÑģек +insi +Ġretention +kie +Ġinjection +ĠMadison +ì°° +Ġvient +Ġvaried +Ġviolin +ĠBil +Ġluckily +Ġhtt +lä +Ġranch +çľĭçľĭ +Ġsólo +ìķħ +ĠDerek +ĠScripture +оÑĢа +Ġclassrooms +avil +formed +Ġbeforehand +ĠGem +prech +Ġlin +Ġgreens +ÑĨев +ĠMercedes +Ġdrought +gasps +Ġabortion +Ġterribly +Ġsposób +Ġsecured +Ġatrás +Ġwavelength +Ġgrains +ective +Ġspacecraft +Ġtours +Ġprofes +Ġsurgeon +ĠPie +Ġideally +arner +UP +opard +sce +Ġimmense +ĠOrt +roller +ĠDallas +ĠNicholas +Ġsulf +ĠToyota +Ġquantities +ceans +Ġcui +ança +ĠCAN +itzerland +åĦ¿ +Ġzou +ĠCyber +legen +ĠInit +edu +Ġapert +Ġadjac +ouv +èĢĮä¸Ķ +rs +Ġcabbage +Ġwheelchair +inyl +ĠDynam +ĠìķĦëĭĪëĿ¼ +Ġling +hl +ĠмогÑĥ +Ġcrisp +Ġmij +Ġdug +nin +Ġbloss +Ġbelonging +Ġloudly +Ġminerals +Ġconcluded +Ġsearched +96 +ĠMeet +ĠSEO +ĠСк +ĠHob +otta +Ġpropaganda +Ġcinnamon +Ġhunter +Ġgemeins +Ġsculpture +ulsion +Ġväl +Ġmagazines +Ġcontroversy +ä¸Ģ樣 +Ġsequences +ãģĦãĤĭ +ĠíļĮ +Ġdeleted +使 +IJëıĦ +Ġvarying +ãĥĨ +Ġmounting +Ġaffair +Ġpathways +æ¦ +Ġdigo +亮 +Ġдок +Alex +Ġtobacco +ĠCV +Ġbothered +Ġambient +inky +ĠSL +Ġhates +Ġjeżeli +Ġcongreg +Ġelas +Ġdeuts +ĠStudios +chÄĻ +Ġdocumented +ĠCruz +ĠLen +ĠDouglas +ĠPortugal +enti +Ġspouse +Ġanalys +avia +Ġedited +Ġlại +built +Ġville +adora +Ġbracelet +Ġsushi +Ġpm +Ġtrails +Ġlug +Ġöver +Ġsorrow +Ġcolony +adox +Ġserie +anyak +ĠØ· +ĠGulf +æĺ¯ä¸įæĺ¯ +ĠPV +ĠSamuel +ĠKit +ĠRal +ontin +expl +Ġentries +Ġactivists +Ps +Ġsant +ĠÑĤоÑĩ +ĠBruno +keley +Ġtutto +éĶ +Ġvintage +Ġterrified +ĠпоÑħ +usive +owers +айÑĤ +ëıĻ +Ġtwisted +ĠThought +Ġtah +Ġshrink +Ġsheer +lit +Ġdalam +Ġdib +Ġvard +owane +Ġdobr +ĠRena +ĠÑģвоÑİ +ĠpaÃŃses +ĠEra +ãģ®ãģ§ +ĠBUT +sighs +Ġ그거 +ĠgroÃŁen +Ġ빨리 +Ġnerves +Ġconstit +Ġpreocup +ĠGay +ĠXu +keeper +heure +..) +ĠCalm +ĠUnidos +ĠìĿ´ê²ĥ +ĠAqui +ĠìłľìĿ¼ +dır +ì¦ĺ +your +ĠÑįÑĤим +2020 +Ġrund +ĠHO +ĠCatherine +ieli +Ġfusion +Ġideology +Ġforam +shaped +ĠíĽĦë +Ġwt +Ġretr +Ġpréc +Ġê°ij +Ġopenly +vity +구ìļĶ +Ġobstacle +Ġboo +Ġseiner +icorn +Ġeigenlijk +Ġheader +aremos +Ġsofter +ĠÐŁÐ¾Ð´ +Ġprejud +Ġdefines +ierte +Ġblending +Ġbelievers +ĠWochen +Ġникак +ĠÐļогда +ĠTypically +Ġíģ¬ +管 +cios +Ġmissiles +Ġsponge +ĠKitchen +Ġtren +ningen +Ġscrap +Ġserait +´ìł +ç¹ +Ġë°ĺë +Ġrestored +ĠprzykÅĤad +ĠKubernetes +Ġsait +Ġuw +Ġenabling +Ġtravers +amps +åıĹ +ĠOMG +ensor +Ġzosta +Ġpronounced +Ang +normal +Ġeconomies +tin +ĠChampion +izen +Ġarbeiten +ĠGospel +ĠZu +nga +Ġliteracy +ĠMans +Ġcirculation +Ġadap +ĠTotal +Ġmereka +Ġolacak +ÑģÑĤаÑĤи +Jack +Ġmund +Ġthief +bies +Ġê²ģ +aque +ĠÚ©ÛĮ +ĠScar +å² +Ġabol +Ġdevote +Ġ01 +Ġsitten +ĠVisual +week +some +ingt +Ġjournalism +ĠHir +ĠBachelor +inery +ÃľND +ãĥŁ +ç»Ļ +Ġcoloring +ĠCrist +Ġcelebrities +ĠÑĩиÑģ +ĠCrit +Ġdifferentiate +ĠÐľÐ½Ðµ +elim +Ġseafood +Ġalgumas +otherapy +æĪ° +Ġglaub +Ġarbitrary +gens +ĠбÑĥдем +Ġtav +Ġcreamy +ĠCountry +añ +меÑĤ +Ġhinter +Ġmism +Ġillustrate +ÃľNDNIS +Ġdecreasing +Ġweniger +AKI +ixon +Ġней +Ġfatto +Ġnerd +çł +Ġbitte +Per +Ġtane +Ġgöz +Ġforte +ĠEy +ĠнавеÑĢ +被 +ĠWordPress +ĠMis +ů +zäh +Ġintéress +osaurs +ĠFalls +Ġnessa +97 +Ġmuseums +Ġcorresponds +Ġsings +four +Ġeder +ĠCommunist +oa +nek +ĠWHO +Ġcorpo +Ġmessing +ÏĦαι +Ġbrushes +Ġbisc +ĠArbeits +ĠTax +Ġsele +Ġflags +oupe +Ġanticipated +ãĥij +ĠNad +Ġpoured +Ġml +Ġllama +Ġvisualize +Ġlisteners +ÙĦÙĥ +alten +Michael +Ġcosì +Õ¡Õ +opus +Ġíķ´ì£¼ +Ġhike +ĠAttorney +ĠHillary +uded +Ġíķĺì§Ģë§Į +Ġdove +Ġstorms +акÑģ +Ġdoctrine +Ġhex +iks +noÅĽÄĩ +Ġscripts +Ġδεν +ĠÑįÑĤиÑħ +ĠÐĨ +aber +ĠVas +Ġcentimeters +×ŀ×Ķ +ниб +Ġriders +ĠTrib +åĮħ +Ġtakże +Ġnoun +Ġicons +Ġsolely +minded +Ġdispon +ĠSwitzerland +Ġclusters +Ġqueda +ailing +Ġmanga +Ġ68 +ĦĪ +Ġtet +gins +haus +空 +å·¥ +ĠOP +oted +Ġnouveau +ALLY +ÙĪد +òn +Ġmortality +ĠGitHub +drop +Ġdisgu +Ġrecom +Ġlocals +Ġhomemade +amba +Ġpronunciation +Ġalphabet +анÑĮ +owany +iras +idency +OME +ĠÑĢаÑģÑģ +arak +viamente +Ġnonprofit +ĠYouTuber +Ġparenth +ĠBoo +vat +ĠStir +Ġprecip +Ġants +Ġally +ĠMaori +ĠëĮĢíķľ +åı¯æĺ¯ +ogene +ĠLabour +arette +Ġrecycling +ensa +Ġpursuit +Ġsak +ĠÐĹдеÑģÑĮ +Ġtolerance +Ġsaat +Ġclicked +âĻ¥ +Ġfacebook +ĠInto +Ġincentives +기ëĬĶ +ĠDennis +ĠWik +gesch +à¹Ģà¸Ľ +ĠÏĢα +ĠWhoo +Ġrounded +Ġdope +Ġcapturing +ĠWarri +Ġcivilian +Ġcharming +Ġesas +Ġsustained +Ġleaning +Ġabundance +ÃŃlia +алÑĮнÑĭй +Ġphải +acja +Ġê°ĻìķĦ +activ +าย +Ġ97 +Ġмой +cro +ĠJackie +ittees +bracht +ulent +Ġìłľë +Ġplugin +vantage +party +Ġsuas +Ġante +Ñĥл +ÐĿÐIJ +æĤ¨ +ĠÏĥÏħ +Ġmeth +Ġenthusiasm +ÑıÑĤÑģÑı +íĻĶë +Ġsynthetic +Ġseasoning +ĠLost +onomy +ĠSpark +Ġbure +Ġassured +Ġimagin +Ġcarro +Sha +Äħt +нÑĥÑĤÑĮ +ática +TY +Ġkern +ĠBrazilian +ð +Ġsuspended +ĠCarib +Ġbizim +ĠOliver +ãģ¶ +Tom +Ġплан +Ġnope +omething +Ġbeiden +ÑĨен +Ġfluct +ĠμοÏħ +Ġfathers +ĠBlake +Ġupward +ĠDash +ĠLil +ĠìĪĺëıĦ +Ġrevelation +Ġelevated +ĠJiang +LED +ĠThompson +ĠмогÑĥÑĤ +ÑģÑĤÑĢÑĥ +ifiers +Ġcomeback +Ġbuyers +ê²° +ĠSales +иÑĩе +ciones +Ġwhistle +Ġdull +LEX +Ġíķĺê²łìĬµëĭĪëĭ¤ +Ġcriminals +Ġdescent +ipple +ması +Ġfoolish +ĠдÑĥмаÑİ +tar +Ġmango +Ġchoreography +Matt +Ġterritor +Ġacaba +ĠEinstein +ĠIBM +ĠMetal +ĠCrystal +Ġrah +Ġfoul +ĠIslands +Ġintact +ĠRail +.: +Ġacá +ĠпÑĢоп +еÑĢе +ĠWrite +hehe +ĠFO +ĠÏĥÏĦη +Ġdoin +held +Ġappropriately +Ġdeliberately +Ġarchive +Ġgiveaway +ãģĵãģĵ +Ġfinale +лаÑģ +ено +Æ¡n +æ£Ĵ +ogo +çī© +ĠAudience +ãħł +Ġsubur +Ġheadache +аннÑı +ĠWitch +ĠSwedish +ĠBI +Ġerase +Ġkhi +Ġcommentary +ĠSultan +íĥĿ +ĠLeban +Ġë³´ìĭ +ĠPam +pekt +month +Ġgrounded +ê¾ +ĠÅŁekilde +250 +ĠSCH +ioso +Ġinaug +heimer +Ġreflecting +ĠRuth +ĠOil +Ġtrouver +uep +..] +ĠìŀĪë +Ġolha +Ġreasonably +Ġglitch +UB +ĠGran +Ġadalah +Ġlent +را +Ġtraction +Ġadjusting +´¤ +нибÑĥдÑĮ +Ġдоп +Ġstretched +Ġort +Ġcosine +viol +Ġìħ +cir +Ġbastard +ä¸ĩ +ĠÑħод +Ġquier +Ġpressures +ĠAnh +å¹¾ +Ġelles +ĠдÑĢÑĥз +ĠможеÑĤе +Ġchá» +ĠMé +ök +ầu +ìłĪ +zin +Ġcaution +iban +Ġjudging +ÑĥÑİÑĤ +Ġbaj +ĠСейÑĩаÑģ +ĠPoor +ĠNazi +Ġupbeat +yang +Ġweekends +ĠEssentially +Ġoluyor +Ġspatial +acker +Ġseller +Ġ×IJ×ķת +ij׾ +Ġvivid +ĠBond +ê¶Į +iskt +ãĤµ +Ġgoat +driver +Ġmug +ictional +Ġallt +ĠIniti +ĠRand +Ġfinishes +Ġê°Ī +Ġvitam +Ġteenagers +ĠMorris +ì¤Ħ +ĠOri +iya +Ġmyös +Step +ĠKre +辦 +Ġdinosaur +Ġëªĩ +affe +ĠëIJ©ëĭĪëĭ¤ +Ġzeg +åĪĩ +ĠManhattan +Ġsujet +uelle +stoff +Ġdür +Ġsubmar +eses +Ġaquele +Ġnou +ĠFaith +tz +ĠÑĤомÑĥ +aceut +liers +Ġbandwidth +Æ°á»Ŀ +Ġrespective +ĠAve +Ġspreadshe +ĠSent +icamente +Ġinfra +Ġlearners +Ġà®ī +aiah +renal +Ġmustard +Ġhabt +çĥ +ĠQué +Ġanalyzing +æ¯ı +Ġsolic +Ġ×Ķ×ķ×IJ +Ġcausa +Ġwelcomed +ĠSuccess +Ġfacile +ĠÐŁÐ¾ÑĤомÑĥ +schein +Ġfetch +Ġstrat +ĠÑģÑĤоиÑĤ +ìĹIJìĦľëĬĶ +ĠÑģпоÑģоб +mam +ĠserÃŃa +naments +writer +Ġconsulting +íĺĢ +ĠBerkeley +eu +asive +UU +ĠAnalyt +Ġsubmission +Ġmagnificent +enza +Ġecon +Ġprofiles +Ġincar +Ab +ĠNun +Ġhic +screaming +Ġresilient +åĪ© +grund +Ġconcur +Ġbereits +LD +Ġnurt +ìī +Ġfeast +Ġencuent +ĠMichel +Ġsuprem +\"] +Ġfeeds +ĠKollegen +isser +ĠFeng +ĠWen +mun +ĠtenÃŃa +ĠWrest +Ġìĺ¤ëĬĺìĿĢ +Ġstead +Ġrestoration +Ġdonated +Ġdels +Ġcensus +Ġdesperately +worthy +HE +ĠSpa +ĠBryan +Ġhj +ĠRaw +ìķĦë +ĠCamera +Ġzien +Ġstyl +ĠTW +ĠCheese +borne +Ġobl +ĠAlready +Ġunstable +Ġflames +post +Ha +romagn +ĠìĹĦë§Ī +dest +Ġkolej +Ġtemporarily +Ġdetermining +ĠGlass +ÑĢон +olan +Ġdominated +åĮĸ +____ +ĠÙĩذا +ĠDana +Ġdinheiro +aqu +민 +ĠÃłs +ĠJoey +ĠGriff +Ġattain +Ġtransitions +ĠLiterally +енд +ĠHaven +Ġgrabbing +Ġcrystals +ĠFourth +Ġcandles +ĠÑģлÑĥÑĩа +rico +Ġ5000 +etto +Ġundo +Ġkto +Ġdivert +Ġchir +Ġpersec +Ġhiking +Ġannouncements +çĶ± +зÑĭ +Ġauc +Ġsystemic +ĠRM +Ïĥα +ĠÐĶж +Ġyar +ĠWard +Ġpissed +Ġcarn +Ġautonomous +ãħİãħİ +sover +æ²ĴéĮ¯ +å¾Ī好 +Ġreflex +Ġgardens +Ġdated +ì± +amiÄĻ +Ġcontinuity +Ġcitizenship +Ġschwer +Ġzak +table +ĠÑģÑĩ +è§ģ +ĠÏĥε +Ġgenerates +구ëĤĺ +öh +óm +alam +ĠJUDY +ĠBug +Ġãģ¦ +Ġdrones +Ġágua +acaks +æļ +ĠÐļон +×ĸ×Ķ +Ġstrive +ĠAltern +Ġnearest +Ġproyect +tera +ĠASHLEY +Ġworm +Ġreplay +Ġtara +ĠIndians +ãĤ° +icaid +ĠìĪľ +Ġappealing +ĠWes +Ġmentions +Ġделе +Ġkw +Ġfragile +isz +ków +hang +color +Ġpresidente +87 +еÑĦ +çĪ¸ +Ġдобав +ĠNelson +áfic +ĠMICHAEL +Ġmechanic +Ġmetres +ĠoczywiÅĽcie +ĠCind +ĠogsÃ¥ +Ġlandsca +ACE +Ġheadlines +Ġcatalyst +ĠCatch +inkles +Ġpills +ordo +Ġimmigrant +Ġexamination +Ġaccidents +zÄħd +Ġquiere +Ġnella +Ġ67 +Ġpassa +Ġsuperfic +istor +Ġnov +ëĭµ +Ġmandate +isons +ĠVirtual +Ġselber +Ġcounseling +ĠNBA +Ġsept +Ġbeliever +Ġmarvel +ĠIntegr +ĠмÑĸ +Ġorph +Ġbackward +ĠGeneration +ĠPict +ĠÑĤоÑĤ +Ġtapi +prochen +Ġhallway +hte +ĠÛģÛĴ +ĠZum +èĢģ師 +achment +iquer +folg +ĠEddie +ĠKil +Ġwellness +stock +è¼ĥ +Ġkaç +Ġterrorism +Ġpointer +Of +heric +ĠUltimately +Ġmeses +ĠTrade +Ġpint +Ġtuition +Ġdisagre +Ġê²ĮìŀĦ +Ġmanuscript +Ġroomm +Ġoutputs +еÑĨи +Ġries +Ġsalud +otzdem +Ġmasses +ĠbyÅĤa +Ġclearing +Ġdiscourse +atson +Ġfolded +ĠJar +ÙĦÙī +900 +ĠÑĥÑģп +Ġprophecy +Ġinterfere +иÑħод +à¹Į +Ġthri +Ġ×ŀש +Ġlazım +Ġ1992 +Ġfuturo +Ġlocking +Ġembargo +ĠNeither +ivamente +ĠmÃ¥ste +Ġmik +Ġcollector +екоÑĤоÑĢ +ĠGand +Ġsentir +ĠMight +å¡Ķ +Ġganzen +UC +Ġrelating +SD +Ġmosquito +GR +Ġhollow +âĺħ +ĠWalker +Ġaffiliate +Ġduplicate +нем +Ġgrape +ĠOrganization +Ġsynt +Joe +Ġgeg +Ġrevealing +ĠEthan +outer +Ġyay +é«Ķ +лаÑĢ +Ġreportedly +Ġihrer +Ġrecognise +Ġbumper +ĠRandy +ĠVenus +tles +Ġappetite +Ġglucose +Ġchodzi +ĠFurthermore +tir +Ġconta +Ġintuition +Ġaltitude +Ġchunks +ĠJoshua +ıģım +rylic +leans +ĠíĶ¼ë +LL +Que +Ġgor +ĠзнаÑĩиÑĤ +Ġpoems +Ġexcel +Ġexplored +Ġpopul +Ġincluso +stä +ĠGavin +alling +ĠÏĦον +é© +arbeit +ĠGas +Ġglorious +rieben +Ġspam +Ġindoor +Ġthrust +ĠAld +ĠPrior +Ġonboard +ãģłãģķãģĦ +oca +ASH +£ł +ĠChristine +Ġdrawer +Ġnoon +Ġìŀĺë +Ġpermanently +æ·± +ĠнапÑĢимеÑĢ +Ġpodcasts +erapeut +prit +Ġstainless +ĠÚ©ÛĴ +Ġfamilia +ĠÑĢазÑĢ +unto +ĠÑģÑĤол +Ġhä +ĠHai +ĠPB +izon +Ġkonnte +Ġbüyük +Ġutilizar +ÚĨ +Ġaquesta +Ġmixer +udent +лекÑģ +ÅĤu +ĠÑģиÑģÑĤем +ĠноÑĢм +Ġfatal +Ġconsiderations +Ġvalidation +Ġoli +ĠkardeÅŁ +ĠGLORIA +Ġpall +еÑģÑĤе +Ġrectang +Ġmedieval +allahi +asti +ĠSyrian +Ġshear +Ġdebug +ĠMai +Ġknocking +ĠLex +ardan +rov +Ġmemorial +æ°£ +ooky +Ġstuffed +Ġpassé +Ġwig +Ĥł +Ġpróxima +Ġ1991 +ĠмеждÑĥ +Ġnuestros +ĠBeast +Ġsmo +atched +ologia +Ġмод +Ġgee +Ġconceptual +Ġô +Ġdecreases +Ġqueries +олÑĮÑĪ +ĠApart +Ġexempl +å±± +Ġfled +ĠOFF +ggak +Ġbead +hir +lies +ĠClearly +ılar +Ġchess +Ġwhichever +Ġ96 +ằ +Ġrespects +ĠмоÑĢ +Ġorganism +Ġgrandpa +ĠVie +è·Łä½ł +Ġflooding +Ġupgraded +ÑijÑĢ +Ġcheeks +Ġconquer +Ġstubborn +Ġpuzzles +Ġauction +Ġrelying +ĠPROF +ĠEsper +ĠÐľÐ£ +Ġhype +Ġpossibil +Ġimprison +ĠErn +ìĹĪìĬµëĭĪëĭ¤ +Ġenvie +Ġresurrection +ä¸įè¡Į +Ġsper +ĠVenezuela +som +Ġìŀłê¹ +Ġnouvelle +Ġcloses +Ġ1940 +Ġqua +ĠJared +ĠPir +Ġinde +Ġscrub +uku +Ġrequiring +Ġвами +Ġconsiderable +åIJĽ +ilia +Ġinne +Ġmeinem +Ġhardship +Ġtraps +roc +ĠìĦ¤ë +Ġresearching +ĠMargaret +Ġpenny +Ġbırak +Ñijл +Ġwool +Ġrhet +Ġflatten +çĩ +à¹Ģร +Ġpied +ĠChap +Ġunderm +Ġfret +Ġcrashed +ĠFrauen +Ø°Ùĩ +ivan +Ġliterary +latego +Ġspäter +Ġsimilarities +âĨ +ĠCoron +ĠCreek +Ġbosses +Ġaccompanied +Ġdebates +Ġassembled +ĠÃģ +ĠVai +Ġtract +Ġsimplement +ĠArin +Ġvulnerability +Ġhormone +IEL +OOK +Ġrelay +ĠAndrea +ril +Ġnecessity +aceutical +ÑİÑī +ousing +nahmen +Ġfootprint +map +ĠTier +annya +intend +åĸ® +å¢ +Ġdecorate +Ġzombies +ĠHyd +ĠSuz +Ġcampuses +ĠEmb +Ġthrottle +Ġadmin +Ġoportun +Ġmirrors +Ġidentities +ĠClin +Ġë¹Ħë +á¹£ +ĠOtt +Ġblues +Ġimpressions +-, +Ġvague +afe +Ġinferior +erald +Ġmedicines +Ġpregunta +osely +Ġtélé +ĠMonth +ĠLeaders +ĠEgyptian +Ġration +kers +heits +Ġrecht +Play +Ġeg +Ġpolls +ĠWOODR +Ġslots +jam +Both +ĠRat +ÑĢаж +ĠBright +ä¸Ģå®ļ +á»iji +urious +Ġsingers +Ġlogin +Ġtêm +lation +ĠMum +Æ°á»Ŀng +ĠEditor +åIJij +Ġinnovations +have +ĠSek +Ġweaker +ĠGob +After +´ì§Ģ +Ġë¬¸ìłľ +ãĥ¼ãĥ¼ +Ġdisadvantage +確 +Ġgaze +ĠMack +Ïģί +ĠKiss +ĠHolo +ĠBirth +izi +bab +ä¿Ŀ +ìĭľê³ł +деÑĢж +Ġsquat +кÑĥÑģ +uni +ĠComme +ĠWOODRUFF +ĠChampionship +Ġwelche +ĠYouth +zem +Ġodpow +Ġpersistent +rut +ìĶ© +íĸ¥ +lair +iku +Ġvendor +Ġchúng +Ġfinanci +Ġoverly +âu +Ġgluten +Ġ1800 +Ġdivisions +Ġciudad +Ġobed +Ġwarum +Ġeher +Ġelim +ĠÐĴо +Ġpeuvent +ĠWanna +Ġattendance +Ġassessments +ĠBog +Ġimagery +Ġcollectively +Ġinformal +ĠSchwe +Ġdeutlich +ĠChel +ĠPE +owed +Ġbanner +Ġshelves +ĠReturn +æĭ¿ +LAUGHS +Ġcongratulate +ĠNorway +Ġdwell +ĠCaribbean +Ġnorms +ĠAnimal +ĠValentine +Ġextending +ĠVou +orr +ĠCheng +¡ +ĠдоÑĢог +Ġveg +ĠhÃ¥ +ĠXin +Ġì¹´ë +emet +Ġhypoth +Ġinteressante +rices +IZ +ĠUSD +Ġrunner +ĠBag +Ġê½ +Ġcomeçar +Ġpigs +Ġweaknesses +Ph +ĠViol +ä¸įçĶ¨ +Ġdragging +ĠAquÃŃ +ĠCSS +Ġmillimeters +Ġestás +Ġacute +Ġdejar +iÄŁ +obra +Love +Ġsilk +**** +Ġjoins +Ġprol +Ġê°IJìĤ¬íķ©ëĭĪëĭ¤ +æĶ¯ +ØŃد +aghetti +änner +Ġstrang +Ġdoubled +Ġdescriptions +Ġstellen +Ġparti +ç«ĭ +²Ħë +ĠÃ¶ÄŁ +ighing +Ġangular +Ġnatuur +ĠShel +Æ°Æ¡ +Ġrays +Ġseper +start +vised +Ġrushed +Ġinternationally +Ġnivel +Ġboxing +fallen +á»ijc +Ġseinen +plicity +Ġcarboh +ĠTravis +uso +ĠPhase +Ġactivation +Ġopio +·¨ +Ġdecreased +Car +Ġbundle +Ġexpend +ormal +Ġadjacent +Ġmee +ĠоÑĢг +Ġtranscript +ĠLanguage +GS +è§ī +Ġseul +Ãłnh +Ġnya +nings +Ġìĭľë +ĠëĶ°ëĿ¼ +ĠAgr +ÃŃd +çķĻ +Ġaby +ĠNeo +ıyoruz +ĠThinking +aime +Ġvite +Ġtravés +Ġ×ij×¢ +Ġмед +Our +hoot +Ġliner +ĠPizza +Ġhyg +flies +ĠContinue +Ġdental +ĠTib +Ġregulate +lieÃŁ +ALK +ĠTae +길 +ĠBrexit +ĠGut +Ġoccupation +Ġzrobi +âm +Ġwhisk +ä¸ĸçķĮ +Ġkanske +omon +robe +Ġwarfare +Ġthá»ĥ +Ġjaki +Ġstrokes +Ġpeas +ĠDamit +HAN +Ġinterference +ĠминÑĥÑĤ +NER +outing +Ġtextures +Łī +owi +ĠíķĻ +Ġdens +Ġprotagonist +änn +Ġgoddess +Ġwollte +ijo +ĠWoche +ĠVPN +story +Ġkinderg +Ġfunnel +Ġdistress +ноÑģÑĤÑĮÑİ +Ġnoisy +ĠпÑĢодолж +Ġdaran +Ġenzyme +лож +Ġmute +Ġdwar +Ġاس +Ġkompl +Ġmerit +Ġfosse +ĠDrink +Ġfora +Ġwohl +Ġbreeze +Ġsanit +Ġdrin +ĠìĿ´ê±°ëĬĶ +Ġ62 +Ġì°¨ë +abytes +Ġdeeds +Ġй +ième +iggling +Ġ\"' +ĠÑĩаÑģÑĤÑĮ +ĠAnswer +Ġevangel +Ġ1080 +ĠVisit +icient +Ġreliability +ÑİÑģÑĮ +ĠEarlier +Ġfid +çŃīä¸Ģä¸ĭ +Ġsleeves +iyorsun +Ġbib +ĠAccount +Ñıли +ciplinary +zas +ĠбеÑĢ +Ġnecklace +Ġblender +ĠPhillips +eti +ĠJupiter +Ġprovoc +ĠYears +entre +acio +Ġkü +Ġantenna +Ġnovels +Ġfart +ĠSugar +ĠJudy +Ġcollapsed +ç° +ritis +ĠìĥģíĻ© +ÐĹЫ +ĠVerf +ranean +ereum +ĠTarget +Ġ88 +ĠÐĺз +ideo +Ġregression +ì¶ľ +Ġmówi +Ġstudios +iens +iph +Ġfrying +Ġfascinated +ĠWah +bucks +maya +ĠSaturn +ĠMommy +Ġratings +Ġautumn +Æ°Æ¡ng +Ġloser +Ġcentro +érieur +ĠFold +Ġsupervisor +ĠNobel +Ġunderest +obia +ĠвÑģÑı +Ġverw +Ġfuels +Ġartifacts +Ġë¶Ļ +ĠAutom +çļĦæĺ¯ +ÛĶ +×ķס +Ġihnen +Ġ59 +ounding +еÑĢÑĭ +inars +chant +Ġaddicted +Ġexplosive +Ġdispers +âĸĪ +axis +ARY +Ġlum +ĠÑĥÑģл +ĠØĮ +Ġrupees +ĠPearl +camp +tv +oya +Ġconcludes +Ġcollision +Ġbuyer +Ġplayground +Ġsprings +Ġfeminine +ĠRas +Ġincarcer +íĹĺ +Ġdialect +Ġclosure +Ġchatting +Ġbabe +Ġspotlight +Ġnotation +è·¯ +Star +ião +Ġtête +Ġtide +Ġjunto +Ġsenator +Ð¥ +Ġexcuses +Ġblink +Ġadmission +ĠLily +Ñĭми +Ġamigo +Ġlust +ëĭ¬ +Ġamino +äºĭæĥħ +Ġconsultant +ĠElectric +Ġëħ¸ëŀĺ +ujah +Ġshooter +ichten +ĠUkrainian +Ġaims +ĠEntertain +Ġmiracles +èŃ° +Ġzeigen +Ġlam +Ġress +ĠJill +ylan +Ġrook +Ġhaya +Ġpassport +adata +Ġjuicy +conf +лей +ĠSz +Ġintercept +ãģĤãĤĬãģĮãģ¨ãģĨãģĶãģĸ +ĠTeams +Ġmaken +irrel +ĠLIKE +áºŃy +êµ° +Ġshortage +Ġparadigm +Ġpapel +Ġastero +ãģ¾ãģŁ +Ġsollen +ĠMickey +ĠOrleans +Ġcholesterol +Ġgoose +ÑĨиÑİ +ãģĤãĤĭ +ĠFL +Ġголов +Ġtribute +ĠGam +Ġévidemment +ÑıÑħ +å®ŀ +çĶ° +Ġinappropri +uhan +Ġorganizational +ailed +Ġendure +Ġ76 +Ġshotgun +Ġlivre +Ġsuited +Ġwarmth +ĠSIM +Ġenvision +Ġdegrad +îne +Laughing +ĠWhoever +ĠBuddhism +Ġsprinkle +ceÄŁiz +Ġruins +Ġstarch +ĠHerz +Ġinjustice +Ġhumidity +ожалÑĥй +ĠObject +ĠIgn +ĠExam +igers +Ġthou +ĠSoy +ivas +Ġpoles +math +Ġвним +INGING +edral +Ġexplor +Ġroasted +Ġcrawl +Ġcoff +Ġanom +Ġwij +Ġimproves +Ġtreaty +Ġdiscovering +Ġstatute +Ġmercado +ĠÑģил +Ġintel +ĠChancellor +ĠMedicaid +ugi +Ġverbal +Ġdön +Ġscripture +Ġiteration +eks +ĠOxford +Ġwäh +ĠVad +ĠAK +ĠìķĦìĿ´ë +Ġiets +Ġneedles +ÙĥÙħ +Ġpasado +Ġalbums +Ġyea +etzen +ĦëıĦ +Ġdetermines +Ġthee +ĠPlaying +ärt +Ġצ +cled +Ġdownward +alone +Ġsolu +Ġpartition +Ġwz +dd +Ġpessoal +媽 +Ġfactories +Ġbleibt +มา +alsa +ĠNFL +Ġfuera +Ġreserved +ĠEarn +Ġhelt +Ġshortcut +Ġconvincing +space +Ġenforce +Ġcores +Ġefter +Ġrecession +xico +Ġproposition +arians +ropol +Ġ몰ë +ĠÎľ +ĠìļĶì¦ĺ +Ġactivist +Ġconviction +Ġzab +Ġcanceled +ÑĤоÑĩно +Ġή +éĢĻ樣åŃIJ +nite +Ġfundra +buzzer +ело +ications +Ġzona +Ġteens +Ġmethodology +Ġì¤ijìļĶ +than +ĠUl +ĠGrey +Ġhog +INK +ĠSung +ĠClaud +ĠCNN +Ġdelivers +alin +ĠAdobe +othe +ĠDeswegen +ำ +Ġwerde +Ġgrease +Ġupgrades +ĠFinland +accept +Ġinterrog +bee +Ġãģ« +Ġprede +ĠNep +ĠCambridge +Ġgraphs +Ġhaunted +Ñģем +æ§ +åħĭ +Some +ĠMall +Ġrehearsal +ĠUrban +ĠLag +Ġnim +ê°ķ +Ġpositioned +Ġavoided +EMA +Ġllegar +Ġrápido +Ġgouvern +Ġhing +Ġdealer +Ġreforms +Ġfatty +кол +ĠAce +Ġnep +Ġì²Ń +Ġcomputation +ĠStream +bourne +tur +Por +Ġsleepy +Ġbanget +ãģĤãģ® +Ġweighs +Ġbleiben +ĠGren +Ġunions +ĠêµIJ +Ġaprender +uitar +ĠJest +uming +ĠPlayer +ĠExtrem +Ġinteger +аÑĩе +Ġconcerts +×ķ׼ +ĠtrochÄĻ +ĠRepe +éĩįè¦ģ +à¹Ĥ +żen +Ġsounding +Ġanonymous +Ġexca +ĠIranian +Ġenergetic +Ġwives +ĠÑĨвеÑĤ +Ġais +ãģĭãģª +Ġsudah +Ġunderwear +Ġcrunchy +ĠPain +Ġgerçek +redict +Ġmisma +ÑĸÑĤ +Ġsurviving +ÎŃÏĤ +Ġparticipant +ĠHessen +árias +Ġsubway +istä +Ġcoral +Ġmarijuana +ĠMemorial +ÑĪий +riz +Ġsatellites +Ġlease +ĠCameron +umph +Ġclassmates +ähän +ÑģÑĤве +Ġhue +ĵ¤ìĿĦ +Ġproportional +Ġnoss +Ġlaps +rÃ¥ +Ġbitcoin +ÐĹЫÐļÐIJ +Ġ충 +ĠÙĦÙĦ +ĠMort +ĠEsp +arnos +ĠÑģказал +Ġänd +åħĦ +×Ļ×Ļ×Ŀ +ĠGeb +gehen +Inaudible +borough +ÑĦÑĦ +Ġfellowship +ĠPaper +Ġcurved +ĠGEOR +Ġcalculator +ĠCatal +ĠvÃło +Ġbypass +леÑĤ +à³ +trans +rencies +ì¡Į +igent +Ġtasted +Ġoceans +uft +ervice +ĠÐľÐ£ÐĹЫÐļÐIJ +ĠClassic +Ġrespectively +~) +ître +ĠNash +Ġzit +ĠìĽĥ +ĠëĨĴ +quote +ĠUns +Ġtac +Ġproves +ĠPortland +bly +Ġere +ì¶Ķ +Ġépoca +ĠÑĤÑĭÑģÑıÑĩ +76 +Ġhade +ĠFro +ĠpolÃŃtica +tag +ĠíķŃ +Ġschö +arett +Ġprovisions +Ġmotors +Ġimaging +Ġdok +ulously +Ġmeille +çİ°åľ¨ +ëIJ +ĠISO +ĠSTEM +ĠBowl +Ġtowers +ĠEe +ĠPerformance +Ġloin +cussion +Ġcoastal +iale +compass +Ġspells +Ġdisappointing +Ġë²Ī째 +EER +Ġversatile +asury +Ġenfin +Ġdownside +Ġguiding +ĠاÙĦÙĤ +Ġninety +charged +ĠFans +Ġphilosophical +Ġgarn +ĠmÃ¥nga +Ġwillingness +Ġportions +aben +Ġï +¿ +raul +Ġsprint +ifen +ıyla +ĠкÑĥп +ãģıãģłãģķãģĦ +Ġensuite +ĠCapitol +Ġ63 +ĠговоÑĢиÑĤ +Ġappointments +æī¾ +omiast +Ġcareg +Ġpublisher +Ġheraus +Ġεί +ĠVS +ãģĿãģĹãģ¦ +ä¸Ńåħ± +Ġsacrifices +third +Ġhumanitarian +ĠëĤ´ì +imon +Ġinequ +Ġzob +Ġcomfortably +ĠDinge +Ġcancelled +ĠPSAKI +ĠRobinson +Ġfins +)? +ĠHistor +ĠÑĩеловека +Ġtbsp +text +kim +Ġupdating +Ġgeld +feld +ı¼ +Ġmä +Ġcafé +ÖĢ +ĠSri +ĠRegion +ĠHahaha +Ġfinances +ĠاÙĦØ´ +Ġbunk +ruk +haft +Ġlateral +Ġextensions +ĠìķĦìĿ´ +Ġdefinite +ĠZhao +ĠLuis +sty +Ġcasos +ĠKlim +Ġ1993 +Ġrealization +Ġhistorian +Ġcracked +ëĤ´ +Ġsystème +ĠCIA +ĠÑĤво +ospheric +Ġflee +Ġrất +ĠRegardless +Ġreluct +Ġtimely +ĠJulian +GM +éĴ +adura +é£Ł +Ġdresses +çģ£ +ĠëĶĶ +Ġnominated +Ġadvocates +ymph +Ġrecordings +Ġdeviation +Ġprioritize +Ġspiral +ĠYOUR +Ġtranspose +ampoo +ĠìĽIJëŀĺ +ĠVision +Ġpolite +Ġhamb +ĠPatient +æ¯Ķè¼ĥ +íģ¬ë +Ġsia +Ġê³³ +Ġže +è§Ģ +Ġsupermarket +ë¹ +ĠSierra +Ġgrilled +ĠUpon +Ġabsent +Ġmec +ĠApollo +Ġpunk +ĠPaÅĦst +ĠÑģвой +Ġ거기 +Girl +Ġskinny +ĠPremier +Ġterritories +Ġliability +Ġjerk +ratic +Ġdancers +ĠÑĥÑĢов +Ġê´Ģë +only +ĠStu +Ġskeleton +ĠëŃIJë +Ġзакон +ıkt +ĠMIKE +Ġlö +mie +Ġreiter +ãģĵãĤĮãģ¯ +ĠKolleg +ĠAdams +licher +Ġçocuk +Ñıг +Ġblush +Ġsunshine +Ġez +ĠDevil +Ġ길 +ĠãģĬ +add +Ġlicensed +Ġvinyl +ĠCzech +imag +Ġcracking +Ġìº +Ġudah +Ġsommes +Ġìĸ¼êµ +waÄĩ +Ġfres +åij½ +ĠWalmart +ĠТепеÑĢÑĮ +atisf +CI +lang +Ġdiffusion +çĶ· +Ġsomos +ĠMakes +æĪijæĥ³ +ĠRicky +Ġmucha +íķ¨ +Ġhorsepower +asia +Ġfibers +Ġerm +Ñģкие +Ġjeste +Ġfirefight +Ġcuisine +Ġbesonders +dig +Ġì¢ħ +ĠÑĥж +Ġtracing +Ġcertains +ĠApply +ÑĭваÑĤÑĮ +çĮ +Ġbru +ĠYES +ĠBai +ĠDit +ĠBis +Ġunle +ÑģÑĤаÑĤоÑĩно +ĠAwak +..\" +Ġ125 +Ġrooted +Ġcautious +const +Ġorchestra +çľ¼ +ĠвнÑĥÑĤ +Ġquelqu +ĠоÑĤвеÑĤ +ĠMethod +ì¹ľ +ĠμαÏĤ +lü +ĠìķĦê¹Į +Ġnaming +Char +ĠSicher +Ġprivileged +ĠFly +Ġãģĭ +áºŃt +Ġadvances +ĠZelda +Ġandra +Ġgrinding +ĠEdition +pf +Ġwarriors +Ġhedge +Ġunseren +ĠÑģÑİда +eliness +Ġpersonalities +Ġfö +'M +ĠÑĤоÑĩно +Ġshipped +Ġmeteor +Ġsurroundings +ĠFill +uesta +ĠPersonal +ĠAlle +ORT +ä¹ħ +ĠSche +VI +Ġcomparable +damn +Ġditch +YAN +ismus +Ġpickup +Ġdak +ĠEP +best +ĠSue +ällt +Ġpopcorn +Ġfolding +home +иваеÑĤ +å·²ç¶ĵ +Ġannot +chuck +Ġfierce +Ġdamaging +Ġflop +Ġpasar +Ġreef +ĠÑģвоей +Ġzoo +overs +jets +Ġprès +ĠSilicon +teok +ĠSeth +atamente +Ġtransmitted +Ġreplicate +Ġslim +ĠCream +æĦŁãģĺ +Ġsidewalk +ìĪĺë +ĠжизнÑĮ +ĠMonica +ä¾ĨäºĨ +Ġcopied +ĠTerra +istent +ç³» +Ġоно +Ġwhale +ĠWITH +лÑĥÑĪ +å½±çīĩ +ĠEen +ĠÑģвои +Ġordin +Ġplural +Ġspokes +Ġdispute +Ġsensible +Ġpreaching +Ġktórzy +pted +avier +Ġpistol +ĠTapi +ĠÅĤ +ffff +Ġacrylic +Ġignorance +ĠZiel +rans +Ġwelding +mid +æĪijä¸į +Ġзаним +Ġlanes +Ġmines +Ġmoms +×ķ×Ĺ +ĠChamber +tier +Ġmodest +ĠìĹ¬ê¸°ìĦľ +Ġunas +Ġwrench +handed +Ġsaturated +ĠFang +ĠCommissioner +र +Ġ×ĸ +ĠLouisiana +ĠMask +Ġcubes +ìĶ¨ +Ġvidéos +ĠnÃ¥gon +Ġrider +Ġì¶ľ +Ġsón +ĠLatino +bank +íķ´ì£¼ +ĠBrend +Ġsexuality +..., +Ġforgetting +ĠÛĮ +ĠAvengers +ĠBonjour +cessor +кÑĢаÑĹ +cence +Ġgeograph +culo +оÑģÑĤÑĮ +Ġsweating +íĥĢ +Ġsymmetry +tsÃ¥ +Ġjan +ĠFerr +é¦ĸ +Ġambassador +ziÄĻk +Ġmusun +ĠÑĥÑĤ +ĠLG +issent +commun +Ġcours +Ġdevelops +Ġbronze +Ġsubstances +driven +주ìĦ¸ìļĶ +Ġaos +åĦĦ +ĠPROFESS +half +Ġsorted +ĠBomb +лаг +ĠMalaysia +ĠChristina +Ġteammate +èģŀ +FT +Ġkı +hearted +++ +ogenic +Ġbells +ĠOuais +Ġspecialists +бÑĭ +depth +lasses +gies +ĠCoffee +Ġmarking +Ġfoll +uli +Ġadhesive +ĠBot +ĠPunkt +eye +ĠBub +elong +åĪ¶ +ĠпÑĢик +Ġdonor +84 +Ġenfor +Ġcatches +Ġbricks +Ġknitting +ĠKnowing +oks +HY +ride +ĠFantasy +iman +Ġpse +Ġìĺ¨ +Ġвд +Ġrestra +Ġevaluated +ÑĢев +Ġfortunately +Ġchegar +رب +Ġdomains +ibi +arry +Ġshutter +Ġficou +Mike +Ġinclu +Ġdonors +Ġapl +ĠLower +Ġimported +Ġacademy +Ġfinals +Ġdisappears +ÙĬا +Ġadministrator +js +Ġcutter +Ġranging +örper +Ġconstraint +ĠTable +ĠShan +vic +ĠFix +ĠSwift +ounces +ĠWarum +Ġlettuce +appelle +Ġshave +Ġbás +Ġ77 +ĠOoo +ao +ĠMcM +ĠDrew +Ġlump +Ġlashes +scheinlich +Rep +inis +ĠCette +Ġcomposite +emetery +Ġsorte +ĠFinancial +оне +rones +ĠVoy +Ġtéc +ł¹ +ĠNinja +ĠCorin +еннÑı +ìĿ´ìĹĪ +Ġnich +Ġdetective +âĢ¦\" +Ïĥε +Ŀ¼ëıĦ +Ġë³Ģ +Ġë¸Ķë +Ġprope +ĠWright +Ġ×Ķת +ĠShi +ĠãģŁ +Ġinvestigations +éĤĦæĺ¯ +ĠPowerPoint +ĠChu +Ġìĺ¤í +ĠìĻĦìłĦ +ĠFragen +unning +Ġpourrait +Ġtextbook +мÑĭ +Ġfahren +ĠÑĤоÑĢ +Ġlakes +ünde +Int +ĠMetro +Ġmansion +Ġаб +ĠZhou +Ġcorridor +Ġescol +Ġindicating +iaÅĤa +Ġmommy +Ġarchives +Ġfounders +engine +ĠDieu +Ġsickness +Ġë³´ëĭĪê¹Į +Ġarb +Ġned +ĠChop +Ġcovid +Ġslam +Ġpublications +DC +Ġspends +æ¾ +Ġrefugee +Ġdile +Ġ×IJ×ĸ +ificar +ĠSach +Gu +Ġreload +???? +ĠjeÅĽli +ĠÑģоÑģÑĤо +Ġsimplicity +Ġbullying +Ġмол +Ġrealidad +Ġunclear +appa +levant +ĠISIS +ĠWatson +Ġdein +ĠMicro +íķľë +üg +Ġdevam +Ġtweeted +å°İ +Ġunderstandable +atan +Ġversa +Ġpreca +Ġvá»ģ +ĠCopy +ĠOracle +Ġmindfulness +Ġdiscret +ernen +ĠPle +Have +Ġisolate +Ġdeu +Ġseventy +ĠHills +Ġarcade +ĠÑģпеÑĨи +Ġsiguiente +ĠBÃľNDNIS +liga +ĠвÑģÑĤÑĢеÑĩ +ôm +Ġtweets +Ġschauen +Ġcritique +ĠðŁİµ +Ġstatt +ĠÑģамое +ância +Ġsupernatural +Ġplugged +Fl +ynı +ĠTambién +Ġencouragement +ĠServer +ëĤľ +upa +Ġaston +Ġhears +ÑĢаÑħ +Ġsche +Ġrats +Ġrecuper +Ġunten +ĠFighting +Ġacademics +示 +ĠSü +ÑģкиÑħ +Ġpaired +ĢìĿĦ +Ġárea +Ġsweetness +åıĬ +Ġdefer +Ġmuitas +ĠAudio +Ġlocker +ÙĬد +ĠÑģÑĤав +Ġbuena +ANS +Ġdetector +avo +bek +Ġαν +íݸ +Ġdragged +Ġдолжен +Ãĸ +رة +ìĿ´ì§Ģ +Ġcelle +cking +ĠاÙĦج +ĠCanvas +Ġespañ +Ġglimp +Ġspreads +ongo +ĠMason +ĠIng +Ġê°ĢëĬ¥ +ÏĦικ +Ġsecular +Ġbater +Ġinquiry +Ġenergies +Ġmanufactured +Ġvegetarian +Ġpineapple +ÑıÑĤа +Ġpractitioners +2000 +Ġíķ´ìļĶ +ĠìŬ룬ë¶Ħëĵ¤ +Ġë¶Īë +ĠJefferson +ĠJoan +Ġtram +容 +chmal +ĠHait +á¹ĩ +Ġunreal +Ġsymbolic +Ġstealth +Ġsplash +ĠEntertainment +Ġmetallic +?\". +è¶Ĭ +around +Ġdespair +ĠNevada +ĠFinance +Ġkrie +ĠLux +ĠSmash +keeping +Ġзаг +Ġnarciss +Ġdzisiaj +Ġtolerate +oard +Ġlinking +ĠEconomic +Ġì¼ +Ġmorph +ĠNak +ĠBaker +aton +rings +ĠPeng +ĠAirport +ãģĭãģ£ãģŁ +íķĺëĭ¤ +§ģ +prints +Ġhadi +Ġempir +ĠLives +anners +Ġним +ĠPROFESSOR +Ġpositively +antom +Ġbadge +kelt +Ġinterfer +Ġfulfilling +Ġvisualization +éĹľä¿Ĥ +ĠPrice +�� +Ġscenery +Ġprone +Ġwizard +Ġbanyak +verb +sky +Ġwished +Ġrailway +Ġüzer +Ġalguien +ĠAW +ĠколиÑĩе +Ġreacting +ĠBuch +ึ +Ġanth +Ġsih +Ġhust +ĠScreen +ilant +aho +Ġfragrance +Ġelevation +ĠMediter +Ġë¿ +Ġéqu +Ġwraps +Ġinert +Ġrecreate +лаÑĤ +Ġboleh +Ġharassment +unky +Ġglimpse +regierung +Ġfutur +Ġrepository +Ġengra +Ġtrafficking +assis +ĠTrek +Ġë²Į +Ġë§Īë +ĠKab +aniu +give +Ġdinosaurs +Ġfeather +Ġattitudes +Ġplum +ĠRS +ĠAnfang +illery +ĠìĬ¤ +MY +Ġtrzeba +Ġskies +ĠAj +urable +CU +ĠShane +Ġdeparture +ĠTON +ieten +rats +æ°Ĺ +isu +Ġbord +Ġinterestingly +çĻ» +oughing +Ġrushing +Ġvolatility +Ġpyt +Ġformats +ĠзаÑĤ +Ġê¼Ń +Ġwhatnot +Ġcomport +sw +orean +ĠRelax +Ġclan +ĠAH +Ġpew +Ġdictionary +Take +shirts +ĠHugh +ĠعÙĦÙĬ +ĠPic +Ġenrolled +Ġjednak +Ġofferings +Ġcoraz +Life +Ġ!!! +Ġcler +ĠVideos +ĠRodrig +ĠIdent +ĠPos +ĠStage +ĠRace +Ġenact +ãģĦãģ¾ãģĹãģŁ +ĠGy +ĠHispan +Ġdefence +ĠCampbell +matic +Ġrelev +Ġpeach +Ħ¸ìļĶ +Ġparadise +Ġceremon +Ġannoyed +æĮĩ +lax +Ġexploit +Ġclause +eker +ĠBloom +nant +ateurs +Ġheights +Even +Ñģон +Ġoutrage +ĠVietnamese +ãģ¯ãģ¯ +TR +Ġeer +Ġcannon +ĠComb +IJë§Į +è»Ĭ +Ġê²ĥëıĦ +Ġaccomplishments +ĠAnalytics +Ġshaping +reiben +Ġbachelor +Ġfingert +acked +Ġpyramid +ĠStewart +ást +Ġsurvivor +Ġduct +Ġdealers +æ´» +عÙħ +лин +Ġede +×ķ×¢ +ĠÙĥاÙĨ +ĠÏĦι +Ġchooses +ĠOwn +гоÑĤов +hire +алÑĮнÑĭе +ĠÐĽÑİ +ĠоÑģÑĤав +tech +Ġdroit +Ġsubjective +enes +Ġdivis +avez +Ġmaneuver +à¹Ħà¸Ķ +adece +ĠEns +acial +ĠProtection +ĸ´ +Ġformally +Ġwyd +inguém +Ġziem +Ġrecruiting +×Ļ×ļ +nem +Ġforbidden +ĠBapt +×IJ׳×Ļ +Ġsubset +ĠMagaz +nement +Ġaquela +ragon +Ġcommittees +Ġétaient +udi +ĠDawn +Ġbore +Ġcomposer +ĠwiÄĻcej +anga +Ġdislike +ĠDays +åŁº +Ġparal +Ġmientras +Ġheavens +ãģĴ +heid +Ġtraders +once +Ġmascara +ĠÏĢÏģο +Ġwhisper +ĠMusk +éĽĨ +ĠFamilie +Allah +ĠOlivia +ĠPros +Ġolika +ilim +Ġrépond +ĠPeters +Ġå¾Ī +Ġbites +Ġvic +ĠNY +emption +Ġ450 +Ġvisuals +Ġlieu +ücken +ĠSteel +ĠGP +wait +Ġnoticeable +ucha +Ġrehabil +Ġrejection +ĠÑģледÑĥÑİÑī +Ġslider +Ġregarded +Ġgravit +ĠReserve +count +Ġbreeding +Ġlonge +aleb +Ġknight +Ġвой +Ġprésent +ĤĺìļĶ +ĠSpecifically +Ġposes +Ġveure +okay +emas +Ġãģ§ãģĻ +ĠmajÄħ +Ġwebinars +Ġcannabis +Ġdamals +ĠNorthwest +Ġpada +Ġcrowds +Ġfutures +Ġän +Ġcivilians +ĠSachen +æį +Ġtraces +Ġë¨¹ê³ł +QU +é¡ĺãģĦ +ĠIF +anın +ìĤ´ +Ġbiblical +ĠVed +Ġstoring +ÑĢавлÑı +æĩī該 +Ġnast +Ġdö +ÑĢоп +elia +Ġsideways +ĠUnderstand +ĠQur +Ġperpend +ĠMillionen +Ġwatermelon +ĠDivine +ultur +abord +Ġsuccesses +Ġhombre +Ġcarp +Ġsuscept +ungkin +Ġkij +ulus +اج +Ġnotch +Ġpolynomial +å¹² +å© +Ġúnico +Ġtelescope +Ġpolitique +kiem +ĠÎŃνα +Ġaggregate +ĠGeoff +Ġtril +ĠGRA +Ġsubscriber +imet +ĠдоллаÑĢ +oping +Ġtherapeut +ĠCancer +Ġparade +Ġirrig +âĻªâĻª +Ġclearer +Ġbog +ĠMaur +าà¸ĩ +ĠShanghai +achte +ĠKol +elujah +Ġhav +ĠCrime +sek +Ġë¡ľ +ienna +ĠGor +èĽ +ĠпоÑĤÑĢ +ĠкажеÑĤÑģÑı +ĠLift +ĠSort +ĠPsal +Ġping +ĵĿ +phis +ĠFUCK +ĠSyn +Ġbamboo +¬ìĺģ +cuts +Ġmmm +Ġfunktioniert +Ġ_ +ÃŃcio +Stop +Ġimaginary +Ġnotamment +ĠInitiative +ãĥ¥ +ĠKurt +Ġloosen +Ġbuscar +çģ« +Ġzelf +Ġprops +åĽī +Ġmoeten +Ġmilli +Ġhalls +ĠMatch +Ġbrackets +ĠCou +æ¦Ĥ +ĠÐľÐ°ÑĢ +ISA +Ġcigarette +Ġcompetitions +ĠMIN +Ġbehö +voor +Ġust +ĠZi +ĠOcc +ulates +Ġballoons +Ġpronto +ĠMiy +ĠFile +ĠклаÑģÑģ +нÑĥл +Ġcereal +Ġincrement +Ġrefined +åı¦å¤ĸ +prising +ĠRF +Ġrespectful +Ġloot +asket +Ġdeixa +ingle +Ġfunciona +ĠRevel +Ġsober +Ġperforms +ĠGentle +ãĤ¨ +Ġrecipient +ĠHause +Ġëĥ +From +Ġministers +Ġparadox +å°±æĺ¯èªª +Ġtasting +Ġ×Ķ×Ĺ +Ġreuse +ĠLane +ĠÑģовеÑĢÑĪ +Ġremembers +Ġfeminist +Ġcommitments +Ġprojected +Ġgaz +iyoruz +Ġobligations +Ro +zar +Ġchw +ĠJAM +ĠbÄĻdÄħ +aspberry +ĠмеÑģÑĤо +ë²ķ +Ġregulated +Ġwicht +ĠTrevor +Ġsecondly +ĠIhre +elsh +Ġreporters +ÑĤоÑĢа +oyo +GI +Ġinterconnect +éIJĺ +OSH +æŃ² +Ġbrass +Ġignoring +ä»ĬæĹ¥ +infect +Ġprojekt +oret +ÏĦαν +ĠÑĤип +Ġmutta +Ġunboxing +Ħ° +å¡Ĭ +Ġadvised +ĠDenver +Ġseverely +ĠMhm +Ġflipped +Ġpien +Ġkommun +ĠFRE +Ġà®ĩà®° +ainted +Ġknives +Ġhabl +Ġgeworden +arettes +CS +ĠмаленÑĮ +Ġgalax +Ġninete +ê±°ëĤĺ +Ġsis +Ġadvisory +Ġdrilling +ĠWouldn +ünf +gestellt +ĠHelen +Ġ×ŀ×IJ +apolis +Ġrzeczy +Ġterra +Ġhep +Ġalgún +ikk +Ġastronom +ĠStarbucks +kÄħ +Ġpatrol +Ġì½Ķ +Ġgon +ĠãĢIJ +Ġsonst +Ġencounters +Ġretrou +Ġsharks +Ġdor +ĠRever +Ġevapor +Ġreservoir +Ġalleged +uler +Ġverm +Ġcommerce +Ġfitted +gem +Ġtactical +Ġlith +éīĦå¡Ķ +had +è®Ĭ +Ġcarbohyd +Ġlengths +ιο +Ġdemographic +Rob +ĠSkin +ccoli +Ġsimplified +Ġreadily +ĠCum +adesh +ĠDÃ¥ +usst +igne +eton +Ġmenor +qi +OOM +à¸Ńà¸Ļ +Ġpsychiat +Ġeighty +Ġмилли +ĠTob +edo +網 +ĠÄijến +Ġcircuits +ĠLAUGH +icism +emor +Ġregener +egree +Ġbureauc +ĠAlber +ä¹ĭå¾Į +ĠWor +夫 +Ġresin +ĠbyÅĤy +ĠIG +à¯į, +Ġ78 +Ġweeds +ĠMyth +93 +æ¿ +ĠëĤĺìĻĶ +év +á½ +ören +çar +ĠPAUL +Ġdisadvant +Ġpositioning +Ġcocktail +Ġagrees +nn +ĠSally +Ms +Ġinherent +Ġmonetary +Ġnatur +ĠNh +ĠImport +Ġleben +Ġwi +ussy +Ġobes +Ġwandering +Ġìĭłë +Äħda +etchup +Ġdisposal +ĠJA +ĠCer +zilla +Ġvirgin +ĠSlide +andel +Ġrighteousness +ĠΣ +Ġideia +ä½łå¥½ +иÑĢоваÑĤÑĮ +ר×IJ +Comment +Ġprelim +ĠVale +Ġì§ĢëĤľ +ĠVanc +OMAN +ĠпÑĸд +Ġyum +stre +cem +Ġpocz +Ġfragment +ĠÑģлÑĥÑĩае +Ġundergo +ĠHank +ceks +ĠFPS +Ġocur +Ġdeterior +注 +Ġempresas +Paul +Ġ))) +ĠвÑĢемени +Ġscold +×Ļ×¢ +Ġsuspected +Ġaccessing +Ġsubstit +Ġhistorians +ä»» +Ġдело +Ġsocied +rone +Ġreden +Ġextends +epherd +Ġbalcon +ä¸įèµ· +ĠSolo +Ġpolitician +олÑĮно +Ġirgendw +Ġtraumatic +Ġrapper +ĠROBERT +Really +æģ¯ +Ġlineup +ASE +Ġcontractor +ĠCorporation +gor +ĠTodo +ÑģÑĤÑĢой +FBE +Ġnewsletter +ĠkoÅĦ +alties +ĠпÑĢиÑĩ +ĠHeavy +Ġswords +Ġmanipulation +Ġfunk +ĠvÃ¥r +ĠTaliban +Ġë°¥ +Ġacne +ürü +Ġdeswegen +ĠDust +Ġsilic +Ġhooks +Ġblij +Ġpetits +Ġfilme +ĠBereich +ĠSaid +Ġimposed +Ġdiary +ĠгоÑĢ +ĠGates +Ġalta +å¸Į +Ġchcia +pleasant +Ġë°Ŀ +Ġmożemy +ĠAustria +Ġbroker +Ġsucked +èĢĥ +Ġcompartment +Ġclone +Ġ×Ķ×¢ +ĠDanke +Ġnochmal +езд +Ġadrenal +Ġkleinen +ãģ¾ãģĹãĤĩãģĨ +Ġsubsequently +Ġdecentral +Ġgenetics +Ġê´ij +Ġmonitors +ĠApplic +ĠReporter +wert +Ġwiem +ĠMovement +Ġinterviewing +Ġhairs +Ġpuò +ĠChelsea +Ġcoher +Ġcot +Ġzas +Ġpatches +Ġlah +Ñĥнк +ĠReagan +ĠMarco +city +Ġdefender +Ġdecoration +iji +Ġlitter +Ш +Ġjego +REW +ĠPik +ĠHee +ĠIv +Ġиде +ĠTheater +ĠÑĩаÑģÑĤо +Ġsweater +Ġhighlighting +Ġainsi +Ġdiplomatic +ĠNevertheless +å³ +ASON +Ġpúblico +Ġferm +reated +cod +Ġ물ë +Ġmister +ĠVancouver +Ġrecognizes +ecd +Ġcomplications +encial +ãģĹãģı +Ġê°Ģì§Ģ +ĠUltimate +Ġvaig +ĠMerry +×ķ×Ĵ +ĠMarcus +總 +owego +Ġmente +Sm +Ġaja +ĠTao +Ġjudicial +Ġentrepreneurship +Ġнемного +Ġpis +Ġerg +Ġchrist +ĠCurt +ĠÑĢаÑģп +λε +ensch +ÃŃre +Ġfocal +ĠDiamond +avÃŃa +Ġhanno +ĠSquad +Ġassociations +ĠCreative +Ġmessenger +Ġbegging +Ġdecimal +ĠdÄ±ÅŁ +Ġmetadata +sels +ĠÄ°ÅŁ +ữa +Ġdifficile +dı +Ġslaughter +ĠVerg +Ġ×Ĵ×Ŀ +ç°¡ +æĮī +ĠTea +asses +Ok +Ġsynthes +otiation +Ġpainter +Ġelbows +Ġarchitectural +ĠÑĢад +Ġglor +image +ampa +culiar +ł¨ +Ġteve +ĠStelle +ĠBam +Ġì´Ī +asis +ipedia +ĠGI +ĠActive +çĦ¶åIJİ +azi +ãĤĮãģ¦ +ĠLucky +íķ© +ĠпÑĢиÑħод +Ġrunway +Ġauthentication +Ġposible +Ġsupplements +Ġsurgical +Gen +Ġfeasible +DO +Ġoutlook +Ġintervals +Ġanecd +Ãłng +Ġstraps +ĠShu +udd +issenschaft +Ġporte +Ġcommitting +Ġalley +Ġcovenant +ĠPedro +lessness +ĠSolid +ĠMolly +ĠнекоÑĤоÑĢ +Ġcooperate +åĮĹ +ollen +Ġtuna +Ġkindergarten +ĠSiz +Ġdużo +ĠMBA +ĠGEORGE +ĠFisher +å¿ĺ +ĠCaesar +ĠкÑĢаÑģив +ĠDelhi +zym +Ġexplicar +ê°Ģì§Ģ +uns +grow +ĠпÑĢиÑģ +Ġ86 +Ġstating +Ġmassa +chter +Ġì»¬ëŁ¬ +Ġdeputy +SM +noc +Ġgeography +ĠEnterprise +ĠCant +öz +Ġunpack +ĠíĻĶë +Ġsearches +Ġpresidency +Ġtrivial +Ġpige +oubt +ãĤļ +ì¼ĢìĿ´ +Ġbudgets +Ġub +Ġpne +ĠYale +ĠÅŁÃ¶yle +regular +Ġimperfect +ARA +ĠfamÃŃlia +urm +ĠAdventure +ãĥĬ +cis +emark +Ġnego +Ġinappropriate +ĠпÑĢиз +ĠÑĢол +Ġdreamed +Bry +Ġshuttle +Ġpillars +Ġbik +inum +ĠÑĥÑģ +ĠNebr +Ġperpendicular +Ġbooked +bery +Ġvikt +bear +esus +Ġвозможно +¨¹ +Ġpresumably +ĠMemphis +Ġambulance +×ķ×ŀר +Ġthumbnail +Ġmodification +éĩı +Ġinterpreted +Ġpromo +Ġκά +ĠεÏĢ +Ġacoustic +ĠDB +åĵİ +Ġnonetheless +oule +Ġpequ +Ġknob +ãĤ£ +ĠëıĮìķĦ +Ġpurchases +ĠÃĩünkü +Ġdividing +perform +raction +healthy +ĠTitle +Ġuk +Ġcerca +Ġarguably +Ġfale +ë³µ +Ġgamers +Ġutilizing +Ġoffended +Ġtava +alı +Ġmedian +Ġinfectious +ĠAnnie +Ġsmartphones +Ġparole +åĸĿ +ĠEpic +zza +Ġunified +Ġê·¸ëķĮ +Ġcurtain +ĠÄĥ +Ġsexually +Ġunserem +ĠConvention +Ġallegedly +Ya +ĠHoo +enment +æĢª +íĽĦ +Ġgigantic +Ġnoting +Ġrebo +ĠJama +ĠAlz +Ġborrowed +침 +Ġperipher +оÑĤа +ĠGB +ĠGear +Ġeconomically +Ġtelefon +Ġqueremos +ĠдалÑĮÑĪе +Ġras +ĠTeach +icios +atos +Ġpledge +bau +ĠHimself +Link +Ġespero +Ġchromos +ĠPER +Ġerle +Ġpodium +ços +Ġnieu +Ġfen +ĠGOD +ĠChocolate +werk +Ġtừ +Ġsuppress +λη +Ġ240 +Ġsitä +Ġhonesty +ĠBio +ĠBard +ĠобÑīем +ĠмÑĥз +Ġmarble +ĠÑĨенÑĤ +Ġprocure +Ġrotor +bern +Ġtuh +Ġheadset +atem +Ġwarranty +à®´ +Ġfiling +ιά +Ġcomprendre +Ġimpulse +Ġsalv +written +Ġinstitute +Kim +ĠLGBTQ +ficiente +His +ĠαÏħÏĦÏĮ +Ġteenage +orus +ĠÑĢазб +See +ĠConserv +á»ģn +fulness +Ġstrawberries +ĠAbu +ион +Ġolla +NOISE +ĠEmploy +Ġwiped +urger +Ġmodifications +Ġíķĺì§Ģ +Ġfootsteps +Ġhonors +Ġadul +Ġflipping +ĠHU +ZY +Ġintegrating +بر +ulla +Ġnatuurlijk +ĠíĹĪ +ĠEthereum +ÙĬÙĦ +wed +Ġpeaks +ĠKes +Ġbloom +Ġcrashing +Ġ911 +ĠоÑĤлиÑĩ +Ġcontrollers +ĠDod +ĠвмеÑģÑĤе +Ġsortir +å¥ĩ +ĠStraight +ĠGracias +Ġgroove +Ġtogg +Ġìĭ¶ìĿĢ +éro +Ġoutward +ĠWA +ĠRocky +Ġscam +Ġhayat +ignty +âĦ +plings +Ġantibiotics +Ġä¸Ģ +Ġnevertheless +jang +commerce +Ġspoiler +Ġglove +Ġchatter +ĠBY +~? +Ġíĺ¸ +Ġdemol +wechsel +imir +Ġraid +еÑĢÑħ +ìŀIJ기 +enf +Ġcommented +Ġoptimized +Ġconvicted +Ġbats +ĠSB +ĠAur +ĠTong +Ġimplicit +ĠJanet +Ġreag +ãģ² +ĠAdvanced +Ġimpose +ש×Ķ +Ġschemes +ougher +abolic +Ġê±°ì£ł +Ġslowing +Ġwtedy +Ġdestructive +ĠопÑĢед +Ġlandmark +ĠëıĪ +ĠWalking +ẹ +Ġtijd +ĠKN +ĠQuant +ìĺ¤ë +ĠкÑĢÑĥ +Ġperder +Ġnove +ände +ĠãģĹ +bia +Ġcustody +Ġbiod +æĿ±è¥¿ +Ġdirecting +...âĢĭ +Ġreloc +Ġdemande +ãĤĵãģł +ĠoÄŁlum +Ġодна +ĠMilk +åı· +ĠKra +ĠHonda +Ġpue +Ġelekt +Ġbeginners +Ġspear +ÃŃnh +ĠLuft +Ġnig +ĠSchools +Ġforums +ĠQin +ppo +Ġzag +ĠЮ +Ġtoothp +ĠStyle +ì´Ī +Ġpunct +Ġreps +ĠAly +Ġamendments +Ġöz +Ġdigits +urai +Ġchaotic +ĠMasters +eon +ĠCash +ĠCuz +Ġbedeutet +Ġscanning +Ġжд +неÑĤ +Ġcertainty +jek +Ġdijo +ĠClimate +Ġrinse +Ġkrij +veland +Ġsoundtrack +ĠSafe +ĠNova +94 +Ġathe +ĠVerb +oler +ìĿ´ì£ł +Ġvin +Ġrespiratory +ĠStudy +ĠCAM +Ġavocado +ĠZhen +Ġlatency +Ġfeathers +Ġcontar +ĠвеÑī +Ġfark +Ġblended +Ġexploded +ĠXX +ĠBenim +Ġalguém +istoire +Ġconfidential +Ġmast +Ġì¿ +geh +Ġdisrespect +ĠSystems +Æ°a +Ed +Ġwys +Ġexotic +Ġglowing +ùng +ounge +èĦ +аниз +Ġpalav +ĠSword +Ġgim +ĠCrow +Ġpotent +bish +Ġabused +ĠJed +Ġgambling +ĠSpect +Ġinvestigators +æĻļ +Ġratt +Ġdob +ĠDES +hog +ĠоÑĤкÑĢÑĭ +íĮħ +ĠденÑĮги +Ġíĺ¹ +Ġ머리 +Ġsaturation +Ġinherited +ĠInnovation +ìĹĪëįĺ +Ġtangible +Ġdepri +hed +Ġпомог +Ġsliced +à¥į +Ġthế +Å¥ +68 +Ġcorona +Ġgifted +Ġsoir +Ġhumility +ĠìĿ´ê±¸ +Ġflaws +ĠпÑĢакÑĤи +Ġkald +waż +yw +ãĤĵãģ§ãģĻ +irteen +Ġcrochets +¦¬ê°Ģ +ĠìłĦìĹIJ +Ġdese +æ¥Ń +Ġмаг +ĠdziaÅĤ +Ġlég +changing +Ġllev +ÅĦsk +çĶ» +Ġ1984 +orns +ĠWelsh +Ġpharmaceutical +Ġpumping +ĠShaw +punk +Ġvault +Ġkinetic +Ġhurricane +ĠIncluding +ức +ĠGrandpa +anship +é¦Ļ港 +ĠвÑĭÑħод +нож +ľł +utta +Ġê²ģëĭĪëĭ¤ +Ġbaz +ĠпоÑĪ +Ġpeculiar +zyÄĩ +ĠEllie +Ġlearns +ĠKrishna +Ġconsecut +Ġempath +ĠDin +Ġtraded +ĠBoris +uggage +olla +Ġназв +Ġeternity +Ġвп +èmes +Ġgrapp +bé +ĠпÑĢедÑģÑĤав +ĠFC +įëĭĪëĭ¤ +even +ĠNebraska +ortune +Ġkarena +ĠAgent +Ġsting +ĠPI +Ġmunicipal +powered +Ġconsegue +ĠManchester +Ġrainy +Ġbli +Ġkost +Ġhalten +ĠAhhh +insula +erting +ĠاÙĦÙģ +Ġrelacion +Ġkomen +Ġdome +Ġpriests +ĠIntrodu +rophe +shore +velt +clipse +ĠÑĢÑĥÑģ +×Ļס +Ġsabemos +ĠHolland +ogi +anki +ĠMats +Ġsmoked +ullie +Ġeurope +ĠдейÑģÑĤвиÑĤелÑĮно +Ġbardziej +Ġtransforming +ĠEz +opath +Ġìĸ¸ëĭĪ +ĠÑģÑĤан +ằng +ัà¹ī +ĠOuch +Ġclearance +ustain +Ġsolidarity +Ġproving +ĠÐĺн +ĠÑģÑĬ +Ġprolong +адно +Ġsos +ĠDeal +Ġ170 +mons +Ġзем +Ġlogged +Ġlifelong +Ġsensory +Ġbehold +ĠFAR +ètement +ĠFederation +Ġdodge +ĠShir +Ġdragons +ĠArctic +Äħż +Åį +º +Ġdenke +ĠpodrÃŃa +cole +ÑĥлÑĮÑĤаÑĤ +Ġsystematic +ама +chos +Ġclinics +ĠBS +Ġtales +usions +ĠíĪ¬ +Ġpreservation +Ġlore +ĠProtest +Ỽ +å¸Ĥ +Ġacknowledged +ĠIsaiah +ĠëķĮëĬĶ +Ġ×ĺ +Ġcompetitor +Ġadvancing +zip +Ġtenth +ĠLaure +Ġhints +Ġexercising +ŀľë +ĠIntelligence +uated +OUT +oped +Ġautonomy +Ġbranding +ĠMediterranean +Ñĸк +Ġscrewdriver +Ġsupre +Ġstap +Ġjurisdiction +ĠSettings +Ġforefront +ĠFemale +comfort +Ġmultiplication +ĠMurray +Ġbob +ĠTas +Ġtahu +Ġonun +etter +Ġprophets +lag +Ġrevenues +Ġprá +Ġuploading +Ġmachinery +ascal +ĠEstá +ĠGoth +ĠBald +ĠSaw +Ġstripes +ìłij +Ġpowin +æĹ¥æľ¬ +Ġhostile +Ġdarum +Ġprevented +ожалÑĥйÑģÑĤа +Ġalgunas +Ġhopeless +Ġznaj +Ġreadings +Ġcraving +tat +ĠPig +Ġliar +çĪ± +Ġmultiplayer +Ġdale +ĠCourse +íģ¼ +ĠKita +Ġcustoms +Ġresponds +endra +è¦ĸ +Ġmetro +Ñģол +Ġmitigate +Ġoppression +ĠæĪijåĢij +quinho +Ġammo +Ġenfer +Ġpony +Ġounces +°Ķ +ĠìĪĺê°Ģ +Ġdicho +ĠDeb +Ġwonders +ĠRoose +Ġprizes +ĠALEX +Ġthankfully +Ġtissues +ĠÑĢавно +ĠLuna +intelligible +ĠìĻ¸ +ê°ij +ĠHeat +ĠÑģид +ĠQui +Ġions +Ġaccommodation +便 +ĠKart +ienst +Ġtarde +Ġsoaked +ĠCasey +Ġì´Ŀ +ĠÑĢÑĥб +Ġdifferenti +Ġleftover +Ġexchanges +second +Ġfirstly +Ġbuilder +rien +Ġdw +Ġbouncing +?< +ologÃŃa +wealth +Ġmeditate +ĵ¤ìĿĺ +ĠCraft +è§īå¾Ĺ +æĻ® +riv +ĠAgainst +Ġceramic +espère +Ġcompetent +ĠHopkins +Ġkilos +Ġgravel +Ġpiston +Ġfriendships +Ġescre +Ġvoz +ĠGesellschaft +Ġunterstüt +Ġmuj +Ġwarnings +pos +ĠProfessional +wszy +odle +bands +Ġteamwork +stellung +Ġdx +åįĬ +Ġattorneys +Ġweitere +ãħĭãħĭãħĭ +ĠOriginal +×Ļ×Ĺ +Ġbroadcasting +ĠпеÑĢвÑĭй +uchi +Ġheure +Ġgrabs +ĠWOR +ĠPlaid +Min +Ġpaz +ĠPuis +umu +itates +Ġcoats +Ġbuen +Ġheir +Ġpneum +שר +enser +ĠJUDGE +Ġblonde +á¹Ľ +Ġgak +Ġsık +Ġquoted +Ġequipo +Ġwishing +ÃŃcia +Ġverbs +çµĦ +ĠCanadians +Ġgoverning +ĠEvans +Euro +Ġgenres +Ġunterschied +ĠBecky +³¼ê²ĮìļĶ +Ġeinge +ĠRaise +oland +ĠStrateg +Ġeres +ĠVeterans +Ġbreakout +Ġsanté +Ġadel +Ġinvestigated +Ġpeur +Ġagile +Ġrailroad +anska +Ġей +Ġexpos +atories +ĠContent +Ġtruths +ĠTrail +Ġgua +Ġpores +Ġwritings +ĠUhr +ĠThats +Ġicing +OC +ĠProduction +Ġcarne +ISS +Ġninguém +non +Ġvicious +×ķ×Ķ +Ġreconnect +Ġcentres +ĠKem +Ġcrease +ĠìĿ´ë¯¸ +айÑĤеÑģÑĮ +ĠбоÑĢ +ĠHayır +ĠÑģÑĥд +Ġúnica +owaÅĤ +Ġadher +hua +ZZ +Ġpreciso +Ġcurrents +Ġseasoned +ĠIoT +ĠBishop +è¨Ī +sted +ĠBernard +ì¤ĺ +æ²» +ĠGlenn +Ġktórym +ืà¹Ī +Ġastrolog +ĠKot +å¤ľ +Ġparfois +Ġforwards +ĠWiÄĻ +ĠÎĺ +Ġnano +è»į +sub +ĠBrill +Ġgrit +Ġcited +gado +Ġmelts +Ġforcé +âĸĪâĸĪ +Ġbajo +Ġdiscretion +°° +ativity +Ġsituated +ãĥ«ãĤ¯ +Ñīее +åľ°æĸ¹ +ĠпÑĢинÑĨип +amaz +Ġaquarium +Ġdissolve +ĠGods +Super +Ġamid +zk +ĠãģĦ +éłIJ +ampf +Ġhela +'! +Ġdevelopmental +ĠDise +ĠÑĢабоÑĤаеÑĤ +Ġsnapshot +好好 +Õ¸ +ĠYue +ĠHulk +ĠDoom +ĠFelix +Ġréf +Male +ç·Ĭ +phants +ENS +ĠMechan +ĠGolf +åĨįè¦ĭ +Ġgenerosity +ätze +Ġunlocked +ĠãĤĴ +íĥģ +ocalypse +Alright +Ġê°ľë +Ġ×IJ×ij׾ +ĠKeeping +Ġcollaborating +chief +ĠFernando +Ġchefs +ĠíĶ¼ë¶Ģ +Ġskipped +Ġpersonn +Ġaxe +chez +Ġextraction +ĠAV +ĠGibbs +Ġíľ +Ġsı +IAM +View +ĠGRANT +Ġ몸 +Ġverification +Ġdepicted +ĠMoz +oux +Ġtul +Ġscanner +Ġcomedian +ĠVolks +ĠJEFF +è¨Ĥéĸ± +§Ħ +Ġdistraction +rá +ĠINTER +Ġsincer +Ġ×ŀת +Ġש׳ +Ġconstructive +arf +ĠëĪĦë +Ġeco +ramos +Ġrenewed +inement +ĠUb +ĠPepper +ì§Ģê°Ģ +ĠDarwin +Ġmerchand +Ġvárias +èce +NG +ĠìľĦíķ´ìĦľ +ĠакÑĤив +ĠUnters +عÙĦ +Ġintric +omma +ieving +ĠCaroline +åĵģ +ĠPRES +Ġperformer +Ġautour +ãģ¾ãģĽãĤĵ +Ġutterly +Ġsynthesis +Ġlesbian +Ġretrieve +Ġmaneira +Ġimpair +Ġmentoring +ĠSouls +ĠGoPro +ÑĢаÑĤÑĮ +Ġcose +ĠSSD +IRE +Ġupfront +ĠAun +Ġgamer +Ġlitt +Ġaggression +ĠLikewise +ĠBetty +ĠDart +ĠDLC +ishment +ìŀ¥ìĿĦ +Ġ对 +ç»ı +cream +ĠBabylon +Ġnug +brar +Ġaynı +amily +bike +ahahaha +loyd +Ġmira +Ġperme +ĠGaming +Ġfirmware +Ma +Ġassisted +atics +Ġìķŀìľ¼ë¡ľ +ĠMental +niejs +ĠIz +owÄħ +Ġtougher +Ġdeed +èĭ¦ +Ġstylish +ĠTools +ĠHamp +Ġsunscreen +Ġarticulate +iye +иÑĦ +ĠSpread +ĠHAVE +Ġswirl +Ġsponsoring +ä»ĭ +iovascular +mesi +Ġrelaxation +ĠÑģвоиÑħ +Ġmargins +ĠsaÄŁ +ĠPride +ĠÏĦοÏħÏĤ +иÑĨи +enci +Does +Ġcorpse +Ġendurance +Ġíŀĺ +ì¹´ +Ġhaircut +Ġinterrupted +Ġwindy +ĠCaleb +ÏģÏĩ +ĠPourquoi +Ġholistic +uclear +ĠWhole +士 +Act +Ġgallon +cade +ĠRegional +roads +ĠSchne +áng +Ġизмен +ãĤĪãģŃ +Ġmenus +Ġsplitting +Ġpriced +ĠÎĵ +Ġusername +ĠÐŀÑĩ +Ġcompressed +yin +Ġguardian +Ġgoof +Ġchecklist +Ġinterchange +Ġexpedition +Ġextern +Ġinfrared +engo +Ġdenying +Ġpackets +onent +BB +ĠIncre +Ġsini +ÃŁer +èg +maal +generation +Ġminorities +Ġllevar +Ġnomination +Ġconsid +Ġ×ľ×¢ +muÅŁ +ĠEsc +Ġnumerator +Ġkaik +Ġktórych +iesen +Ġvê +ĠUSS +ĠPrivate +Ġодно +Ġalém +ÃŃtulo +Ġlimb +Ġforgiven +Ġdisclosure +ÏĦί +Ġningún +Ġtherapeutic +Ġnegotiating +ĠNike +enseful +Ġincap +Ġflagship +town +âĪ +ĠÏĢολ +Ġwolves +Ġviolations +ĠArnold +Ġintervene +Ġheater +Ġrecursos +Ġmaid +ê²¼ +ĠдавайÑĤе +ĠCelebr +Ġcape +ĠSty +ainen +site +bij +ĠполÑĮз +Ġframed +Ġpublishers +ĠÑĩÑĥÑĤÑĮ +Ġtemptation +Ġcerteza +Ġexempt +ìĬ¹ +selling +ĠTask +hoon +ĠCoc +ĠParks +Ġrepetition +ĠÑĤÑĥда +Ġensl +ĠdeÄŁiÅŁ +ĠOrlando +ĠMainten +æŃ¢ +ocument +ĠHC +Ġscooter +ĠнапиÑģ +Ġtighter +Ġtease +Ġremoves +Ġkijken +ĠÑģÑĥÑīеÑģÑĤв +Ġthé +ĠвÑĭглÑıд +Ġrelieve +Ġmitä +Ġstationary +öff +pable +Ġarter +Ġdéf +rative +Ġconect +Ġsaddle +ĠDiane +Ġcommemor +fendim +SÃŃ +Ġíģ´ë +Ġmange +atte +Ġarrogant +Ġrobotic +ĠgiÃł +æĺ¯çļĦ +Ġneighbourhood +isson +Ġдвиж +ĠRI +ĠNorman +brand +amation +Ġrazor +Ġmurders +ĠÑĤÑĥ +Ġwszystkim +Ġutilities +Ġmicroscop +ê¿ +Ġdaqui +ollar +ĠÐĶавайÑĤе +Ġannée +Ġkilometres +Ġhomosexual +Ġarchitects +ãģ¡ãģ¯ +Ġniye +LER +Ġmicrophones +ĠStunden +Ġconsecutive +ienda +vänd +DER +Ġlifts +ĠMeat +Ġsavez +íĸĪëįĺ +Men +Ġdismant +거를 +Ġinsulation +Ġscall +Ġspooky +Ġparc +Ġballet +ĠWhatsApp +Ġfranc +Ġdeliberate +ĠíħĮ +Ġmars +ĠZur +Pr +disciplinary +Ġobsession +ме +Ġmarching +ĠEmergency +iguous +Ġszy +ĠLands +Ġboarding +ĠпоÑĩÑĤи +Ġenvy +Ġcompassionate +Ġmerci +Ġdesirable +dale +Ġcanım +ĠAntar +temps +Ġconfigured +ĠCompared +neh +icating +Ġnickel +ÙĪÙĤ +ÙĥÙĪÙĨ +opes +Ġformulas +ĠÐķÑģÑĤÑĮ +Ġpobl +ĠPJ +ĠLud +ä»ĬåĽŀ +ĠBrid +ĠHog +ĠBris +Jen +Ġshading +ĠYas +Ġdisturbed +Ġrecommending +Ġcé +ĠHOW +ìĹĪìĸ´ +Ġreversed +ĠInterestingly +ioxid +åħŃ +Ġìĺ¤ì¼ĢìĿ´ +ếu +xx +Ġouais +ĠYouTubers +ĠRosa +ĠHaupt +jadi +Ġvlogs +Ġcultura +ĠLeadership +ĠHep +Ġillum +´ëıĻ +Ġcustomized +Ġmarca +Ġquatro +Ġнаг +ĠSpaceX +ĠEigen +asting +ĠolduÄŁu +Ġforts +ãģī +riment +iencia +Ġtenir +roffen +Ġ1979 +Ġcie +ĠëIJĺê³ł +Ġescri +ÏĮÏĤ +íı¬ +uzzy +Cong +ìĿ¸ìĿ´ +Great +sil +éch +ãģ¨ãģĭ +Ġmultic +ĠDisk +²ķ +Ġfazla +Ġlevant +Ġabajo +urry +stru +Ġ먹ëĬĶ +Ġaccessory +Ġдвиг +ĠRid +2019 +Ġdownstream +æķ¸ +Ġkaz +utan +Ġcharcoal +Ġafect +wu +Ġcontexts +Ġfeared +ĠìĦ¤ +Ġhistories +Ġfas +ensible +Ġcocoa +illar +geons +Ġspirituality +ĠPew +Ġpharmacy +Ġpassions +Ġbos +Ġallá +Ġthriving +ĠReact +Ġoccupy +Ġwithdrawal +Ġallowance +ĠFraktion +Ġbuddies +Ġidle +Ġdissolved +Ġprevalent +Ġmilitar +Ġsensing +Ġpojaw +Ġancora +Ġabundant +Ġhairst +ãģĤãĤĮ +Ġtwee +Ġnächste +ĠMöglichkeit +Ġhoo +ufficient +Ġfantast +Ġedible +Ġëĸ¨ìĸ´ì +ìĽĥ +Ġvein +ucci +Ġdevotion +Ġconcealer +income +Ġrecycled +ĠìĬ¤íĥĢ +Ġpontos +Ġdessus +Ġvérit +Ġreflections +ĠAA +Ġtakeaway +bare +ĠContact +eil +ĠHear +Ġmirac +ĠGerilim +ĠÑģамÑĭй +Ġvivo +Ġkilograms +ĠCrim +ût +78 +Ġsincerely +raz +Ġë³µ +Ġarriv +Ġconception +ĠPersian +Ġsjäl +Ġstarring +ĠìķĦ무 +ĠForever +еÑģÑĤÑĮ +Ġveil +Ġsubtit +odka +ĠоÑĤноÑĪ +Ġcooks +енÑı +Kay +Ġniños +ĠPhone +Ġstitching +Ġfingerprint +é¢ĺ +λά +Ġdedicate +ĠLob +Ġblacks +ĠBle +bout +ĠÄijang +Ġeks +Ġsquash +ĠKü +odi +ĠnÆ°á»Ľc +Ġvoyage +Ġplayful +ĠØ¥ÙĦÙī +anic +Ġcondemn +ĠBöyle +ĠPolize +ãĤ¿ãĥ¼ +Ġayuda +Ġpam +à¹Ħà¸Ľ +ĠKathy +един +нова +Ġbrig +eger +Ġeagle +Ġvisions +ĠíķŃìĥģ +Ġshitty +Ġhott +ĠBritt +utors +ENTE +æĽ² +Ġphon +ĠBing +ĠподдеÑĢж +spring +æĸ¯ +etten +Ġpilgr +Ġediyor +енÑĤÑĭ +aggio +Ġjul +Ġcomprend +teil +Ġز +Ġperformers +Ġinfamous +ĠMK +çª +æ³ģ +otle +eff +ĠHash +Ġcoward +ĠBRA +ĠDD +Ġcomida +Ġplata +Ġflap +ĠMehr +ribution +ĠYemen +Ġmysteries +ĠÄ°yi +Ġstell +Ġeyeliner +Ġdeles +Ġnailed +Ġillnesses +Ġstacks +Ġtrabajar +flower +ciu +Ġcrude +Ġsubstantially +Ġhomem +Ġnephew +Ġstamps +Ġcarbs +ÑĮÑĤе +mooth +Ġtunnels +acie +æ³¢ +ĠSeñ +ĠHera +ĠìķĦëĭĪìĹIJìļĶ +ĠWyoming +ĠHDMI +ĠLis +ución +Ġsteer +оÑİ +иÑĤа +NT +Ġìĸ¼êµ´ +Ġpalms +Ġneon +ованиÑı +Ġfiltering +Ġjouer +ĠHö +ĠнеÑģ +ê²łìĸ´ìļĶ +Ġ81 +Ġstoryline +Ġprzep +Ġthanking +ĠBoeing +Ġsoftly +jem +алÑĮнÑĭÑħ +Ġflashlight +ĠпÑĥ +ĠWOMAN +ắc +ÃŃch +Ġluxurious +Ġwün +Ġimpactful +Ġconson +reu +irring +ifter +Ġconstituents +èIJ½ +Ġ94 +ĠTou +gom +ĠìĥĿê°ģìĿĦ +Ġstereotypes +Ġmożli +åĪĨ享 +Ĥ¨ +Ġpencils +ĠÑģлож +Ġihrem +ĠBesch +ĠKoh +ĠEntscheid +Ġlek +Ġförs +Ġtotalmente +Ġlively +Ġentropy +Ġdiscern +ĠÐĹна +Ġdov +Ġmythology +è¨ĺå¾Ĺ +apanese +Ġapproximate +аÑĤив +ifiable +ĠSeo +åĢĴ +´ìĭ¬íŀĪ +Ġìĺ· +Ġtemporal +ĠiT +Ġestat +ким +Ġsprink +Ġgrund +Ġinfantry +Ġschaffen +ç´Ħ +Ġank +riages +ĠYeon +ĠMoroc +Ġinvasive +ģĶ +Ġparenting +ĠRis +ibile +Ġmods +å½¢ +ĠпÑĢовеÑĢ +ĠThing +ĠWherever +Ġacknowledging +Ġpawn +ummer +orb +69 +Ġretrouve +Ġrelies +ĠHighway +Ġawe +ãģ§ãģĻãģĭ +itaire +Ġapplicant +Ġaisle +worm +Ġpayload +Ġcarre +ĠBach +æł¼ +Ġì¹ľêµ¬ë +ние +ĠitÃŃs +onnaise +sol +èı¯ +algia +Ġrocking +Ġbesten +rites +^^ +иной +Ġbaixo +Ġ기ìĸµ +оÑĤÑĢи +sim +Ġincarn +ëĭ¤ìĿĮ +Ġlick +sided +Ġ71 +forder +Ġresonance +Ġtegen +Ġmetaph +owser +Ġ×IJ׳×Ĺ׳×ķ +?ãĢį +Ġspielen +Ġvolley +ĶìĿ´íģ¬ìĹħ +looked +Ġsentenced +Ġmultiplying +Ġideals +Ġwahrscheinlich +Ġdeposits +bilir +Ġeffet +illon +Īë§Į +Ġtestimon +Ġzawsze +ĠпÑĢоÑĨеÑģÑģ +ĠLav +ä¸įéĮ¯ +Ġtravailler +Ġlaisse +ĠMountains +ĠÑĢоб +Ġexamined +itus +Was +лÑĭ +Ġattributed +ĠìĬ¹ +ĠBaron +Ġgep +Ġattent +ĠCollection +Ġtheat +ĠCai +Ġwells +Ġhumano +çĹħ +ĠHast +ĠÑħоÑĤÑı +czas +Ġpermits +Ġlegg +Ġepo +ĠFen +Ġthi +ĠFoi +Ġélect +Ġ83 +Ġoverth +Ġè¬Ŀè¬Ŀ +Ġtenant +è²· +Next +Ġpraised +security +ĠImpact +为ä»Ģä¹Ī +Ġvouch +Ġnegó +Ġunve +Ġcriticize +ĠKenya +Ġtactic +Ġlogr +Ġpois +Ġpapa +speaks +ðŁij +ispers +Ġsurplus +Ġcolder +åįĹ +åIJ¬ +plets +ĠVienna +ĠLead +Ġaerial +ĠTah +енÑĤов +ĠGreeks +Cam +Ġmáxim +Ġkuin +chio +Ġdemonstrates +anos +ĠCert +ĠÑįн +Ġblogs +ĠìĦľìļ¸ +Ġbeams +иков +Ġprompted +Ġfrightening +ĠPorsche +ãģĪãģ¦ +larını +Ġchilling +isphere +Ġflashing +ĠKard +bread +Ġexh +Ġtycker +Ġecological +ĠMae +Ġ×ŀ×IJ×ķ×ĵ +ĠëĤĺëıĦ +лон +yss +Ġpergunt +Ġprix +izzard +Ġcancers +Ġ91 +susp +ĠItem +ÅŁa +Ġpest +ĠtakÄħ +Ġlymph +ĠPatri +fill +Ġreconna +Ġoptimism +Ġmimic +Ġì²ľ +ĠMadame +ocy +lining +åijĬ訴 +erme +Ġfolders +ĠczÅĤ +uchar +Ġcurso +Ġbreach +ниÑĤÑĮ +ĠpamiÄĻ +Ġelig +Ġautop +Flow +Ġprogrammed +ĠProcess +Ġfigur +ĠSF +ĠEles +Ġprogrammes +Ġdizzy +ìĭľê°Ħ +Ġлибо +Ġsniff +ĠSebastian +ĠHye +Ġ4000 +Ġpermite +æ¢Ŀ +ĠзаÑī +Ġguit +ĠDais +Ġaccordance +Ġmodular +ogeneous +æĭį +Ġpouquinho +Ġartillery +Ġlubric +Ġvolcan +ĠNH +ðŁ¤ +Ġdean +Rh +Ġministre +åĿIJ +ĠInv +ĠBulgar +ĠDaten +èİ +Im +Ġoriginated +ĠNixon +integr +Ġlacks +ĠNacht +ìĸ´ëĤĺ +camera +Ġradish +kiye +Ġanges +Ġpréf +juk +ĠBee +ĠBU +ĠвоÑģп +ĠBT +êmes +ĠStück +ĠInk +æĪĸèĢħ +ĠSergeant +ĠMultip +Ġhiçbir +ĠСам +ĠDé +olph +ìĸ¸ +Ġimpat +ĠìķĬê³ł +ĠÑĤакого +ĠнавеÑĢное +Ġunpredictable +Ġmend +ĠìĹĨìĸ´ìļĶ +ĠjakieÅĽ +Ġanni +Ġdonné +ĠKirsty +Ġrectangular +Ġempezar +ĠExchange +ê°Ķ +Ġéconom +ãģĵãĤĵ +elin +reibt +Ġ×Ķפ +Ġcemetery +Ġespañol +olin +лÑİд +Ġgrâce +allen +ĠPhilos +ĠErst +ĠìĥĪ +ĠVid +Give +OH +μο +ĠPare +Ġmetabolism +Ġmaple +Ġaxle +ĠDy +Ġkomme +Ïİν +Ġgreatness +Ġverified +Ġspé +ĠFahrenheit +ĠBren +ĠConfeder +Ġhistoire +Ġeliminating +ĠAdding +ĠAbi +æĿİ +Ġhospitality +tim +Ġbonito +Ġpartes +ĠдÑĢÑĥгиÑħ +ĠShay +ĠSed +Ġregrets +Ñıми +Ġtenants +éĢŁ +ĠPTS +Ġdevi +ĠLate +uez +Ġsöyl +ãĤ» +Ġìŀ¬ë°Į +Ġtoggle +Ġmasking +алÑĮного +Ġpersön +Ġamerican +fik +ĠRGB +enson +ĠKA +wwww +ĠÑĢег +metics +Ġeducator +ãĤ·ãĥ«ãĤ¯ +park +елÑĮзÑı +arus +ÑĢеÑĤ +Ġfeito +Ġchoir +Ġlargo +Ġeens +Ġwatts +ĠSingle +Ġsusceptible +icer +ĠвклÑİÑĩ +Ġpus +íĻĺ +Eng +Ġfantas +Ġspecification +Ġconfronted +ĠColumbus +ивеÑĤ +arım +Ġcaffeine +munition +Ġmigrants +lide +itations +ĠGeme +ẫ +Ġplanner +Ġstimulate +Ġaproxim +ceu +ĠNom +Ġvog +ĠÑĢаÑģÑĤ +Ġenseñ +Ġsellers +Ġguten +zd +Cal +Ġdescript +Ġreconciliation +zinho +á¹ĩa +ãģĺãĤĥãģĤ +acyj +ĠCOL +saw +ĠíĻķìĿ¸ +Ġvarit +Ġpartnering +Ġdetention +Ġbombing +clapping +iencies +ondu +AME +Ġê°ĻìĬµëĭĪëĭ¤ +cÃŃa +ĠпоÑģÑĤо +ĠASMR +Ġhomepage +Ġsiè +antha +ĠPoll +Ġigen +cych +Ġê°ijìŀIJ기 +Ġconsiderably +ä»ĸçļĦ +ĠArist +Ġwithstand +Ġqualitative +ĠKraft +ĠÑįлекÑĤ +ĠBead +екÑĤив +Ġcrushing +ì³IJ +Ġnavy +ÙĪÚº +sho +Ġoak +ippers +Ġsoils +Ġpigment +Ġevitar +ãĥĩ +Ġfuse +ĠDale +:\" +Ġcomplètement +Ġkel +à¹Ĩ +Ġquatre +ĠUM +Ġë§IJë +æł¹ +ÃŃr +Ġleisure +ĠHousing +Ġfolds +estion +ARS +Ġmash +urpose +Ġaccumulated +ĠStuff +èªŀ +Ġtapes +ĠÑģилÑĮно +ĠLOVE +Ġ1982 +Ġscars +Ġcapitalist +ĠNed +Ġsoften +Ġnotably +Ġforcément +ĠRaum +ĠнеобÑħод +Ġtrademark +Ġfertig +Ġ?! +æĹł +Ġreinforced +Ġrecharge +ĠPutting +Ġvillains +Ġhandic +Ġadvertisement +تÙĬ +ĠÑģÑĥм +ĠRiley +×ķ×ij× +京 +Os +از +Boy +Ġsquish +ocket +Ġtestify +æ¼Ķ +Ġ׾×ŀ× +ĠмаÑģÑģ +manuel +ĠArkansas +iffe +Ġanalysts +ĠDeaf +Ġjó +Ġgroceries +ĠWheel +ĠÑĢиÑģ +Ġcòn +ĠCob +Ġprisons +ève +ĠCabinet +Ġposed +Ġguerre +ĠLloyd +Ġclerk +Ġcrises +ĠSho +ĠOre +ĠFootball +ĠAdvis +ĠZheng +èį +ĠAMY +Ġunfor +Ġmonaster +Ġcompile +Ġimmortal +atable +Ġparano +Ġtiver +ĠSteph +ĠFuÃŁ +Ġdiscontin +Ġripe +Ġhacking +Ġsiendo +Ġseguro +altres +Ġanderes +Ġ리ë +Ġexports +æŃ¥ +Ġtabii +Ġ기ëĭ¤ë +Ġbothering +Ġpickle +ĠBRIAN +Ġaltar +ĠпÑĢиб +Ġtransferring +ĠVors +ĠÙĩÙĪ +ĠZa +ĠFrances +Ġbrowse +emit +Ġchewing +ĠFreddy +Ġeditors +älle +ĠíĮĢ +ĠSque +ĠCultural +awk +ĠSache +ĠCarbon +ắt +FL +ĠNGO +peÅĤ +ĠSou +Ġhvor +unintelligible +Ġë²ķ +Ġ° +iin +Ġ×¢×Ŀ +Ġderrière +Ġczym +ĠApost +Ġregarder +Ġagrade +ĠCandy +Ġmare +Ġintroduces +birds +Ġuniquely +Ġmuk +Ġcooker +Ġcrews +Ġjeito +ERT +¶Ħë +nisse +Ġef +Ġcarte +ĠYak +ĠPAT +ино +bokki +Ġmates +Ġdistint +Ġì½Ķë¡ľëĤĺ +Ġyıl +Ġκάν +Ġconfigurations +enga +recht +Happy +ãĤĦãģ£ãģ¦ +invest +Ġreconstruct +ĠÑįÑĤомÑĥ +Ġmosque +raum +Ġvoyez +ĠNBC +ĠìŀIJìĭł +Ġsturdy +Ġкап +Ġansch +alid +Ġmasih +ĠREP +Ġì½Ķë +Ġdeduct +Ġsalir +wurf +ilot +ĠMutter +olds +ĠFEMA +ĠBib +Ġneighboring +Ġbliss +Ġíĺ¼ +лиÑģÑĮ +ĠÑĤÑĢеб +Ġå°±æĺ¯ +Ġgrenade +Ġegal +Ġfinely +Ġpetals +Ġkeer +Ġchyba +Ġskipping +Ġthirteen +Ġgravy +ĠSAT +61 +Ġног +Ġmins +ITE +Ġsozial +íķĺë©´ìĦľ +ruktur +Ġвозмож +ĠопÑıÑĤÑĮ +Ġarth +ĠCuban +Ġtreasures +Ġfertilizer +Ġawakening +Ġë°±ìĭł +Ġrall +Ġdepict +ĠPablo +Ġnineteen +Ġwatt +Ġentirety +KS +ĠWoods +Sch +ĠÚ©ÙĪ +ĠDry +ãģŀ +uve +Ġreconstruction +Ġanatomy +Ī를 +Ġbaba +Ġlistener +Ġsharpen +ĠPeru +ĠвÑĭз +Ġrecreation +Ġinitiate +Ġcalor +ĠNaj +gee +ĠFeels +ĠSnapchat +ĠTet +ĠNest +ĠDaf +ĠFinish +ĠÑĤаким +úc +izens +Ġspins +Ġembry +Ġpassages +Ġcient +Ġjustification +ä»ĸ說 +Ġolmaz +Ġflooded +Ġemoji +Ġembracing +Ġdiscard +ĠBasic +agog +ĠìľĦíķ´ +Ġasylum +erin +Ġfim +Ġninja +Ġautomate +Ġallergic +ÿÿÿÿ +amam +ĠмаÑĢ +ĠOi +äus +Ġinduct +ĠBEN +ĠzÅĤ +Ġkażdy +ĠAMP +nÄĽ +Sure +Ġquil +Ġespec +rok +BSCRI +Ġliebe +pus +achsen +Ġcricket +ëĬIJ +ĠFrame +ekkür +arb +ĠpÅĻ +иÑģÑģ +Ġzeggen +Ġdoubles +ĠDre +test +insp +boys +Ġmão +ĠVerse +Ġmuscular +ĠMALE +Ġdulu +Ġoccasional +Lo +conomic +Ġvak +Ġremedy +å¤ł +ĠâĻªâĻªâĻª +vem +Ġönem +ĠkarÅŁÄ± +ĠSharp +hur +Ġë°©ë²ķ +Ġgrandson +Ġaktiv +ĠThrones +ĠìķĪìĹIJ +Ġtots +Ġsubd +ĠPaula +Ġgraves +ĠBrent +ĠникÑĤо +Ġsöz +Ġcrec +ĠVladimir +çĸ« +Ġпой +Ġ\"- +Ġpsy +atri +idan +Ġaún +Ġstandardized +ì¹ĺë +ĠкÑĢов +ĠZhu +something +Ġ750 +Ġmujeres +Ġait +éĹ´ +agu +Ġcorrected +ikka +eled +ĠCareer +owym +Ġroommate +Ġdescendants +ĠNapoleon +ĠÐĶо +íĸĪìĸ´ìļĶ +Ġbunun +ĠMicha +ç·ļ +Ġdescob +PI +Ġpalabra +Ġtracked +Ġdependence +ĠBarack +åģĩ +Ġfertility +ĠSouthwest +Ġincomplete +Ġcomunic +Ġcompris +ĠRestaur +Ġacron +κα +Ġapprentices +Ġmusst +ĠAbr +Ġpentru +ĠConsort +ĠAvec +Ġdumplings +LR +Ġwszystkie +Ġswamp +нев +uggle +Ġwatercolor +Ġproton +ĠEspaña +ocking +овал +Ġtakim +Very +Ġdementia +ĠÅŁeyi +Jac +ĠMacBook +ĠLiv +fficients +ĠHunt +Ġoverlay +æĦŁè¦º +ĠSkype +punkt +Ġconfined +ĠAdrian +رÙĥ +ĠJeep +Ġenquanto +Ġanest +оÑĤвеÑĤ +ĠменÑĮ +Ġirrigation +á»ijn +Ġeighteen +ĠPon +Ġrescued +Ġ1983 +rü +jae +ĠJeong +Ġamazingly +ĠFDP +Ġbackstage +cue +ĠÏĥÏĦην +ĠاÙĦص +Ġlivestock +ĠWarner +Ġmajors +ãĥģãĥ£ +Ġcooperative +ĠBrady +rained +rieb +Ġ×ij×ŀ× +ĠдоволÑĮно +ĠFE +Ġleaked +ĠMercury +Ġpersuade +Ġtransformer +ĠNorweg +ĠìŬ룬 +ĠzrobiÄĩ +Ġcardiovascular +ĠCrash +Ġgossip +аÑģÑĤÑĮ +Ġ쪽 +Ġswept +ĠHorn +ĠAté +Ġbukan +ĠKaw +KY +ĠStories +Gary +Ġgardening +ĠQuickly +ĠFalcon +Ġovat +cı +ĠComplet +ĠDate +ĠпÑĢим +Ġläuft +ĠAudrey +ĠWent +ĠpelÃŃcul +Ġcarriage +Ġunacceptable +nymi +ĠÑģлÑĭÑĪ +Ġterre +uellement +EEEE +Ġpharmac +hões +Ġzich +Ġmigrate +ĠFry +ñana +ĠMuito +EOVER +Ġfortress +ĠCompan +ĠJSON +ordnung +Ġwarto +Ġungef +ìħĶìĦľ +ĠÑĢок +Ġpaddle +Jared +Ġsubmitting +Ġlatch +Ġfug +ĠкоÑģ +ĠEf +Ġlaunches +Ġft +otechn +Ġtravelled +اÙģ +éģķ +Ġproch +Ġdedim +83 +Ġrebound +ĠLU +path +ĠÑģпÑĢав +Ġöl +ĠíĤ¤ +Ġprivat +Ġtractor +ĠAttention +Ser +Ġcoses +ária +pal +ĠìĿĢ +Ġsuccessor +Ġconnectors +ĠÑĥÑģÑĤанов +Ġgenocide +Ġsufficiently +ĠAixò +Ġstabilize +Ġcongest +Ġcarving +Ġzost +ĠбÑĭÑģÑĤÑĢо +Ġshortest +Ġlivel +Ġ89 +éģĬ +Ġerk +Ġportraits +à¥Ģ +èĺ +boat +llah +ANC +Ġempirical +ĠEcho +ĠNederland +è¿Ļä¹Ī +Net +Ġcuidado +ĠRoma +Ġcalf +Ġgiants +ĠExplorer +ĠCollect +alition +ĠDestiny +Ġausge +ĠEdu +ĠClo +Ġearrings +ĠTrack +ĠROS +ĠBelle +çĻ¾ +Ġpueda +Ġdaytime +Ġsupplier +ĠSV +ĠExhale +Ġgalera +course +Ġcentimeter +ĠBast +mud +Ġsangat +ĠPhysical +Ġprivately +Ġtrata +lynn +illi +Ġë©ĶìĿ´íģ¬ìĹħ +Ġcrystall +Ġpods +ản +inator +ĠRecords +å®ĺ +ÄŁimiz +issement +hare +hadow +ĠDK +ĠìķĮê³ł +Ġwyn +Ġrequesting +ĠDonna +ĠìĹ´ìĭ¬íŀĪ +inea +Ġexert +ĠDuncan +ĠвеÑĩ +ĠHah +à¤Ĥ +ĠLif +ĠFinding +ĠNov +Ġзнак +ĠоÑĦ +ĠQuè +Ġquarterback +ĠÑĦак +Ġbipartisan +ÄŁin +Ġnécess +Ġreferendum +Ġcompiler +Ġprobabil +еди +Ġtrader +æĺĵ +ĠRum +geme +Ġdio +ĠbÄĻdziemy +ĠÏĢά +꾸 +×ķ×ĺ +Ġà¤ķ +Ġблаг +Ġscalp +ĠPause +Ġcaption +Ġendanger +Ġenlar +Ġrotten +ãĥĥãĥĪ +Ġwah +èĤī +Ġdzi +ĠInstall +Ay +Ġcrear +енÑĤа +Ġweighing +Ġbutterflies +ĠGast +äºķ +horn +warz +ICEOVER +ĠнайÑĤи +Ġcoefficients +ç°¡åĸ® +ĠSpencer +ĠHigher +Ġcowork +å¨ĺ +ĠкоÑĤоÑĢое +Ġmonit +Ġdysfunction +ĠÑģÑĤанов +Ġtournaments +Ġoyster +BN +Ġtrud +slow +ĠPenny +ĠOdys +ær +Ġfou +Ġenjoyment +аÑĤÑĭ +ĠwyglÄħda +алÑĮнаÑı +ĠProtect +Ġmoy +Ġclaw +Ġsuspicion +Ġsacrificed +Ġgosto +Big +Ġaggressively +Ġvorne +ãĥł +Ġblamed +ĠSehr +פר +cito +Ġseals +Ġmujer +ĠWeird +Ġforens +Ġcontributes +estra +Ġpog +LOL +Ġhacerlo +оÑĤÑĮ +fiction +79 +λο +大æ¦Ĥ +声 +ĠÑĤоб +ĠGS +ĠClara +itez +Ġadvocating +ĠíĶĦë +sung +Ġvertices +Ġnavigating +Ġeuropé +çļĨ +Ġslowed +Ġforeground +ĠIndustrial +Ġadore +ìĭŃ +Ġcréer +æŀĹ +chnitt +Ġunaware +Ġcurly +entar +Ġler +Ġprohibited +ĠHeroes +ĠReed +uca +Ġsmok +Ġkunna +zeitig +immen +ĠLun +ĠабÑģолÑİÑĤ +Ġdegli +Ġvillagers +Ġpreset +zept +uds +Ġemit +ä½łè¦ģ +Ġëī +ëĬĶì§Ģ +нако +Ġosób +Ġ1969 +ĠÐIJÑĢ +Ġmanchmal +ĠBrock +Ġmantra +ĠWIL +bach +inä +elas +keln +Ġdisciple +Ġqualc +Ġdehyd +ìĿ´ëĿ¼ëĬĶ +Af +ìĦ±ìĿ´ +Ryan +Ġpuppet +ĠдÑĢÑĥгие +Ġrud +Ġpending +Plus +ĠìķĬìĿĦ +Ġbá»ĭ +ĠSega +çe +Ġprogrammer +bli +Ġunl +Ġenslaved +Ġsociété +Äģh +Ġinheritance +ĠBangl +ermaid +Ġpractitioner +ĠStalin +ĠUser +cible +Ġcardiac +ĠKoreans +Ġdumped +Ġ×Ķ×Ļ×Ķ +áis +Ġhydraulic +oubtedly +ĠPit +Ġpicnic +Ġbehöver +ĠÑģмог +Ġbraking +é»ij +utar +ĠìĦ¸ë +ubl +Ġüz +Ġmajesty +Ġbers +utable +Ġhotter +çħ§ +ÛĮÙĨ +Ġbiases +Ġsubjected +Ġnaughty +Ġcircus +ãģĹãģĭ +ĠImmedi +ĠStefan +ĠTriple +enk +Ġwit +Ġrecycle +emie +dated +Ġunload +Ġpopula +chin +Ġyields +Ġenglish +ĠBonnie +Ġspiders +Ãģ +Ġerosion +éĥ¨åĪĨ +ĠNICK +иÑıÑħ +Ġimpart +Ġкни +Ġresolutions +Ġlithium +Ġconvergence +ĠTara +Ġдве +ths +ĠCindy +æĪijè¦ģ +幫 +ĠDIE +Ġassurance +ĠопиÑģ +Ġbuckets +Ġcues +ĠQuiet +Ġsimilarity +Ġfoundational +ĠMinist +滿 +Ġpian +Ġcentr +Ġnumb +Ġmonks +ujourd +enzie +Ġskateboard +Ġdlatego +ĠÑģоÑĤ +ĠAE +Ġmasterpiece +ĠSolomon +ĠReddit +Ġriot +abl +ĠJazz +Ġelectromagnetic +Ġinsecure +ĠCompet +geries +обод +ł×ķ +ðŁĴ +Ġsenators +ĠBrisbane +ĠAlb +uttering +ĠAllow +zero +Ġpai +ĠÐIJлекÑģ +ĠDisplay +ĠBlade +ĠApps +Ġpä +ĠдеÑģÑı +Ġquella +ĠGao +еннÑĭÑħ +Ġspoilers +Ġgallons +ĠÙĦÙĬ +ĠZion +æľīä¸Ģ +onie +ragt +ĠChand +Ġë³ij +Ġblunt +Ġusu +ĠKad +rakt +Ġcinematic +Ġammunition +rene +Ġfourteen +ĠCarn +crit +Ġtenure +vu +Ġprincipalmente +Ġalleen +éĢĻä¸Ģ +Ġkomplett +Ġdüny +James +Ġreceptor +Ġoneself +guru +Ġmerchant +liness +Ġoverlooked +Ġharmonic +éķ¿ +ieso +×ķ×ŀ +colm +ĠпÑĢоекÑĤ +ĠAda +اس +Tim +Ġrecurring +Ġproceeds +ĠParticularly +ĠDownload +etrical +Ġmatrices +Ġproyecto +ancies +ĠUhm +Ġcaves +Ġìĸ´ëł¤ +ĠLeaf +ĠобÑĭÑĩ +ĠìĿ´ìľł +Europe +ĠtÄħ +Ġpuls +Ġtakiego +ÐĿе +GU +Ġfors +Ïģγ +Ġfotos +Ġ)) +Ġ멤ë +Ġaquilo +ĠKurd +ï¸ı +ptic +ĠDort +Ġmisery +auso +åĬŁ +chuckling +ĠRidge +ĠíĸĪìĬµëĭĪëĭ¤ +Ġ*** +客 +ĠHmmm +Ġgeographic +Ġanys +Ġtalvez +Ġskelet +Ġsignatures +Ġliters +IJë©´ +ĠÑģвоего +Ġskiing +ĠÐľÐ¾Ñģ +Ġadopting +Ġhaft +Ġsymmetric +ĠLiqu +Ġthyroid +Ġmisin +lude +Ġhull +ĠXD +ĠGust +zeich +Ġvibrations +Ġesemp +ĠвÑģÑİ +ĠQuem +Ġübrig +ĠSke +ĠLynch +rooms +artet +fest +Ġfrüher +Ġlure +ä¸į好æĦıæĢĿ +ĠìķĮìķĦ +ĠWIN +ĠRYAN +ĠкоÑĤоÑĢÑĥÑİ +ĠKash +Ġ×Ķ×ŀ +Ġsafeg +ĠHallelujah +ĠдвÑĥÑħ +Ġstaple +Ġsediment +ĠActs +Ġblaming +Ġmainland +Ġsporting +Ġdecorations +Ġexecuting +Ġparan +ĠDollar +Ġprojections +Ġcommissioned +Ġbour +öm +Ġsteamed +ĠëŃĺ +Ġpetrol +Ġcelular +帶 +ĠHungary +Ġrented +ĠваÑĢи +bbie +Ġsécur +üll +Ġswings +between +ĠиÑĤ +estro +Ġniemand +ĠìĤ¼ +ĠPardon +esses +ĠMID +Ġcentralized +ĠAlien +culos +Ġcrise +裡éĿ¢ +Ġclasse +beitet +iÄŁi +Ġwhales +Ġperimeter +Ġtying +Ġstrony +Ġlikewise +ĠPunch +Da +ĠBaptist +Ġsorting +Ġiv +Ġíķ© +Ġrehab +Ġeta +river +Ġsai +ãģĦãģŁãģł +odus +ãģĬé¡ĺãģĦãģĹãģ¾ãģĻ +Ġessayer +Ġturtles +ĠHazrat +Ġfabrics +Ġcavity +Ġponieważ +Ġschlecht +Ġsalsa +ÅŁekkür +Ġseating +Ġeconomists +Ġmang +Ġseguinte +Ġrang +Ġratios +Ġconstell +Ġlongtemps +uating +Ġspoiled +Ġrecipients +Ġsniper +ä¹ĭåīį +ìĬµëĭĪê¹Į +Ġwp +ĠLINKE +Ġflare +ĠAdri +ñas +Ġbackl +mÃ¤ÃŁ +ĠBend +Ġworkloads +ĠÑģÑĥп +Ġ1975 +имÑģÑı +ане +Ġмон +Ġaspirations +ĠAer +ĠговоÑĢиÑĤÑĮ +ĠQian +å¦Ī +Ġcompromised +Ġyolk +лаÑģÑĤ +Ġhemen +rove +dens +ĠкомменÑĤ +Ġ--- +Ġfluores +ноÑģ +ĠLiverpool +ĠÑģобой +ĠZwe +Ġlumin +ĠOG +Ḡ+holm +profits +SN +Ġproportions +Ġmica +ĠBoh +ĠAtlas +Ġunsure +Ġtouring +Ġnied +ĠtÄĻ +Ġimperative +Ġdemek +ĠSheriff +rance +Ġhomeland +ĠHail +ĠGanz +ymm +Mon +åĨ· +vida +Ġdesarroll +æĬĢ +Ġintriguing +ĠHugo +ĠãĤĤ +é¬ +аÑĨ +ĠWiÄĻc +atted +ĠìķĦëĭĪê³ł +ĠVari +ád +Ġsurreal +Ġdisparities +Ġmó +ullen +ĠìŀĪëĭ¤ê³ł +ĠпожалÑĥйÑģÑĤа +Ġmains +Ġeject +Ġmethane +Ġmarginalized +Ġchilli +rès +Ġyem +ä½łæĺ¯ +ĠChun +Ġdebts +Ġdownloading +ĠAthens +isierung +ryn +Ġtekn +ĠQuindi +éľĢ +Ġtaraf +Ġhé +Ġconsciously +Ġfixes +uckle +mayın +Ġfrei +Ġspa +Ġì§Ħíĸī +ĠاÙĦØ° +ĠÑĥк +lett +ĠolmuÅŁ +Ġcheesy +าà¸ģ +naire +Ġwiden +Ġlien +Ġescaping +iggs +ĠBlick +cÄħ +ĠìĦľë +Ġ×Ķס +ĠвпеÑĢ +ophone +iell +ĠSUBSCRI +Ġlions +Ġê·¸ê²ĥ +Ġinspires +Ġguarantees +Ġcomeça +ĠGrowing +Ġneglig +ĠFrankf +Ġgegeben +ĠÄijầu +Ġendlich +Ġìį¨ +ĠTT +ĠLith +ÏĢα +astern +ĠAzer +Ġlunar +hic +ĠнаÑĢод +Ġnenhum +è·ij +ĠSalvador +ĠProgress +Ġprivileges +ĠëıĻìķĪ +Ġantagon +ĠImpf +Ġdescub +ĠLei +ĠìĥĪë¡ľ +Ñĩе +Ġdólares +ĠMeghan +ĠWire +too +aying +usc +Ġtud +Ġappeals +educ +Ġpane +Ġji +Ġdecks +ĠAlter +Ġå°± +ìĦ¤ +åĪĨéIJĺ +Ġproductions +ĠWILLIAM +Ġimplied +Ġfulfillment +ĠAah +Ġsaja +xus +ĠÎļαι +Ãłs +ucch +око +ĠDiscord +ĠSY +jsk +ĠWallace +unction +Daniel +Ġköt +ijah +Ġmarche +Ġdisgr +Ġmungkin +Ġalma +³µ +Ġextensively +ĠFloren +ĠAllison +ãĤ± +ÙĬÙħ +Ġjuven +ĠRenaissance +Ġfundraising +ĠChaos +Ġparaly +Ġnarrator +Ġecosystems +Ash +Ġmitigation +ĠAujourd +ĠIdee +!, +Ġ½ +Ġlandlord +Ġdefects +Ġacre +ulsive +Ġalgae +pek +Ġemba +ĠRoc +éĽ¢ +ksom +äche +Ġleuk +Ġleveraging +Ġê·¸ëłĩì§Ģ +ĠPalm +Ġäven +Ġlis +ĠInsp +ĠRita +ĠAbb +ithm +Ġsupervision +Ġrevisit +ĠpiÄĻ +Ġeuh +Ġfades +Ġmotto +åį¡ +езж +ĠShim +Ġrelevance +Ġoo +Ġostat +nica +Ġchoix +ĠFaculty +Ġì¤ijìĹIJ +ĠAbove +ĠнеболÑĮÑĪ +Ġsequencing +Ġnutrient +Ġconquered +Ġdigestive +Ġbackdrop +ĠLori +ailable +Game +Ġneglected +omorph +illah +Ġkne +Ġsiitä +Ġworkspace +ĠVenice +ĠKne +Ñīо +ħĢ +ĠHass +Ġvita +Ŀ¼ë©´ +Ġlays +ências +érica +ĠLl +æ±Ĥ +ĠCoca +ĠWHY +èĪŀ +Ġrouting +Ġpermissions +Ġdings +prend +program +Ġcrocod +bral +AAAAAAAA +agit +ĠNä +Ġgekommen +atten +Ġreferenced +Ġpairing +ĠPartner +ĠCoronavirus +ÑĸÑģ +è½ī +Ġ×Ķ×ĵ +ĠespecÃŃfic +arsi +quelle +Ġspontaneous +çĨ± +Ġê²ĥìĿĦ +ĠÐŁÐ¾Ñģле +ĠاÙĦد +ĠShout +Ġнал +Ġdisguise +ĠJord +Ġwee +Ġmiejsc +Ġserum +Ġplaisir +Ġcredible +ĠbÃ¥ +ĠAJ +mares +Ġrods +Ġeran +ãģ¾ãģĤ +Ġpää +ĠUA +ĠUnknown +ĠÙĦÙħ +ĠRabbi +Ġlaat +Ġhairstyle +Ġغ +éģĭ +Ġcach +ĠWriting +оÑĩки +abad +Ġstraighten +--\" +wife +Ġhottest +Ġpunya +ĠFashion +griff +ĠQR +otch +ĠÐľÐ¾Ð¶ÐµÑĤ +Cloud +ĠStrike +ĠHein +Ġ羣çļĦ +Ġlei +ĠFlow +wegs +Ġhabr +åīĽåīĽ +nahme +Ìģ +Ġpleasing +opping +Ġ구ëıħ +Ġdran +Ġbangs +Ġ79 +Ġsket +Ġcaval +ĠMacron +Ġweighted +Ġmuted +Ġnuestras +EEP +Ġmathematic +ĠMRI +agus +Ġtherapies +θε +Ġunpl +Ġcommencer +full +Ġtowels +Ġprue +Ġlicenses +׼×ķ׾ +ĠÐŁÐ¾ÑĩемÑĥ +Ġpointless +Bye +Ġeligibility +Ġscrape +Ġabusive +ĠMant +Ġjeunes +tal +ĠPrincip +ĠOrthodox +Ġmelod +ĠмаÑĤеÑĢи +Ġprosecutor +Ġopioid +ĠÑĥвеÑĢ +ĠBeen +Ġìłijì¢ħ +Ġdynasty +Ġajuda +Ġentreg +Ġweighed +Ġeure +ĠBem +Ġabnormal +82 +ĠJR +ĠAkt +ĠBri +út +Ġstagn +!* +Ġwegen +Ġleaking +ĠWords +ĠMau +Ġvue +ĠLiam +анием +Ġclinicians +ĠPump +Ġförst +?... +Ġautomotive +ĠOwen +zusagen +ĠHundred +Ġdecentralized +Ġbulbs +Ġ׾׼ +Ġprovinces +ĠMilan +81 +kas +Ġëĵ£ +Ġforça +Ġrightly +島 +rÄħ +Ġvenues +Ġwai +Ġpredicting +ĠWiFi +Ġê¶ģê¸Ī +رÙĪ +Ġ×Ķ×ĸ +century +Ġgradual +ĠProbleme +ĠìĹħ +Ġcoping +ĠBrus +Ġpeanuts +irtschaft +Ġзал +ĠTroy +Ġsperm +ĠMitar +ĠTürkiye +grand +¦Ń +Ġ×ŀס +Ġpans +ĠKnowledge +berly +ĠÐķго +Ġdanced +ĠFrost +ĠBurg +Ġbiting +ìłķìĿĦ +meal +Ġheroic +Ġmotherboard +ĠLicht +ãģ£ãģ +llan +айн +ĠÑĢÑıд +Ġà¹Ģภ+onen +irie +Art +rang +νη +Ġnewborn +Ġamis +ĠاÙĪر +Ġsophom +ĠCareful +Ġprospects +ensen +Ġthrill +ĠViá»ĩt +Adam +rition +entric +uden +Ġcertificates +Ġashes +調 +playing +Ġsadece +Ġost +Ġairplanes +ÑĢок +oner +Ġmagnesium +Ġgoddamn +Ġ1972 +ĠSchule +Ġtemat +Ġpartout +à¯Ĥ +Ġinve +ĠScientists +ĠHudson +winning +ceksin +Ġcongressional +oru +Ġropes +вед +Ġmadre +Ġferry +ĠCohen +ĠPred +Ġvagy +ĠбеÑģп +Ġmultim +Ġdrainage +Ġsimulator +giggles +ĠStadium +обÑī +Ġnotices +Ġcrawling +Ġgroupe +åı¸ +ĠktoÅĽ +ĠYoga +Ġmedida +ĠÑħваÑĤ +ĠLite +Ġrav +orama +Ġdiscord +ĠDIRE +Ġteh +ĠNurs +ç²ī +Ġpitched +Ġbarking +ĠCoke +wiad +Ġpopulated +éĻ¤ +pelled +Ġбог +Ġpewno +ĠCube +Ġrecruited +éĢĻ種 +ĠCara +ıģını +imated +ĠÑĪкол +icional +ĠпÑĢоÑĦ +Ġcontamination +Ġúltimos +Ġfearful +Ġelephants +usi +ĠiTunes +ĠSwami +ê¼ +ĠìĦ¤ëªħ +ĠRichards +Ġmagnets +ĠRichtung +ĠLegion +èıľ +Ġkitty +Ġkissed +Ġwatering +Ġcono +ĠPalestine +idir +Ġmaze +Ġfluids +ĠProducer +ĠKrsna +好åķ¦ +laf +Ġ×IJ×ķ +Ġmiesz +ĠXing +ointed +sein +ĠFuk +ĠDepression +ĠDuty +ĠPanther +Ġsund +Ġrefere +Ġexclusion +Ġnaval +ĠWinston +Ġslogan +Ġhypothetical +Ġelevate +ëł¹ +Ġcabeça +ĠGesund +meter +ĠìķĦëĭĪë©´ +Ġcloudy +âĢ¦? +ĠSchritt +ĠJS +ìį +ĠSprings +ĠBatter +·° +Ġtailor +ĠPTSD +ĠGent +ĠbaÄŁ +Ġspatula +Ġcray +ĠLegisl +Ġsú +Ġleve +าม +Ġerad +Ġdong +Ġderm +ĠBanks +icho +åħĪçĶŁ +ĠFranz +ravel +éģĶ +оло +Ġflute +ĠEk +Ġjoyful +Ġchased +ĠLarge +Over +Ġentrepreneurial +Ġconsiders +Ñĥем +opa +Ġdormir +ĠElementary +Ġprzypad +ÑĥÑģка +ĠоÑĩеÑĢ +ugene +Ġtenido +Ġlugares +ë¥ +ĠÑĩаÑģÑĤ +Ġsao +Ġbraid +ĠVere +ĠReich +ĠPoss +Ġinan +wand +ref +Ġmontrer +Ġ1981 +çķª +asında +Ġchrome +ĠTrinity +Ġexploitation +ĠSense +ĠCMS +ĠNoble +ĠìĦłíĥĿ +Ġswelling +electronic +]? +Ġbrushing +Ġliquidity +ĠHook +ĠConnor +ĠAlum +Ġgucken +suite +Ġwiele +Ġbarrels +ĠRegel +ĠMent +ĠTrip +ĠBrush +ĠErik +urate +ÉĻr +ĠCyr +ouble +ĠBecca +Ġpasswords +ű +borg +Ġvendo +ĠClaus +ĠFaz +indest +Ġdeceased +Ġcomparisons +ĠLCD +ĠPork +Ġeventual +Ġpatreon +Ġinability +Ġextinction +Ġì¢ĭìķĦíķĺëĬĶ +ĠÑģоÑģ +aju +Ġ×ij×IJ× +Ġsofort +Ġdestined +ĠRin +Ġmouths +ĠNatürlich +Ġpreserving +Ġlimp +黨 +ocused +инг +Ġexposing +Ġξ +ëį +laugh +Ġhiss +ãģłãģĭãĤī +Ġindie +Ġdetal +ÑĢавÑģÑĤв +Ġtrên +æķ° +Ġogni +Ġsimplemente +Ġ1978 +Ġgoo +Ġ1967 +Ġgenug +hö +Ġhistó +å®Ł +Ġlobster +cendo +Ġteil +Ġallevi +0000 +OLD +Ġpesos +Ġbonuses +Ġami +Ġrevival +ĠHorse +Ġsack +Talk +Ġmulher +ĠпоÑģÑĤоÑıн +ĠHood +Huh +Ġë¶ģ +Ġhyung +ĠMeeting +Ġimporta +Ġì°¾ìķĦ +ĠVern +Ġstripped +Ġrefuses +Ġqualifications +opl +ĢëıĦ +ixÃŃ +Ġdiab +itime +flows +Ġinac +ĠGong +Ġmeaningless +Ġcourageous +Ġmicrobi +azy +hist +Ġvolunteering +VIE +Ġviolated +Ġsympathy +ĠEdit +好åĥı +electric +product +Ġpandemia +Ġgeometric +ĠConvers +gre +Ġglut +isted +ĠاÙĦÙĥ +ĠChain +ĠPresent +ĠYin +ĠÑģог +ĠVlog +Ġìĸ´ë¨¸ +Ġdonn +Ġhitch +ucking +ãģĬãģĦ +wald +risk +Ġhari +ĠKens +ĠIdol +Ġвнимание +Ġtodd +Ġsmashed +Ġinvari +ĠконÑĤÑĢ +Ġautistic +ìŀ¥ëĭĺ +Res +дÑĭ +chau +Ġselv +Ġhätten +ि +Ġexpects +Ïģη +Ġaçık +ĠHTTP +leÅŁ +Ġsweeping +ĠBeta +Ġcounterparts +abile +ĠSims +Cs +Ġrepar +squ +Ġprovincial +Ġshareholders +Ġrunter +Ġgedacht +ĠTeen +Ġgrands +çĶ¢ +agles +Ġrocky +vens +Ġrivals +unal +Ġreacts +ë© +Ġmercury +ĠLuigi +Ġог +ĠJUST +Ġlod +Ġcortex +wig +Ġlakh +ì¤ijìĹIJ +ĠVic +ĠMund +Ġmapped +ĠDell +ĠDruck +Ġlifes +алÑĮное +ividual +adım +Ġatrav +ĠFlug +ĠKlein +ê±°ìķ¼ +หà¸Ļ +Ġappli +ா? +üyorum +ĠинÑĤеÑĢеÑģно +Ġdisinfect +>- +Ġchampagne +Ġkla +opers +Trans +ĠDesert +Ġcultivate +ĠFucking +idelity +ĠÑĤан +Ġincub +Ġtemu +Ġlearner +founder +ĠSyl +ãĤĢ +Ġfato +zier +ĠìĹĨìĿ´ +ĠìĪ¨ +Ġpsycho +ĠÑĤелеÑĦ +Ġregarde +Ġrepresentations +Ġlitigation +Ġspann +ults +bior +è¦ĭãģ¦ +ä¸įå¤ļ +ĠSurvey +ĠLEDs +Ġträ +Ġlên +Ġantioxid +еÑĢом +Ġinduction +Ġfooled +ätzlich +ĠговоÑĢÑıÑĤ +ĠFact +umbai +Ġwiggle +NOUN +Ġdévelopp +ĠClaro +Ġì¸ +ë¬ +ãģªãĤĵãģł +Ġaccumulate +Ġmaintains +ëĦ +ĠFighter +íĨł +Ġmatin +Ġcoupon +Ġstunt +Ġdebuted +å¾ħãģ£ãģ¦ +Ġprag +иваем +73 +Ġexpres +Ġìĺ¤ë¹ł +ĠпеÑĢÑģон +Ġcalculus +Ġabrupt +ĠInspector +ourt +æĸĻ +źniej +intense +Ba +Ġlounge +Ġasthma +ĠHiç +ª» +Ġeditorial +Ġseize +Ġkır +Ġmouve +Ġtierra +Ġtestosterone +Ġrh +ĠKingston +ELLE +ĠRepresentative +Ġ1974 +Ġiba +Ts +Ġsorta +Ġ(?) +ĠتÙĪ +ĠëĤ´ëł¤ +Ġbekommt +Ġspiritually +Ġdistorted +Mad +Ġreim +ánh +ĠOttoman +ĠRelig +ĠEls +Ġretained +ĠLaughs +æĢ» +ĠSAS +ĠколиÑĩеÑģÑĤво +×ķתר +Ġinnovate +Ġkork +ĠÑĢаÑģÑģказÑĭв +ondere +ivi +aye +ounty +ĠполÑĥÑĩаеÑĤÑģÑı +Ġbuns +åħ« +Ġyüzden +Ġsurgeries +Ø£ÙĨ +Ġbankruptcy +welt +Ġsiamo +Ġdarkest +ĠHann +gga +Ġformas +ĠDj +named +Ġshields +ueller +ĠFew +Ġlace +Ġfurious +ĠYU +Ġsocietal +Ġjudgement +ĠDos +Ġjab +laws +Ġreinvent +ĠKatherine +ĠChoi +adows +Ġrans +oden +ĠMidwest +nın +Ġdeport +ĠDip +ç´ħ +Ġatención +ĠCourtney +ividad +ĠÚ©Ûģ +Ġefficacy +ĠBrooks +Ġreferral +ĠконÑĨ +Ġmalicious +Ġkir +ĠGoddess +Ġfunky +Ġinterim +ĠKörper +Ġìĸ¼ë§ +kur +Ġкли +Ġtrucs +gesetz +Ġzug +ĠGlück +ĠMinute +Ġprestigious +Ġniez +Ġconcentrations +лаÑģÑĤи +ĠSis +ĠVitamin +kov +ĠPBS +Ġнее +Ġretailers +Ġconventions +ĠSamantha +Ġproudly +Jordan +ĠJASON +atk +Ġtriste +Ġstär +Ġreiterate +Ġposterior +Ġ1973 +ĠPine +ĠJuliet +Ġpedir +kil +Ġoverlapping +Ġexclude +Ġeconóm +Ġaccepts +ĠSter +決 +Ġìļ´ëıĻ +estab +Ġtug +arg +Ġlivro +اص +Ġseams +Ġburaya +Ġello +ĠTM +ĠPaw +ĠIndex +Exc +Ġinspirational +Ġdunk +è°ģ +akter +Ġconditioner +ĠSalut +ÅĤec +Ġìī½ +ĠÑĥзна +ĠRomeo +fruit +ĠYO +Ġchá»ī +бÑĥ +bons +Ġreproductive +Ġorada +Ġíļ¨ +Ġtentar +Ġmañana +ãĤ¬ +Ġsolvent +Jessica +ĠLegal +Ġtua +Ġsic +ĠEQ +aukee +ìĭľëĭ¤ +ĠÅŀu +Ġadhere +ĠTul +Ġà®Ĩ +Ġtextbooks +ĠFifth +Ġexperi +Ġchic +Ġheap +inely +atra +Two +Ġhelemaal +Ġfren +æݨ +Ġbisher +اش +ĠìĦłìĥĿ +ĠTages +Ġsá»± +Ġbullied +ؤ +Ġbenefited +ĠPreviously +ĠÑįÑĦÑĦ +Ùį +Ġsenate +ĠMorm +ijke +ĠFlu +Ġincorporating +jack +ĠпиÑĤ +Ġimply +Ġhacks +ĠRICH +ĠкваÑĢ +ĠпÑĢекÑĢаÑģ +Ġdependency +Ġìļ© +Ġì±ħ +Ġwährend +Ġsulla +ĠPittsburgh +Ġesempio +¼ë¡ľ +prot +ĠRosen +ĠIndependence +Ġparsley +iegen +Ġhaw +Ġaquell +ĠCAP +ĠÑĢабоÑĤаÑĤÑĮ +ĠCliff +ionar +Ġsecuring +æĪijåĢijçļĦ +νε +Ġutilis +Ġcoule +ĠPing +Ġtrek +Ġfak +Ġenorme +Ġìĭ« +让 +Ġdoubling +ĠнÑĢавиÑĤÑģÑı +Ġhed +hoven +ĠStanding +ĠmÃŃn +ĠJimin +Ġmonarch +Ġcoke +Ġmr +Ġclic +Ãį +Ġimpeachment +Ġdurability +Ġvarios +Ġcommercials +Ġgreetings +ĠRi +ĠAppreci +ìŀĪëĬĶ +Ġrésult +ért +Ġsalute +Ġpoderia +Ġsunrise +veck +Ġreluctant +Ġcommissioner +念 +âte +ĠKenny +ĠSiri +ãĥĥãĥĹ +ĠëĬĺ +ĠEE +Ġunch +кон +ĠاÙĦØ¥ +Ġbelts +Ġhass +ĠмоÑı +Ġdisplaced +Ġabra +ÎŃλ +Ġscratches +Ġcomet +Ġauthorization +ĠLLC +Ġproduk +Ġrehabilitation +åŀ +ÑĸÑĩ +uding +olit +Ġ105 +Ġexpands +Ġaltri +ĠKomment +Ġanf +Pl +ĠMana +fed +Ġbri +Ġora +Gs +ĠGur +uckland +Ġjunction +Ġironic +ĠFeed +Ġprakt +ĠHammer +ĮëıĦ +ĠTracy +çµ± +ĠAside +него +ĠиÑģполÑĮзоваÑĤÑĮ +Ġzaj +Ġequitable +Ġcurb +ĠãģĵãĤĮ +Ġderivatives +Ġpuppies +ĠKenneth +ĠCompl +igram +ĠGarcia +)\" +ĠHarbor +estial +Ġä¾Ĩ +Ġers +æ¹ +Ġunwanted +Ġbelang +аго +emb +dos +ĠìĻľë +ĠBudget +Ġbattling +ØŃت +kok +наÑĩала +Ġplag +Ġcantidad +Ġgrupos +Ġplugins +lerini +ĠимееÑĤ +Ġsozusagen +olics +Ġpueblo +Ġreminis +rän +ĠMorrison +Ġlinha +Ġbreaths +ĠTaste +Ġenfrent +ĠDocker +Ġден +Ġethnicity +Ġwob +Ġsuffers +Ġtransitioning +ĠRange +ÄĻdzy +ĠкаÑĤ +Ġsyner +Ġdonut +Ġprobabilities +ĠOmar +Which +uish +isin +Ġdemos +ĠìłĢ기 +Ġëĺijê°Ļ +Ġедин +Ġcerve +Ġjoka +IAN +Ġkilometer +Ġhorizontally +ĠBhag +Ġ-> +ĠMonitor +Ġknowledgeable +Ġfav +Ġpinned +ĠeBay +icker +Ġìŀłê¹IJë§Į +ĠXiaomi +Ġcapit +Ġnp +Ġ1965 +hoe +Ġnok +ĠSage +ĠнелÑĮзÑı +ĠTow +gam +Ġdicen +ĠSUBSCRIBE +Ġreboot +Ġpaj +Ġë³´ìŬë +Ġthicken +ĠReality +idän +Na +Ġê²ĥìĿĢ +!!) +Ġroutines +Ġодного +Ġexting +Ġì¦Ŀ +Ġsulfur +Ġcarve +Ġasteroid +ĠWarrior +Ġphotographers +Ġpell +Ġcrossover +æĪijçŁ¥éģĵ +Ġhacemos +ĠNej +Ġsettling +Ġirm +ĠBooks +ientôt +Ġespacio +ĠScholars +Ġdoomed +ĠIRS +wohl +Ġsegue +ĠëĪĦê°Ģ +Ġpratic +BT +ĠConsidering +ĠBuffalo +Ġtrainings +Ġgebru +ĠGleich +Ġpirates +Ġenvelop +Ġreopen +imat +Ġtee +Ġsued +feh +Ġ×Ķק +Ġdiets +Ġjuntos +asto +Ġmisunderstood +Ġruim +Ġclassify +ĠпÑĢодÑĥк +Ġinse +Ġillustrated +Ġcorrosion +Ġaccred +ĠAuntie +ĠпÑĢивеÑĤ +ĠLIVE +Ġrek +Ġreceipt +åĪ°åºķ +ĠBarbie +ĠSnake +turn +Jeff +ãģĬãģĬ +ķĦ +VOICEOVER +coll +Ġrunners +ìłľë +osos +moon +Ġkeynote +ĠInstit +SPEAK +Ġplugs +Ġcurv +ĠYuri +ĠTheres +ĠPs +ĠμÏĢο +Ġconverter +Ġrefine +Ġbadass +Ġοι +Ġregen +azzi +ÙĬÙģ +Ġseized +Ġiçer +ilee +Ġupstream +Ġbuds +Ġpim +Ġíķĺ루 +Ġalluded +Ġthemed +Ġconsisting +Ġbons +unuz +ĠпÑĢовод +ĠLovely +à¥ĭ +Ġparach +ĠStaats +éļĬ +Ġselective +Ġfase +ĠGeorget +Ġcocaine +Ġreproduction +ĠLara +ĠLD +Ġgh +Jon +ĠlÃ¥ +ĠëijIJë +Ġtyped +ĠBana +ëĵľë +Ġsavory +ĠZomb +standen +Ġpedestrian +Ġdifférents +Ġìĭ¸ +èī¯ +Ġcomplained +ç¦ı +ĠÐļÑĤо +Ġ׾פ +aliÅĽmy +Ġmortar +Ġverdict +Ġsuficiente +ĠMillion +mittel +inals +ĠاÙĦØ® +аÑİÑģÑĮ +ĠmiÄĻdzy +ĠOle +Ġinvert +czyÄĩ +озможно +starter +Ġauditor +ĠScout +chien +ĠSverige +uffled +Ġzehn +ĠAuckland +Ġargent +Ġ1976 +ĠHoe +Ġbothers +Ġsocialist +Ġpliers +Ġemergen +ĠXP +еÑĢов +More +ĠLevi +ĠAnders +ibilidad +ĠParents +Ġinduced +ìĸ´ì¤ +Ġbalances +ĠвÑĭÑĪ +Ġsubmarine +Start +Ġdries +Ġvolver +Ġticking +cott +Ġfaj +prés +ĠSabb +ĠзаÑĩ +ĠпокÑĥп +Ġbaptized +ĠBrilliant +ĠÐijог +Ġmots +bits +Ġlattice +æĪijè·Łä½ł +Ġcoriander +Ġresidency +ync +Ġpierwszy +ĠKnock +ĠZap +ĠÐķв +견 +å°ıå¿ĥ +Ġuneven +ĠJas +odor +ç¿Ĵ +74 +ĠSite +Ġaconteceu +ympt +Ġtrilogy +Ġlantern +ĠZucker +vari +welling +ĠPotato +gomery +Ġreacted +ĠChron +Ġjede +beeld +Ġtwent +Ġlact +æ¨Ĥ +Ġrése +Ġrelent +Ġfurnace +Ġwidget +Ġearthquakes +ĠAdjust +ilit +ĠØ£ÙĪ +Ġhearings +Ġdefendant +irsiniz +Ġbask +cja +ľ¨ +Ġrifles +Ġinstal +ĠForgive +pical +ĠÐŀÑĩенÑĮ +Ġpetites +Ġhp +Ġrenowned +ĠInn +Ġ주ìĦ¸ìļĶ +Ġemphasized +éĹ®é¢ĺ +ĠìŀĪì£ł +Ġê²ĥìľ¼ë¡ľ +ãĤĨ +Åĵ +gili +Dave +Ġexhausting +ÅĤug +Ġschema +μά +cycl +Ġautant +Ġparcel +Ġmateria +ĠBerry +ĠÑģами +Ġextracted +ĠSaying +ismatic +ĠпопÑĢоб +Ġneuron +graph +ľë©´ +Ġenclosure +ĠJohann +Ġaftermath +ÑĤоб +Ġuży +Ġsamp +360 +ĠMei +Ġtaco +Ġreceptors +Ġpunches +ĠHoje +ĠÙĩÙĨا +=\"# +ĠAngular +Ġmusique +Ġrol +Ġñ +sterreich +Ġclam +ĠTreasury +chemical +Ġapar +Ġappend +Ġforbid +ĠHamburg +аков +Ġê¸Ī +ilda +Ġpreparations +ĠmogÄħ +Ġcamino +Eric +ĠBlind +èĪĩ +å¹´çļĦ +ĠDiscovery +ì¸ł +çĪ¶ +Ġinterpreter +Ġbred +ĠPsalm +Ġdefended +ìī¬ +ĠErfahr +ĠPeach +Ġmoons +ĠOst +Ġspécial +Ġarriver +ĠWis +uci +Ġrobotics +IVE +Ġsiege +arla +Ġseparates +ĠTC +íı° +quisite +Ġparentheses +ике +ç«Ļ +Ġtrous +建 +ĠÑģилÑĮ +Ġbeers +ĠплаÑĤ +ãģĻãģĶãģĦ +Ġsola +Ġdès +mingham +ikte +Ġoops +Ġtwitch +å°ĩ +ÏĪ +ĠShouldn +uvre +Ġleer +criptions +Ġeyeshadow +ĠGuo +ĠPowell +Ġsupuesto +Ġana +rals +ĠMontreal +Ġsurfing +ĠÐŁÐµÑĢв +×ŀ×ķ +Ġmilliseconds +Ġsuburbs +Ġplaneta +ÑĥÑĪка +hrlich +ĠHY +ĠسÛĴ +ĠMM +ĠEff +åı¯æĦĽ +ĠHS +anson +Ġì§ģìłij +Ġsuo +Ġdeploying +Ġkunt +tering +Ġerect +ìŀ¥ìĿ´ +ĠìĿĮìĭĿ +Ġspecimen +!... +æĪij說 +Ġligne +Ġkonst +adequ +Ġìĥģíĥľ +Ġaccessed +ĠPole +kill +Ġë²Ħë +Ġauthenticity +Ġappelle +ulle +Ġrevision +Ġgoats +гли +Ġpau +ĠRanger +ĠImag +author +Ġeve +ĠMessenger +Ġnay +Ġwholes +ätte +Ġonwards +ĠDepois +ĠíijľíĺĦ +ĠSARS +Ġwszystkich +Ġdestru +umbing +Ġcompatibility +Ġmisinformation +odore +ĠFavor +eko +ıĮ +waukee +ĠTeaching +ĠKO +Ġbetting +Ġquests +Ġvivre +ĠмÑĥзÑĭ +Ġsaga +Ġswell +Ġgehe +æĢİ麼樣 +ĠоÑĢганиз +Ġgide +ĠGross +Ġdalej +Ġclaws +á»Ļc +Ġprejudice +Ġinsign +ihood +Ġpled +Ġdónde +ĠPolitical +Ġpremises +undert +عت +onnen +Ġespaço +Ġfé +ĠHarrison +ĠCensus +Ġcardio +Ġdiy +Ġmilieu +Ġjournée +ĠRelease +NIE +ĠMuk +idée +á»įi +Ġiçinde +ŀĻ +Ġresonate +Ġmoles +ĠFlying +ĠGloria +ĠPastor +ĠArena +好ä¸į好 +NON +олов +ĠallÃŃ +omat +ìĸ´ëıĦ +ĠcaracterÃŃst +Ġdeclining +ÑĸÑı +anco +ĠInform +Ġbargain +Ġbushes +ĠNaturally +Ġrechts +ĠTensor +ĠPatricia +Ġprincipio +ĠMumbai +Ġwomb +Ġnostra +Ġdilemma +Ġirgendwann +Ġ1964 +ĠenergÃŃa +ĠнаÑĢ +Ġsegregation +ĠAthlet +Ġ», +Ġyeni +ĠSeit +Ġvenom +Ġdakika +ĠëıĮë +ĠÃīl +Ġfus +ĠMog +¦½ëĭĪëĭ¤ +Ġremar +ĠTeddy +Ġbreasts +icans +æĶ¶çľĭ +kap +ĠhÆ¡n +ĠJP +ãĥ³ãĤ¿ +Ġresurrect +ĠìĿ¸ë +herical +Ġfotograf +ĠJosé +Ġlivelihood +Ġbibli +teri +Ġvorstellen +ĠAAA +Ġassessing +YA +Ġsplend +Ġexcav +Ġbaptism +yll +wow +Mac +Ġplastics +teokbokki +Ġintéressant +Ġcommanded +Ġfamously +ĠÐĺли +ĠManuel +Ġsouthwest +Ġdeformation +ÃŃculo +ĠнаÑħодиÑĤÑģÑı +ĠPatter +degree +ĠczÄĻsto +\"- +Ġìħĭ +Ġmanger +ĠTrustee +Ģ리 +Ġpuntos +ivable +Ġvolatile +ĠëĬIJ +Ġinstability +Ġciel +ciÄħ +Ġpurity +ноÑģÑĤ +Sil +edar +åĻ¨ +NOUNCER +Ġspelled +GER +Ġsanctuary +Ġaccelerating +Ġscout +ĠпÑĢев +fahren +ãģĵãģ¡ãĤī +ĠëĤĺìĺ¨ +ĠpoczÄħt +ĠMeu +kaar +³´ê³ł +akra +Down +ĠÃĦr +ĠElite +Ġallons +Ġmayonnaise +ĠSustain +prisingly +Ġsupervis +Ġê·¸ëłĩì£ł +Ġunemployed +Ġfreshly +Ġ×ŀ×¢ +ĠDh +Ġtackling +Ġogr +Ġì´Īë +ãĤĪãĤį +Ġloft +arah +ĠAirl +ĠDir +ĠÐľÐ¾Ð¶Ð½Ð¾ +Ġbooking +ĠCRA +Ġhttps +Ġchoke +Ġgown +Ġnoite +Ġzac +istol +Ġsecre +Ġresembles +Ġcuad +ìĤ¬ê°Ģ +show +Ġblanc +Ġagu +ĠPrint +asted +ĠWeather +ipl +Ġobscure +Ġconte +oughs +); +ĠDame +ä¸Ģ缴 +Ġclarification +Ġintimacy +Ġuphold +ĠMirror +Ġwagon +xide +Ġclog +apper +ĠImmediately +úde +Ġtouchdown +Ġrooft +аÑĪа +Ġçıkt +Ġlaisser +ĠUnreal +ensitive +Ġ123 +Ġplaster +Ġducks +Ġetme +Ġbishop +brevi +Ġbic +ä¸ĭåİ» +Ġruntime +Ġambitions +маÑĤ +ĠWein +ĠMari +ĠíĬ¸ë +Ġresolver +ĠngÃły +ĠRise +ãĤĪãģĨãģ« +ĠCrus +Ġmerchandise +Ġeli +Ġstatewide +Ġowl +éģł +æĶ¹ +Ġtwisting +Ġcontaminated +ĠCommerce +hythm +ĠÃĪ +Ġìĭ¤ë +Ġmusste +uir +Ġsums +ĠSomewhere +ãĥİ +Ġkami +Ġaired +ĠANDREW +Ġêº +Ġviendo +Ġantibody +Ġabsolument +Ġprotesters +ĠQuébec +stadt +Shaun +Ġchambers +ĠWear +ĠEffects +Ġhazards +Ġnei +Ġcorazón +Ġá¼ +ĠSG +Ķ© +ĠìĹŃìĭľ +Ġcomfy +ĠCody +Ġpensando +Ġganska +ĠAcross +öllig +abyte +Ġwedge +Ġkalian +Ġsigue +endes +ĠGroÃŁ +Ġutiliser +Ġflown +аниÑİ +Ġlevar +restrial +Ġillustrations +Ġaslında +BLEEP +ĠдоÑģÑĤ +Ġturret +Ġsuitcase +ziÄĻki +Ġsketches +Ġacred +ĠRei +Ġtsun +ĠSag +Ġthirds +ĠKIRBY +rai +Ġhumanos +Ġrecommends +Ġextraordinarily +Ġcommencement +KN +opez +Ġ×ijש +Ġlethal +ĠEstamos +Ġinspector +ĠSeok +eun +Ġoffshore +Ġgettin +years +ĠSilence +ĠNatur +upun +Ġtrzy +Ġnoget +Ġhamburger +ĠPraise +énd +Ġ1971 +ylie +krit +ĠìĥĿê°ģìĿ´ +çļ® +Ġmomentos +Ġesté +Ġdissemin +Ġgigs +Ġdesaf +Ġavis +ĠZoo +ĠìķĬìĿĢ +häng +åı¥ +hake +ĠBism +Ġrethink +ĠMalcolm +Ġidentifies +lower +ixel +ĠtvÃ¥ +ked +ierz +Ġöffentlich +Ġproclaim +soon +lol +Ġloi +Ġbitten +rollo +Ġsermon +Ġesqu +Ġjackets +Ġgráfic +ĠпоказÑĭв +Ġcabeza +chodzi +Ġpelvis +Ġnostalgia +Ġbrew +Ġshortcuts +ĠAdemás +Ġsuperficial +åħ©åĢĭ +Ġboca +ĠæĪijæĺ¯ +imentos +åĽłä¸º +Ġsprouts +é£Ľ +ĠJonas +ĠFlorence +static +daughter +*) +ÅĤby +fashion +ĠGinger +Ġ매ë +Ġhustle +utos +ĠÑĤÑıж +ĠLös +ש×Ļ×Ŀ +anych +tuber +Ġtidy +Ġfrontal +Ġwhiskey +Ġhumid +ĠÎŁ +Ġridge +Ġmarin +Ġbientôt +ĠCarrie +chw +Ġtahun +ĠErgeb +FR +Ġìłķë¶Ģ +ĠSoldier +Ġenlightenment +Ġexamining +ĠNotre +Ġeram +ĠSunny +Ġlayered +ĠDazu +rades +好åIJĥ +ĠнаÑĪей +Ġtimber +Ġmanners +ĠBirmingham +Ġminiature +ometers +Ġfiller +ĠRip +ĠKomb +owner +ì¿ +idian +Ġdemás +ĠÙĪت +Ġprecautions +Ġgoverno +zelf +ĠComplete +å¸ĥ +ĠPhantom +ãģ¾ãģļ +Ġнез +ĠкаÑĢÑĤ +ĠAntwort +ĠPfizer +ĠFranco +ĠwÅĤ +Ġfrig +esper +Ġkale +Ġfilmmaker +Ġkurt +Ġinvalid +å±Ģ +arella +Äĥng +ramento +Ġnutritional +Ġdictators +Ġafin +Ġfuzzy +ĠGina +ót +ĠExtremadura +Ġdemonstrations +ĠMontgomery +íķ´ìĦ¤ +ĠGandhi +ãĥĿ +ç½® +Ġreunion +ĠjakiÅĽ +ĠZug +OUGH +lifting +Ġಠ+á¹Ľá¹£ +eb +ĠWOW +ĠShiva +ometry +Ġwildly +Ġtended +Ġmegap +ì²ĺ +Ġnause +Ġgerek +ãĥĭ +ĠMarcel +Ġneste +خر +Ġfeh +åĨħ +suspenseful +ĠWrestle +ĠPalestinians +ĠGORD +iyet +ĠÑĢади +Ġversuchen +Ġtransistor +ĠÐŁÑĢоÑģÑĤо +ĠпонÑĢав +Ġrhyme +ĠVermont +platz +è®° +ĠÄ°ÅŁte +ĠHag +ĠÐĺм +ĠÑĢаÑģÑģказ +Ġmetros +ĠInfinity +wolf +ibal +ftig +ĠÚĨ +Ġíĺ¹ìĭľ +Ġoggi +Ġdisposit +ĠпÑĢил +ĠвÑĭпол +Ġthôi +ĠKENN +Ġhanding +actus +Ġtacos +Ġformerly +ĠCorinthians +ãģ«ãģ¯ +ÑĨÑĸÑĹ +Ġpadre +Ġcongregation +æij +fert +Ġsubir +aiser +qua +araoh +ĠCurry +ĠìķĬëĬĶ +елÑİ +Ġfuss +Ġbooty +Ġlows +Ġhommes +ĠMH +ĠDisneyland +went +Ġresidue +Ġbeeping +è¼ķ +ätta +Ġmould +ĠProjekt +stalk +Ġartifact +ĠAntrag +ĠAMD +ĠCrypt +Ġë©Ķ +ĠFelipe +ĠCOB +elu +Ġselfies +ĠSanti +chutz +ĠУкÑĢаÑĹ +gesamt +Ġflock +jaz +plain +Ġwrinkles +Ġreais +Ġpaljon +Ġempowerment +Ġattendees +ppa +Ġneden +онÑĭ +Ġtimeframe +ĠCherry +Ġidée +Ġgag +Ġdonkey +Ġông +ĠHare +éļĽ +ĠKara +Ġacompan +places +imientos +ĠHamm +би +uben +iliyor +Ġthirst +Ġkry +ĠGeorgetown +׳×Ķ +Ġorch +Ġheartbeat +Ġtransformations +estones +ĠKH +Ġcartoons +Ġanci +Ġworthless +Ġtailored +pu +Americans +Ġpiles +ĠMonkey +Ġbasin +ĠTemper +ĠPaint +Ġpunching +Ġbaik +ĠOakland +vre +ÅŁallah +ydd +Ġcasually +odu +Ġcoded +ĠNorwegian +ĠVince +Ġpremature +ĠPromise +екÑģÑĤ +Ġdevastated +ĠPremium +ĠParam +ĠÃĸyle +umuz +PO +rators +Ġlamps +Ġterritorial +Ġbackbone +listed +DY +ĠاÙĦر +Ġpursued +ĠCommons +Ġ곡 +locks +edor +Ġconceived +gere +Ġdisappearing +ĠSull +ĠìĹ°ë +Ġhoffe +Ġdetox +íĶĮ +Ġretir +ĠëģĿëĤ +Ġpergunta +ĠBOY +ç²¾ +Ġpenn +æĿ¥äºĨ +hés +hon +Ġcatastrophic +Ġaust +Ġtorso +Ġìĸ´ëĬIJ +ĠìĤ¬ëŀĮëĵ¤ìĿ´ +Ġmarvelous +ĠHarley +achine +Ġtiế +itto +ĠIÃŃm +ylon +Ġshutdown +.'' +Ġapologies +ĠCommunication +ĠговоÑĢÑİ +ãģĤãĥ¼ +âĦ¢ +ÃŃveis +acun +Ġretaining +Ġcontradiction +ĠADAM +COM +Bryan +ĠMonsieur +Ġadapting +ШÐIJ +ĠScr +ändert +Ġplaus +ä»Ĭ天çļĦ +Ġonset +Ġassistants +Ġvalves +Ġscatter +ĠRust +awia +Ġreadiness +Ġpais +Ġbible +Ġambiente +ĠамеÑĢик +Ġuncond +Ġkalk +åĬ¨ +Ġmoc +unn +Ġactu +Ġhumming +issimo +ĠPatrol +gow +ãĥ¤ +ĠTHEY +ĠBoden +ĠBie +Ġreel +ĠÑĥÑģлов +Ġendeavor +ĠPeriod +ustomed +mals +alon +Box +ĠÏĥαÏĤ +Ġomdat +Ġaltre +ĠHeh +kad +Ġprotector +Ġdominance +odynamic +Ġcommunicated +kö +Ġpredecessor +ĠLuk +ĠFlower +Ġãģ© +poque +ÑĤиÑĢов +Ġretrospect +Ġdecisive +Ġexempel +{\\ +ĠRück +rite +ĠZeus +Ġcalorie +Ġattractions +ĠHinter +Ġuhm +ĠíĮIJ +Ġrulers +Ġdiscouraged +Ġacontecer +Ġaccents +ĠOptim +ĠAlg +kids +2021 +ĠLindsay +Ġfilmmakers +prowad +Ġterug +ëĭ´ +ĠSommer +2018 +Ġborrowing +ĠTransfer +ноп +arias +Ġheadphone +ì¼ľ +Ġtranslating +Ġaufge +à®ªà®Ł +weis +avant +paid +baby +Ġtoughest +Ġrepeats +ĠTeresa +Lord +Ġacabar +ĠRide +dir +Ġleng +Ġdwa +Ġheadaches +Ġnữa +ĠнаÑģÑĤоÑıÑī +Ġboils +Ġlonging +rias +ório +ĠParadise +ĠSeñor +erdem +Ġreinst +Ġsalaries +Ġinsecurity +ÅĤoÅĽci +ĠабÑģолÑİÑĤно +inken +ĠEddy +udos +Ġdummy +Ðļак +six +Ġinbox +ẩ +People +á»ĵng +Ġorganizers +find +Ġül +ĠCOM +ża +weile +Commentary +íĬ¸ë¥¼ +ĠMittel +kus +èĽĭ +न +iral +Ġgarment +ικά +Ġstool +payers +Ġshimmer +ĠOllie +ĠJeżeli +è¿ĺæľī +Ġ1977 +Ġjeux +Ġextinct +ĠTransportation +ĠMaker +Ġjohn +Ġrichest +Ġtraumat +Ġliegen +´ë¥¼ +è¿ĻéĩĮ +Ġunrest +ĠStraw +æĭľæĭľ +Ġcoma +ĠKristen +ĠÐļонеÑĩно +ĠBryce +ĠÑıкÑĸ +Ġpearls +ĠпонимаÑİ +Ġadditions +Ġasympt +ĠменÑĮÑĪе +Ġscans +Child +ĠHide +кÑĥÑİ +etas +Ġdank +Ġpleas +Ġessays +Ġjets +åħĴ +Ġвед +Ġpositives +hof +-) +zzo +Ġstarters +Ġsmiled +Ġ1944 +quiera +Ġrok +Ġpuesto +Nico +Ġsimulations +Ġච+Ġintrigued +ĠOverwatch +åĸĤ +sigh +bai +Ġë§IJê³ł +idé +Ġcrabs +áºŃp +ĠIraqi +ìĿ´ë¥¼ +ÑĤÑı +ĠSophia +ĠDNS +Ġönemli +ĠLuo +Ŀ¤ +ĠCounsel +ligen +анÑĮÑĪе +Ġtrumpet +Ġdapat +ĠJM +ĠEVERY +Ġå°įä¸įå°į +夢 +ĠLayer +Ġcô +нал +ĠJoo +ĠHack +Ġsunt +ĠLeonard +ĠFirebase +änger +Ġexploding +voy +Ġì¦IJ +ĠÑģеÑĢÑĮ +Ġseverity +Ġbestimm +çµIJæŀľ +Ġtiring +Ġprocurement +Ġdiplomacy +Ġdecorative +ĠÙĬا +Ġpenetration +Õ« +Ġoutright +ENE +ĠUni +odles +Ġzeros +Ġdelightful +jm +Ġdopo +没äºĭ +Ġpositivity +ĠVISTA +ĠResource +íĥĢë +ÑĪие +Carl +Ġpiping +Ġchopping +ĠGanze +üss +ĠAo +Ġshattered +ĠDetective +Ġundoubtedly +Ġhalluc +Ġench +ÑĭÑĩно +ÑĥлÑıÑĢ +isesti +Ġpedals +Ġdurum +¤íĶ +laimer +Ġpropre +Cu +Ġtranslator +ĠcaÅĤ +Ġ그걸 +ĠcaÅĤy +UA +Ġrevised +Ġподоб +ĠArticle +ĠHaiti +ĠÃĵ +ĠCtrl +Ġrozm +lait +Ġletzte +ispering +display +Ġaluminium +Ġpalabras +Ġconocer +Ġzitten +Ġdirig +åıªæľī +Ġbrainstorm +Ġwifi +ĠParticip +Ġviewpoint +ĠQuan +Ġhierarch +Welcome +対 +Ġoffen +ĠRecovery +gano +Would +Ġrepro +Ġperceptions +Ġdemasi +ĠBangladesh +ĠIncredible +Ġletzt +Ġbehaving +Ġastonishing +ĠâĨ +ĠëĤ¨ìŀIJ +èµ°äºĨ +ãĥĶ +ĠGORDON +CAR +?!\" +ĠPrest +Ġë§ŀìķĦìļĶ +Ġtand +Ġlash +çĬ +ificant +Ġintoler +ĠгеÑĢо +Ġteu +aso +ĠÑģовеÑĤ +Ġtravelers +ĠSynd +ĠвеÑĢÑģ +Fonda +adı +Ġtranscription +Ġtitanium +Ġtwists +Ġgearbox +ensation +fat +Coll +ĠCommonwealth +zon +ĠPolizei +ĠAPPLAUSE +fry +ĠJuda +esteem +Ġsock +ĠJugend +ĠкÑģÑĤаÑĤи +ĠDro +Ġprochaine +ãĥ¼ãĥ« +Ġliksom +ĠEnergie +ĠMarina +Ġ230 +Ġê°ĢìĦľ +umping +Ġlone +ç´ļ +Ġfonts +Ġbusinessman +Ġply +Ġdoe +grid +ĠMilwaukee +ĠEden +!\". +ĠÛĮÛģ +ogens +Ġteaser +Ġquién +Ġincentiv +govern +Ġchildcare +Ġsneakers +Ġimprisoned +® +иÑĤеÑģÑĮ +anbul +Ġregain +Ġtranquil +Redner +鼨 +IFA +Ġideological +ĠmayorÃŃa +Ġbureau +eterm +ĠDID +ìĬ· +Ġwaving +Ġbeb +Ġár +Ġкв +Ġenvoy +anut +икÑĥ +ĠEnvironment +ĠAssass +ãĤĵãģ§ +ĠBread +ĠТÑĥÑĤ +Ġstaircase +ĠDisease +Ġaucun +ĠëĭĪ +Ġconfrontation +Ġ1941 +Ġirony +Ġworsh +ãĤĮãĤĭ +Ġfick +ĠNaomi +Ġbackside +ieux +Kap +Ġvedere +Ġlengthy +Ġbreaker +ĠRolle +Ġpredator +Ġnossos +Ġadvertise +è³ĩ +ÑĢоде +Rednerwechsel +reten +Ġcollectors +ıģımız +Ġtrig +Ġaxes +inters +Ġpenalties +ĠOsman +ĠJenna +Ġflakes +Ġtrainers +Ġstunned +ĠScroll +ĠPip +ĠнаÑģÑĤ +ĠnhÃł +ĠSmack +ẫn +ratos +ĠÑĢабоÑĤÑĭ +Ġucz +ĠLemon +ĠSind +Ġpsychic +ĠAbg +Ġmammals +Ġimmersive +Ġbots +Ġverschiedene +Ġgeral +Ġfollower +Ġä»ĸ +Ġseguridad +Ġimmersed +feito +cross +Ġöld +íĥĦ +Ġãģĵãģ® +Ġ×Ķ×Ļ×IJ +ĠJian +Ġbiliyor +area +Ġkaf +Ġgodt +çĽ¸ä¿¡ +Ġë°©ìĨ¡ +Ġdetriment +æ¥ļ +Ñĸл +ĠÄijâu +Ġchloride +øre +lei +Ġmonte +Ġdifférentes +à¯ģ. +Ġcaregivers +Ġinadequ +Ġfarewell +ĠÑĤипа +ontec +ĠEph +HHH +ĠTodos +ĠСШÐIJ +Ġtrov +Ġlige +Ġcông +ĠCiv +Ġcapaz +ĠVallahi +Ġqueste +Ġreplica +سب +zna +ĠÑģлÑĥж +ĠPT +wave +ieni +Ġrelied +develop +Ġdeme +ĠAman +Ġ[...] +Ġcompliments +uais +ĠíĮ¨ +Ġsmelling +Ġdadurch +ÙĪت +Ġoranges +Ġлай +Ġstabilization +åĢį +ãĤĮãģŁ +楽 +Ġappliances +Ġhm +ĥIJë©´ +odynamics +ĠciÄĻ +ĠCott +MON +ĠMang +æĶ¯æĮģ +Ġallerdings +ική +shots +Ġts +ĠGör +ĠCHAR +Ġ:( +Ġwrath +Ġfique +Ġführen +Ġtestament +Ġ^^ +á¹Ľá¹£á¹ĩa +ALD +Ġtexto +ĠDogs +Ġsib +Ġpathetic +ocks +Ġradically +ĠMORE +ĠJAMES +Ġingl +ĠTechnical +Ġporch +ĠUT +ĠобÑıзаÑĤелÑĮно +Ġrenewal +Ġaesthetics +ikum +Ġbeverage +dern +Ġpredictive +Ġchuy +ĠRegarding +ĠForward +ĠÙĪÙĦ +Ġcontextual +Ġdwarf +Ġprehe +Ġgoverned +ħĦ +Ġtrabalhar +Ġnegócio +ĠболÑĮÑĪой +еÑĩаÑĤ +ĠдÑĥÑħ +Ġfloods +Ġbowling +ĠOB +ĠHär +Ġgrading +주ëĬĶ +Ġgars +dling +Ġrak +ëĪ +creat +ĠÑīе +Ġneighbours +food +Query +Ġheroin +iceps +ĠKinda +NET +Ġmari +Ġimitate +Ġachter +Ġsettlements +rare +cciones +Ġëĵľ +Ġfik +itung +ĠмакÑģим +Ġelf +Ġdalla +ĠPolsce +ĠPul +ЧÑĤо +ĠMorgen +ØŃÙħ +Ġsupremacy +Ġkys +ĠHurricane +ĠGTA +ĠFeh +Ġfinalmente +mund +ĠKrie +époque +ĠTucker +ITT +Ġlur +Ġdipping +äv +Ġeerste +ĠFlint +bildung +ูà¹ī +Ġtoim +Ġpracy +Ġtransforms +Ġspeeding +Ġpresenter +Ġfellows +filled +ieza +Ġadvising +ĠInterview +игÑĢ +wehr +ĠDante +pture +Ī문 +¯¸ë +IJIJ +ĠCounter +Ġcrist +Ġì§ľ +Ġjeune +ĠÑģÑĤÑĢаÑĪ +ĠmieÄĩ +Ġtutor +Ġmasala +Ġpowdered +Ġnau +ĠFrederick +Ġbilling +ĠEisen +ĠдобÑĢ +Ġmest +æ½ +Ġsnipp +Ġmono +ĠAlo +ĠMercy +érience +Ġcasualties +ĠANNOUNCER +ä»İ +Ġtocar +Ġbacterial +Ho +Ġstreak +ĠJENN +Ġplast +Ñģлед +Ġreapp +Ġpaycheck +Ġminers +habt +ĠJap +нÑĥÑĤ +Ġredemption +Ġquir +hnlich +Ġaccumulation +Ġshove +Ġadrenaline +Make +ĠHern +ossing +ĠVil +ubby +hertz +breaks +Ġspur +ĠDaha +USTIN +Ġcontinuer +ĠSaul +ãģ®ãģ¯ +ĠíıŃ +ĠëIJĺë©´ +Ġë§IJìĶĢ +Ġож +Ġsuspects +Ġlaquelle +ĠMuchas +Ġvöllig +ulen +Ġimpres +Ġlobb +enee +Ġнаж +Ta +Ġréalité +ĠRex +Ġharvesting +Ġestr +æ¶ +ospace +OSS +Ġdisturbance +assic +ĠIsab +Ġdécouv +ĠHampshire +Ġornament +Ġluôn +ĠUW +ĠjÄħ +éĤ£ä¹Ī +Ġrespecto +Ġcomunidad +Ġcomigo +agna +Ġintrinsic +ĠAlumni +Ġsesleri +Ġestimation +âĢĶâĢĶ +Ġproduit +ãĢĤãĢį +ĠвÑĢ +Ġwhirl +Ġacces +çu +Ġvariability +Ġvodka +itsu +Ġinternships +Ġallocate +RR +íĽĪ +Ġinstructional +tant +Ġà®ħத +Ġinvites +Ġhak +Ġscares +Ġeclipse +пов +колÑĮ +ativas +Ġstabbed +ĠDOM +ä¸įåĪ° +roots +ĠPicture +íĺ¼ +ĠCHA +iec +ıı +hanol +Ġmisunderstand +Ray +Ġroadmap +ocumented +izione +ĠOlive +rift +Ġ×Ķ׳ +æ¯į +lest +;; +ĠEA +éľĢè¦ģ +одÑĥ +Ġhobbies +Ġburial +ãģ«ãģ¡ãģ¯ +Ф +lege +ĠHJ +Ġobjection +ĠãģŃ +ctory +Ġincremental +Ġgymn +Ġepidemi +ÑģÑĭл +Ãij +Ġadvancement +Ġparch +News +Ġayr +лам +Ġ׾ש +Ġdiploma +ãģ¡ãĤĥãĤĵ +Ġrobbed +Only +Ġincur +Ġchanting +Ġíķ´ëıĦ +Ġriches +ĠCarmen +Ġnostro +λÎŃ +ĠPowder +à¹Ģห +ĠìŀĪìľ¼ë©´ +Ġgerçekten +ĠPikachu +емон +OLL +Ġplanetary +Ġslows +Ġclockwise +alion +ĠìĮ +Ġvern +Ġhomme +Ġendpoint +Ġinnocence +Ġelementos +Ġsophomore +Ġnotions +ĠCouldn +pur +Ġzat +Ġobsess +Ġmotivo +ĠKub +ĠDrug +Ant +ĠPlayers +ĠHumans +Ġmelee +ĠWildlife +ĠVP +Ġvolcanic +Ġcomin +ĠGuang +ĠÏĦιÏĤ +ĠоÑģобенно +ĠSize +Listen +ĠAaa +appro +Ġbarbar +ĠParkinson +нÑıÑĤÑĮ +åį° +Ġunderestimate +Ġsubstitution +Ġcosmetic +ä¸ĭ次 +Ġwillen +Ġbeide +anni +Ġconditioned +ĠDebbie +Ġisto +ĠEdwards +ìĽĮìļĶ +ĠÑĤов +Ġabbrevi +ĠMün +ĠPrinc +ĠLiang +Ġstink +Ġradioactive +ãģĨãĤı +Ġacontec +Ġuncon +ĠTurbo +ãģIJ +Ġkisses +æĺ¯ä»Ģ麼 +еÑĤÑĢов +Ġfrontier +ĠSpy +ĠBelarus +ĠCBS +á»Ĺ +amoto +íķľëį° +ĠÑģÑĤÑĢо +ĠEnfin +Ġbreadth +éĺ² +ĠCafe +ĠDafür +ĠBour +aras +Ġblueprint +anı +Ġconstants +Ġattacker +ĠFormula +zaÄĩ +Ġsowie +Ġeyebrow +obook +Ġsetzen +第ä¸ī +onsider +awning +Ġsöyleye +Ġinvaded +Ġpronouns +Ġdobry +Si +ĠХоÑĤ +Ġvolleyball +Ġlament +isches +arme +api +ĠWiki +лиÑĪ +Ġkasih +Ġpess +ĠÑĦоÑĤ +ĠSul +å¾· +Ġpseudo +Ġmemo +ĠìĹ°ìĬµ +ĠдоллаÑĢов +ĠпеÑĢем +ĠReach +miral +alted +Ġstatut +reading +Ġsöyled +ĠLindsey +ĠAhmad +ë¶Ģë +ĠСегоднÑı +Ġprzygot +Ġhyster +URE +ĠNeigh +Reporter +ĠBunu +ĠTreaty +ĠRank +ĠFame +inished +Ġgeared +Ġcompose +odia +ĠLon +ĠjesteÅĽmy +ĠDIRECTOR +Ġelkaar +ĠViel +×IJש +ynthia +並 +Ġmère +ĠTomato +Ġexatamente +niÄĻ +ĠFrei +ĠDif +Ġopenings +Ġgraphical +ĠÑĥдоб +ĠвÑģп +ĠWeekly +ева +Ġhangs +Ġunsafe +Ġemblem +ĠKolleginnen +alay +Ġksi +Ġhides +Ġolmay +Ġentste +Ġarthritis +ÃŁerdem +Ġbinnen +Ġlistens +ĠHess +åĨįä¾Ĩ +ĠLouise +lden +енÑģ +ĠVersion +ĠAgriculture +ìĬ¤ë¥¼ +ман +ëĦ¤ìļĶ +Ġwines +ĠINF +rul +ĠJK +ıyorlar +shield +reath +Ġterus +ĠLum +Ġanticipation +Ġaccustomed +ĠMina +Ġwield +ioè +mera +Ġcountdown +Ġcling +Ġcommend +Ġfaktiskt +Ġdefenses +Ġcockpit +Ġкоманд +Ġdishwas +ĠThanos +Ġkidneys +Ġsehe +Ġmicrobes +Ġcuff +ĠвÑĭÑģок +ĠSpicy +çŃīçŃī +வர +culus +orc +ç¾ħ +ixes +ĠCredit +Ġraj +Ġbringt +ĠNiss +Ġgrim +ĠSOL +Ġtenim +ĠSudan +ĠSpart +Ġpromotes +ĠNossa +ĠÑģоÑģÑĤоÑıни +Ġì°© +Ġuncont +ĠLiberal +ĠТолÑĮко +ĠViele +Ġktórej +Ġ**** +Max +ĠЧÑĤобÑĭ +350 +Ġíĺ¼ìŀIJ +Ġë¶Ħëĵ¤ìĿ´ +Ġwarp +Ġtenga +Ġsympathetic +Ġbizi +ĠZack +iedo +Ġëī´ì +piel +ĠÑĤол +Ġscaled +ĠPETER +ĠCOMM +ĠCame +Ġcatastrophe +Ġsweaty +igration +Ġstuffing +ĠÏĢολÏį +ĠDriver +zyst +Tech +Ġassessed +ĠSurface +ırım +sur +lerweile +Ġдог +Ġshutting +Ġfractions +ĠÑģол +everyone +Ġern +ĠÐĿов +Ġdefenders +Ġversucht +ãĥ³ãĥĢ +Ġpolity +ĠÐŁÐ¾Ð½ +verständ +Ġbrowsers +Ġtransformative +Ġdictate +ĠLEGO +Ġninguna +ê´ij +Ġpizz +ĠHarold +ĠLopez +Ú¾ÛĮ +anız +atchet +ÙĬت +Ġlernen +Ġê·ĢìŬ +Ġhoused +Ġcleanse +ĠWAT +laration +Ġbytes +Ġtucked +Ġfaults +до +FX +Ġìĸ¼ë§ĪëĤĺ +Ġdeform +Ġcontracting +ĠTIME +irse +Ġneben +Ġcerc +ĠArmstrong +Ġtester +Ġparfait +Ġjealousy +Ġtoxins +Ġdisbel +ÑĥÑĢÑĭ +impression +Ġprostate +Ġfirewall +Ġclassics +еÑĩÑĮ +Ġsocialism +Ġgracious +ĠÑģнова +ĠднÑı +Ġburner +ĠMinor +Ġìļ°ë¦¬ë +Ġjedes +Ġcontinuum +Ġhots +Ġoccurrence +Ġadministered +ĠзамеÑĤ +Ġhesitation +Ġdrills +erca +ĠвÑĤоÑĢой +Ġsteadily +Ġinsanlar +Ġihan +íij +Ġhelper +ĠSenin +åģľ +ование +ĠERIC +bla +ĠAcademic +Ġhumanities +black +umpy +ortex +ĠìłĪë +ĠØ¥ÙĨ +Ġdisclose +ĠElijah +ĠλÎŃ +ĠQuer +بÙĦ +ãĤ¡ +Tell +arle +ÑĸÑĢ +Ġaugmented +Ġë¹ĦìĬ· +Ġandroid +त +arma +Ġszer +geord +Ġgeek +Ġyeux +Ġpong +ĠãģĿãģĨ +Ġtortured +ĠBath +zig +asonable +Ġnets +Ġbaru +ĠFlat +ĠVater +ĠTerror +ĠAvo +Ġceremonies +roe +Ùģس +Ops +Ġhyvin +Ġapresent +olor +ĠигÑĢÑĭ +orton +Ġê·¸ëŀ¬ +Ġlookin +ĠTY +ĠMint +Add +Ġmite +ĠSmoke +Ġnota +Ġmoss +ĠAbend +Ġ컨 +Ġexaggerated +fires +Ġredist +ffiti +Ġopenness +ê°IJìĿ´ +endeu +енной +Watch +Ġavatar +ĠPey +urun +Ġsenza +Ġì§ĢìĹŃ +ĠNatomiast +Ġemergence +rays +Ġcrafted +gary +ãģłãģij +üng +-\" +Ġhacked +Ġstray +encie +emo +Ġcomen +ĠKız +ĠJasmine +ĠHindi +manas +Ġinfinitely +emon +ìĿ¸ëį°ìļĶ +jak +Ġroaring +érique +sweise +ĠRolex +åł±å°İ +ĠStuart +bnb +Ġdiagnose +Ġcoherent +ĠMJ +æºĸåĤĻ +Ġpike +lav +Ġorchestral +аÑģÑĤи +Ġterminar +Ġgatherings +Ġcompliant +Ġupgrading +Ġregulator +Ġlanç +éĢ£ +Ġmerchants +tawa +Ġmonitored +Ġrendre +两 +Ġunterwegs +anguard +gard +ĠBelow +duino +ĠЦе +Ġimpedance +ìľ¡ +份 +Ġaktuell +ĠVatic +åŃ© +Ġstewards +Ġbrightest +Ġkenn +Ġkau +ĠMatrix +ĠBark +ĠðŁij +Ġtaper +Ġcasino +ר×Ķ +ysical +Ġbuilders +ĠczÅĤowie +ĠNepal +Ġ!\" +Ġterme +Ġinnych +Ġmaths +Ġdrafted +ĠBalk +Ġhesitant +Ġvoltar +Ġrevive +ĠÑĦилÑĮма +Ġassassin +ĠSolutions +Ġduel +Ġbearings +à¸Ħะ +Ġrookie +ikat +Ġbiscuits +Ġcords +ÑĥваÑĤи +ARIN +Ġprogressing +ĠGir +Ġpenetrate +ĠStorage +eight +ĠÑĤÑĢÑĥ +ĠdonÃŃt +Ġsizin +Ġoutdated +ĠнаÑĪи +Ġaffir +Ġspoons +Ġoni +Ġflank +ĠGol +hã +Ġpéri +Ġhonorable +ĠBreathe +scenes +Ġobviamente +икÑģ +Ġש×ŀ× +Ġsmoothie +ŀĪë +Ġdime +ĠíĸĪìĸ´ìļĶ +Ġappel +ĠCatholics +Ġsingles +Ġlaten +Ġçünkü +ĠVader +æıĽ +Ġvardı +ĠIstanbul +gré +ĠElsa +ël +Ġinvece +Ġcrane +Ġobe +ĠShark +Ġsmack +Ġrestoring +.\\ +Ġë¹łë +Ġfaded +umbers +Singing +Ġdepressing +thest +ĠWahr +Ġmultitude +ÑĢавÑģÑĤвÑĥйÑĤе +rijk +eka +Ġcompletes +ĠWells +Ġroy +ĠPray +ĠKalau +izin +iaÅĤem +Ġlocom +ĠNashville +ĠPentagon +미 +ĠNEW +ÄħÄĩ +ÃŃss +Ġmarrying +Ġfeud +íĻķ +æĢ¥ +)! +ĠOperations +ÑĥÑĶ +Ġmoje +Ġinstructed +ĠëĪĦ구 +Ġ×Ķ×Ĵ +ĠпомоÑīÑĮÑİ +Ġsabia +ìķĺìĸ´ìļĶ +plane +pri +ĠполноÑģÑĤÑĮÑİ +ĠKitty +Ġpróprio +edere +Ġinteresante +Ġде +Ġcondensed +Ġavent +TOR +Ġgreasy +ARK +orta +AJ +Ġdisreg +Ġcorrections +Ġstero +Ġinfluenza +Ġdesses +Ġballots +Ġmeget +Ġmafia +Ġböl +nost +ĠÑģÑĤаÑĤÑĮ +Ġresponder +Ġhinten +grav +à¸Ńะ +ynchron +Ġviens +Ġsamo +Ġdt +pannt +ĠÅĽwiat +ĠзапиÑģ +Ġmerged +Ġkep +Ġmisleading +Ġdigamos +Ġammon +è¾Ľ +chet +Ġê°Ģìł¸ +Ġuni +ĠëIJĺëĬĶëį° +ĠнапÑĢав +ĠкоÑĤоÑĢого +Ġanimate +×ķ×IJ× +еÑĢв +Ġminced +Ġkaum +ãģĤãģģ +ÏĢε +лег +existing +Ġplataform +ĠKRIS +ìĽł +ĠFamilien +ĠLibya +Ġbiodiversity +Ġidiots +irdi +Ġszyb +ĠRolling +ücht +ĠÑĥдив +ÑģÑĥд +Ġrealizar +Ġcanned +ĠÑĢан +Ġmetabolic +ĠBeef +Ġkilka +лÑİÑģ +Ġregistry +моÑĤÑĢиÑĤе +Ġvielä +Ġodc +Ġcondemned +æ©ĭ +fal +ĠDil +woÅĽci +Aw +Ġstatistically +Ġsogen +ĠBETH +Ġshaving +幸 +ocal +ĠFunny +Ġpeacefully +Ġaddictive +ĠInsert +lauf +Ġexperiencia +é¦ĸåħĪ +иÑĤелÑı +ÃŃgen +ágina +Ġabdomen +íķľëĭ¤ +icus +imana +ìį¨ +arching +Ġkonkret +ìķĺë +ека +oufl +ivel +Ġnude +ètres +Ġmonsieur +Ġclash +Ġtherapists +Ġcubed +Ġretrouver +Ġwaveform +Ġpotem +ĠFormer +isión +åºľ +Ġ×IJ×Ŀ +undos +ĠMeinung +صÙĦ +ĠJude +ĠnÃ¥r +ĠLeonardo +ĠCristo +ĠGOT +ÑģÑĤÑĢÑĥк +LAN +ĠgÃ¥ng +Ġdéb +ĠFrankfurt +Ġcrappy +Ġlil +année +ĠмеÑģÑĤе +RET +ĠNer +ĠCOSTA +Ġjedem +Ġcurtains +Ġiterations +Ġunav +Ġplaque +orum +Ġζ +Ġnúmeros +Ġdesap +²½ +Ġcompiled +Ġrefle +Ġrankings +Ġrepaired +ĠÐĿапÑĢ +Ġdownloads +Ġarmour +Ġ×Ļ×ķתר +Ġlongevity +ĠTONER +ĠкомменÑĤаÑĢ +Ġczego +Ġnotify +Ġairports +Ġenduring +lette +Ġapparat +Ġhabil +á»ĩc +nad +ICO +ĠBrah +Ġsegún +Ġgovernors +kaha +ĠSchluss +Ġodpowied +irting +Ġrempl +ĠAboriginal +identally +Ġenhancing +licting +ĠHawaiian +Ġstriving +ĠNiet +Ġznaczy +Ġobedience +ĠnÃ¥got +Ġexpired +Ġ1918 +presented +Ġprowad +ĠTerr +ĠPrinceton +Ġmorgen +Ġattracting +ĠSigma +igner +ĠRechts +ĠPeki +Ġmethy +Ġhamm +Ġdireito +Ġdelegation +иваÑİÑĤ +Ġgin +Young +Ġdependencies +ĠBradley +buds +Ġfis +Ġpytanie +Ġinterconnected +Ġembaixo +ĠSas +Ġruh +ĠSicht +Sur +Ġsuperb +ĠSabbath +ĠDanger +kol +Ġhou +supp +ĠNacional +Ġsuccession +Ġvá +ĠMaÃŁnahmen +ĠJessie +ĠIdaho +forest +ħĺ +Ġ×ŀ×ĵ +ĠØ£ÙĬ +Ġsweetheart +Ġneatly +ĠEvangel +곡 +ĠSuite +ública +ĠÑĥли +ĠAnnouncer +ligh +Ġsensations +Ġshelters +Ġhart +Ġsqueezing +ĠRivers +ĠCooking +ì±ħ +personal +Ġmanos +ÑijÑĤÑģÑı +wij +Ġgogg +ĠMilli +ĠFP +ünst +ĠLS +Ġspraying +Ġfaux +Ġautograph +ologic +Ġtorment +Ġencrypted +á»ħ +Ġestre +ç¹¼ +à± +Ġstumbled +Ġaider +Ġsaben +xter +ĠCities +ĠTürk +ëĭ¥ +chine +Ġtopping +Ġpoisoned +ĠRomania +×ĵ×Ļ +Ģë¡ľ +ĠпоÑĢÑıд +Ġchirping +ĠìĻĦë +×ij×¢ +Ġcuanto +Ġdonating +ĠRegent +ĠBeruf +Ġdistracting +Ġstamina +ĠDarren +Ġì¶ķ +lists +dal +chuss +Ġeconomist +ãģĪãĥ¼ +orgt +Ġistiyorum +è¿Ľ +ĠSurprise +ĠHao +Ġìµľê³ł +ĠGW +ĠInner +Ġquieren +Ġminded +Ġsupercomputer +Ġdiagrams +íĬľë +ê²łìĸ´ +ĠобÑĬÑıÑģ +Ġestaban +Ġdestroys +ĠBreaking +ĠkarÄ±ÅŁ +Ġrebuilding +ľëĮĢ +ливо +ĠSauce +ĠFusion +×ķ×ŀ× +ĠQuinn +Ġgauche +ĠÙĪØ£ +ĠÈ +çĵľ +Ġtechno +Ġdispatch +ĠaÅŁk +Ġeinzel +ĠGmail +çŀ +Ġê°ľìĿ¸ +ĠÑģемÑĮ +Ġjourneys +Ġiht +Ġfibre +Ġdramas +ouched +Ġrename +ĠопеÑĢ +Ġpoo +ĠDru +ĠиÑĤог +Ġzast +Ġcoz +Ġzucch +Ġobtaining +Ġcommute +Ġsubmer +ĠVish +ĠRabb +ogg +Ġhut +íĸĪìĸ´ +æ¯Ķå¦Ĥ +eremi +Ġμα +Ġdiskut +ĠбÑĥк +Ġimpaired +depend +ĠÙĪا +ĠÑĢÑĥк +ĠбаÑĢ +Ġoxidation +Ġsituação +ÉĻn +ução +Ġsagte +ĠSER +ĠCake +Ġturmeric +ĠKak +bung +ĠKá¹Ľá¹£á¹ĩa +Ġpoisoning +Ġslipping +ĠSays +å°±åı¯ä»¥ +òng +çŁ³ +« +ĠClaudia +ĠCharacter +ниÑĨ +coat +Ġprogressed +ĠFergus +Ġìĺ¤ëĬ +Ġoat +ordable +ĠLey +ĠHeraus +Ġresultados +ĠKayla +Ġriff +Ġchegou +Ġxi +Ġspacious +Ġrecognised +Ġech +ĠTie +Ġlauncher +Jim +Ġsuppression +ĠImpossible +Ġguitars +ĠFourier +иÑĩеÑģкий +ĠTherap +ĠKaf +centered +ĠÑģооÑĤвеÑĤ +Ġklim +Ġcarbohydrates +ignant +ĠAstron +Ġemple +Ġdrastic +ĠмиÑĢе +вин +uw +Ġprettier +Ġdonuts +ĠAthena +Ġdissert +Ġplante +Ġuranium +ìĿĮë +aré +Ġrzecz +Ġdisplaying +æĪ² +Ġsarc +rão +Ġtampoco +Ġphilosophers +ĠRecht +æĵļ +Ġcomentarios +yse +Ġìľ¤ +Ġmise +ĠGin +Ġном +ĠFROM +liner +atif +ĠspoÅĤec +xa +ĠÑĤÑĢÑĥд +Ġwag +기ìĹIJ +ĠMG +Ġoffspring +ĠUnderstanding +åıªæĺ¯ +ORA +Ġwhirring +Ġsurrend +Ġpoker +Ġmonuments +ĠâĻ© +Ġorganised +ĠSozial +ĠFactory +Ñħа +Ġresemble +зд +Ġexplosions +Ġpayroll +Ġomn +ĠJorge +ιÏĥ +Ġfracture +Ġpersecution +Ġdemais +ECH +,) +Ġcriar +ĠJOSH +Ġdemographics +Ġ1600 +Ġcurrencies +ĠTips +ĠéĢĻåĢĭ +ĠRefer +ĠDancing +Ġinconsistent +Ġdeh +Ġimmens +Ġmeist +Ġimpatient +Ġbehaves +æĿ¾ +ĠëĤ´ìļ© +Ġbackstory +Ġagreeing +ĠÅģ +ihin +Ġtemperatura +ĠBackground +Ġnutzen +Ġëħ¹ +ĠMänner +Ġcollaborations +ĠKos +éģİåİ» +Ġnightmares +ëĵ± +ĠQueensland +Ġassociates +ĠKok +Ġfactorial +ĠHyung +Ġê·¸ëĭ¤ìĿĮ +Ġfilho +Ġelét +Ġíĸīë³µ +°± +Ġgefunden +Ġsemicondu +Ġcounselors +ĠUpper +ĠAub +ickers +Ver +Ġnorthwest +ĠMaintenant +ĠLakes +аÑıв +inté +ì°½ +Ġгаз +Ġgiorn +Ġdigitally +ĠCircuit +ì¼Ģ +ãĤĬãģ¾ãģĹãģŁ +Ġcheerful +ĠPeterson +ĠDanish +ativos +Ġliken +Ġharbor +алиÑģÑĤ +xe +Ġcurls +ĠRhod +End +ĠET +Ġacquaint +ĠKelvin +Ġtrif +ĠAway +ìŀIJëĬĶ +vs +Ġpágina +Ġinlet +ĠSantos +Ġìļ°ìĻĢ +Ġyapıyorsun +theme +Ġsouff +Ġinjected +Ġpóźniej +iverso +amped +Ġdaher +Ġdagger +ĠлÑİбим +Ġtummy +Ġenlightened +cents +ĠDah +Ġcuest +ä¾Ĩ說 +ILY +Ġ×ijר +Ġbanging +ĠEmil +ĠCler +ĠBorder +ижÑĥ +Ġpresenters +ĠSTUD +coins +ĠíĻį +Ġperks +Ġparap +Ġcertaines +ĠLore +öst +ĠMARTIN +Ġbios +Ġwhereby +verts +ĠMiranda +Ġstip +澤 +andez +׼׾ +ujin +Ġê¾ +Ġallergies +plate +Ġyapıl +Ġundertake +ĠëĤĺê°Ģ +Part +Ġkızım +hguru +ãģĤãģ¨ +ĠJohns +Ġeyelashes +Ġdrained +ĠstÃ¥r +ãģĤãĤĬãģ¾ãģĻ +ĠJade +Ġcalend +film +Ġmesa +Ġludzie +Ġattracts +Ġjuices +Ġкил +Ġnieuwe +Ġmencion +Ġignition +Ġbladder +andaag +ĠExtension +íĤ¨ +feed +ĠÙĪÙĩ +Ġspun +Ġtät +оÑĢоÑĤ +tyard +ronics +ĠHuge +Ñĥжд +string +Ġunjust +Ġprawn +Ġfrosting +Ġdisappearance +iosa +Ġcardi +ĠPriest +ĠcientÃŃfic +åĵªè£¡ +ĠÐĴаÑģ +Ġë¶Ģíĥģ +Ġthieves +Ġphysique +ĠEugene +Ġблиз +Ġmonopoly +Ġbiography +ĠhoÅŁ +Ġtö +mac +Ġshocks +ìĦ¸ë +hit +Ġsnug +Ġincl +Ġdedic +Ġultras +ĠизвеÑģÑĤ +Ġutilization +ĠÑģовеÑĢÑĪенно +Ġservi +stag +180 +Ġsewer +ĠChoice +Ġdischarged +ĠJD +олеÑĤ +ĠкваÑĢÑĤи +Ġtelescop +ĠJeÅĽli +ĠNana +cale +ĠÑĤон +mmm +äºĨåIJ§ +Ġgehabt +ëĤł +æĬķ +à¸Ļà¸Ļ +Ġether +Ġzen +Ġresearched +ĠCzyli +å®Įåħ¨ +workers +Ġ경찰 +Ġsheriff +allo +Ġtipos +Ġprosecution +Ġfrogs +Ġfalt +jd +ĠíĮĶ +Ġfiltered +ĠOft +Ġìį +Ġdisfr +ĠMustang +Ġwoah +ĠREALLY +Ġмогли +Ġentrada +ĠигÑĢа +Ġmixes +ĠавÑĤомоб +ÐĻ +Ġshin +Ġparanormal +Ġsomeplace +Ġdishon +etaan +Ġfuerte +Ù¹ +Ġdoom +ìĪľ +Ġexistential +Ġbuld +ĠSDK +ĠпÑĢавда +Ġturnover +ĠìĹ¬ê¸°ìĹIJ +Ġह +Ġmodeled +Ġbugün +Ġexperimentation +Ġmornings +Ġmedo +Stevie +Ġplayable +Ġairlines +gments +Ġ기ë¶Ħ +ĠTomb +ĠMVP +AUDIENCE +Ġcheckout +Ġpasst +Ġbeispiel +ĠLinks +heavy +Ġquestionable +Ġìĵ°ë +Ġsill +Ġmanipulated +ĠLoren +Ġìľ¼ +Ġverge +ák +IES +Ġsabot +ĠCustomer +ależy +Ġnominee +ĠGad +Ġnouvelles +ĠSPE +istling +Ġoval +обÑĢаж +ifty +éĩİ +Ġbezel +yet +Ġfreight +ĠHanım +rÃŃa +Ġzoning +Ġindem +ĠBü +Ġfeminism +Ġvoix +Ġoficial +Ġdiyorum +»IJ +Ġarose +Ġparar +ìĿ¸ì§Ģ +ĠMartine +ĠLect +Ġrester +Ġdrowning +uya +cida +ĠAriel +Ġ02 +Ġ×Ķ×Ķ +ç´ł +ĠWert +ТÑĭ +Ġwidow +Ġparchment +Ġcottage +ĠXL +ĠSlack +ĠNES +Ġrobe +Ġgimm +Ġcaminho +ĠHarper +Ġcitrus +Ġfirefighters +Ġdopamine +elets +Ġdemocrat +ìłľë¡ľ +Ġplayback +oj +ĠпÑĢок +ĠSullivan +semble +ĠWorth +ĠMustafa +าร +Ġmets +éĸĢ +лоÑģÑĮ +Ġinertia +Ġuniforms +足 +ério +×ķר×Ķ +ént +Ġà®Ĵ +ĠÑģамÑĭÑħ +Ġvoulais +ĠZimmer +ê²łë +ĠноÑģ +encias +Ġrelación +Ġ걸ë +Ġfaction +Ġgosp +полож +nap +hak +Ġproceedings +ĠìĨĶ +ìķĦëĭĪ +ĠìŀIJ기 +Ġwerd +Ġsof +Ġschlim +Ġflavored +Ġquadratic +ĠBoot +Ġpublicity +ĠCaro +Ġ?\" +ниÑĨа +mania +ĠSUR +ĠBUR +lance +ética +Ġzobaczy +Ġtrio +sama +ĠtaÅŁ +Ġasymm +resser +Ġتع +ĠпеÑģ +Ġbeginnings +ladım +ĠбÑĭÑģÑĤÑĢ +Ġmoo +ĠGeneva +Ġåľ¨ +erus +borah +Ġrefusing +bull +ĠWaiting +ĠIndividual +Ġanonym +imens +Ġmedidas +Ġfragrant +Ġdirectement +ĠìķĦë§Ī +uria +Ġspherical +Ġabge +ĠVictorian +Ġspectacle +ĠRodriguez +Ġocup +ĠNär +marks +ngulo +ĠLuci +Ġshouted +Ġregulators +ÄŁini +Ġdisent +ĠÑĢÑĭн +ëĤ¨ +ĠìĤ´ë +Ġproblèmes +ĠFinger +assemble +Ġpear +Ġdroite +ĠEverywhere +tam +оÑĤив +вой +ordinate +ĠLak +ĠmỼi +ĠTelevision +Ġexponentially +avas +Ġblev +ĠMT +俺 +Connell +ĠêµŃ민 +ĠÑģвоим +Ġacha +ĠDynasty +Jin +Ġtore +Ġflor +Ġмногие +æ²Ĵäºĭ +owan +bah +Ġì£Ħ +ĠCela +Ġìµľê·¼ +Ġpermettre +Ġabras +Ġverstehen +Ġescort +ĠThem +ärke +porter +Ġkahkaha +Ġhect +Ġdau +wah +olve +ĠAges +schaft +ĠStell +nelle +ĠEnsuite +ĠÐĴÑģем +Ġcréd +ĠPP +lords +grunting +Ġcontraction +Got +Ġacquiring +Ġsopr +Ġpoisonous +RNA +Ġanar +ĠHof +') +Ġremarkably +Ġinternacional +ücke +inqu +Ġduy +Ġbeasts +ĠLAN +Ġprecedent +ĠRPM +åij¨ +Ġselon +Ġmorte +Ġcomeçou +Ñıла +Ġinterpreting +ĠBurke +ÑĤÑĢа +ĠìĿ´ëŁ¬ +Ġpessim +ĠNok +íĮĿ +Female +Ġìĭ¤í +ĻĢ +Ġstimulation +Ġslick +Ġê°ĢëĬĶ +Ġказ +ĠHBO +Ġpapier +Ġkönnten +Ñĥбли +ĠConstant +SPEAKING +ĠktórÄħ +Ġcosmetics +ĠTrend +Ġrobbery +Ġtitt +Ġgjort +Ġdietary +łĮ +ĠKirby +ĠпÑĢимеÑĢно +Ġqualification +Ġìķī +Ġcabinets +Ġhttp +ĠErica +義 +Ġdisadvantages +Ġchattering +yz +feit +Ġguild +ĠETF +ĠDragons +ĠHERE +venth +ÙĦاÙħ +Ġmarché +Dam +Ġphoton +Ġestable +Mag +Ġolhar +Ġcoupling +ĠHilfe +ĠWizard +Ġмало +help +ĠlÃŃnea +Ġì« +Ġstandalone +Ġmorale +Ġzweite +ãĤĪãĤįãģĹãģı +ährt +Ġdotted +Ġdripping +ĠFlag +éĿĴ +rocket +rategy +irim +Ġíķĺë©´ìĦľ +Ġsogenan +ĠUno +ĠSchutz +Ġestilo +ĠSubs +ĠDaisy +ÐĿеÑĤ +'... +Ġplatinum +Ġbirl +ĠSovi +Ġviolate +ÑĥеÑĤÑģÑı +rill +Ġtraz +Ġsnip +Ġcumpl +à¸Ńà¸ģ +Ġcuk +éħĴ +ĠParlament +Ġhypert +Ġpulp +Ġtongues +atto +Ġbusca +ihn +ERO +ĠÙĬع +Ġvarias +ĠMarian +Ġbounded +Ġpitching +Ġdeficiency +ĠBlessed +ĠExerc +uchs +ĠnhÆ°ng +æľ¬å½ĵ +Ġraped +hales +Ġmala +pic +Ġ401 +ÅĽniej +arina +ëĵ¤ìĿĦ +otti +Ġдолго +Ġtracker +ĠShelby +Ġvanished +Ġbakery +Kapı +Jesus +ĠKR +JO +ħ¸ +Ġdiscs +ìĦ¯ +ì§Ģë +×Ļצ +emary +Kendra +Ġyük +ückt +Ġvaz +Ġkup +aktu +ĠÑģпаÑģибо +Ġaik +Ġnursery +Ġendangered +êmement +ematics +Ġresponders +ĠRepresentatives +Ġsculptures +igkeiten +Ġdepl +Ġinterpretations +Ġdeadlines +Ġ1942 +ÃĹ +Ġsugars +emu +lively +Ġrecreational +Ġdistort +Ġunderscore +Ġunquote +Ġsafest +Ġswollen +Ġanalyses +Ġcommencé +妹 +andin +ĠХоÑĢоÑĪо +Ġdiarr +ãģ¾ãģģ +ziest +Ġtoothbrush +éł»éģĵ +uations +Ġcade +Ġbacklash +hind +Ġrisque +zess +ĠìĿ´ìķ¼ê¸° +Ġesperar +Ġtranslations +ioned +groans +ĠпÑĥÑĤ +Ġgenetically +éĢł +Ġhappiest +Ġwerk +atoon +Ġmusi +Ġfunção +ĠìŀħëĭĪëĭ¤ +ĠÑĢай +Ġbevor +BLANK +Ġrepentance +Put +Ġpotrzeb +Ġsala +Ġcampa +WER +ĠdecÃŃa +Ġsécurité +ĠAppreciate +Ñĩи +ĠRandom +ë³Ħ +kah +Ġmöj +Ġsäger +Ġ×Ļ׼×ķ׾ +Ġ190 +xtures +Eu +Ġgä +Ġ×ijת +ĠCroat +apo +PLE +Ġpersistence +åĬ© +Ġblends +Ġtreffen +ĠSantiago +ydia +aldo +ĠTensorFlow +ĠDual +ãĥľ +Ġchiff +ìĹ´ +Ġcontracted +Ġsegreg +ĠFairy +Ġwisely +Ġvulnerabilities +Ġhandheld +Ġgadgets +ĠboÅŁ +ĠPopular +Ġcurvature +문 +ĠMARY +ìĿ´ìĬ +Ġformulation +Ġcelery +Ġblurry +ĠTS +alez +Ġws +Ġprogramm +ĠStack +ĠJIM +овали +ıll +Ġpère +ĠKanye +ĠDelaware +Ġãģł +Ġdaunting +ĠбеÑģ +ĠStupid +big +fficial +Ġprecipitation +Ġplung +ục +burse +Ġdarle +Ġcripp +Ġpioneer +Ġdisput +Ġsean +ãģĵãĤĵãģª +Ġresistor +Ġallein +ipples +arel +Ġendors +zust +ĠÑĢебÑıÑĤа +eded +Ġì¹´ë©Ķë +Ġlleva +Ġkennt +Ġбал +ĠDocument +ĠKnights +Ġbuckle +Ġìī¬ +Ġalk +ĠEveryday +atters +Ġtoilets +Ġjugar +ĠìŀĪì§Ģ +Ġgenauso +ĠLandesregierung +ãģ£ãģ± +ije +Ġtrailers +ĠTigers +Ġgitti +Ġforgiving +Ġconcurrent +ĠVu +ĠíĬ¹íŀĪ +ĠBROWN +ounded +\"; +Ġtremb +Ġtiet +ĠÑĢежим +Ġnutshell +елиÑĩ +Ġlosers +ricting +Ġredeem +defined +Nice +Ġbroadband +KO +Ġteasing +Ġpartisan +ıma +Ġìŀ¬ë¯¸ +ĠJourney +Ġslopes +uning +grunts +Ġtäll +Ġuncovered +ĠmyÅĽlÄĻ +ĠEsther +äºİ +ĠHealthy +Ġë°ij +rée +Ġpolarization +Ġflav +Ġcambiar +Ġyr +ĠRanch +Ġsplits +Ġtrouvé +åľĭ家 +Ġrecorder +Ġdépart +ÙĪب +ĠKry +Ġinteressant +Ġederim +ÅĽwiad +ilateral +wright +Ġpourra +êter +Ġcamel +áŀ +Ġrapidement +Ġmej +Ġstiffness +ADAS +Ġdiffers +Ġalot +ĠSig +ÑıÑĤелÑĮ +Ġabstraction +åľĺ +Ġkeiner +grupp +ĠSherlock +íĺĶ +Ġcite +Ġoverflow +Ġtại +úcar +bula +Ġconjunto +ĠCI +Ġmoderator +Ġindirectly +Ġalleine +âĤ +ÑĪиб +Ġбаб +Ġdanach +Ġ1939 +Ġpromet +Ġdestinations +ĠIllust +ικÏĮ +Ġsabes +Ġheh +ĠGesetzent +ĠMiz +енко +ĠMys +Ь +ĠJudaism +Ġmustache +Ġstimmt +ĠGaza +Ġvolte +Ġnuo +Ġmón +ĠComput +ูà¹Ī +ĠRadi +Ġexceptionally +Ġassumes +éĸĭå¿ĥ +ãģĪãģ° +inform +Ġshrine +æĵĬ +Ġimplication +ĠFitz +æ²ĴéĹľä¿Ĥ +!. +Ġlt +Ġalloy +Ġethic +Ġmonastery +ìĭľì£ł +icação +Ġcoordinating +ĠMoto +Ġoverlook +Ġchois +Ġantibiotic +ĠMinne +ĠBJ +ĠApa +orian +Ġspilled +Jam +Ġhusbands +Ġcreations +Ġañ +üssel +ĠìĿ´ìļ© +Ġanalyse +rose +Ġpunched +Ġpresque +Ġastronomy +Ġschwierig +ĠEbola +Ġcis +Ġacet +ĠFX +endre +ĠìĿĮìķħ +Ġwebpage +Ġfreaked +Ġlatte +Ġì¿ł +Ġ머ë +Never +Gra +íĻĶ를 +eyed +Ġë°ľëĿ¼ +Ġespera +Ġaparece +ração +Ġdisruptive +ĠJoint +urous +reas +ĠquerÃŃa +Ġdistributions +Ġexponent +ì¹ĺ를 +Ġdl +zhou +ĠHearing +å·®ä¸įå¤ļ +ĠCraw +Ġfloats +ounced +Lab +World +Ġburdens +Ġauthoritarian +ĠBolt +ĠоднÑĥ +Ġpigeon +Ġdistractions +ĠHerausforder +Ġzest +esc +Ġshakes +atas +ĠÙħØ´ +holes +Ġthinkers +alta +Ġarche +ĠSuk +anha +Ġtempting +Ġyoutuber +Ġvì +ĠdziaÅĤa +ĠVatican +Park +Ġsupers +ĠNikki +ëĬIJë +orang +ramient +鬼 +Ġê°ĸê³ł +Ġdesserts +Ġavere +ĠGregory +Ġëĵ¤ìĸ´ìĺ +Ġcosting +ĠClinic +Ġrebels +ĠMob +Ġbunlar +ĠYours +ertime +Ġretali +mara +atus +alles +ĠдÑĢ +ĠдиÑģ +Ġdiscounts +ĠGUY +Ġкакое +ĠExperiment +rement +ĠXiang +Ġbate +WE +Ġspecialize +Ġdeity +ĠLoki +mag +ĠNit +West +Ġmaternal +Ġquis +åŁºæľ¬ +broken +Ġlasers +Ġhakk +ĠAngels +Ġmastery +antis +Tiffany +eee +çij +orem +Ġinacc +Ġjurisdictions +ĠKardash +æľº +Il +ĠSinn +åĭķçĶ» +Ġathletics +cÄĻ +Ġloosely +Ġdieta +Ag +Ġ?? +ĠëĮĢíijľ +Ġsuperv +Ġnutrit +Ġdrifting +ĠìĦłìĥĿëĭĺ +ĠпонÑıл +ĠVictory +ÙĦØ© +×ķ׳×Ķ +ĠпиÑĪ +Ġshaved +Ġmesure +onden +Ùĥر +Ġexile +ĠDesde +ĠPinterest +Ġattachments +Ġhombres +Ġfines +ĠìĦ¸ìĥģ +Ġsleeps +ĠTaco +ĠIRA +rios +Ġoll +etes +Ġunut +fashioned +Ġtreball +ĠNearly +ĠÑĢеалÑĮно +Ġchil +éĢ± +ÄŁa +ĠMEL +roscop +ĠCG +Ġvenge +Ġdishwasher +algic +Ġmodifier +Ġembassy +timer +emics +Ġintricate +Ġevet +ĠëĮĢë°ķ +Ġisot +ĠнаÑĥÑĩ +ĠQuiz +reso +δÏİ +Ġyelled +Ġfeder +ELLER +Ġexceeded +onas +icano +ĠживоÑĤ +ĠMao +ĠKazuto +Ġãħĭãħĭãħĭãħĭ +Ġfrontline +ĠHungarian +Ġüberall +awat +Ġgrips +ições +arnya +ĠÍ¡ +Ġseid +Ġanak +Ġacabou +íķij +Ġnotorious +ĠGodzilla +Ġovercoming +ĠPend +Ġolabilir +ülme +Ġerhalten +ãĤīãģĦ +ê·¹ +ĠMeter +Ġstaan +Ol +Ġchats +ĠBuenos +ÃŃve +aluable +Ġstrategically +Ġcomprised +ĠпеÑĢÑģонаж +Ġwann +ĠCen +ниÑĤе +Łģ +ĠÑĤобой +iad +ĠkardeÅŁim +ĠCongressman +reaming +homme +Ġcommunaut +Ġalcoholic +Ġpickled +Ġacord +position +egól +Ġtroubling +ĠMarcheg +Ġzumindest +Ġseamlessly +Ġolun +ĠTVs +ĠпÑĢакÑĤиÑĩеÑģки +Ġbackend +ãģĵãĤĵãģ«ãģ¡ãģ¯ +idable +Ġgadget +Ġfaço +ĠMarchegiani +Ġë°¤ +Ġaccidental +ĠLP +Ġeldest +ĠAdmiral +ĠnÄĥm +lever +Ġpastel +Ġfondo +Connie +Ġtercer +Ġpact +ĠMonte +Ġmeats +ĠSMS +ĠAustralians +ç¼ +Rhett +Ġexactement +Ġë¹¼ +ĠMOD +ç¡ +ĠRapt +ĠNoch +Ġabort +ĠNaval +ĠFuji +INTER +ĠновÑĭй +Ġmiejsce +ĠICU +ĠGraduate +ĠGlen +ardi +ĠÈĺ +Ġsolder +Ġprofessions +Ġorthog +omn +introdu +ĠDenise +ìŀIJ를 +Ġcorrespondence +AMA +Ġinflict +Ġfand +ĠGü +ĠÑĩеÑĤ +Ġtraced +Ġpatents +Ġambush +Ġlotta +ffer +ĠWagner +Ġimperson +Ġextrêmement +ÙĤت +conduct +Att +ĠMueller +ĠAlicia +Ġcyc +Ġhacker +Ġtys +Ġhail +ĠзаÑıв +Ġpasso +Ġì¶Ķê°Ģ +ĠÎĪ +Ġpackaged +ĠCynthia +heet +ä¸ŃåĽ½ +ĠNissan +ĠQuesto +é¨ +did +Ġμια +ĠEllis +ĠAnalysis +cemos +Ġaseg +ĠMyster +ĠCao +Ġtuv +ĠIndustry +ì£¼ê³ł +otal +Ġpequeño +bras +Ġcomprehend +ĠSimpson +ÑģÑĤвие +ocracy +иÑĩеÑģки +ĠMush +ĠLaurie +Ġtriangular +ĠPresents +ĠKunden +ç´¹ +æѦ +ĠIss +ĠDeck +á»ĥn +ĠDarkness +Ġinflammatory +eremiah +Ġwarmed +veyard +ĠMemory +etty +Ġtaxpayers +à¸ĵ +Ø¡ +Ġpractise +ëĭ¬ë +Ġdrilled +mÃ¼ÅŁ +logo +ĠFach +¤ë¡ľ +Ġübrigens +Ġkonnten +Ġnormalmente +Ġargues +ilingual +°ë¥¼ +egal +Ġtravaill +ovy +аÑĤо +Ġruth +ĠLights +Ġconsisted +×ijר×Ļ×Ŀ +Ġstereotype +Ġpayer +ĠRee +ĠAirbnb +Ġdrowned +ĠZoe +Ġcanopy +Ġbarr +ĠноÑĩ +Ġpagan +Ġjars +Ġrê +erver +æĪ¿ +ieben +Ġespect +ĠFi +Ġunwilling +Ġtechnician +ặt +member +ĠCanal +سÙħ +Ġlieber +Ġinference +Ġhonoring +åijµ +ĠCampaign +Ġlineage +ĠStress +Ġvictories +Ġdeja +×£ +êtes +blick +Ġменее +oths +ĠCouple +Jason +ĠNicolas +екÑģ +lib +Ġherramient +Ġ×IJ×ķ×ŀר +Ġвидим +millimeter +Ġsilhouette +Ġdriveway +Ġcherish +ãħłãħł +Ġransom +Ġinterdisciplinary +ĠPortal +Ġtrag +thood +Ġtedious +Ġglossy +Ġprépar +ĠCay +ĠTook +ĠBottom +Ġzig +å« +åį± +represented +à¹Ģลย +Ġdesarrollo +ìĦľë +Ġviscos +Ġmilligram +ĠGund +Ġferment +drum +Ġdrawers +Laugh +Ġpelos +Ġpavement +Ġmemoir +avait +Ġ2050 +¤ë¥¼ +Ġrazón +Ġflourish +Ġstern +ä¸Ī +ĠChung +Ġserpent +ĠGentlemen +羣çļĦå¾Ī +kook +Ġlut +importe +parent +Ġwsz +Ġscree +ĠMitarbeiter +å·´ +mut +Ġìĸĺ기를 +Ġsemble +ĠOW +Ġinvestigator +ĠCheryl +ĠGerald +Ġprere +Ġcompares +nyt +Ġdiferença +?- +Ġquá +ר×Ļ +Sen +Ġheps +Ġgratuit +Ġconsort +ĠSTOP +ĠProtestant +Ġelectrode +âĹ +Ġsecurely +иÑĩеÑģкой +Ġtää +Ġregisters +ĠHeavenly +ogly +issä +ĠPhysics +ĠMerkel +Ġrév +éĻ¢ +Ġerased +ĠSacramento +Ġcoffin +Ġexacer +Ġlanz +Ġpoets +ulif +Ġì¹ĺë +ĠNerd +ĠNCT +ĠHour +nehmer +ŀĺëıĦ +ĠPrinci +Sw +mies +armed +ĠBeatles +Ġpropagation +Ġexchanged +Ġcumulative +Ġì§ijìĹIJ +Ġdefeating +æĬ± +bels +Ġwes +ĠOdyssey +ä½łæĥ³ +avior +ĠìľĦìĹIJ +Ġbrit +Ġhijo +DAY +ĠاÙĦتÙĬ +ĠСеÑĢг +Ñĥка +edsiÄĻ +Ġimpos +Ġellas +Ġfirearms +ĠNR +Ġ×ij×IJ +ĠÐŁÐ¾ÐºÐ° +awi +ĠìĦ±ê³µ +Ġpupils +ĠTack +Ġfrase +ĠShip +Ġstad +举 +ĠGreater +unun +immung +grown +ĠNXT +ĠAmericas +fox +Ġmanten +éłIJåĤĻ +ĠÑģок +Ġrikt +lectric +deep +ĠзнаеÑĪÑĮ +Ġbenut +ĠInfrast +ĠEmir +ĠоÑĤпÑĢав +ĠKimchi +ĠFinnish +´ìłģ +inaire +Ġoike +æ¸ħæ¥ļ +Ġhostage +ĠButton +ÙĤÙĬ +eking +ĠKazakh +Ġcomforting +Ġsog +Ġgreeted +guitar +payer +Ġrelational +Ġconstruir +çī¹åĪ¥ +opian +ĠVolume +ieth +ÑģÑĤвом +urrection +liÅĽmy +Ġhemisphere +ĠBean +IGN +Ġkötü +ĠFallout +Ġbrace +ç¹¼çºĮ +ÏĢά +ĠHAS +Ġgé +Ġcharacterize +ặc +ĠMilky +Ġtumors +Ġnuit +ĠGaz +ĠìŀĪëĭ¤ëĬĶ +ĠгаÑĢ +essment +ĠAbe +Ġë½ij +ĠEinsatz +JIN +jä +Cry +ĠPromised +ĠÑģеÑĢд +okus +Ġscalable +ĠпоÑģмоÑĤÑĢеÑĤÑĮ +ücklich +Ġrealism +Ġmayo +Ġjuvenile +Ġheadlights +ĠgörÃ¼ÅŁ +ĠReform +Ġhalves +czne +Ġbreakup +żej +Ġrätt +Day +ĠìĿ¼ë³¸ +Ġmuerte +Ġtunes +ĠSmile +record +Ġrecherche +atisfied +Ġpozi +Ġcelebrations +isexual +ĠROB +thirds +ĠFortune +ĠÑĤой +Ġbranded +loo +Ġdud +Ġrandomized +Ġcombin +ä¸ĢäºĽ +ieran +czenia +įãĥ« +Ġcurator +Ġartery +ĠÑĥÑĪ +ĠÑĩиÑĤ +Ġsubsidies +Ġblossom +ĠTwilight +Ġhyvä +ĠPompe +ĠCisco +ĠÐŁÑĢо +Ġbiri +Ġgern +Ġrebuilt +Ġwcze +Ġbenefici +Ġdrummer +Ġsolids +Ġdiyorsun +ãģĤãĤĬãģĮãģ¨ãģĨãģĶãģĸãģĦãģ¾ãģĹãģŁ +lated +Ġmuddy +Ġholog +Ġclaps +ĠRings +ĠOkey +ĠBrave +Ġvaluation +Ġmigrant +Ġintermitt +Ġeigene +iliary +ãĥ¼ãĥĪ +markt +kr +ĠRib +á»Ļi +Ġaccusations +Ġarab +wash +ĠBardzo +Ġugh +esters +ophren +Ġalimentos +ĠUz +ÖĤ +Ġ650 +ĠпÑĢиеÑħ +FI +Ġsampai +Ġparlé +hesion +Ġsır +Ġapparatus +Ġcorrelated +ĠPrincipal +Ġcorr +ĠOfficial +иÑĩеÑģкие +Ġterminals +Should +Ġvacun +Ġstellt +Ġmooi +etzung +ĠкÑĢа +Ġdai +Ġпож +Team +ĠPPE +ĠÐŀÑģ +ĠLeah +ĠIvy +yst +Ġuhhh +Ġnighttime +Ġtrendy +Ġsecurities +Ġcontinents +Ġfirsthand +ĠVeron +ĠëĤ® +Ġbrowsing +ĠCada +tro +Ġtramp +reib +Ġerstmal +irler +Ġpsic +Ġgetir +ĠNP +Ġdzieci +обÑĢаз +Ġmagician +Ġscrutiny +Ġslab +ĠOT +isty +iries +orest +Ġtasked +Ġmorally +ìķ¼ì§Ģ +ustered +Ġfools +Ġirrespons +Ġeinf +Ġviá»ĩc +Ġscor +Ġpillows +ĠGegen +Ġtutte +Ġquarterly +Ġdidnt +ĠGym +ĠEther +ĠØ« +лиÑĪком +Ġsignaling +ĠNode +ĠDoncs +Ġyah +ĠKanal +Ġfading +etin +Ġinfluencers +Ġmedals +Ġengineered +Ġfermented +ê²łì§Ģë§Į +ĠBeethoven +×ŀש +inental +ĠìķĮ볤 +ütfen +alnya +Ġovere +Ġdenkt +акÑĤеÑĢ +Ġâĺ +Ġnecesit +Ġgenerators +grass +ĠподÑĥм +lieÃŁen +Bar +ľëıĻ +ĠдеÑĤей +Ġsucking +Ġstencil +Ġprimo +ĠBreath +strom +Ġimmensely +Ġappreh +ìłķìĿ´ +Pop +Ġjong +ĠGiul +ĠADHD +Ġhören +Ġelo +ivent +Ġrus +Ġoutrageous +Ġmastered +Ġ커 +ÙĪÙģ +ipes +ĠRudy +Jacob +Ġbullish +Ġtapped +Ġfaud +izophren +ĠÑģоÑħ +ĠDarling +Ġ1963 +ĠPrevention +²Ķ +Ġabdominal +stones +Ġavaient +á»ķi +make +Ġsare +ĠInstant +кам +Ġkeeper +Ġblankets +ãģ§ãģĹãĤĩãģĨ +Ġsweats +ĠMinneapolis +åħ¨éĥ¨ +Ġgenommen +Ġfasten +ĠBrussels +åij¼ +Ġcafeter +Ġabsorbing +Ġhago +ĠElmo +Ġgusto +ĠYap +Música +Ġtert +Ġbanda +Ġmily +Ġthereafter +ĠStockholm +ĠCarson +Ġcalibration +avaÅŁ +ansa +ikke +Ġforesee +Ġqualche +Ġdeste +æ¤ +ünüz +Ġforge +Dis +esten +Ġδια +Ġencaps +ĠGespr +Ġchercher +ickets +ÑĤоÑĢÑĭ +Cr +ĠТакже +Ġrabbits +ĠDot +heiten +Ġcausal +ĠFoster +ajÄħc +Ġbereit +Ġayudar +é«Ļ +ãģ³ +song +comb +Ġfringe +Ġcybersecurity +Ġ뾨 +Ġkier +Ġbeschäft +ĠконÑĨе +Ġfacilit +ĠNamen +Ġbilateral +tx +ĠWissenschaft +Ġnuances +Ġripping +Ġfy +ĠSicherheit +ĠGhana +olon +Ġtopped +ĠMorocco +Ġradial +ĠLEE +ĠAndreas +edd +ĠìĹ´ë +ĠAirlines +ãģĵãĤį +Ġvalores +ê·ľ +Hy +ĠзадаÑĩ +ĠKendall +ĠÑħаÑĢ +ĠVamp +Ġpython +Ġmanageable +ĠGente +oise +iciary +Ġimposs +ĠBunny +iesta +Andrew +Ġsert +ĠCec +zzarella +Ġautomobile +ĠTiere +allows +åĨĨ +Ġë°Ģ +ĠScorp +ĠJelly +agara +ĠStretch +Ġredef +Ġexacerb +ĠSHA +éf +orsa +Ġflawed +ĠNoel +?!? +Ġprocent +Ġmenstru +ĠпÑĢоÑĩ +Ġinfants +ðŁİµ +pause +ĠRacing +Ġ1948 +Ġsuperintendent +idores +idy +brahim +Ġunlucky +Ġperk +anci +Ġë§ĮëĤĺ +ĠÐľÐ¾Ñģкв +Ġfinans +Ġdiferencia +łĪìĿ´ +éħį +ORY +ĠTac +ÛĮا +Ġdesem +Ġважно +ĠJU +ĠìŀĪìŀĸìķĦìļĶ +ĠÎĿ +Ġinformations +ĠHEL +hst +ĠпоговоÑĢ +Ġvoiture +Ġreus +ändig +ĠпоÑħож +jing +Ġdru +altra +Ġproduits +Ġkite +Ġeyeball +ĠBelt +ĠRestaurant +Ġgamb +Ġporridge +itters +Ġconverts +Ġyardım +Ġmáximo +wirtschaft +ĠíķĺëĤĺë +Ġì¤Ģ +Ġiceberg +Ġvorbei +Ġ256 +ocratic +Ġreckless +onner +Ġmús +Ġlogically +ĠPrison +ĠNetz +Ġvacant +Ġnimmt +ĠHARR +Ġзов +ĠDee +ringe +niest +ĠRules +ìĬ¤ëŁ½ +cussions +Ġfloral +Ġconstrained +Ġdifferentiation +ĠQuebec +ĠÛģÛĮÚº +Ġpública +itel +Ġaccommodations +ĠGrü +íľ +Ġpickles +иÑĩеÑģкиÑħ +Ġcommissions +ĠBaek +ĠçocuÄŁ +ĠMedium +Ġperiodically +Ġwonderfully +Ġstaffing +ìĽIJë +rire +fle +ĠMcL +ĠÑĤеп +ĠпеÑĢек +нолог +Ġíģ¬ê²Į +çĻ¼çı¾ +Ġprosperous +ĠSpiritual +ĠChick +DIA +ĠÐŁÑĢивеÑĤ +ĠperÃŃ +ÑĮÑİÑĤ +Ġconsultants +ĠEarl +ä»Ĭå¹´ +Ġruining +оÑĢе +Ġpenser +Ġtakiej +Ġstrengthened +ĠLiquid +онеÑĨ +аваÑĤÑĮ +Ġcamer +Ġdisagreement +Ġbathing +ĠYosh +aal +prechen +RISADAS +Ġsuperstar +æģŃ +лÑıÑĤÑĮ +Ġnib +ĠTherm +ĠDANIEL +Ġpaw +Ġliquids +Ġcapacit +arken +Ġvagina +Ġmashed +Ġemerges +yscy +Ġunrelated +ĠGuild +Ġinverted +itives +Tra +Ġbegr +Ġalte +ì§ķ +ãĤģãģ¦ +ĠÑĢазÑĢабоÑĤ +finder +Ġдалее +ĠблагодаÑĢ +walker +Ġcrater +assadors +rences +inski +ĠKIM +ĠElliot +2017 +ĠSr +inka +anov +Ġìŀĺ못 +Ġproprietary +displaystyle +ĠÑģим +Ġизб +ĠPanel +Ġinstincts +ĠCommunications +麻 +midt +Ġë§Įëĵ¤ìĸ´ +ĠÑģлова +ĠGilbert +缮åīį +Так +voorbeeld +еÑİÑģÑĮ +aryn +quez +Ġdart +ÑĸÑĪ +ĠHut +Sal +Ġsoutheast +Ġpesticides +Ġhelicopters +Ġendured +iada +Ġbrewing +ìŬë +ĠÑģвобод +ĠSaints +ĠFrançais +ĠEconomics +Ġdisloc +ophobia +Camer +Ġnegotiated +ĠÑģÑĤали +ìĬ¤íģ +ogie +Ġtsunami +Ġpeeled +Ġmotivations +è¨Ń +ostat +flan +ĠDAC +Ġkav +'RE +ĠPearson +bbe +czenie +Ġatenção +íĨµëł¹ +ãģ£ãģ¡ +ĠÑĥдаÑĢ +Ġintroductory +ĠIci +ëĮĢë +akat +Ġtrench +Ġproceeded +ĠCoin +Ġderecho +ĠRede +æ¯Ľ +аннÑĭй +Ġincarcerated +ĠRichmond +Rock +ĠPav +ĠKarma +uges +Ġconteú +ë¹Ħ +Ġê·¸ë§Į +ĠGone +ĠwspóÅĤ +ĠRahmen +unken +Ġì¤ijìļĶíķľ +Ġib +Ġattaching +Hay +Ġsuka +ìį¹ +Ġpivotal +ĠRespect +ÃŃda +IB +ĠVerantwort +wiet +Ġforensic +ÑĢиÑģÑĤ +ĠпÑĢинÑĨипе +Ġmarkings +Ġkettle +ĠOpera +ĠDoctors +Ġshredded +Ġrecuer +Ġvigil +ĠFail +Ġentrev +ĠдÑĥÑĪ +Ġoutbreaks +èµ°åIJ§ +ĠÏĢο +Ġrogue +angled +Ġyearly +ĠCreed +Ġwam +Ġlotus +ê³¼ë +ãĢģãĢģ +ĠSpit +ĠItu +Ġstrains +Ġstamped +Ġplaint +Ġpotion +Ġconsolidation +è©ķ +оÑĩкÑĥ +Ġvlogging +Ġslate +ĠAuft +ĠIncor +ừng +§IJ +enh +ĠheiÃŁ +Ġdomest +ĠStrom +åį³ +akis +Ġfragen +Ġfiner +ĠSug +Ġuphill +Ġéén +âĢ¦) +ĠÑģоп +ĠCorey +Ġsiebie +Ġmuse +Ġcloves +Ġpous +ĠFinanz +ĠRoute +amat +Ġmutually +ĠвнÑĥÑĤÑĢи +ĠSelena +ëĶ +ĠGaussian +ë¶ĢíĦ° +Ġ×ij׼ +Ġejerc +å¾® +kea +ĠGerry +ĠSic +大çļĦ +Ġ1966 +iese +Ġfossils +Ġestad +ĠKane +ciÄĩ +ĠìľłíĬľë +Ġпам +ĠCruise +intérieur +Ġbekannt +ĠPode +Ġdemander +Rem +Ġinvade +Ġdecorating +ropic +Ġcowboy +ĠPhoto +opolit +Ġì»¬ëŁ¬ë +Ġreap +Ġhandwriting +à¹Ħร +Ġëļ +Ġبعد +ĠMt +ÙĢ +Ġspaceship +Ġnationalism +Ġcouncils +ĠGriffin +ĠAhmed +Ġclich +ĠOL +wl +ĠPilot +å®® +Ġacronym +Ġgels +Ġelectroly +èĵ +Ġмной +Ġepisod +ĠDieses +ĠATP +Ġediyorum +Ġexpresses +Ġexhibits +Comm +ĠкÑĢÑĥп +Ġmatar +Ġ2025 +ĠArtem +vasive +rÃł +ĠbeÅŁ +é»ĥ +Ġlizard +Ġfille +Ġì§Ī문 +ĠмоÑī +Ġtür +Ġculprit +Ġwoven +ĠANY +nim +Ġtay +Ġpromin +Ġacompa +Ġidé +Ġboiler +ĠThemen +Ġavenue +ĠMud +ĠновÑĭе +Ġwitnessing +Ġlance +ĠCHAN +ĠBever +تÙħ +Ġchemotherapy +King +ĠbÄĻdÄĻ +Ġatual +Ġtive +Ġtalkin +Ġquedar +ieÃŁ +edel +Ġìĸ´ìłľ +Ġjogar +Ġör +Ġundertaking +ĠStrength +Ġmilhões +ĠWine +ĠMolt +讲 +ãģijãĤĮ +Ġundermine +ĠArchives +vana +mercial +MC +Ġcaste +пÑĢ +Ġlegislators +ulators +ênio +Ġëį°ë +ĠÑħоÑĤиÑĤе +Ġнек +Ġsurn +Ġconsci +ĠPOW +Ġculinary +ĠKAT +ĠFolks +Ñĭваем +Ġвок +ãģijãĤĭ +service +pts +Ġпобед +æĺ¯åķĬ +Ġtents +Ġnord +STE +Ġrepublican +Ġwyk +Ġminions +èĻķ +Ġmemang +jest +Ġcomparative +Ġtyle +carbon +bedingt +ksen +Ġnegativity +Ġsjälv +Ġdú +æīĢæľī +Ġrecalled +cra +ĠTada +ĠÑĢÑĥки +ĠопÑĢедел +Ġprocrast +Ġjogos +ĠOo +ĠHearts +Ġéch +ĠksiÄħż +Ġcoarse +ĠTube +ĠGreens +Ġén +Ġdumbbell +ĠÑĤи +Ġquerer +اØŃ +Ïĥει +ĠпÑĢавилÑĮно +Ġпап +Ġcompra +Ġtér +ĠAntes +Ġoptimum +Ġbiscuit +κι +aczego +Ġìĭľê°ĦìĿ´ +ĠMarines +vero +Ġvaccinations +Ġpetty +riters +Ġал +country +Ġcounters +Ġattendant +ĠHui +ãģ¨ãģĦãģĨãģĵãģ¨ãģ§ +cka +ÑģÑĤвеннÑĭй +guy +Ġtricked +ĠRED +Ġthrilling +ÏĢοι +Ġpiggy +Ġanunci +ORTER +ĠValue +Ġrond +ĠADA +Ġposer +hores +ĠRoland +ĵ¯ +Ġnoir +Ġש×IJ× +ë°ľ +iemand +ĠпоÑĤеÑĢ +ê³³ +Ġê±± +Ġformatting +ĠLed +è§Ģçľ¾ +Ġkillers +ĠÄijấy +Ġhaar +again +!>[ +minster +Ġвли +Ġidentifier +ĠLambda +Ġtros +Ġflawless +Ġdetrimental +Ġbunları +War +Ġregião +羣çļĦæĺ¯ +ĠBike +cessors +Ġcùng +ĠRN +Ġê½ĥ +Ġküçük +ĠBeginning +íĺ¸ë +Ġgewe +Ġdenote +ĠAlberto +Ġprobiot +Ġode +Ġmolar +Ġbursting +assumed +Ġfootprints +veda +Ġsteroids +Ġflaming +ĠEller +Ġerkennen +ätzen +Ġlifecycle +ĠDOU +ĠKarena +ĠGuerra +è¿ĺæĺ¯ +Ġsinister +Ġpodéis +Ġparab +Ġoko +Ġmatéri +Ġcaric +sonaro +Ġpraticamente +ÑĥÑģа +Ġcomunque +Ġvigilant +Ġregimes +ĠShooting +Ġraids +ĠNora +ĠWieder +mens +ĠÑģод +Ġê²½ìļ°ìĹIJëĬĶ +ĠвÑħод +Ġautobi +ĠSchn +ĠRobbie +ĠFitness +ĠконÑĦ +Ġpenguin +моÑĤÑĢÑı +Ġминим +plays +Ġdelegates +Mer +Ġsistem +ĠMichaels +male +اع +Ġcách +ĠHä +Ġ×Ļ×ķ×ĵ×¢ +Ġsuperpower +Ġstron +Ġrover +Ġdépend +éĻ³ +Ġretiring +Ġvampires +Ġmerde +ĠChanging +Ġtame +Ġspokesperson +Ġcay +Ġflirting +ĠGrö +Ġwär +Ġwyb +Ġcoeur +ạnh +ĠìĻĢìĦľ +Ġconnais +ĠHundreds +ĠBea +ĠαÏĢ +pruch +Ġsociedade +ĠWhilst +ĠKait +espace +Ġchia +ĠErm +Ġë°Ķê¿ +Ġfences +ĠMortal +ê²ģ +ĠгÑĢаÑĦ +ĠHomeland +ĠJUN +isst +Ġparlar +Ġsporty +éo +Ġdeepen +ĠBehavior +éĢı +åĵĪåĵĪåĵĪ +Ġerrand +Ġrotary +ĠWellington +Wind +Ġmesela +ảng +iende +Ġexcell +ĠGenius +ĠEduardo +æľī人 +ĠÅŁunu +ĠÄ°stanbul +Ġproduto +Ġãħİãħİ +OFF +Ġwollt +çĪĨ +Ġëī´ìĬ¤ +Ġlass +Ġhertz +Ġaromatic +Ġзвон +Ġautoc +ĠLust +Ġ112 +ĠÎĹ +Ġreviewers +Ġreceptive +å°įäºĨ +ând +oglo +ĠìķĦëĭĻ +Ġngo +ÑĸÑĤи +Ã¥t +cono +Ġtekrar +Ġì£¼ê³ł +ĠgelmiÅŁ +Ġbedtime +ĠArgh +ADA +ĠгоÑĢода +ĠÄĩ +Ġalliances +giggling +Ġyerde +Ġspies +Ġgutes +çi +Ġalltid +ĠLah +ŀIJë +ĠdokÅĤad +ÙĪÙĬ +Ġtoxicity +Ġcancellation +Ġ1958 +dro +ĠìŀijìĿĢ +ĠMotorola +Ġmultin +Ġenthusiasts +ĠMighty +ĠCoconut +:ãĢĮ +ĠPictures +Ġsangre +Ġblinking +olesome +ĠìĬ¤íĥĢìĿ¼ +FP +Ġbooming +ĠдеÑģÑıÑĤ +Ġratchet +Ġtimelines +leness +Ġcages +ĠGoodnight +ometimes +Ġcunning +ĠRisk +uled +dade +Ġprata +ĠgustarÃŃa +amus +ĠJinping +Ġestrut +Ġdescobrir +ĠMÄģ +ĠAllan +ĠåĪĨ +Ġ׾ק +Ġpreserv +ĠStrawberry +Äı +Lu +Ġkro +ĠReports +ìħĶìķ¼ +Ġvalt +Ġpouvait +Ġappar +ĠBone +Ġpreferably +ĠRepública +å°±åĪ° +Ġherzlich +Ġchimney +Ġçev +Ġvisas +Ġverr +Ġcultivation +ĠArmenia +ĠвдÑĢÑĥг +Ġcockro +retched +artz +ĠлÑİдÑıм +ĠpolÃŃticas +ĠPanz +ĠAKA +ĠëĪĮ룬 +Ġerro +Ġcamper +Ġ102 +स +done +Ġhoard +ĠÐŁÐ¾ÑĤом +jeong +Ġdesta +pak +Ġinim +Ġgrowers +ĠMessage +Ġelector +engage +ĠForbes +ĠCincinnati +Ġdifférence +df +Ġspar +Ġawaits +ĠUSSR +ĠRising +ĠHoÅŁ +Ġfooting +Ġcondiciones +ÑĤоÑĢов +Ġclinician +ĠDiskuss +å£ĵ +ר×Ĵ +×¥ +iteit +gren +Ġcharisma +Ġleuke +Ġirritating +Ġcirca +ĠRhodes +Ġpior +Ġhandicap +royable +Ġvull +OG +ĠinÃŃcio +ieri +Ġsplashing +Ġdemise +Ġassistir +ÑĩÑĤо +Ġcovert +ĠGud +à¸ī +klär +ĠìŀIJ꾸 +Ġverändert +ĠREM +ĠConven +atge +Ġpierwsze +Ġclergy +lington +liv +VPN +ĠÑģожал +ĠHate +ãģ¨ãģĵãĤį +ÏĨο +ĠRespons +озд +Ġetmek +Ġchemin +ÙħØ© +Ġê°Ģ족 +Tre +Ġumas +ĠBurton +Ġpatriarch +ĠSmithsonian +¥ĺ +Moon +Air +Ġmedios +Ġeraser +Ġwollten +Ġpareil +ĠBillie +æĬ½ +еÑĢÑĤв +Ġparlament +Ġagony +ĠQUE +sequently +Another +ĠWhew +ĠAnnual +Ġseben +ìĥģìĿĦ +values +ŀľë§Į +Ġsinon +ereal +ĠEnlight +ĠChemistry +ĠCatalunya +Ġdoctr +anton +Ġstuk +ĠPlate +ĠKardashian +Ġfilos +ĠWet +ĠпопÑĭÑĤ +Ġunknowns +ĠSchon +ĠBaldwin +Ġtelescopes +ĠGucci +oxide +ĠConservative +ìĦ±ìĿĦ +Ġhinaus +Power +Ġê±´ê°ķ +Ġprevail +orman +machine +Ġ1946 +Ġunbel +Ġschaut +Ġpiel +eenth +Ġobjectively +Ġchakra +audio +Ġchicos +ĠVault +å°Ī +Ġmedicinal +ĠTail +While +Ġasphalt +Ġfroze +ĠEK +unching +nosis +2015 +ĠGri +Ġoddly +ĠMär +ĠAeg +colo +Par +Ġëĵ¤ìĸ´ë +Ġvinden +ĠOVER +Ġiced +Ġscorp +Ġhac +qualified +ĠÑĥвидеÑĤÑĮ +ermo +HEN +Ġsoi +Ġmultiples +Ġlayouts +Ġblindness +ĠBowser +ĠподÑĤ +ĠÃİ +ventional +Ġmata +madı +Ġgeez +Ġcadence +Ġważne +ĠChristie +venge +Call +Ġturnaround +Ġblob +ĠЯк +ĠVoiceover +Ġperil +ĠJaime +ĠHOY +lane +Ġsebel +ĠDuo +ĠHistorical +Ġdni +Ġgema +yk +Ġsabem +ắng +Ġvars +ĠRonnie +ĠRonaldo +ĠPerquè +nsinn +hair +Ġrelentless +Ġlyn +Ġtraveler +æĢİ麼äºĨ +nine +Ġantim +Ġì¼Ģ +Ġsnowball +ĠÑħаÑĢакÑĤеÑĢ +Ġinterns +Ġconstituency +ĠÐĿам +׾׾ +VEL +Ġviktigt +Ġapoyo +ÙĦب +Ġjard +Ġheightened +ÑĢоÑģÑĤ +ĠSMITH +Ġдела +Ġrepairing +Ġrigt +ĠSheikh +ĠBritney +Ġeverytime +Ġadventurous +ockey +ernt +Ġataque +ĠAlternatively +effect +Ġpalavras +ĠElliott +Ġréussi +Ġhypertension +ĠManual +Ġprophetic +Ġhandc +ÑĮе +Ġrefrain +ĠSquid +ìŀ¡ +Ġкоман +ällen +Ġllegó +Ġbash +iony +ĠÑģклад +Ġкаб +Ġcareless +ĠPool +Ġtrás +Ġfils +ĠSchr +Ġsprawd +ĠMonaten +Ġunforgettable +ĠCotton +Ġinconvenient +ĠRX +oris +Ġhumbled +ת×Ĺ +Ġآپ +ĠincreÃŃ +ĠKommentare +èĪĴ +ración +Ġvantage +ĠSeal +ĠìĿ´ê±°ë¥¼ +Ġjoue +ãģĿãģĨãģ§ãģĻãģŃ +Ġìĺ¤ëŀĺ +ĠиÑģпÑĭÑĤ +oben +Ġgrate +Ġcontrole +ĠPercy +ÅĤada +Ġsimultaneous +Ġprototy +ĠgroÃŁer +Ġbewusst +inizi +Ġpassieren +ĠHappiness +åīĩ +shi +geht +Ġstationed +ĠErgebnis +Ġdirectamente +Ġsurvives +Ġpersones +BERG +Ġvomiting +Ġconhecer +Ġadjour +ĠCivic +pei +burst +Ġëĭ¤ëĭĪ +éı +Ġsled +Ġplataforma +ĠSect +ĠDefin +çĻ»éĮ² +énom +chnet +Ġprofitability +Ġerreicht +á»ıi +cation +Ġì§Ģê¸ +Ġperdre +Ġfelony +Ġ1957 +æĪijå¾Ī +Ġunsuccessful +Ġnagyon +Ġelasticity +Ġfacade +Ġearthly +ĠамеÑĢикан +Ġconn +cla +Du +Ġpolitiques +Ġhalo +iantes +Ġмоей +ãĥ³ãĥī +tones +elier +è®ļ +htaking +Ġwichtige +Ġanno +ĠLok +illions +Ġviver +Ġsolchen +Ġsuf +ĠSalz +ĠNvidia +zuge +ĠSpike +Video +Ġtwor +ĠAla +èijī +Ġhanya +ĠAdm +ìĿµ +ĠPatienten +ĠOnion +ĠKobe +ĠScene +ĠRash +æ¨Ļ +ÑĢаÑģÑĤ +istani +General +leye +imbap +Ġconcealed +ĠFridays +ĠWool +ĠновÑĭÑħ +شر +Ġê²°ê³¼ +Ġjedoch +´ìĭľ +ĵ¤ëıĦ +Ġìŀ¥ëĤľ +ukt +Lou +Ġ먹ìĸ´ +ĠExpect +Ġдомой +Ġirresponsible +Ġacerca +ĠZust +ר×ĺ +UI +Ġyoutubers +ĠPositive +Ġsocioe +Ġsnatch +èĥĮ +Ġrefreshed +Ġnominations +ĠPatt +Ġobsolete +ĠdemiÅŁ +åı¤ +ormuÅŁ +ĠìĨĶì§ģíŀĪ +Ġfla +Ġcraziest +ĠZie +ĠTú +zep +icem +Ġë©ĭìŀĪ +Ġcynical +ãģĿãĤĵãģª +Ġtresp +Ġcraz +Õ¥Õ +Ġnelle +Ġmph +ĠNered +ĠKob +ĠEck +¨¸ëĭĪ +Jan +ĠТогда +Ġdeci +ĠVog +Ġbubbling +éĢĢ +úa +Ġproductos +iberal +Ġreplicated +ĠImprove +illary +Cha +Ġrédu +ĥIJíķĺë©´ +Ġconnot +ĠKrit +ĠдÑĥÑħов +Ġtreadmill +ĠPW +ĠзовÑĥÑĤ +Ġclams +Ġdrafting +Ġ1956 +unta +Ġexpenditures +ĠHoover +WOO +ÑĪее +Ġdeduction +monary +Ġrecib +Ġpovo +ĠëįĶë +ĠPAL +ĠBlow +Ġwyp +Ġdestac +deal +Graeme +Ġnécessaire +Ġdamned +Ġ1938 +Ġìĭ¤ìłľë¡ľ +Ġtroop +Ġinsightful +ĠTJ +ĠоÑģв +Ġfidelity +ĠSkip +ĠMayo +ë§Ŀ +appe +Ġblas +ĠWY +ĠGN +ctar +Su +Ġcuent +hews +Ġcorpses +Abs +Ġwastewater +Ġciek +ĠOnu +Ġexplosives +Ġarma +ĠSTEPHAN +politik +ĠOsaka +taÅĤ +Ġyapıyor +Ġizquier +Ġbeleza +ĠWyatt +åIJ¸ +Ġsuk +Ġspecjal +Ġdanke +whistle +ĠfÃŃsica +ĠHarriet +ĠìķĦíĮĮ +Ġwillkommen +iping +ĠÑģмоÑĤÑĢиÑĤе +ĠможеÑĪÑĮ +Ġinaccurate +Ġarrogance +ĠRemo +γά +assed +Ġdeliveries +Ġstinky +ĠпеÑĢеж +jay +Ġtransitional +Ġrere +ĠNGOs +ĠATM +خت +iology +Ġвлад +Ġschme +ĠShine +ìķ¡ +pants +Ġserge +Ġsenhor +Ġabduct +ĠBryant +VES +Ġawakened +ĠLaz +ropolis +ĠLao +è¾Ľèĭ¦ +Ġvilla +Ġsummers +Ġenthal +Ġ1949 +Via +Ġìĸ´ì¨ +Ġtendon +Ġviolet +Ġintellectually +Ġbounced +araus +Ġ1919 +Ġvraag +Ġspel +ĠSchwar +Scott +ĠIndo +Ġë§Ŀ +Ġcanonical +ĠIKE +ĠthatÃŃs +Ġmellan +æ¯Ĵ +igmat +Could +...?) +Ġfoarte +ĠKumar +rendo +Ġélé +à´ +valuation +cases +Ġintuitively +hong +etted +Ġsouven +Ġmorb +Ġcors +ĠNV +ĠHasan +æĥħåĨµ +ieved +Ġì§Ģê¸ĪìĿĢ +Ġdumpling +Ġcontrôle +Ġambiguity +æ©Łæľĥ +Ġcog +ĠScriptures +Ġcai +Ġbever +大家éĥ½ +Ġhuis +Ġaime +Ġerklären +ĠLM +ĠFey +éļ¾ +றத +Ġsupervised +Ġjewe +spl +ĠÑĨенÑĤÑĢ +Ġcollisions +ÙĦÙģ +ĠHogwarts +ĠDurham +×ķ×£ +Ġphosphate +Ġoversee +Ġinspections +Ġbrinc +ĠZak +Ġpayoff +Ġchaud +ĠHunger +ãos +vir +Ġfiance +Ġboug +lived +cry +åĽŀä¾Ĩ +Ġjointly +Ġgirlfriends +ĠNexus +¦¬ê²łìĬµëĭĪëĭ¤ +ĠKwang +åĵĪåĽī +å§ij +ÅĤÄĻ +ĠNeden +iece +Ġinserting +æŁĵ +ĠMummy +ĠGlobe +Ġlee +Ġgerman +Ġcreams +acho +ĠchÆ°a +ĠGalile +Ġfürs +Ġestiver +cidos +Christian +Ġlorsqu +Ġcutest +vale +ĠкÑĢеп +Ġwary +Ġslicing +Ġesperando +ĠVander +ĠDeixa +Ġ1954 +ĠmówiÄħ +ÑĸÑĶ +Ġtooling +Ġrestor +Ġposición +Ġintentar +ĠApache +OUL +ĠÙĪب +Ġmatière +ãĥ¼ãĤĵ +Ġlinen +Ġestratég +ĠMutta +顯 +è¡ĮäºĨ +Ġparting +Ġminimizing +Ġapprendre +æľĿ +Ġанглий +ĠDoo +ĠFirefox +cómo +Ġgeopolit +Ġmakan +Ġmogelijk +ĠÏĢεÏģι +Ġcứ +Ġinstaller +Ġdibuj +ĠHeath +loop +ĠBroken +HYUN +shelf +Ġfizer +Ġenhances +ä¾ĭãģĪãģ° +ĠдоÑģÑĤи +ĠPUB +ĠKollegin +Ġattained +ľ +Ġmistress +ĠOftentimes +×ŀ×Ļ×Ŀ +Ġbewe +ĠSora +rauen +baum +Ġrollers +Ġmering +ĠPAC +ĠнÑĸ +ĠRépublique +ĠÑĤÑĢав +ĠVanguard +uciones +Ġ무ëĮĢ +Ġgour +¯¤ +ĠÏī +Ġsauna +Ġpeine +ĠValerie +ĠSikh +fendimiz +bero +ĠÑĩи +ĠdoÅĽwiad +ĠEuros +Ġcommentaires +Ġtweaks +ĠFaster +ĠÑĢаÑģк +Ġprogressively +ĠEuch +boro +ĠIngred +Cap +Ġuncheck +Ġìĺ¤ë¥¸ +Ġwre +ĠFT +örung +Ġmemorized +ĠDinner +ĠPhew +oubl +Ġputa +Ġadmits +езде +opod +Ġpanda +Ġhinges +cipe +Ġtransact +Ġpodia +Ġpics +Ġcriterion +ĠOrchestra +ĠBlog +Ġsolemn +ĠPixar +Three +Ġвниз +ĠVolunte +ĠSavage +ĠPVC +ĠCaf +Ġwykon +Ġgraders +Ġcrouch +Ġcliche +Ġsoybeans +ĠMUR +ĠGonzalez +ĠMimi +ĠBolsonaro +Ġdiaphrag +Ġbilang +ëIJĺëĬĶ +éĤ£æĪijåĢij +Ġregulating +Mc +Judge +Ġнож +ĠjakÄħ +itesse +ĠWij +Ġlata +groaning +POSING +Ġ×IJ×ķת×ķ +Ġhaga +Ġgrounding +Ġviolently +Ġtills +Ġengag +ĠHollow +ĠпопÑĥлÑıÑĢ +Ġwprowad +Ġreplaces +Ġfluorescent +urgical +iggly +ĠTraditional +tte +ĠÙĦÙĩ +Ġphosphorus +Ġapron +ĠWaters +ĠKultur +авай +Ġolives +Ġ×Ķ×IJ׾ +Ġteilweise +Ġsencill +Ġprends +Ġnarrower +Ġjätte +ĠInformationen +ìĥģìĿ´ +Ġstarve +Ġfrick +ĠBeweg +ल +Ġdolphin +ĠLAUGHTER +ĠINTERVIE +åĶī +ĠyanlÄ±ÅŁ +Ġtorpedo +Ġshortages +ìĿ´ëĵľ +ıldı +Ġpaws +Ġozone +Ġcultivated +ĠFot +Ġnotor +ноз +ĠкоÑĪ +Ġtouchscreen +ĠAlly +æľĢè¿ij +Ġ맼ìŀĪìĸ´ìļĶ +ĠСеÑĢ +Ġвполне +Ġpaprika +ĠDustin +Ġefecto +Ġopini +Ġmuut +Ġhá»įc +Ġinterject +ÄĻt +Ġbutts +urez +ĠPike +ĠHok +ĠGuinea +ĠCathedral +Ġ1400 +Cra ++, +맼 +³´ëıĦë¡Ŀ +abyrin +Ġvideog +ĠоÑĢÑĥж +Ġuž +Ġbuscando +ĠAssistance +éĻ½ +Ġmelhores +ì¡´ +Ġëģ¼ +ĠRJ +ĠتÙħ +Ġomin +Ġmotorcycles +ĠSapp +Ġsupplying +ĠAlgun +Ġaerospace +×¢×ľ +occup +leist +Ġê±°ëĬĶ +Ġcompleta +bres +!( +ĠÐŁÑĢед +Ġdisadvantaged +ĠAttend +ĠJudah +á»ĭch +ylene +actly +Ġsetups +Ġammonia +ĠSchweiz +ĠShame +Ġbande +ĠFuel +Ġtroublesome +Ġnumero +ĠMOM +ĠпÑĢедлаг +mentioned +ĠболÑĮÑĪое +ĠViktor +ĠStyles +Ġcrucified +ructured +environ +Ġmorals +Ġmeditating +Ġaxial +isance +ĠAbst +Green +Ġê±´ì +Ġquadrant +Ġpergi +Ġcameraman +ĠSequ +Ġpaused +ĠLaughing +ê·Ģ +?.. +ĠÅ»e +Ġpermitir +Ġdetectors +ĠHUD +aval +ĠìĹ¬ê¸°ê¹Įì§Ģ +Ġhubs +Ġbestimmt +ĠбÑĥдеÑĤе +INTERPOSING +Ġtengan +Ġcrave +ĠBundesregierung +ĠBloody +Ġusability +ĠEas +ĠÄijá»Ļng +Ġ1955 +Ġkriegen +Ġhabitual +Ġessentials +riminal +Ġroommates +éĤ£å°± +ĠпеÑĢеÑħод +Ġnghi +Ġmening +ĠSymphony +ĠHug +aggi +Ġwied +Ġmitad +ãģ£ãģ¦ãģĦãģĨ +teenth +idaÄĩ +Save +ĠrobiÄĩ +Ġbounces +°ĸìĹIJ +stars +Ġpragmatic +Ġcognition +Ġwrapper +Ġwarten +adh +Ġpensa +ĠHertz +ĠnÄĽ +ĠReid +ĠPCs +ĠMole +Ġ..... +Ġprecio +ĠChampionships +ê°ĢëĿ½ +Ġvér +Ġcorridors +ĠElectronic +Sl +Ġале +Ġoverthrow +Ġkabul +ĠRES +ĠCyberpunk +огод +ĠÐĿав +Ġwan +Ġmanifestations +Ġcuales +ĠWise +ĠLösung +Ġexfol +Ġearns +ÑĥÑģÑĤиÑĤÑĮ +Ġsapp +ĠBraun +ĠBRANDON +ì¹Ļ +Ġsano +ĠFEL +ÑĭвайÑĤеÑģÑĮ +ождениÑı +Ġsewn +Fun +Ġreciprocal +Ġexpansive +ĠTraffic +Ġktórego +ĠÙĪس +æĺ¥ +Ġ빨 +prove +igare +Ġloh +اض +Hope +Ġdevotees +ĠGom +Ġsteals +ĠUms +ĠTwice +ãĤ² +iyim +Ġrhythmic +ĠVorte +Ġprefix +omination +Ġdato +Ġcustard +ĠVOICE +å·ŀ +Ġmeny +istors +Ġíĺij +ĠìĤ´ìķĦ +ĠíĥĦ +Ġkort +Ġaba +ĠVera +epy +Ġì¹´ë©ĶëĿ¼ +Ġsubmerged +ĠClock +Ġthumbnails +Ġboast +ĠFare +!!] +ĠÅĽm +Ġkaikki +ĠTechnologies +ìĻ¸ +ãĥĴ +иÑĤай +å°ıæĻĤ +ĠаÑĤ +Ġknobs +Ġreicht +ượng +glio +Ġ맼ìĿ´ +ê°IJìĿĦ +Ġjotka +ĠHandy +ĠHaben +nous +Ġinland +Ġamazon +hooting +SL +Ġleisten +~\" +Ġprovoke +ĠTwist +Ġ×ij×Ĺ +Ġdeparted +ê°ľë¥¼ +Ġkonse +ĠCarwyn +íķĺìĭł +idental +ESCO +Ġtteokbokki +Ġdizendo +ç·´ +ındaki +imasu +afar +Ġlandfill +Ġcorrecting +Ġclears +ĠNummer +HAM +Ġcartridges +ĠDiesel +paced +Ġobliv +Ġmoyens +ĠSinne +ĠPreis +iliz +ĠÑģмож +Ġbroaden +ä»ĸæĺ¯ +xes +Ġcarbohydrate +íĺ¹ +seok +Ġechoes +Ġcess +ë°Ķ +ĠбизнеÑģ +Ġllamado +Ġessent +ĠìĿ¼ë°ĺ +ĠAires +phen +Ġzebra +Ġsymbolism +Once +Ġracks +ĠKafka +ĠÑģеÑĢÑĮез +Ġsinn +picious +kaa +Ġmotherfucker +Ġapprenticeship +Ġrpm +Ġtaxation +Ġfurry +ĠSacred +ĠÑĢазм +pora +enges +ĠíĹĪë +ĠÑģин +Ġsanitizer +Ġcringe +ĠSca +оÑĩно +Ġofere +Ġmelodies +ĠVelvet +ĠIhrer +ĠHybrid +ĠGiov +Ġirgendwas +Ġdepende +ĠUsers +Ġhump +driving +Ġsf +Ġruthless +à¹Ģà¸Ħ +Ġlemons +Ġföret +ĠOj +Ġмама +Ġinterpersonal +Ġgev +Ġabnorm +иÑģл +Ġинд +Ġkontroll +Ġregres +Ġledge +Ġerzählt +ĠTact +Ġarrivé +Ġsubstantive +Ġspoonful +zwischen +ooooo +Ġcontenido +Ġbesl +á»ĥm +kten +Jamie +Ġsandy +ä¸įåIJĮ +âĭ +Ġpase +Ġdette +ĠBelgian +ê°ľë +ulares +rud +igor +ĠíĮ¬ë +Ġremedies +Ġblasting +ĠSich +Ġожид +Ġmonstr +Ġmanifold +Ġglauben +ĠEST +Ġstreamline +Ġlobbying +ĠGothic +toire +..' +Ġdémocr +ĠнаблÑİд +Ġwspól +ĠczÄĻÅĽÄĩ +ä¸ĭéĿ¢ +isés +gangen +Ġbezpie +remlin +ê°Ŀ +Still +Ġresides +Ġgelecek +Ġtéléphone +Ġpewn +Ġleopard +Ġcomplimentary +Ġcrib +ĠAnimals +Ġgeil +essel +Ġgarder +Ġcatchy +樹 +ĠEts +ĠCommercial +ĠDENNIS +ĠCoordinator +ĠAbigail +ffffff +ấp +Ġpequeña +Ġinjections +cekt +Ġphilanthropy +Ġpuck +Ġcelebrates +ĠDunk +ĠDlatego +ãģ¾ãģł +δή +graduate +ĠMobil +till +acam +Ġyolks +Ġtangled +Ġmaniac +Ġobliged +ĠLaink +Ġverder +ĠDamon +Ġmutant +Ġhopping +Ġreins +Ġinverter +Ġcontempt +×ł×¡ +learning +Miss +ĠÐĵоÑģ +ĠMeyer +ê»ĺìĦľ +é£İ +×ķ׳×Ļ×Ŀ +asking +Ġtrimming +Ġtreasury +Ġsente +Aust +ĠUnterstützung +ĠComedy +ĠAnakin +é¹ +ÑĢÑĥÑĤ +ĠHari +ographers +Ġoatmeal +ĠBots +ä¸įäºĨ +ĠпалÑĮ +Ġacknowledgement +xic +Ġê´Ģìĭ¬ +gasping +Ġãģķ +Ġterrace +Ġornaments +ĠMER +committee +ĠìĹĨìĬµëĭĪëĭ¤ +Ġrij +é³ +צ×Ŀ +leme +Ġliberties +Ġfellas +ĠCopper +bench +ĠIdea +á»įn +ÑĪа +Ġversión +ÏĦοÏį +ĠÐľÐ¸ +ĠпÑĢилож +Ġboxer +ĠTanner +ĠMoy +ì¹ĺëĬĶ +Thr +Ġtinham +Ġpolishing +Ġconsequently +Ġamenities +ĠKI +ĠGREEN +ĠFrankie +ниÑĤ +ittel +Ñģкое +ursed +Ġupbringing +Ġthứ +ĠìĭĿìľ¼ë¡ľ +Ġwhim +Ġchinese +confidence +ĠJeder +ãģªãģ®ãģ§ +ajcie +ĠTous +ĠPowers +ừa +othermal +ĠвÑĭÑĪе +rale +اخ +Ġì§ĢìĽIJ +Ġépisode +Ġsulph +Ġencara +kraft +aları +ĠComes +Ġdivul +ĠRudolph +ĠMuse +Ġutens +ĠìŀIJ주 +Ġpana +ĠVegeta +ĠPHP +ĠNSA +entin +ĠCarnegie +اÙĬ +iÄĻcy +Harry +Ġfır +Сп +Ġgladly +Ġaveraging +íķĺê²łìĬµëĭĪëĭ¤ +лÑıÑİÑĤÑģÑı +ĠÐľÐµÐ½Ñı +Ġquotation +rires +itchens +ayed +Ġunatt +ĠPerez +ĠоÑĤмеÑĤ +Ġtactile +ĠEuh +isini +buh +Ġhatır +ĠìŀĪìľ¼ +Ġpolicymakers +³´ìĦ¸ìļĶ +acı +Ġκι +Ġregistering +reto +ĠSprinkle +ĠGrammy +axter +Ġби +Ġsitter +Ġpredic +Ġthinly +Ġstrum +Ġaggrav +Ġaha +رج +mellow +Ġconstante +ĠLaut +iston +Ġtransitioned +ĠCambodia +ãģĦãģįãģ¾ãģĻ +è·Łå¤§å®¶ +arted +Ġmisf +ĠPunkte +Įëĵł +Ġtrembling +Ġgespannt +ĠعÙĦÙĬÙĩ +ĠникакиÑħ +Ġë¶Ģëĵľë +ĠÑĢазвиÑĤ +Ġitchy +Ġciento +Ġplains +Ġkittens +Ġbacklog +ĠPresiding +pta +Ġhavoc +ĠDarrin +ĠÐĽÑİб +Ġsegregated +Ġghetto +Ġerlebt +Ġdrugiej +ĠSixt +åıĥ +ระ +uencia +Ġíķĺ기 +ĠëĨį +Ġrobi +Ġpioneers +Ġmilliards +ĠWitcher +Ġ무ìĹĩ +orro +mass +Ġdivergence +ĠRivera +ĠNoodles +Ġendroit +ĠKosten +ĠдÑĢÑĥга +ĠmÃŃnimo +ĠKazakhstan +تÙĩ +ĠвоздÑĥ +Ġgeschrieben +ĠNil +Ñģки +ĠFrüh +Ġbeverages +æºIJ +ĠGon +æĺ¨ +Arin +ĠIntro +ocalyptic +Ġexhaustion +ĠStatus +ĠBattery +ész +£¼ë +airy +Ġë³´ìŬëĵľë +Ġdisparity +ÙĮ +ĠTucson +Ġbrightly +problem +Ġbiomass +éĻį +§ī +Ġhurdle +Ġwavelengths +Ġ<< +Ġteamed +FFFF +ĠSlim +omial +Ġunveiled +ĠVerein +ÙĤØ· +estry +Ġclás +Ġcheddar +Ġaccusing +ĠScientific +ĠбÑĥде +ĠCyrus +εÏĦε +Ĩĵê³ł +Ġë³Ħ +Ġcurd +Ġreferrals +shift +åįķ +ników +Ġmier +Ġconfronting +ê²ĥëıĦ +awl +Ġtryin +Ġê·¸ëŀĺìļĶ +Ġchiar +Ġìĺ¤ëĬĺëıĦ +æĶ¿æ²» +esque +Ġmismos +ĠShak +Ġsociaux +ĠpiÅŁ +ĠkiÅŁi +Ġcyan +hay +bew +bod +Ġι +ĠMainly +ÑİÑĤÑĮ +habitude +ĠÑģпокой +è·ŁæĪij +Ġprecon +ĠMandy +ðŁ¤£ +illos +Ġgrupp +Ġcrumble +Ġconstructor +ervices +Ġlighthouse +ĠConcept +анÑĤи +altro +hope +ĠAlleg +ìĸ´ë¥¼ +pieces +ounter +ĠíķĺëĭĪê¹Į +ĠìĿ¸íĦ°ë +Ġvéritable +Ġthreaded +blind +ĤĺëĿ¼ +Ġtrays +ĠEdison +ĠÃĸz +ĠStevie +Ġlender +Ġbrigade +Ġdeutsche +muffled +bart +Ġinsanity +Ġsavvy +Ġsensational +Ġderechos +ĠMX +ĠпÑĢеп +Ġthreatens +ĠrealtÃł +Ġindicative +Ġchops +Ġbenefiting +ĠVernon +ĠStrand +nun +quently +101 +Ġeel +ìĪĻ +rints +ĠÙħس +Ġبد +ĠпоÑģÑĤÑĢо +ĠyapmÄ±ÅŁ +Ġolması +Ġiedereen +olé +kef +Ġë°ľìĥĿ +Ġrained +Ġalmighty +ĠвÑĭд +ĠCPR +Fre +Ġinhabited +Ġarbets +Ġakin +аÑģÑĤв +vania +Ġhäufig +ĠMatte +sorry +Jenny +ĠгÑĢад +Ġwhit +Ġbrokers +å¯Ł +Ġhine +asten +ĠгÑĢÑĥ +MB +ĠPRI +Sab +Ġwrestler +Ġfacilitating +Ġehkä +ĠCred +Ġ127 +Ġnothin +Ġmandated +å¯Į +ÑĥÑĤÑģÑĤв +Frank +Ġwors +ĠdzieÅĦ +ĠUnderground +Ġznajdu +ĠBä +ĠPrinzip +аÑĤелей +Ġveterinar +Ġsplendid +Ġrozp +Ġpsychopath +igon +Ġhops +Ġcần +ĠXian +Ġtroisième +Ġproducto +ĠdeÄŁer +ĠContinuing +ивал +cık +Ġmoisturizer +White +Ġsiis +ĠEverest +ienced +Ġcảm +ĠJapon +´ìłĦ +ĠtenÃŃan +Ġencanta +Mm +Ġdropdown +ĠIya +³´ë©´ +Ġwording +ĠSqueeze +ĠMaple +Ġclarified +ĠMunicip +ĠRouge +ĠNicki +ĠGoo +volt +tek +fecture +fred +arrive +ãĥ¼ãģĦ +tez +Ep +Ġobras +ĠVID +ĠRiv +ĠModi +ibe +Ġacontecendo +Ġimitation +Ġcamouflage +Ġspanning +ĠSECRET +ĠOreo +ìĨĮ리 +Ġhunch +ĠcaÅĤe +Ġspontaneously +ĠPerd +Ġetap +ĠHole +ĠDisability +Ġafterlife +æģ© +Ġtestified +Ġpresup +Ġpetroleum +Ġcontrario +ĠAssessment +ÄŁlu +Ġpests +Ġdilig +ĠвÑģÑĤÑĢеÑĤ +Ġconséqu +Ġcannons +Ġcanoe +ĠMile +Ġcitoy +Ġbegged +ĠMinnie +ÅĤych +Ġprincipe +ÏĢÏĮν +mniej +Ġwert +Ġëĭ¤ëĵ¤ +anse +Ġuncles +Ġprovocative +Ġintersections +Ġdemocrats +ĠJulius +инки +ygusal +Ġ׾×ķ +Ġgjorde +Ġgasket +ĠBock +ĠÄ°n +breat +ĠEquity +ardı +Ġканале +Ġдней +ĠtỼi +Ġfixture +Ġabuses +Ġvaya +Ġouvert +Ġmulticultural +Ġcontexto +ĠSesame +Ġdépl +Ġconsomm +ĠParte +Ġpem +ĠConan +ĠбÑĸлÑĮ +Ġpersuaded +Ġdrains +Moo +FORE +ĠбаÑĤ +Ġfod +ĠProducts +ì§Ħì§ľ +Ġ\"[ +ĠWick +ĠNaruto +нали +ryw +Ġlodge +Ġinh +Ġvontade +Ġdij +ĠJesús +Looking +Ġforearm +ĠIntegration +ĠHARRIS +Ġtoolbar +leader +Ġseldom +ĠбÑĢоÑģ +ĠKook +онд +Ġmonopol +Ġmillet +Ġlira +ĠAsians +Ġ1890 +ciÄŁim +Ġeden +ĠIKEA +ĠNeighbor +ĠKazuya +üd +Ġpsychedel +Ġenvisioned +åĿĹ +Ġï·» +Ġwunder +ĠBulgaria +Brid +Ġmarrow +Ġdepiction +ĠTin +ĠPharise +Ġeinzige +Ġblindly +ãģĽãģ¦ +Ġdefens +Dire +Ġvibrating +Ġtrolls +Ġdisrespectful +Ġwod +Ġstimuli +Ġcreeping +Ġclairement +Ġscariest +Ġdécouvrir +Ġ104 +ĠвеÑĢÑħ +ĠÅĤat +Ġróżne +Ġbarley +ĠRepl +ĠTwe +kke +ĠãģĿãĤĮ +ĠRedmi +ĠMetroid +ĠήÏĦαν +Check +ĠSEN +Ġido +ÑĤоÑĢии +óp +UNKNOWN +Ġändern +ĠJuice +ĠGesicht +å°±æľĥ +ĠнаÑģÑĤолÑĮко +íĥķ +ÂŃ +exhales +Ġì´ī +Ġjsem +ÏĢÏīÏĤ +Ġitt +ëªħìĿ´ +Ġremix +Ġblossoms +ĠRenee +isations +ìĬ¤íĦ° +Ġë³´ìĿ´ëĬĶ +uestas +opedia +ĠAim +ìĿ´ì¦Ī +scene +Ġleakage +uckt +Sad +Ask +Ġsuspense +Ġimpost +ĠStrategic +ĠItÃŃs +âĢĮ +Ġkeyboards +Ġamusing +ogr +iderman +ŀĸ +ĠвижÑĥ +Ġdips +Ġapologized +ĠSTAR +Ġescuela +ĠChing +нениÑı +Ġë¶Ģë¶ĦìĿ´ +ĠFleet +Ġsamb +Ġentsprechend +Ġelectrodes +ĠFreiheit +æĪijä¸įçŁ¥éģĵ +ĠShrim +iÃŁe +Ġselections +Ġfordi +Ġdoss +ÑıÑĩ +Ġdiscriminate +ĠAuÃŁerdem +Ġdesenvolv +ĠInternal +ĠBenedict +å¯Ĩ +ĠShiv +Missy +ĠобнаÑĢÑĥж +ĠнаÑģÑĤÑĢо +Ġcontrolar +ĠLia +Ġopioids +antu +Ġcupboard +æģIJ +ге +achts +Ġcurated +Ġxem +Ġweary +Ġbrethren +Ġbudgeting +Ġpourtant +éļ» +aisia +ĠоÑĤвеÑĩ +ĠGIS +μαι +Ġש×Ķ×ķ×IJ +Ġsaud +ĠlỼ +ÐķТ +ubine +ĠнÑĥжен +Ġkidnapping +Ġbrat +ĠTerre +ĠMonet +Ġë§ĪìĬ¤íģ +Ġflashy +ĠISBN +Ġfreelance +iage +Ġjunge +충 +ceral +ĠÑĤоÑĩки +Ġformulate +ĠFER +ĠDartmouth +ìľ¼ë©´ìĦľ +å¢ĥ +owiÄħ +ĠëĶĶìŀIJ +Ġregiment +Ġmetabolismo +ĠParr +Ġ충ë¶Ħ +Ġsanity +ĠLal +ĠGö +ĠGla +Ġproto +Ġmicroscopic +Ġkang +ĠScalia +Ġpug +ĠScore +ĠSavannah +Ġgarde +ĠNOR +å°įåIJ§ +Ġscheint +ĠpóÅĤ +Ġcorri +Ġbrute +ĠÅĤad +ä»ĸ们 +Ġsucceeding +Ġbicycles +Non +Ġseekers +Ġunconditional +Ġrhymes +ĠGarage +Ġinvoice +Ġcanvi +neck +Ġcustomizable +iritual +Queen +íķĺìĭľëĬĶ +Ġpowerless +Ġcsak +ä¸įä¼ļ +isoft +ĠìłķíĻķ +Ġnhân +ĠMAND +ĠHaf +Ġrevolves +ä¹Łåı¯ä»¥ +ovan +aroo +ĠGrind +éĽª +Ġindispensable +Ġconsulted +ĠClinical +Acc +Ġolhos +Ġmonter +ĠHana +etah +Ġvaan +Ġtigers +Ġcaucus +ðŁĺĤ +³´ìŀIJ +powers +iums +ĠíĨłë +Ġtradicional +Ġresonated +Ġìĭłê¸° +them +Robert +Ġelemento +Ġantid +ĠобÑģ +Ġnatives +Ġloca +owment +ĠTight +ĠæĢĿ +Ġmelan +ĠNue +amis +Ġsorgen +asına +Home +ĠPUBG +Ġawfully +ĠShore +ĠPerché +ĠLau +ĠCinderella +ĠChest +Ġsemantic +Ġdeserted +ĠMomo +ĠHernandez +genes +ĠAdult +иÑĩеÑģкого +oshima +ĠcaracterÃŃsticas +ĠKL +´ìŀ¥ +ocar +Ġfehlt +Ġdruk +ĠPoppy +ENGLISH +ĠVergleich +Brien +Ġrecomp +ĠÑģд +Ġmerger +Ġmarketers +Ġhoneymoon +Ġpenso +Ġbelli +еÑĤÑĥ +Ġbanker +Camera +ĠStall +ĠStamp +ĠBite +ежде +Ġsür +Ġgüç +ĠPassover +ĠBugün +ĠÑģожалениÑİ +Ġниз +Ġmanure +Ġglacier +è«ĩ +RAY +terror +Ġsalads +Ġhurricanes +ĠDesigner +atorio +Ġfactual +ĠTammy +ĠзвÑĥÑĩ +Ġintroductions +Ġhousekeeping +Ġhanger +ëĭĺë +akte +ĠCola +'] +ĠGender +оÑĢон +ipse +icias +Ġsuccessive +Ġpolitic +Ġhöher +ĠQiao +ĠGimme +Ġлож +Ġseb +ĠWeiter +ĠSakura +ĠBoulder +ĠAmérica +peÅĤnie +ĠtecnologÃŃa +ishops +fur +Ġmoonlight +Ġdispersed +Ġrez +енное +алÑĮнÑĥÑİ +ĠTwelve +ĠHOR +ìĭ¤íŀĪ +ilage +Ġshaded +Ġresumes +ĠPeanut +ĠMILL +apons +ĠUFC +ĠSole +Ġjoystick +ĠOlivier +warming +Ġsyllabus +ĠобÑīе +Ġhiá»ĩn +Ġfesta +Ġcradle +ĠZac +Ġremembrance +Ġê°ĻìķĦìĦľ +ĠpiÄĻk +Ġcoexist +ĠVII +Ġáreas +Ġuważ +Ġobservers +Ġmänniskor +coon +ĠDAM +Ġnaszym +Ġalligator +ĠFreeze +ĠEstate +ĠÑĤÑĢади +Ġundercover +Ġnies +ĠFehler +plin +ĠKabul +ilate +Ġê³łìĸij +Ġmop +ìĦ¼ +Ġanderer +ĠKELL +оки +ĠжеÑģÑĤ +Ġgrazing +ĠdaÃŃ +Ġcapitalize +Ġapex +Ġnurturing +Ġcortar +Ġcontrac +ımızı +Ġtandem +éĥ½æľī +gement +ĠÑģиÑģÑĤема +Ġmanque +iajÄħ +WOR +Ġاب +Ġcarts +ANO +Ġë°Ľê³ł +ĠCena +ĠBiology +idar +Ġaż +erne +anu +Ġthanked +Ġsubmarines +Ġmanic +Ġмоз +ä¼Ĭ +instant +essential +Ġsamurai +Ġpasti +Ġalan +Ġbroch +Ġbaker +ĠGuill +¨¼ +Ġwithdrawn +ëĭĿ +Perfect +quency +Ġstreamlined +Ġ1300 +´ëıĦ +Ġëĸłë +Ġãģ¯ãģĦ +Ġhvad +ä¸Ģå®ļè¦ģ +Ġverbally +ĠKons +Ġì¡°ìĭ¬ +Ġdiez +æİ°æİ° +Ġchuckling +ĠMih +Ġrallies +Ġmanter +Ġearnest +super +Ġgece +ĠRend +ĠGerade +jenigen +ĠVall +ĠìŀĪëĤĺ +ĠÑģказала +Ġtrabalh +ĠнаÑĪем +ĠмеÑħ +ikit +Ġnouns +Ġneurological +Ġmotivational +ĠMcMahon +ĠFinished +Ġë³´ìĿ´ +ĠFields +Ġadolescents +ĠTisch +ĠNeben +ĠFlowers +ĠEnerg +Ġdiret +ĠThi +ĠPicas +æĥľ +æĢİä¹Īæł· +Ġavete +ĠFors +ĠChapel +Não +Et +ĠÑģодеÑĢж +reno +Ġsven +ĠdostÄĻp +nee +ĠSnapdragon +ĠIDs +ìķĺëĬĶëį° +ר×ļ +Ġsunflower +Ġperpetual +ç³ĸ +Ġknights +Ġgird +ĠTold +Ġvolcanoes +Ġadversary +ĠEconomy +Ġextrapol +Ġbluetooth +Ġzooming +Ġskys +Ġgenial +ÃŃculos +ambre +ĠмеÑĢ +Ġteeny +Ġstressing +ìķĮ +ONY +Ġtranslucent +Ġrounding +Ġgrues +×Ļ׳×Ķ +après +Ġprueba +Ġpolygon +Ġblueberry +ĠProgramm +Ġtrenches +Ġsebagai +Ġpalate +Ġlaude +Ġbehaved +Ġlongitudinal +ĠModule +Ġadmir +λι +Greg +Ġwyst +Ġpropagate +Ġmolds +ĠTub +ĠLoud +usto +Ġunstoppable +Ġreinforcing +éĿŀ常çļĦ +ĠпÑĢоблема +Ġpotencial +Ġhemp +ìŀĶ +य +Ġoptic +Ġerfolgreich +ÑģÑĭ +олÑĮÑĪе +urst +ĠPois +Ġrespondents +Ġnehme +ĠExternal +olate +Hyun +Ġquartz +Ġmathematician +Ġbásicamente +Ġail +ìłľë¥¼ +attutto +Ġnooit +Ġafflict +ĠOlga +èŃ· +ĠнаÑĤ +Ġdites +Ġrealidade +Ġkän +Ġuniqueness +Ġpadres +Ġsubsidi +Ġpigeons +βα +stad +Ġderen +ĠСлед +doo +ĠопиÑģании +Ġamber +Ġgoosebumps +ĠfrÃ¥gor +ĠVital +ĠIsraelites +wasser +Isn +Ġcommits +ĠSTEVEN +ĠBevölker +uitive +Ġlegen +Ġbruk +иÑĢован +ynen +helm +Ġgenerational +ĠLändern +οιÏĢÏĮν +uzu +Ġcaller +онÑĮ +ümü +Ġbesar +Ġplats +Ġmigrated +Ġjap +ĠWAR +Ġdissect +ĠZusch +ĠZeiten +ĠLions +ĠDF +âĶ +кив +Ġpedestrians +ĠMarilyn +dock +Ġyht +Ġreincarn +ĠSono +ĠGrowth +ÑĥÑģов +Ġdungeons +Ġbagus +kich +ĠÑĥкÑĢаÑĹ +éĨ« +ĠKeller +chemistry +Japanese +Ġwillst +Ġdecomposition +ĠÑģÑĤен +Ġrevived +íķĻêµIJ +ĠÅĵ +ä½IJ +ìĭ¸ +ippy +Ġhourly +jän +ĠWorkshop +Ŀ¼ìĦľ +Ġcuarto +Ġpatrim +ĠBurch +ĠìŀĪ기 +Ġhepat +ĠhÃłng +ĠëĮĢíķ´ +ĠваÑĪи +Ġrework +Ġparse +Ġçıktı +ĠSax +ĠMongo +ĠAaah +ramble +DJ +Ġstabilized +ĠSpeech +Books +Ġhurdles +ĠWO +ĠLamborg +Ġ1933 +Ġvorbere +Ġclinically +Ġbreathtaking +ĠGateway +пеÑĢвÑĭÑħ +uters +Ġë¹µ +Ġyeter +Ġpulley +Ġmuffin +ĠPrefer +ĠPence +Ġinformação +ìĬ¤íĬ¸ë +ãĤ¸ãĥ£ +ĠTurtle +ĠRegina +ĠLoad +does +panze +¸Ķ +Ġmina +ĠLatinos +ammers +ĠTort +ĠBeyonce +имоÑģÑĤи +ĠвопÑĢоÑģÑĭ +Ġbulun +èĢĮå·² +inek +bereich +Ġpasture +ĠOA +ĠMelt +ĠEtt +ĠDY +Ġobwohl +Ġleagues +ÑĤеÑģÑĮ +ĠкÑĥÑģ +Ġvors +Ġtopp +ographical +asst +Ġlindo +Ġë°ĿíĺĶ +Ġréfl +Ġclimbs +Ġvarsa +Ġmethyl +ĠKarere +Æ°á»Ł +Rad +Ġpreparedness +онÑĩ +ĠOD +ĠCGI +Ġम +Ġspeechless +Ġlasci +Ġbolag +ĠÑħоÑĩеÑĤÑģÑı +Ġgrieving +ĠJohannes +ĠCarroll +adaki +Ī¬ë +ĠsÅĤu +Ġinnerhalb +Ġgymnastics +пÑĢи +ifiques +Ġkarate +Ġdomu +ãģĿãĤĮãģ§ +OTHER +Ġdemandé +Ġbooklet +ĠKyoto +Ġwoh +ĠMarÃŃa +violent +JE +Ġlóg +Ġbrutally +cot +ĠÙħÛĮ +ĠWarsz +å®Ī +wol +Ġmikä +ĠPronounce +ĠBrendan +Ġroup +Ġitaliano +å¦ĤæѤ +ĠкомпÑĮÑİÑĤ +Ġurging +edes +Ġcarbono +ĠRichardson +ĠÐĿаÑĩ +ĠTrainer +ĠCrimea +Ġdiapers +Ġcovet +ĠMahar +ĠHutch +ĠAusw +berty +Ġindifferent +кÑĢеÑĤ +uldade +Ġharms +¢ÙĨ +lesia +Ġgio +ĠMistress +ĠKnox +ĠFREE +Ġ루ë +ĠнаÑĪа +Ġinvincible +Ġmaiden +ĠJeez +Ġbreve +pole +Ġcriticisms +ĠRusia +म +phin +ĠCompare +ĠBON +Ġsneaking +ĠRails +ĠGeral +Ġ1953 +Hola +ĠопÑĭÑĤ +Ġrainforest +Ġbelum +ĠObi +ĠISS +ãĤĮãģªãģĦ +ĠСв +Ġblond +Ġwzgl +ĠpowiedziaÅĤ +Ġchoking +ĠSongs +ĠBiraz +Ġyells +Ġstylist +ÏĮÏĦε +Ġschreiben +ĠJaw +ĠEleven +ĠRif +/. +Ġìĺ¤ëŀľë§Į +Ġtreaties +uffed +ĠâĪĴ +Ġroofs +à¹Ģส +Ġë» +Ġsparkle +ĠKiev +ĠArgu +erecht +ĠÐĿадо +ĠFIL +Ġmolta +ĠDevi +Ġcampe +Ġbenevol +ĠTough +Ġmoim +Ġevacuate +Ġerrado +å©Ĩ +ÑĢÑĥго +Ġíİĺ +ĠÎĵια +Ġweaken +Ġilluminated +Ġsiglo +ĠVacc +ией +alis +ĠÑĥÑģÑĤÑĢой +Ġdona +ÅĤos +üman +Ġproducción +Ġclot +ĠMango +Ġuneasy +Ġshuts +ĠExamples +vell +ebe +Ġpromptly +ĠTeles +ĠпÑĢоÑĪл +Ġpuerta +Ġüberzeug +Ġcoch +social +ĠBenson +ĠMeth +ĠExped +Ġsupplemental +Ġconceive +Ġ×ĺ×ķ×ij +Ġcaptivity +ıĻìķĪ +ĠÑħÑĥд +forming +Ġuploads +Ġturbulence +joint +Ġsatisfactory +ĠAnime +Ġwashes +Ġliberals +ĠSunshine +ĠREAL +ublik +binary +Tony +Ġpolarized +Ġenriched +taking +ĠëģĿëĤĺ +Ġpleasures +Ġextermin +inese +atl +vär +аÑĢÑĭ +ĠmyÅĽ +narrator +Ġодном +ĠnajwiÄĻ +Ġmobilize +Ġmillor +Ġata +æ·· +ĠpolÃŃtico +Ġplead +Ġpainters +ĠSow +оÑĦ +ĠìĺĽëĤł +ĠÑĩÑĤоб +Ġsabor +ĠUndert +ĠJERRY +Å¡ÃŃ +Ġë°ĸìĹIJ +Ġprécéd +Ġannotation +ĠInaudible +Ġtextured +Ġfisherman +vordan +icherung +ĠìłģìĿ´ +Ġgezeigt +Ġmandates +Ġbeak +ĠTWO +ĠAkbar +ilian +Ġtiếp +Ġsuperiority +inku +Ġlys +ĠFCC +ĠCPA +ustering +nicos +anja +Ġchills +ĠCage +Ġsealing +Ġsaç +Ġdedans +ĠAlger +Ġspezie +Ġcoloss +ıyı +clockwise +Ġexactamente +Ġiemand +amı +Ġmandar +raj +faced +agua +Ġê¹Ķë +Ġinsbesondere +Ġdrizzle +Ġdiminish +ĠYoda +AI +Ġbilmiyorum +ĠMMA +ategory +ĠпеÑĢеп +Ġparticipar +Ġnormalized +Ġcomplexities +æ´² +æݧ +аÑĢов +mist +icha +Group +Ġresiliency +Ġnogle +ĠCNC +prü +Ġphysicists +нок +LI +Ġstuffs +Ġsistemas +Ġinterfering +ĠMarvin +ército +ĠìĹĨê³ł +Ġsonic +Ġequiv +Ġabord +ĠRamen +Ġ09 +medim +atiques +ĠделаÑİÑĤ +Ġunanimously +Ġskirts +ĠíĬ¹ë³Ħ +ĠPrix +kami +Ġfruition +Ġbirthdays +иком +Ġinaugural +Ġcorrelate +ĠTory +ĠëĤĺìģ +Ġdew +ĠPrecis +ihi +Ġë¬¸ìłľê°Ģ +Ġciting +ĠLana +ĠKag +Ġplaythrough +ĠProtocol +frist +hovah +Ġmerciful +Ġbilingual +ĠGuitar +rh +Ġglamorous +ĠVikings +ĠOoooh +íķĺëĬĶëį° +ĠUganda +Ġcollapses +entry +Ġantioxidants +ëĤĺë +ÑĪаÑı +Ġtrivia +Ġgäller +Ġfungi +Ġmilks +Ġdicht +μη +poke +ĠвÑĭпÑĥÑģк +Ġfeeder +ĠAlcohol +hower +Ġdeserving +ĠRebel +iosis +Ġ103 +Ġhandout +Ġenm +Ġlandlords +Ġgeology +rils +Ġcobra +ĠVold +ĠPanch +ĠGREG +Ġpross +Ġbracelets +ĠVega +Ġrozum +款 +азд +ĠLynd +ĠHonors +Ġsurrendered +Ġlibrarians +125 +ĠÑģиг +Ġuniformly +ĠEagles +ìķĻ +иÑĤан +andid +ĠìłĪëĮĢ +Ġض +Ġarrests +ĠCSV +ĠAzerbaijan +ortic +ĠDX +ĠAdventures +Ġabus +ĠFau +Ġschlimm +Ġrattling +Ġconsumes +ĠTolkien +Ġresurrected +ĠXY +íĬ¸ê°Ģ +ĠвÑĭÑģÑĤÑĥп +ĠAngie +żenia +Mic +ĠSheila +achtet +Ġoverst +Ġlâ +Ġineffective +æĿ¡ +æĢİä¹ĪäºĨ +å¿Ļ +Ġwichtiger +Ġvino +Ġpum +Ġangled +ĠPione +ĠMỹ +ãģĿãĤĮãģ¯ +woÅĽÄĩ +draw +ัà¹Ī +markets +Ġcafes +ĠCem +âĿ¤ +ĠSuit +MK +Ġemphasizes +Ġtortilla +Ġmejorar +ĠSurviv +casting +Ġeducación +ĠGum +uely +ĠìĹ¬ê¸°ëĬĶ +Ġstretchy +ença +Ġwithhold +Ġexiting +Ġenthalpy +ĠTransit +ılmÄ±ÅŁ +alies +Ġsalvar +Ġleaned +ĠgroÃŁes +Ġfitt +аки +Sarah +Ġhostel +Ġfingerna +ĠnadziejÄĻ +wives +Rec +Ġspool +аÑĤов +ĠEnemy +Ġfury +Ġdetta +ĠFay +éļ¨ +ÑıÑİÑĤ +Ġaproximadamente +Ġsilos +Ġmagist +Ġcree +ĠKrank +ĠDOWN +Ġstartled +Ġreborn +ĠUmwelt +ĠSuzanne +ниÑĨÑĭ +outez +ĠJAC +yards +radas +rau +ipts +hail +Ġparagraphs +Ġmeglio +Ġisolating +Ġaceite +ĠHarsh +Ġcyst +ĠBlockchain +ĠÑħоÑĢоÑĪий +Ġvirtuous +Ġinvestigación +Ġdevoir +Ġmasturb +ĠSale +ÙĬرة +ĠΧ +ĠStraÃŁen +Ġdikk +Ġafore +ĠJungkook +Ġchociaż +ĠDebatte +Ġweirdly +Ġviaje +regist +Help +Ġkinderen +Ġformulated +Ġenfim +ĠTowards +коÑĹ +ivering +ĠдеÑĤи +charger +Ġpurl +Ġacademically +ĠNurse +Ġdeleting +ayo +Ġrefusal +Ġdepicts +ĠDracula +Ġtoasted +ĠZombie +ĠSuperior +ĠBold +Ġquizzes +Ġgle +450 +Ġcomeço +ynn +Ġverst +ĠOlaf +Ġpomoc +ĠSask +ëĺ +ĠTCP +ĠProperty +íķĺì£ł +à¸ľà¸¡ +boom +aros +ĠÑĢоÑģÑģий +ĠбÑĭваеÑĤ +åĩºåİ» +ĠìĿ´ìķ¼ê¸°ë¥¼ +Ġcombien +vacc +Ġebenfalls +para +Ġзм +Ġdesperation +ordre +Ġש׾×Ļ +Ġgenerously +ĠÐŀк +Ġorbiting +> +<|startoftranscript|> +<|en|> +<|zh|> +<|de|> +<|es|> +<|ru|> +<|ko|> +<|fr|> +<|ja|> +<|pt|> +<|tr|> +<|pl|> +<|ca|> +<|nl|> +<|ar|> +<|sv|> +<|it|> +<|id|> +<|hi|> +<|fi|> +<|vi|> +<|he|> +<|uk|> +<|el|> +<|ms|> +<|cs|> +<|ro|> +<|da|> +<|hu|> +<|ta|> +<|no|> +<|th|> +<|ur|> +<|hr|> +<|bg|> +<|lt|> +<|la|> +<|mi|> +<|ml|> +<|cy|> +<|sk|> +<|te|> +<|fa|> +<|lv|> +<|bn|> +<|sr|> +<|az|> +<|sl|> +<|kn|> +<|et|> +<|mk|> +<|br|> +<|eu|> +<|is|> +<|hy|> +<|ne|> +<|mn|> +<|bs|> +<|kk|> +<|sq|> +<|sw|> +<|gl|> +<|mr|> +<|pa|> +<|si|> +<|km|> +<|sn|> +<|yo|> +<|so|> +<|af|> +<|oc|> +<|ka|> +<|be|> +<|tg|> +<|sd|> +<|gu|> +<|am|> +<|yi|> +<|lo|> +<|uz|> +<|fo|> +<|ht|> +<|ps|> +<|tk|> +<|nn|> +<|mt|> +<|sa|> +<|lb|> +<|my|> +<|bo|> +<|tl|> +<|mg|> +<|as|> +<|tt|> +<|haw|> +<|ln|> +<|ha|> +<|ba|> +<|jw|> +<|su|> +<|translate|> +<|transcribe|> +<|startoflm|> +<|startofprev|> +<|nocaptions|> +<|notimestamps|> +<|0.00|> +<|0.02|> +<|0.04|> +<|0.06|> +<|0.08|> +<|0.10|> +<|0.12|> +<|0.14|> +<|0.16|> +<|0.18|> +<|0.20|> +<|0.22|> +<|0.24|> +<|0.26|> +<|0.28|> +<|0.30|> +<|0.32|> +<|0.34|> +<|0.36|> +<|0.38|> +<|0.40|> +<|0.42|> +<|0.44|> +<|0.46|> +<|0.48|> +<|0.50|> +<|0.52|> +<|0.54|> +<|0.56|> +<|0.58|> +<|0.60|> +<|0.62|> +<|0.64|> +<|0.66|> +<|0.68|> +<|0.70|> +<|0.72|> +<|0.74|> +<|0.76|> +<|0.78|> +<|0.80|> +<|0.82|> +<|0.84|> +<|0.86|> +<|0.88|> +<|0.90|> +<|0.92|> +<|0.94|> +<|0.96|> +<|0.98|> +<|1.00|> +<|1.02|> +<|1.04|> +<|1.06|> +<|1.08|> +<|1.10|> +<|1.12|> +<|1.14|> +<|1.16|> +<|1.18|> +<|1.20|> +<|1.22|> +<|1.24|> +<|1.26|> +<|1.28|> +<|1.30|> +<|1.32|> +<|1.34|> +<|1.36|> +<|1.38|> +<|1.40|> +<|1.42|> +<|1.44|> +<|1.46|> +<|1.48|> +<|1.50|> +<|1.52|> +<|1.54|> +<|1.56|> +<|1.58|> +<|1.60|> +<|1.62|> +<|1.64|> +<|1.66|> +<|1.68|> +<|1.70|> +<|1.72|> +<|1.74|> +<|1.76|> +<|1.78|> +<|1.80|> +<|1.82|> +<|1.84|> +<|1.86|> +<|1.88|> +<|1.90|> +<|1.92|> +<|1.94|> +<|1.96|> +<|1.98|> +<|2.00|> +<|2.02|> +<|2.04|> +<|2.06|> +<|2.08|> +<|2.10|> +<|2.12|> +<|2.14|> +<|2.16|> +<|2.18|> +<|2.20|> +<|2.22|> +<|2.24|> +<|2.26|> +<|2.28|> +<|2.30|> +<|2.32|> +<|2.34|> +<|2.36|> +<|2.38|> +<|2.40|> +<|2.42|> +<|2.44|> +<|2.46|> +<|2.48|> +<|2.50|> +<|2.52|> +<|2.54|> +<|2.56|> +<|2.58|> +<|2.60|> +<|2.62|> +<|2.64|> +<|2.66|> +<|2.68|> +<|2.70|> +<|2.72|> +<|2.74|> +<|2.76|> +<|2.78|> +<|2.80|> +<|2.82|> +<|2.84|> +<|2.86|> +<|2.88|> +<|2.90|> +<|2.92|> +<|2.94|> +<|2.96|> +<|2.98|> +<|3.00|> +<|3.02|> +<|3.04|> +<|3.06|> +<|3.08|> +<|3.10|> +<|3.12|> +<|3.14|> +<|3.16|> +<|3.18|> +<|3.20|> +<|3.22|> +<|3.24|> +<|3.26|> +<|3.28|> +<|3.30|> +<|3.32|> +<|3.34|> +<|3.36|> +<|3.38|> +<|3.40|> +<|3.42|> +<|3.44|> +<|3.46|> +<|3.48|> +<|3.50|> +<|3.52|> +<|3.54|> +<|3.56|> +<|3.58|> +<|3.60|> +<|3.62|> +<|3.64|> +<|3.66|> +<|3.68|> +<|3.70|> +<|3.72|> +<|3.74|> +<|3.76|> +<|3.78|> +<|3.80|> +<|3.82|> +<|3.84|> +<|3.86|> +<|3.88|> +<|3.90|> +<|3.92|> +<|3.94|> +<|3.96|> +<|3.98|> +<|4.00|> +<|4.02|> +<|4.04|> +<|4.06|> +<|4.08|> +<|4.10|> +<|4.12|> +<|4.14|> +<|4.16|> +<|4.18|> +<|4.20|> +<|4.22|> +<|4.24|> +<|4.26|> +<|4.28|> +<|4.30|> +<|4.32|> +<|4.34|> +<|4.36|> +<|4.38|> +<|4.40|> +<|4.42|> +<|4.44|> +<|4.46|> +<|4.48|> +<|4.50|> +<|4.52|> +<|4.54|> +<|4.56|> +<|4.58|> +<|4.60|> +<|4.62|> +<|4.64|> +<|4.66|> +<|4.68|> +<|4.70|> +<|4.72|> +<|4.74|> +<|4.76|> +<|4.78|> +<|4.80|> +<|4.82|> +<|4.84|> +<|4.86|> +<|4.88|> +<|4.90|> +<|4.92|> +<|4.94|> +<|4.96|> +<|4.98|> +<|5.00|> +<|5.02|> +<|5.04|> +<|5.06|> +<|5.08|> +<|5.10|> +<|5.12|> +<|5.14|> +<|5.16|> +<|5.18|> +<|5.20|> +<|5.22|> +<|5.24|> +<|5.26|> +<|5.28|> +<|5.30|> +<|5.32|> +<|5.34|> +<|5.36|> +<|5.38|> +<|5.40|> +<|5.42|> +<|5.44|> +<|5.46|> +<|5.48|> +<|5.50|> +<|5.52|> +<|5.54|> +<|5.56|> +<|5.58|> +<|5.60|> +<|5.62|> +<|5.64|> +<|5.66|> +<|5.68|> +<|5.70|> +<|5.72|> +<|5.74|> +<|5.76|> +<|5.78|> +<|5.80|> +<|5.82|> +<|5.84|> +<|5.86|> +<|5.88|> +<|5.90|> +<|5.92|> +<|5.94|> +<|5.96|> +<|5.98|> +<|6.00|> +<|6.02|> +<|6.04|> +<|6.06|> +<|6.08|> +<|6.10|> +<|6.12|> +<|6.14|> +<|6.16|> +<|6.18|> +<|6.20|> +<|6.22|> +<|6.24|> +<|6.26|> +<|6.28|> +<|6.30|> +<|6.32|> +<|6.34|> +<|6.36|> +<|6.38|> +<|6.40|> +<|6.42|> +<|6.44|> +<|6.46|> +<|6.48|> +<|6.50|> +<|6.52|> +<|6.54|> +<|6.56|> +<|6.58|> +<|6.60|> +<|6.62|> +<|6.64|> +<|6.66|> +<|6.68|> +<|6.70|> +<|6.72|> +<|6.74|> +<|6.76|> +<|6.78|> +<|6.80|> +<|6.82|> +<|6.84|> +<|6.86|> +<|6.88|> +<|6.90|> +<|6.92|> +<|6.94|> +<|6.96|> +<|6.98|> +<|7.00|> +<|7.02|> +<|7.04|> +<|7.06|> +<|7.08|> +<|7.10|> +<|7.12|> +<|7.14|> +<|7.16|> +<|7.18|> +<|7.20|> +<|7.22|> +<|7.24|> +<|7.26|> +<|7.28|> +<|7.30|> +<|7.32|> +<|7.34|> +<|7.36|> +<|7.38|> +<|7.40|> +<|7.42|> +<|7.44|> +<|7.46|> +<|7.48|> +<|7.50|> +<|7.52|> +<|7.54|> +<|7.56|> +<|7.58|> +<|7.60|> +<|7.62|> +<|7.64|> +<|7.66|> +<|7.68|> +<|7.70|> +<|7.72|> +<|7.74|> +<|7.76|> +<|7.78|> +<|7.80|> +<|7.82|> +<|7.84|> +<|7.86|> +<|7.88|> +<|7.90|> +<|7.92|> +<|7.94|> +<|7.96|> +<|7.98|> +<|8.00|> +<|8.02|> +<|8.04|> +<|8.06|> +<|8.08|> +<|8.10|> +<|8.12|> +<|8.14|> +<|8.16|> +<|8.18|> +<|8.20|> +<|8.22|> +<|8.24|> +<|8.26|> +<|8.28|> +<|8.30|> +<|8.32|> +<|8.34|> +<|8.36|> +<|8.38|> +<|8.40|> +<|8.42|> +<|8.44|> +<|8.46|> +<|8.48|> +<|8.50|> +<|8.52|> +<|8.54|> +<|8.56|> +<|8.58|> +<|8.60|> +<|8.62|> +<|8.64|> +<|8.66|> +<|8.68|> +<|8.70|> +<|8.72|> +<|8.74|> +<|8.76|> +<|8.78|> +<|8.80|> +<|8.82|> +<|8.84|> +<|8.86|> +<|8.88|> +<|8.90|> +<|8.92|> +<|8.94|> +<|8.96|> +<|8.98|> +<|9.00|> +<|9.02|> +<|9.04|> +<|9.06|> +<|9.08|> +<|9.10|> +<|9.12|> +<|9.14|> +<|9.16|> +<|9.18|> +<|9.20|> +<|9.22|> +<|9.24|> +<|9.26|> +<|9.28|> +<|9.30|> +<|9.32|> +<|9.34|> +<|9.36|> +<|9.38|> +<|9.40|> +<|9.42|> +<|9.44|> +<|9.46|> +<|9.48|> +<|9.50|> +<|9.52|> +<|9.54|> +<|9.56|> +<|9.58|> +<|9.60|> +<|9.62|> +<|9.64|> +<|9.66|> +<|9.68|> +<|9.70|> +<|9.72|> +<|9.74|> +<|9.76|> +<|9.78|> +<|9.80|> +<|9.82|> +<|9.84|> +<|9.86|> +<|9.88|> +<|9.90|> +<|9.92|> +<|9.94|> +<|9.96|> +<|9.98|> +<|10.00|> +<|10.02|> +<|10.04|> +<|10.06|> +<|10.08|> +<|10.10|> +<|10.12|> +<|10.14|> +<|10.16|> +<|10.18|> +<|10.20|> +<|10.22|> +<|10.24|> +<|10.26|> +<|10.28|> +<|10.30|> +<|10.32|> +<|10.34|> +<|10.36|> +<|10.38|> +<|10.40|> +<|10.42|> +<|10.44|> +<|10.46|> +<|10.48|> +<|10.50|> +<|10.52|> +<|10.54|> +<|10.56|> +<|10.58|> +<|10.60|> +<|10.62|> +<|10.64|> +<|10.66|> +<|10.68|> +<|10.70|> +<|10.72|> +<|10.74|> +<|10.76|> +<|10.78|> +<|10.80|> +<|10.82|> +<|10.84|> +<|10.86|> +<|10.88|> +<|10.90|> +<|10.92|> +<|10.94|> +<|10.96|> +<|10.98|> +<|11.00|> +<|11.02|> +<|11.04|> +<|11.06|> +<|11.08|> +<|11.10|> +<|11.12|> +<|11.14|> +<|11.16|> +<|11.18|> +<|11.20|> +<|11.22|> +<|11.24|> +<|11.26|> +<|11.28|> +<|11.30|> +<|11.32|> +<|11.34|> +<|11.36|> +<|11.38|> +<|11.40|> +<|11.42|> +<|11.44|> +<|11.46|> +<|11.48|> +<|11.50|> +<|11.52|> +<|11.54|> +<|11.56|> +<|11.58|> +<|11.60|> +<|11.62|> +<|11.64|> +<|11.66|> +<|11.68|> +<|11.70|> +<|11.72|> +<|11.74|> +<|11.76|> +<|11.78|> +<|11.80|> +<|11.82|> +<|11.84|> +<|11.86|> +<|11.88|> +<|11.90|> +<|11.92|> +<|11.94|> +<|11.96|> +<|11.98|> +<|12.00|> +<|12.02|> +<|12.04|> +<|12.06|> +<|12.08|> +<|12.10|> +<|12.12|> +<|12.14|> +<|12.16|> +<|12.18|> +<|12.20|> +<|12.22|> +<|12.24|> +<|12.26|> +<|12.28|> +<|12.30|> +<|12.32|> +<|12.34|> +<|12.36|> +<|12.38|> +<|12.40|> +<|12.42|> +<|12.44|> +<|12.46|> +<|12.48|> +<|12.50|> +<|12.52|> +<|12.54|> +<|12.56|> +<|12.58|> +<|12.60|> +<|12.62|> +<|12.64|> +<|12.66|> +<|12.68|> +<|12.70|> +<|12.72|> +<|12.74|> +<|12.76|> +<|12.78|> +<|12.80|> +<|12.82|> +<|12.84|> +<|12.86|> +<|12.88|> +<|12.90|> +<|12.92|> +<|12.94|> +<|12.96|> +<|12.98|> +<|13.00|> +<|13.02|> +<|13.04|> +<|13.06|> +<|13.08|> +<|13.10|> +<|13.12|> +<|13.14|> +<|13.16|> +<|13.18|> +<|13.20|> +<|13.22|> +<|13.24|> +<|13.26|> +<|13.28|> +<|13.30|> +<|13.32|> +<|13.34|> +<|13.36|> +<|13.38|> +<|13.40|> +<|13.42|> +<|13.44|> +<|13.46|> +<|13.48|> +<|13.50|> +<|13.52|> +<|13.54|> +<|13.56|> +<|13.58|> +<|13.60|> +<|13.62|> +<|13.64|> +<|13.66|> +<|13.68|> +<|13.70|> +<|13.72|> +<|13.74|> +<|13.76|> +<|13.78|> +<|13.80|> +<|13.82|> +<|13.84|> +<|13.86|> +<|13.88|> +<|13.90|> +<|13.92|> +<|13.94|> +<|13.96|> +<|13.98|> +<|14.00|> +<|14.02|> +<|14.04|> +<|14.06|> +<|14.08|> +<|14.10|> +<|14.12|> +<|14.14|> +<|14.16|> +<|14.18|> +<|14.20|> +<|14.22|> +<|14.24|> +<|14.26|> +<|14.28|> +<|14.30|> +<|14.32|> +<|14.34|> +<|14.36|> +<|14.38|> +<|14.40|> +<|14.42|> +<|14.44|> +<|14.46|> +<|14.48|> +<|14.50|> +<|14.52|> +<|14.54|> +<|14.56|> +<|14.58|> +<|14.60|> +<|14.62|> +<|14.64|> +<|14.66|> +<|14.68|> +<|14.70|> +<|14.72|> +<|14.74|> +<|14.76|> +<|14.78|> +<|14.80|> +<|14.82|> +<|14.84|> +<|14.86|> +<|14.88|> +<|14.90|> +<|14.92|> +<|14.94|> +<|14.96|> +<|14.98|> +<|15.00|> +<|15.02|> +<|15.04|> +<|15.06|> +<|15.08|> +<|15.10|> +<|15.12|> +<|15.14|> +<|15.16|> +<|15.18|> +<|15.20|> +<|15.22|> +<|15.24|> +<|15.26|> +<|15.28|> +<|15.30|> +<|15.32|> +<|15.34|> +<|15.36|> +<|15.38|> +<|15.40|> +<|15.42|> +<|15.44|> +<|15.46|> +<|15.48|> +<|15.50|> +<|15.52|> +<|15.54|> +<|15.56|> +<|15.58|> +<|15.60|> +<|15.62|> +<|15.64|> +<|15.66|> +<|15.68|> +<|15.70|> +<|15.72|> +<|15.74|> +<|15.76|> +<|15.78|> +<|15.80|> +<|15.82|> +<|15.84|> +<|15.86|> +<|15.88|> +<|15.90|> +<|15.92|> +<|15.94|> +<|15.96|> +<|15.98|> +<|16.00|> +<|16.02|> +<|16.04|> +<|16.06|> +<|16.08|> +<|16.10|> +<|16.12|> +<|16.14|> +<|16.16|> +<|16.18|> +<|16.20|> +<|16.22|> +<|16.24|> +<|16.26|> +<|16.28|> +<|16.30|> +<|16.32|> +<|16.34|> +<|16.36|> +<|16.38|> +<|16.40|> +<|16.42|> +<|16.44|> +<|16.46|> +<|16.48|> +<|16.50|> +<|16.52|> +<|16.54|> +<|16.56|> +<|16.58|> +<|16.60|> +<|16.62|> +<|16.64|> +<|16.66|> +<|16.68|> +<|16.70|> +<|16.72|> +<|16.74|> +<|16.76|> +<|16.78|> +<|16.80|> +<|16.82|> +<|16.84|> +<|16.86|> +<|16.88|> +<|16.90|> +<|16.92|> +<|16.94|> +<|16.96|> +<|16.98|> +<|17.00|> +<|17.02|> +<|17.04|> +<|17.06|> +<|17.08|> +<|17.10|> +<|17.12|> +<|17.14|> +<|17.16|> +<|17.18|> +<|17.20|> +<|17.22|> +<|17.24|> +<|17.26|> +<|17.28|> +<|17.30|> +<|17.32|> +<|17.34|> +<|17.36|> +<|17.38|> +<|17.40|> +<|17.42|> +<|17.44|> +<|17.46|> +<|17.48|> +<|17.50|> +<|17.52|> +<|17.54|> +<|17.56|> +<|17.58|> +<|17.60|> +<|17.62|> +<|17.64|> +<|17.66|> +<|17.68|> +<|17.70|> +<|17.72|> +<|17.74|> +<|17.76|> +<|17.78|> +<|17.80|> +<|17.82|> +<|17.84|> +<|17.86|> +<|17.88|> +<|17.90|> +<|17.92|> +<|17.94|> +<|17.96|> +<|17.98|> +<|18.00|> +<|18.02|> +<|18.04|> +<|18.06|> +<|18.08|> +<|18.10|> +<|18.12|> +<|18.14|> +<|18.16|> +<|18.18|> +<|18.20|> +<|18.22|> +<|18.24|> +<|18.26|> +<|18.28|> +<|18.30|> +<|18.32|> +<|18.34|> +<|18.36|> +<|18.38|> +<|18.40|> +<|18.42|> +<|18.44|> +<|18.46|> +<|18.48|> +<|18.50|> +<|18.52|> +<|18.54|> +<|18.56|> +<|18.58|> +<|18.60|> +<|18.62|> +<|18.64|> +<|18.66|> +<|18.68|> +<|18.70|> +<|18.72|> +<|18.74|> +<|18.76|> +<|18.78|> +<|18.80|> +<|18.82|> +<|18.84|> +<|18.86|> +<|18.88|> +<|18.90|> +<|18.92|> +<|18.94|> +<|18.96|> +<|18.98|> +<|19.00|> +<|19.02|> +<|19.04|> +<|19.06|> +<|19.08|> +<|19.10|> +<|19.12|> +<|19.14|> +<|19.16|> +<|19.18|> +<|19.20|> +<|19.22|> +<|19.24|> +<|19.26|> +<|19.28|> +<|19.30|> +<|19.32|> +<|19.34|> +<|19.36|> +<|19.38|> +<|19.40|> +<|19.42|> +<|19.44|> +<|19.46|> +<|19.48|> +<|19.50|> +<|19.52|> +<|19.54|> +<|19.56|> +<|19.58|> +<|19.60|> +<|19.62|> +<|19.64|> +<|19.66|> +<|19.68|> +<|19.70|> +<|19.72|> +<|19.74|> +<|19.76|> +<|19.78|> +<|19.80|> +<|19.82|> +<|19.84|> +<|19.86|> +<|19.88|> +<|19.90|> +<|19.92|> +<|19.94|> +<|19.96|> +<|19.98|> +<|20.00|> +<|20.02|> +<|20.04|> +<|20.06|> +<|20.08|> +<|20.10|> +<|20.12|> +<|20.14|> +<|20.16|> +<|20.18|> +<|20.20|> +<|20.22|> +<|20.24|> +<|20.26|> +<|20.28|> +<|20.30|> +<|20.32|> +<|20.34|> +<|20.36|> +<|20.38|> +<|20.40|> +<|20.42|> +<|20.44|> +<|20.46|> +<|20.48|> +<|20.50|> +<|20.52|> +<|20.54|> +<|20.56|> +<|20.58|> +<|20.60|> +<|20.62|> +<|20.64|> +<|20.66|> +<|20.68|> +<|20.70|> +<|20.72|> +<|20.74|> +<|20.76|> +<|20.78|> +<|20.80|> +<|20.82|> +<|20.84|> +<|20.86|> +<|20.88|> +<|20.90|> +<|20.92|> +<|20.94|> +<|20.96|> +<|20.98|> +<|21.00|> +<|21.02|> +<|21.04|> +<|21.06|> +<|21.08|> +<|21.10|> +<|21.12|> +<|21.14|> +<|21.16|> +<|21.18|> +<|21.20|> +<|21.22|> +<|21.24|> +<|21.26|> +<|21.28|> +<|21.30|> +<|21.32|> +<|21.34|> +<|21.36|> +<|21.38|> +<|21.40|> +<|21.42|> +<|21.44|> +<|21.46|> +<|21.48|> +<|21.50|> +<|21.52|> +<|21.54|> +<|21.56|> +<|21.58|> +<|21.60|> +<|21.62|> +<|21.64|> +<|21.66|> +<|21.68|> +<|21.70|> +<|21.72|> +<|21.74|> +<|21.76|> +<|21.78|> +<|21.80|> +<|21.82|> +<|21.84|> +<|21.86|> +<|21.88|> +<|21.90|> +<|21.92|> +<|21.94|> +<|21.96|> +<|21.98|> +<|22.00|> +<|22.02|> +<|22.04|> +<|22.06|> +<|22.08|> +<|22.10|> +<|22.12|> +<|22.14|> +<|22.16|> +<|22.18|> +<|22.20|> +<|22.22|> +<|22.24|> +<|22.26|> +<|22.28|> +<|22.30|> +<|22.32|> +<|22.34|> +<|22.36|> +<|22.38|> +<|22.40|> +<|22.42|> +<|22.44|> +<|22.46|> +<|22.48|> +<|22.50|> +<|22.52|> +<|22.54|> +<|22.56|> +<|22.58|> +<|22.60|> +<|22.62|> +<|22.64|> +<|22.66|> +<|22.68|> +<|22.70|> +<|22.72|> +<|22.74|> +<|22.76|> +<|22.78|> +<|22.80|> +<|22.82|> +<|22.84|> +<|22.86|> +<|22.88|> +<|22.90|> +<|22.92|> +<|22.94|> +<|22.96|> +<|22.98|> +<|23.00|> +<|23.02|> +<|23.04|> +<|23.06|> +<|23.08|> +<|23.10|> +<|23.12|> +<|23.14|> +<|23.16|> +<|23.18|> +<|23.20|> +<|23.22|> +<|23.24|> +<|23.26|> +<|23.28|> +<|23.30|> +<|23.32|> +<|23.34|> +<|23.36|> +<|23.38|> +<|23.40|> +<|23.42|> +<|23.44|> +<|23.46|> +<|23.48|> +<|23.50|> +<|23.52|> +<|23.54|> +<|23.56|> +<|23.58|> +<|23.60|> +<|23.62|> +<|23.64|> +<|23.66|> +<|23.68|> +<|23.70|> +<|23.72|> +<|23.74|> +<|23.76|> +<|23.78|> +<|23.80|> +<|23.82|> +<|23.84|> +<|23.86|> +<|23.88|> +<|23.90|> +<|23.92|> +<|23.94|> +<|23.96|> +<|23.98|> +<|24.00|> +<|24.02|> +<|24.04|> +<|24.06|> +<|24.08|> +<|24.10|> +<|24.12|> +<|24.14|> +<|24.16|> +<|24.18|> +<|24.20|> +<|24.22|> +<|24.24|> +<|24.26|> +<|24.28|> +<|24.30|> +<|24.32|> +<|24.34|> +<|24.36|> +<|24.38|> +<|24.40|> +<|24.42|> +<|24.44|> +<|24.46|> +<|24.48|> +<|24.50|> +<|24.52|> +<|24.54|> +<|24.56|> +<|24.58|> +<|24.60|> +<|24.62|> +<|24.64|> +<|24.66|> +<|24.68|> +<|24.70|> +<|24.72|> +<|24.74|> +<|24.76|> +<|24.78|> +<|24.80|> +<|24.82|> +<|24.84|> +<|24.86|> +<|24.88|> +<|24.90|> +<|24.92|> +<|24.94|> +<|24.96|> +<|24.98|> +<|25.00|> +<|25.02|> +<|25.04|> +<|25.06|> +<|25.08|> +<|25.10|> +<|25.12|> +<|25.14|> +<|25.16|> +<|25.18|> +<|25.20|> +<|25.22|> +<|25.24|> +<|25.26|> +<|25.28|> +<|25.30|> +<|25.32|> +<|25.34|> +<|25.36|> +<|25.38|> +<|25.40|> +<|25.42|> +<|25.44|> +<|25.46|> +<|25.48|> +<|25.50|> +<|25.52|> +<|25.54|> +<|25.56|> +<|25.58|> +<|25.60|> +<|25.62|> +<|25.64|> +<|25.66|> +<|25.68|> +<|25.70|> +<|25.72|> +<|25.74|> +<|25.76|> +<|25.78|> +<|25.80|> +<|25.82|> +<|25.84|> +<|25.86|> +<|25.88|> +<|25.90|> +<|25.92|> +<|25.94|> +<|25.96|> +<|25.98|> +<|26.00|> +<|26.02|> +<|26.04|> +<|26.06|> +<|26.08|> +<|26.10|> +<|26.12|> +<|26.14|> +<|26.16|> +<|26.18|> +<|26.20|> +<|26.22|> +<|26.24|> +<|26.26|> +<|26.28|> +<|26.30|> +<|26.32|> +<|26.34|> +<|26.36|> +<|26.38|> +<|26.40|> +<|26.42|> +<|26.44|> +<|26.46|> +<|26.48|> +<|26.50|> +<|26.52|> +<|26.54|> +<|26.56|> +<|26.58|> +<|26.60|> +<|26.62|> +<|26.64|> +<|26.66|> +<|26.68|> +<|26.70|> +<|26.72|> +<|26.74|> +<|26.76|> +<|26.78|> +<|26.80|> +<|26.82|> +<|26.84|> +<|26.86|> +<|26.88|> +<|26.90|> +<|26.92|> +<|26.94|> +<|26.96|> +<|26.98|> +<|27.00|> +<|27.02|> +<|27.04|> +<|27.06|> +<|27.08|> +<|27.10|> +<|27.12|> +<|27.14|> +<|27.16|> +<|27.18|> +<|27.20|> +<|27.22|> +<|27.24|> +<|27.26|> +<|27.28|> +<|27.30|> +<|27.32|> +<|27.34|> +<|27.36|> +<|27.38|> +<|27.40|> +<|27.42|> +<|27.44|> +<|27.46|> +<|27.48|> +<|27.50|> +<|27.52|> +<|27.54|> +<|27.56|> +<|27.58|> +<|27.60|> +<|27.62|> +<|27.64|> +<|27.66|> +<|27.68|> +<|27.70|> +<|27.72|> +<|27.74|> +<|27.76|> +<|27.78|> +<|27.80|> +<|27.82|> +<|27.84|> +<|27.86|> +<|27.88|> +<|27.90|> +<|27.92|> +<|27.94|> +<|27.96|> +<|27.98|> +<|28.00|> +<|28.02|> +<|28.04|> +<|28.06|> +<|28.08|> +<|28.10|> +<|28.12|> +<|28.14|> +<|28.16|> +<|28.18|> +<|28.20|> +<|28.22|> +<|28.24|> +<|28.26|> +<|28.28|> +<|28.30|> +<|28.32|> +<|28.34|> +<|28.36|> +<|28.38|> +<|28.40|> +<|28.42|> +<|28.44|> +<|28.46|> +<|28.48|> +<|28.50|> +<|28.52|> +<|28.54|> +<|28.56|> +<|28.58|> +<|28.60|> +<|28.62|> +<|28.64|> +<|28.66|> +<|28.68|> +<|28.70|> +<|28.72|> +<|28.74|> +<|28.76|> +<|28.78|> +<|28.80|> +<|28.82|> +<|28.84|> +<|28.86|> +<|28.88|> +<|28.90|> +<|28.92|> +<|28.94|> +<|28.96|> +<|28.98|> +<|29.00|> +<|29.02|> +<|29.04|> +<|29.06|> +<|29.08|> +<|29.10|> +<|29.12|> +<|29.14|> +<|29.16|> +<|29.18|> +<|29.20|> +<|29.22|> +<|29.24|> +<|29.26|> +<|29.28|> +<|29.30|> +<|29.32|> +<|29.34|> +<|29.36|> +<|29.38|> +<|29.40|> +<|29.42|> +<|29.44|> +<|29.46|> +<|29.48|> +<|29.50|> +<|29.52|> +<|29.54|> +<|29.56|> +<|29.58|> +<|29.60|> +<|29.62|> +<|29.64|> +<|29.66|> +<|29.68|> +<|29.70|> +<|29.72|> +<|29.74|> +<|29.76|> +<|29.78|> +<|29.80|> +<|29.82|> +<|29.84|> +<|29.86|> +<|29.88|> +<|29.90|> +<|29.92|> +<|29.94|> +<|29.96|> +<|29.98|> +<|30.00|> \ No newline at end of file diff --git a/examples/BuddyWhisper/whisper-main.cpp b/examples/BuddyWhisper/whisper-main.cpp new file mode 100644 index 000000000..7d69ea307 --- /dev/null +++ b/examples/BuddyWhisper/whisper-main.cpp @@ -0,0 +1,183 @@ +//===- whisper-main.cpp ---------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements an example for Whisper Model Inference. +// +// ------------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace buddy; +using namespace dap; + +constexpr size_t ParamsSize = 99148800; +constexpr size_t MaxVocabSize = 51865; +constexpr size_t MaxTokenLength = 448; + +/// Declare Whisper forward function. +extern "C" void _mlir_ciface_forward(MemRef *, MemRef *, + MemRef *, MemRef *); + +// ----------------------------------------------------------------------------- +// Helper Functions +// ----------------------------------------------------------------------------- + +/// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +/// Print information for each iteration. +void printIterInfo(size_t iterIdx, std::string str, double time) { + std::cout << "\033[32;1m[Iteration " << iterIdx << "] \033[0m"; + std::cout << "Token: " << str << " | " + << "Time: " << time << "s" << std::endl; +} + +/// Load parameters into data container. +void loadParameters(const std::string ¶mFilePath, + MemRef ¶ms) { + const auto loadStart = std::chrono::high_resolution_clock::now(); + std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary); + if (!paramFile.is_open()) { + throw std::runtime_error("[Error] Failed to open params file!"); + } + printLogLabel(); + std::cout << "Loading params..." << std::endl; + printLogLabel(); + std::cout << "Params file: " << std::filesystem::canonical(paramFilePath) + << std::endl; + paramFile.read(reinterpret_cast(params.getData()), + sizeof(float) * (params.getSize())); + if (paramFile.fail()) { + throw std::runtime_error("Error occurred while reading params file!"); + } + paramFile.close(); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Params load time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; +} + +/// Conduct audio data preprocess. +void runPreprocess(dap::Audio &rawAudioContainer, + MemRef &audioFeatures) { + printLogLabel(); + std::cout << "Preprocessing audio..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::whisperPreprocess(&rawAudioContainer, &audioFeatures); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio preprocess time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; +} + +/// Find the index of the max value. +int findMaxIndex(const float *start, const float *end) { + return std::distance(start, std::max_element(start, end)); +} + +// ----------------------------------------------------------------------------- +// Whisper Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + + /// Print the title of this example. + const std::string title = "Whisper Inference Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + /// Define directories of vacabulary and parameter file. + const std::string vocabDir = "../../examples/BuddyWhisper/vocab.txt"; + const std::string paramsDir = "../../examples/BuddyWhisper/arg0.data"; + + /// Initialize data containers + // - Result container + // - Output container. + // - Parameters container. + Text outputContainer; + Audio rawAudioContainer("../../examples/BuddyWhisper/audio.wav"); + MemRef audioInput({1, 80, 3000}); + MemRef resultContainer[2] = { + MemRef({1, 1500, 512}, false, 0), + MemRef({1, 448, MaxVocabSize}, false, 0), + }; + MemRef textContainer({1, MaxTokenLength}, 50258); + MemRef paramsContainer({ParamsSize}); + + /// Fill data into containers + // - Output: register vocabulary. + // - Parameters: load parameters from the `arg0` file into the container. + // - Input: compute audioInput. + outputContainer.loadVocab(vocabDir); + loadParameters(paramsDir, paramsContainer); + runPreprocess(rawAudioContainer, audioInput); + + /// Run Whisper Inference + // - Perform the forward function. + // - Find and append the generated token. + // - Continue iterating until the terminal condition is met. + + for (size_t i = 0; i < MaxTokenLength - 1; i++) { + const auto inferenceStart = std::chrono::high_resolution_clock::now(); + // Execute the forward pass of the model. + _mlir_ciface_forward(resultContainer, ¶msContainer, &audioInput, + &textContainer); + const auto inferenceEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration inferenceTime = + inferenceEnd - inferenceStart; + + // Determine the generated token. + const float *startPtr = resultContainer[1].getData() + i * MaxVocabSize; + const float *endPtr = startPtr + MaxVocabSize; + + int maxIndex = findMaxIndex(startPtr, endPtr); + std::string tok = outputContainer.getStr(maxIndex); + // Print the generated token and inference time. + printIterInfo(i, tok, inferenceTime.count() / 1000); + + // Stop if the end token (50257, <|endoftext|>) is generated. + if (maxIndex == 50257) { + break; + } + // Append the generated token into the output container. + textContainer.getData()[i + 1] = maxIndex; + outputContainer.appendTokenIdx(maxIndex); + + free(resultContainer[0].release()); + free(resultContainer[1].release()); + } + + /// Print the final result + std::cout << "\033[33;1m[Output]\033[0m " << outputContainer.revertWhisper() + << std::endl; + + return 0; +} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 7ec0d3b4f..3aa1195d1 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,6 +16,14 @@ if (BUDDY_LENET_EXAMPLES) add_subdirectory(BuddyLeNet) endif() +if(BUDDY_WHISPER_EXAMPLES) + add_subdirectory(BuddyWhisper) +endif() + +if (BUDDY_MOBILENETV3_EXAMPLES) + add_subdirectory(BuddyMobileNetV3) +endif() + if(BUDDY_DSL_EXAMPLES) add_subdirectory(ToyDSL) endif() @@ -31,6 +39,7 @@ set(BUDDY_EXAMPLES_DEPENDS FileCheck count not buddy-opt buddy-translate + mlir-cpu-runner ) add_lit_testsuite(check-examples "Checking the buddy-mlir examples..." diff --git a/examples/ConvOpt/CMakeLists.txt b/examples/ConvOpt/CMakeLists.txt index 83aa26b68..e01f2b46c 100644 --- a/examples/ConvOpt/CMakeLists.txt +++ b/examples/ConvOpt/CMakeLists.txt @@ -16,14 +16,14 @@ message(STATUS "Spliting size: ${SPLITING_SIZE}") add_custom_command(OUTPUT conv2d.o COMMAND ${CMAKE_BINARY_DIR}/bin/buddy-opt ${BUDDY_EXAMPLES_DIR}/ConvOpt/conv2d.mlir -conv-vectorization="strip-mining=${SPLITING_SIZE}" -lower-affine -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm -llvm-request-c-wrappers -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llc -mtriple=${BUDDY_TARGET_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${BUDDY_BINARY_DIR}/../examples/ConvOpt/conv2d.o + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llc -mtriple=${BUDDY_TARGET_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${BUDDY_BINARY_DIR}/../examples/ConvOpt/conv2d.o DEPENDS buddy-opt) # add_custom_command(OUTPUT conv2d.o -# COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/ConvOpt/conv2d.mlir -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm='emit-c-wrappers=1' -reconcile-unrealized-casts | -# ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir | -# ${LLVM_MLIR_BINARY_DIR}/llc -mtriple=${BUDDY_OPT_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${BUDDY_BINARY_DIR}/../examples/ConvOpt/conv2d.o +# COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/ConvOpt/conv2d.mlir -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm='emit-c-wrappers=1' -reconcile-unrealized-casts | +# ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir | +# ${LLVM_TOOLS_BINARY_DIR}/llc -mtriple=${BUDDY_OPT_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${BUDDY_BINARY_DIR}/../examples/ConvOpt/conv2d.o # DEPENDS buddy-opt) add_library(Conv2D STATIC conv2d.o) diff --git a/examples/DAPDialect/CMakeLists.txt b/examples/DAPDialect/CMakeLists.txt index b147d5604..dff9b10ff 100644 --- a/examples/DAPDialect/CMakeLists.txt +++ b/examples/DAPDialect/CMakeLists.txt @@ -20,6 +20,7 @@ add_executable(buddy-fir FIRLowpass.cpp) add_dependencies(buddy-fir buddy-opt) target_link_libraries(buddy-fir BuddyLibDAP + mlir_c_runner_utils ) #------------------------------------------------------------------------------- @@ -30,6 +31,7 @@ add_executable(buddy-biquad biquad.cpp) add_dependencies(buddy-biquad buddy-opt) target_link_libraries(buddy-biquad BuddyLibDAP + mlir_c_runner_utils ) #------------------------------------------------------------------------------- @@ -40,10 +42,23 @@ add_executable(buddy-iir-scalar IIRLowpass.cpp) add_dependencies(buddy-iir-scalar buddy-opt) target_link_libraries(buddy-iir-scalar BuddyLibDAP + mlir_c_runner_utils ) add_executable(buddy-iir-vectorization IIRVectorization.cpp) add_dependencies(buddy-iir-vectorization buddy-opt) target_link_libraries(buddy-iir-vectorization - BuddyLibDAPVectorization + BuddyLibDAP + mlir_c_runner_utils +) + +#------------------------------------------------------------------------------- +# Buddy DAP Dialect WhisperPreprocess Operation +#------------------------------------------------------------------------------- + +add_executable(buddy-whisper-preprocess WhisperPreprocess.cpp) +add_dependencies(buddy-whisper-preprocess buddy-opt) +target_link_libraries(buddy-whisper-preprocess + BuddyLibDAP + mlir_c_runner_utils ) diff --git a/examples/DAPDialect/FIRLowpass.cpp b/examples/DAPDialect/FIRLowpass.cpp index cfce56091..3a8217730 100644 --- a/examples/DAPDialect/FIRLowpass.cpp +++ b/examples/DAPDialect/FIRLowpass.cpp @@ -14,45 +14,76 @@ // //===----------------------------------------------------------------------===// // -// This file implements an end to end example for fir filter in buddy-mlir. It -// generates coefficients for a filter and apply it on a piece of mono audio, -// then saves the audio. -// This file will be linked with the object file generated by mlir to generate -// the executable file. +// An end-to-end example of an FIR (Finite Impulse Response) operation in +// buddy-mlir. // //===----------------------------------------------------------------------===// #include +#include #include using namespace dap; using namespace std; -int main(int argc, char *argv[]) { - string fileName = "../../tests/Interface/core/NASA_Mars.wav"; - ; - string saveFileName = "FIR_NASA_Mars.wav"; - if (argc >= 2) { - fileName = argv[1]; - } - if (argc == 3) { - saveFileName = argv[2]; - } - cout << "Usage: FIRLowpass [loadPath] [savePath]" << endl; - cout << "Current specified path: \n"; - cout << "Load: " << fileName << endl; - cout << "Save: " << saveFileName << endl; +// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +int main() { + // Print the title of this example. + const std::string title = "FIR Operation Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Generate the kernel for a FIR filter operation. + // Params: + // Input kernel: Stores generated kernel data. + // Type: Specifies the window type from the WINDOW_TYPE enum class. + // Length: The length of the filter. + // Cutoff: The lowpass cutoff frequency. + // Argument: Filter-specific arguments, with size limited by the + // WINDOW_TYPE. intptr_t kernelSize = 100; MemRef kernel(&kernelSize); - dap::firLowpass(kernel, dap::WINDOW_TYPE::BLACKMANHARRIS7, - kernelSize, 0.3, nullptr); - auto aud = dap::Audio(fileName); - aud.getAudioFile().printSummary(); - dap::Audio output; - output.fetchMetadata(aud.getAudioFile()); - output.getAudioFile().setAudioBuffer(nullptr); - dap::fir(&aud.getMemRef(), &kernel, &output.getMemRef()); - cout << "Saving file:" << endl; - cout << (output.save(saveFileName) ? "OK" : "ERROR") << endl; + dap::firLowpass(/*input=*/kernel, + /*type=*/dap::WINDOW_TYPE::BLACKMANHARRIS7, + /*len=*/kernelSize, /*cutoff=*/0.3, + /*args=*/nullptr); + + // Initialize data containers. + // Params: + // Input container: Stores the raw audio data. + // Returns: + // Output memory reference: Provides a MemRef for saving the output. + Audio inputContainer("../../tests/Interface/core/TestAudio.wav"); + intptr_t samplesNum = static_cast(inputContainer.getSamplesNum()); + MemRef outputMemRef(&samplesNum); + + // Apply the FIR filter operation to the audio data. + printLogLabel(); + std::cout << "Running FIR operation..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::fir(&inputContainer, &kernel, &outputMemRef); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio processing time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; + + // Convert a MemRef object to an Audio object and set the metadata. + Audio outputContainer(std::move(outputMemRef)); + outputContainer.setBitDepth(inputContainer.getBitDepth()); + outputContainer.setSamplesNum(inputContainer.getSamplesNum()); + outputContainer.setChannelsNum(inputContainer.getChannelsNum()); + outputContainer.setSampleRate(inputContainer.getSampleRate()); + + // Save the processed data to an audio file. + std::string saveFileName = "FIRTestAudio.wav"; + outputContainer.saveToFile(saveFileName, "wave"); + printLogLabel(); + std::cout << "Processed audio data saved in: " << saveFileName << "\n" + << std::endl; + return 0; } diff --git a/examples/DAPDialect/IIRLowpass.cpp b/examples/DAPDialect/IIRLowpass.cpp index 1b69ec08b..ec5de06c9 100644 --- a/examples/DAPDialect/IIRLowpass.cpp +++ b/examples/DAPDialect/IIRLowpass.cpp @@ -14,52 +14,81 @@ // //===----------------------------------------------------------------------===// // -// This file implements an end to end example for iir filter in buddy-mlir. It -// generates coefficients for a filter and apply it on a piece of mono audio, -// then saves the audio. -// This file will be linked with the object file generated by mlir to generate -// the executable file. +// An end-to-end example of the scalar version IIR (Infinite Impulse Response) +// operation in buddy-mlir. // //===----------------------------------------------------------------------===// #include +#include #include using namespace dap; using namespace std; +// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + int main(int argc, char *argv[]) { - string fileName = "../../tests/Interface/core/NASA_Mars.wav"; - string saveFileName = "IIR_LOWPASS_NASA_Mars.wav"; - if (argc >= 2) { - fileName = argv[1]; - } - if (argc == 3) { - saveFileName = argv[2]; - } - cout << "Usage: IIRLowpass [loadPath] [savePath]" << endl; - cout << "Current specified path: \n"; - cout << "Load: " << fileName << endl; - cout << "Save: " << saveFileName << endl; - // Order of butterworth filter + // Print the title of this example. + const std::string title = + "Scalar Version IIR Operation Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Allocate kernel MemRef for an IIR filter operation. + // Params: + // Order: The order of the butterworth filter. + // Parameter size: Each SOS matrix has 6 parameters. int order = 8; - // Each SOS matrix has 6 paramters. intptr_t kernelSize[2] = {int(order / 2), 6}; MemRef kernel(kernelSize); - // cutoff frequency = 1000, fs = 48000. - dap::iirLowpass(kernel, dap::butterworth(order), 1000, - 48000); - auto aud = dap::Audio(fileName); - aud.getAudioFile().printSummary(); - dap::Audio output; - output.fetchMetadata(aud.getAudioFile()); - output.getAudioFile().setAudioBuffer(nullptr); + // Generate the kernel for an IIR filter operation. + // Params: + // Input kernel: Stores generated kernel data. + // Lowpass filter: Supports butterworth filter upto order 12 for now. + // Lowpass frequency: The lowpass cutoff frequency. + // Sampling frequency: The rate at which the input data is sampled. + dap::iirLowpass(/*kernel=*/kernel, + /*filter=*/dap::butterworth(order), + /*frequency=*/1000, + /*fs=*/48000); + + // Initialize data containers. + // Params: + // Input container: Stores the raw audio data. + // Returns: + // Output memory reference: Provides a MemRef for saving the output. + Audio inputContainer("../../tests/Interface/core/TestAudio.wav"); + intptr_t samplesNum = static_cast(inputContainer.getSamplesNum()); + MemRef outputMemRef(&samplesNum); + + // Apply scalar version IIR operation to the audio data. + printLogLabel(); + std::cout << "Running scalar version IIR operation..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::IIR(&inputContainer, &kernel, &outputMemRef); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio processing time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; - dap::IIR(&aud.getMemRef(), &kernel, &output.getMemRef()); + // Convert a MemRef object to an Audio object and set the metadata. + Audio outputContainer(std::move(outputMemRef)); + outputContainer.setBitDepth(inputContainer.getBitDepth()); + outputContainer.setSamplesNum(inputContainer.getSamplesNum()); + outputContainer.setChannelsNum(inputContainer.getChannelsNum()); + outputContainer.setSampleRate(inputContainer.getSampleRate()); - cout << "Saving file:" << endl; - cout << (output.save(saveFileName) ? "OK" : "ERROR") << endl; + // Save the processed data to an audio file. + std::string saveFileName = "ScalarVersionIIRTestAudio.wav"; + outputContainer.saveToFile(saveFileName, "wave"); + printLogLabel(); + std::cout << "Processed audio data saved in: " << saveFileName << "\n" + << std::endl; return 0; } diff --git a/examples/DAPDialect/IIRVectorization.cpp b/examples/DAPDialect/IIRVectorization.cpp index c7d0c1955..e766c8588 100644 --- a/examples/DAPDialect/IIRVectorization.cpp +++ b/examples/DAPDialect/IIRVectorization.cpp @@ -14,53 +14,82 @@ // //===----------------------------------------------------------------------===// // -// This file implements an end to end example for iir filter in buddy-mlir. It -// generates coefficients for a filter and apply it on a piece of mono audio, -// then saves the audio. -// This file will be linked with the object file which use dap vectorization -// pass to generate the executable file. +// An end-to-end example of the vectorized IIR (Infinite Impulse Response) +// operation in buddy-mlir. // //===----------------------------------------------------------------------===// #include +#include #include using namespace dap; using namespace std; -int main(int argc, char *argv[]) { - string fileName = "../../tests/Interface/core/NASA_Mars.wav"; - string saveFileName = "IIR_VECTORIZATION_PASS_NASA_Mars.wav"; - if (argc >= 2) { - fileName = argv[1]; - } - if (argc == 3) { - saveFileName = argv[2]; - } - cout << "Usage: IIRVectorizationPass [loadPath] [savePath]" << endl; - cout << "Current specified path: \n"; - cout << "Load: " << fileName << endl; - cout << "Save: " << saveFileName << endl; - // Order for butterworth filter. +// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +int main() { + // Print the title of this example. + const std::string title = + "Vectorized IIR Operation Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Allocate kernel MemRef for an IIR filter operation. + // Params: + // Order: The order of the butterworth filter. + // Parameter size: Each SOS matrix has 6 parameters. int order = 8; - // Each SOS matrix has 6 paramters. intptr_t kernelSize[2] = {int(order / 2), 6}; MemRef kernel(kernelSize); - // cutoff frequency = 1000, fs = 48000. - dap::iirLowpass(kernel, dap::butterworth(order), 1000, - 48000); - auto aud = dap::Audio(fileName); - aud.getAudioFile().printSummary(); - dap::Audio output; - output.fetchMetadata(aud.getAudioFile()); - output.getAudioFile().setAudioBuffer(nullptr); + // Generate the kernel for an IIR filter operation. + // Params: + // Input kernel: Stores generated kernel data. + // Lowpass filter: Supports butterworth filter upto order 12 for now. + // Lowpass frequency: The lowpass cutoff frequency. + // Sampling frequency: The rate at which the input data is sampled. + dap::iirLowpass(/*kernel=*/kernel, + /*filter=*/dap::butterworth(order), + /*frequency=*/1000, + /*fs=*/48000); + + // Initialize data containers. + // Params: + // Input container: Stores the raw audio data. + // Returns: + // Output memory reference: Provides a MemRef for saving the output. + Audio inputContainer("../../tests/Interface/core/TestAudio.wav"); + intptr_t samplesNum = static_cast(inputContainer.getSamplesNum()); + MemRef outputMemRef(&samplesNum); - dap::IIR(&aud.getMemRef(), &kernel, &output.getMemRef(), + // Apply vectorized IIR operation to the audio data. + printLogLabel(); + std::cout << "Running vectorized IIR operation..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::IIR(&inputContainer, &kernel, &outputMemRef, /*isVectorization=*/true); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio processing time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; + + // Convert a MemRef object to an Audio object and set the metadata. + Audio outputContainer(std::move(outputMemRef)); + outputContainer.setBitDepth(inputContainer.getBitDepth()); + outputContainer.setSamplesNum(inputContainer.getSamplesNum()); + outputContainer.setChannelsNum(inputContainer.getChannelsNum()); + outputContainer.setSampleRate(inputContainer.getSampleRate()); - cout << "Saving file:" << endl; - cout << (output.save(saveFileName) ? "OK" : "ERROR") << endl; + // Save the processed data to an audio file. + std::string saveFileName = "VectorizedIIRTestAudio.wav"; + outputContainer.saveToFile(saveFileName, "wave"); + printLogLabel(); + std::cout << "Processed audio data saved in: " << saveFileName << "\n" + << std::endl; return 0; } diff --git a/examples/DAPDialect/WhisperPreprocess.cpp b/examples/DAPDialect/WhisperPreprocess.cpp new file mode 100644 index 000000000..db69ac836 --- /dev/null +++ b/examples/DAPDialect/WhisperPreprocess.cpp @@ -0,0 +1,77 @@ +//===- WhisperPreprocessor.cpp --------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// An example of the Whisper Preprocessor operation. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace dap; +using namespace std; + +// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +// Write preprocessing results to a text file. +void printResult(MemRef &outputMemRef) { + ofstream fout("whisperPreprocessResult.txt"); + // Print title. + fout << "-----------------------------------------" << std::endl; + fout << "[ Whisper Preprocess Result ]" << std::endl; + fout << "-----------------------------------------" << std::endl; + // Print reuslt data. + for (int i = 0; i < 240000; ++i) { + fout << outputMemRef[i] << std::endl; + } + fout.close(); +} + +int main() { + // Print the title of this example. + const std::string title = "Whisper Preprocess Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Initialize data containers. + // Params: + // Input container: Stores raw audio data. + // Returns: + // Output memory reference: Features formatted as memref<1x80x3000xf32>. + Audio inputContainer("../../examples/BuddyWhisper/audio.wav"); + float *outputAlign = new float[240000]; + intptr_t outputSizes[3] = {1, 80, 3000}; + MemRef outputMemRef(outputAlign, outputSizes); + + // Compute audio features from raw audio data. + printLogLabel(); + std::cout << "Preprocessing audio..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::whisperPreprocess(&inputContainer, &outputMemRef); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio preprocess time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; + + // printResult(outputMemRef); + + return 0; +} diff --git a/examples/DAPDialect/biquad.cpp b/examples/DAPDialect/biquad.cpp index 14a78084a..e606c2d0e 100644 --- a/examples/DAPDialect/biquad.cpp +++ b/examples/DAPDialect/biquad.cpp @@ -14,45 +14,70 @@ // //===----------------------------------------------------------------------===// // -// This file implements an end to end example for biquad filter in buddy-mlir. -// It generates coefficients for a filter and apply it on a piece of mono audio, -// then saves the audio. -// This file will be linked with the object file generated by mlir to generate -// the executable file. +// An end-to-end example of a biquad operation in buddy-mlir. // //===----------------------------------------------------------------------===// #include +#include #include using namespace dap; using namespace std; -int main(int argc, char *argv[]) { - string fileName = "../../tests/Interface/core/NASA_Mars.wav"; - string saveFileName = "BIQUAD_NASA_Mars.wav"; - if (argc >= 2) { - fileName = argv[1]; - } - if (argc == 3) { - saveFileName = argv[2]; - } - cout << "Usage: BiquadLowpass [loadPath] [savePath]" << endl; - cout << "Current specified path: \n"; - cout << "Load: " << fileName << endl; - cout << "Save: " << saveFileName << endl; +// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +int main() { + // Print the title of this example. + const std::string title = "Biquad Operation Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + // Generate the kernel for a biquad filter operation. + // Params: + // Input kernel: Stores generated kernel data. + // Frequency: Normalized frequency (frequency_Hz / samplerate_Hz). + // Quality factor: Defines the filter's bandwidth relative to its + // center frequency. intptr_t kernelSize = 6; MemRef kernel(&kernelSize); - dap::biquadLowpass(kernel, 0.3, -1.0); - auto aud = dap::Audio(fileName); - aud.getAudioFile().printSummary(); - dap::Audio output; - output.fetchMetadata(aud.getAudioFile()); - output.getAudioFile().setAudioBuffer(nullptr); + dap::biquadLowpass(kernel, /*frequency=*/0.3, /*Q=*/-1.0); + + // Initialize data containers. + // Params: + // Input container: Stores the raw audio data. + // Returns: + // Output memory reference: Provides a MemRef for saving the output. + Audio inputContainer("../../tests/Interface/core/TestAudio.wav"); + intptr_t samplesNum = static_cast(inputContainer.getSamplesNum()); + MemRef outputMemRef(&samplesNum); + + // Apply the biquad filter operation to the audio data. + printLogLabel(); + std::cout << "Running biquad operation..." << std::endl; + const auto loadStart = std::chrono::high_resolution_clock::now(); + dap::biquad(&inputContainer, &kernel, &outputMemRef); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Audio processing time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; + + // Convert a MemRef object to an Audio object and set the metadata. + Audio outputContainer(std::move(outputMemRef)); + outputContainer.setBitDepth(inputContainer.getBitDepth()); + outputContainer.setSamplesNum(inputContainer.getSamplesNum()); + outputContainer.setChannelsNum(inputContainer.getChannelsNum()); + outputContainer.setSampleRate(inputContainer.getSampleRate()); - dap::biquad(&aud.getMemRef(), &kernel, &output.getMemRef()); + // Save the processed data to an audio file. + std::string saveFileName = "BiquadTestAudio.wav"; + outputContainer.saveToFile(saveFileName, "wave"); + printLogLabel(); + std::cout << "Processed audio data saved in: " << saveFileName << "\n" + << std::endl; - cout << "Saving file:" << endl; - cout << (output.save(saveFileName) ? "OK" : "ERROR") << endl; return 0; } diff --git a/examples/MLIRCF/.gitignore b/examples/MLIRCF/.gitignore new file mode 100644 index 000000000..790429d34 --- /dev/null +++ b/examples/MLIRCF/.gitignore @@ -0,0 +1,3 @@ +log* +core +a.out diff --git a/examples/MLIRCF/cf-iteration-exit.mlir b/examples/MLIRCF/cf-iteration-exit.mlir new file mode 100644 index 000000000..89281c9e3 --- /dev/null +++ b/examples/MLIRCF/cf-iteration-exit.mlir @@ -0,0 +1,47 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-vector-to-llvm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// The example is equivalent to the following code. +// int main() { +// int val = 0; +// for (int i = 1; i < 5; i++) { +// val += 5; +// if (i == 3) { +// std::cout << val << std::endl; +// return 0; +// } +// } +// return 0; +// } + +module { + func.func @main() { + %c0 = arith.constant 0 : index + %c3 = arith.constant 3 : index + %c5 = arith.constant 5 : index + %c1 = arith.constant 1 : index + %cst_0 = arith.constant 0.000000e+00 : f32 + %cst_5 = arith.constant 5.000000e+00 : f32 + cf.br ^bb1(%c0, %cst_0 : index, f32) + ^bb1(%0: index, %1: f32): + %2 = arith.cmpi slt, %0, %c5 : index + cf.cond_br %2, ^bb2, ^bb4(%1: f32) + ^bb2: + %3 = arith.addf %1, %cst_5 : f32 + %4 = arith.addi %0, %c1 : index + cf.br ^bb3 (%4, %3 : index, f32) + ^bb3(%iter_idx: index, %iter_var: f32): + %eq = arith.cmpi eq, %iter_idx, %c3 : index + cf.cond_br %eq, ^bb4(%iter_var: f32), ^bb1(%iter_idx, %iter_var: index, f32) + ^bb4(%ret_var: f32): + // CHECK: 15 + vector.print %ret_var : f32 + return + } +} diff --git a/examples/MLIRCF/makefile b/examples/MLIRCF/makefile new file mode 100644 index 000000000..5837ebf44 --- /dev/null +++ b/examples/MLIRCF/makefile @@ -0,0 +1,44 @@ +#!/bin/bash +BUDDY_OPT := ../../build/bin/buddy-opt +MLIR_OPT := ../../llvm/build/bin/mlir-opt +MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate +MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner +LLC := ../../llvm/build/bin/llc +OPT_FLAG := -O0 +CLANG := ../../llvm/build//bin/clang +MLIR_LIB := ../../llvm/build/lib/ +BUDDY_LIB := ../../build/midend/lib/ + +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so +MLIR_ASYNC_RUNTIME := ../../llvm/build/lib/libmlir_async_runtime.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib +MLIR_ASYNC_RUNTIME := ./../llvm/build/lib/libmlir_async_runtime.dylib +MTRIPLE := x86_64-apple-darwin +endif + +cf-iteration-exit-lower: + @${MLIR_OPT} ./cf-iteration-exit.mlir \ + -convert-vector-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts \ + -o ./log.mlir + +cf-iteration-exit-translate: + @${MLIR_OPT} ./cf-iteration-exit.mlir \ + -convert-vector-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll + +cf-iteration-exit-run: + @${MLIR_OPT} ./cf-iteration-exit.mlir \ + -convert-vector-to-llvm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} diff --git a/examples/MLIRLinalg/linalg-batch-matmul-dync.mlir b/examples/MLIRLinalg/linalg-batch-matmul-dync.mlir new file mode 100644 index 000000000..1b910e4a3 --- /dev/null +++ b/examples/MLIRLinalg/linalg-batch-matmul-dync.mlir @@ -0,0 +1,67 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ +// RUN: -convert-func-to-llvm -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @printMemrefF32(memref<*xf32>) + + // Definition for the batch matrix multiplication function + func.func @buddy_batchmatmul_f32(%A: memref, %B: memref, %C: memref) { + linalg.batch_matmul + ins(%A, %B: memref, memref) + outs(%C: memref) + return + } + + func.func @main(){ + // Set up dims. + %cBatch = arith.constant 10:index + %cM = arith.constant 2 : index + %cN = arith.constant 5 : index + %cK = arith.constant 4 : index + + // Set Init Value. + %cf1 = arith.constant 1.0 : f32 + %cf2 = arith.constant 2.0 : f32 + %c0 = arith.constant 0.0 : f32 + + %A = memref.alloc(%cBatch,%cM, %cK) : memref + %B = memref.alloc(%cBatch,%cK, %cN) : memref + %C = memref.alloc(%cBatch,%cM, %cN) : memref + + linalg.fill + ins(%cf1 : f32) + outs(%A:memref) + + linalg.fill + ins(%cf2 : f32) + outs(%B:memref) + + linalg.fill + ins(%c0 : f32) + outs(%C:memref) + + call @buddy_batchmatmul_f32(%A, %B, %C) : (memref, memref, memref) -> () + + // Print output. + // CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [4, 4] strides = [4, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [5, 5, 5, 5], + // CHECK-NEXT: [5, 5, 5, 5], + // CHECK-NEXT: [5, 5, 5, 5], + // CHECK-NEXT: [5, 5, 5, 5] + // CHECK-SAME: ] + %print_C = memref.cast %C : memref to memref<*xf32> + call @printMemrefF32(%print_C) : (memref<*xf32>) -> () + + memref.dealloc %C : memref + memref.dealloc %B : memref + memref.dealloc %A : memref + return + } +} diff --git a/examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir b/examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir new file mode 100644 index 000000000..2c8cc171e --- /dev/null +++ b/examples/MLIRLinalg/linalg-conv2d_nhwc_fhwc.mlir @@ -0,0 +1,96 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ +// RUN: -convert-func-to-llvm -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @printMemrefF32(memref<*xf32>) + func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref + scf.for %arg5 = %c0 to %arg0 step %c1 { + scf.for %arg6 = %c0 to %arg1 step %c1 { + scf.for %arg7 = %c0 to %arg2 step %c1 { + scf.for %arg8 = %c0 to %arg3 step %c1 { + %iarg8=arith.index_cast %arg8 : index to i32 + %loopf= arith.sitofp %iarg8 : i32 to f32 + memref.store %loopf, %0[%arg5, %arg6, %arg7, %arg8] : memref + } + } + } + } + return %0 : memref + } + func.func @conv_2d_nhwc_fhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc_fhwc ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) + return + } + func.func @main() { + // Intput(image, filter) and output value. + %cst = arith.constant 0.500000e+00 : f32 + %cst_0 = arith.constant 0.000000e+00 : f32 + + %current_image_n = arith.constant 2 : index + %current_image_c = arith.constant 18 : index + %current_image_h = arith.constant 8 : index + %current_image_w = arith.constant 8 : index + + %current_filter_f = arith.constant 2 : index + %current_filter_c = arith.constant 18 : index + %current_filter_h = arith.constant 4 : index + %current_filter_w = arith.constant 4 : index + + %current_output_n = arith.constant 2 : index + %current_output_c = arith.constant 2 : index + %current_output_h = arith.constant 5 : index + %current_output_w = arith.constant 5 : index + + // Image. + %image = call @alloc_2d_filled_f32(%current_image_n,%current_image_h, %current_image_w, %current_image_c, %cst) : (index, index, index, index, f32) -> memref + // Filter. + %filter = call @alloc_2d_filled_f32(%current_filter_f, %current_filter_h, %current_filter_w,%current_filter_c, %cst) : (index, index, index, index, f32) -> memref + // Output. + %output = call @alloc_2d_filled_f32(%current_output_n, %current_output_h, %current_output_w,%current_output_c, %cst_0) : (index, index, index, index, f32) -> memref + + call @conv_2d_nhwc_fhwc(%image, %filter, %output) : (memref, memref, memref) -> () + + %3 = memref.cast %output : memref to memref<*xf32> + + // Print output. + // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [2, 2, 4, 4] strides = [32, 16, 4, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [ + // CHECK-COUNT-3: [32, 32, 32, 32], + // CHECK-NEXT: [32, 32, 32, 32] + // CHECK-SAME: ], + // CHECK-NEXT: [ + // CHECK-COUNT-3: [32, 32, 32, 32], + // CHECK-NEXT: [32, 32, 32, 32] + // CHECK-SAME: ] + // CHECK-SAME: ], + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-COUNT-3: [32, 32, 32, 32], + // CHECK-NEXT: [32, 32, 32, 32] + // CHECK-SAME: ], + // CHECK-NEXT: [ + // CHECK-COUNT-3: [32, 32, 32, 32], + // CHECK-NEXT: [32, 32, 32, 32] + // CHECK-SAME: ] + // CHECK-SAME: ] + // CHECK-SAME: ] + call @printMemrefF32(%3) : (memref<*xf32>) -> () + + memref.dealloc %output : memref + memref.dealloc %image : memref + memref.dealloc %filter : memref + return + } +} + diff --git a/examples/MLIRLinalg/linalg-depthwise_conv_2d_nhwc_hwc.mlir b/examples/MLIRLinalg/linalg-depthwise_conv_2d_nhwc_hwc.mlir new file mode 100644 index 000000000..4fc2a5fc1 --- /dev/null +++ b/examples/MLIRLinalg/linalg-depthwise_conv_2d_nhwc_hwc.mlir @@ -0,0 +1,77 @@ +// RUN: buddy-opt %s \ +// RUN: -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ +// RUN: -convert-func-to-llvm -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @printMemrefF32(memref<*xf32>) + + func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: f32) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref + scf.for %arg5 = %c0 to %arg0 step %c1 { + scf.for %arg6 = %c0 to %arg1 step %c1 { + scf.for %arg7 = %c0 to %arg2 step %c1 { + scf.for %arg8 = %c0 to %arg3 step %c1 { + %iarg8 = arith.index_cast %arg8 : index to i32 + %loopf = arith.sitofp %iarg8 : i32 to f32 + memref.store %loopf, %0[%arg5, %arg6, %arg7, %arg8] : memref + } + } + } + } + return %0 : memref + } + + func.func @depthwise_conv_2d_nhwc_hwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.depthwise_conv_2d_nhwc_hwc + {dilations = dense<[1,1]> : tensor<2xi64>, strides = dense<[1,1]> : tensor<2xi64>} + ins(%arg0, %arg1 : memref, memref) + outs(%arg2 : memref) + return + } + + func.func @main() { + // Constants for input image, filter, and output sizes. + %cst = arith.constant 0.500000e+00 : f32 + %cst_0 = arith.constant 0.000000e+00 : f32 + + %image_n = arith.constant 2 : index + %image_h = arith.constant 8 : index + %image_w = arith.constant 8 : index + %image_c = arith.constant 18 : index + + %filter_h = arith.constant 4 : index + %filter_w = arith.constant 4 : index + %filter_c = arith.constant 18 : index + + %output_n = arith.constant 2 : index + %output_h = arith.constant 5 : index + %output_w = arith.constant 5 : index + %output_c = arith.constant 18 : index + + // Allocate and fill image, filter, and output. + %image = call @alloc_2d_filled_f32(%image_n, %image_h, %image_w, %image_c, %cst) : (index, index, index, index, f32) -> memref + %filter = call @alloc_2d_filled_f32(%filter_h, %filter_w, %filter_c, %cst) : (index, index, index, f32) -> memref + %output = call @alloc_2d_filled_f32(%output_n, %output_h, %output_w, %output_c, %cst_0) : (index, index, index, index, f32) -> memref + + // Call depthwise convolution. + call @depthwise_conv_2d_nhwc_hwc(%image, %filter, %output) : (memref, memref, memref) -> () + + %output_cast = memref.cast %output : memref to memref<*xf32> + + // Print the output. + call @printMemrefF32(%output_cast) : (memref<*xf32>) -> () + + // Deallocate memory. + memref.dealloc %output : memref + memref.dealloc %image : memref + memref.dealloc %filter : memref + return + } +} diff --git a/examples/MLIRLinalg/linalg-matmul-opt-f32.mlir b/examples/MLIRLinalg/linalg-matmul-opt-f32.mlir index 5111b57db..53148b0d0 100644 --- a/examples/MLIRLinalg/linalg-matmul-opt-f32.mlir +++ b/examples/MLIRLinalg/linalg-matmul-opt-f32.mlir @@ -1,4 +1,4 @@ -// RUN: buddy-opt -matmul-paralell-vectorization-optimize -verify-diagnostics -expand-strided-metadata -lower-affine \ +// RUN: buddy-opt -matmul-parallel-vectorization-optimize -verify-diagnostics -expand-strided-metadata -lower-affine \ // RUN: -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm \ // RUN: -llvm-request-c-wrappers -convert-func-to-llvm -reconcile-unrealized-casts %s \ // RUN: | mlir-cpu-runner -O0 -e buddy_matmul_f32 -entry-point-result=void \ diff --git a/examples/MLIRLinalg/linalg-matmul-opt-i8.mlir b/examples/MLIRLinalg/linalg-matmul-opt-i8.mlir index 9a7b72e5e..26aa92cbe 100644 --- a/examples/MLIRLinalg/linalg-matmul-opt-i8.mlir +++ b/examples/MLIRLinalg/linalg-matmul-opt-i8.mlir @@ -1,4 +1,4 @@ -// RUN: buddy-opt -matmul-paralell-vectorization-optimize -verify-diagnostics -expand-strided-metadata \ +// RUN: buddy-opt -matmul-parallel-vectorization-optimize -verify-diagnostics -expand-strided-metadata \ // RUN: -lower-affine -convert-vector-to-llvm -finalize-memref-to-llvm -convert-scf-to-cf \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -llvm-request-c-wrappers -convert-func-to-llvm \ // RUN: -reconcile-unrealized-casts %s \ diff --git a/examples/MLIRLinalg/makefile b/examples/MLIRLinalg/makefile index f214fa7f6..12f639f67 100644 --- a/examples/MLIRLinalg/makefile +++ b/examples/MLIRLinalg/makefile @@ -60,6 +60,37 @@ linalg-conv2d-tiling-run: -convert-func-to-llvm -reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} +linalg-conv2d_nhwc_fhwc-optimize-lower: + @${BUDDY_OPT} linalg-conv2d_nhwc_fhwc.mlir \ + -conv-nhwc-fhwc-optimize="vec-size=16" \ + -o ./log.mlir + +linalg-conv2d_nhwc_fhwc-tile-optimize-lower: + @${BUDDY_OPT} linalg-conv2d_nhwc_fhwc.mlir \ + -conv-nhwc-fhwc-tile-optimize="vec-size=16 tiling-height=2 tiling-width=3" \ + -o ./log.mlir + +linalg-conv2d_nhwc_fhwc-optimize-run: + @${BUDDY_OPT} linalg-conv2d_nhwc_fhwc.mlir ${MLIR_OPT_OPTIONS} \ + -conv-nhwc-fhwc-optimize="vec-size=16" \ + -lower-affine -convert-scf-to-cf \ + -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ + -convert-func-to-llvm -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +linalg-conv2d_nhwc_fhwc-tile-optimize-run: + @${BUDDY_OPT} linalg-conv2d_nhwc_fhwc.mlir ${MLIR_OPT_OPTIONS} \ + -conv-nhwc-fhwc-tile-optimize="vec-size=16 tiling-height=2 tiling-width=3" \ + -lower-affine -convert-scf-to-cf \ + -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ + -convert-func-to-llvm -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +linalg-depthwise_conv_2d_nhwc_hwc-optimize-lower: + @${BUDDY_OPT} linalg-depthwise_conv_2d_nhwc_hwc.mlir \ + -depthwise-conv-nhwc-hwc-optimize="vec-size=16" \ + -o ./log.mlir + linalg-generic-lower: @${MLIR_OPT} ./linalg-generic.mlir \ -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ @@ -177,6 +208,16 @@ linalg-batch-matmul-optimize-lower: -batchmatmul-optimize="vector-size=64" \ -o ./log.mlir +linalg-batch-matmul-tile-optimize-lower: + @${BUDDY_OPT} linalg-batch-matmul-dync.mlir ${MLIR_OPT_OPTIONS} \ + -batchmatmul-tile-optimize="vec-size=64 kernel-m=4 kernel-n=2" \ + -o ./log.mlir + +linalg-batch-matmul-scf-optimize-lower: + @${BUDDY_OPT} linalg-batch-matmul-dync.mlir ${MLIR_OPT_OPTIONS} \ + -batchmatmul-scf-optimize="vector-size=64" \ + -o ./log.mlir + linalg-batch-matmul-optimize-translate: @${BUDDY_OPT} linalg-batch-matmul-f32.mlir ${MLIR_OPT_OPTIONS} \ -batchmatmul-optimize="vector-size=64" \ @@ -248,7 +289,7 @@ linalg-batch-matmul-i8-optimize-translate: linalg-matmul-parallized-vectorized-optmize-run: @${BUDDY_OPT} linalg-matmul-opt-f32.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -convert-linalg-to-loops \ -expand-strided-metadata \ -lower-affine \ @@ -263,12 +304,12 @@ linalg-matmul-parallized-vectorized-optmize-run: linalg-matmul-parallized-vectorized-optmize-lower: @${BUDDY_OPT} linalg-matmul-opt-f32.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -o ./log.mlir linalg-matmul-parallized-vectorized-optmize-translate: @${BUDDY_OPT} linalg-matmul-opt-f32.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -convert-linalg-to-loops \ -expand-strided-metadata \ -lower-affine \ @@ -282,7 +323,7 @@ linalg-matmul-parallized-vectorized-optmize-translate: linalg-matmul-i8-parallized-vectorized-optmize-run: @${BUDDY_OPT} linalg-matmul-opt-i8.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -convert-linalg-to-loops \ -expand-strided-metadata \ -lower-affine \ @@ -297,12 +338,12 @@ linalg-matmul-i8-parallized-vectorized-optmize-run: linalg-matmul-i8-parallized-vectorized-optmize-lower: @${BUDDY_OPT} linalg-matmul-opt-i8.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -o ./log.mlir linalg-matmul-i8-parallized-vectorized-optmize-translate: @${BUDDY_OPT} linalg-matmul-opt-i8.mlir ${MLIR_OPT_OPTIONS} \ - -matmul-paralell-vectorization-optimize="vector-size=128" \ + -matmul-parallel-vectorization-optimize="vector-size=128" \ -convert-linalg-to-loops \ -expand-strided-metadata \ -lower-affine \ diff --git a/examples/MLIRVector/makefile b/examples/MLIRVector/makefile index 681335c7f..ccc9e9af2 100644 --- a/examples/MLIRVector/makefile +++ b/examples/MLIRVector/makefile @@ -43,17 +43,20 @@ vector-load-run: vector-broadcast-lower: @${MLIR_OPT} ./vector-broadcast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts -o ./log.mlir vector-broadcast-translate: @${MLIR_OPT} ./vector-broadcast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll vector-broadcast-asm-x86: @${MLIR_OPT} ./vector-broadcast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -62,6 +65,7 @@ vector-broadcast-asm-x86: vector-broadcast-asm-rv: @${MLIR_OPT} ./vector-broadcast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -72,6 +76,7 @@ vector-broadcast-asm-rv: run-targets += vector-broadcast-run vector-broadcast-run: @${MLIR_OPT} ./vector-broadcast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -79,17 +84,20 @@ vector-broadcast-run: vector-fma-lower: @${MLIR_OPT} ./vector-fma.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts -o ./log.mlir vector-fma-translate: @${MLIR_OPT} ./vector-fma.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll vector-fma-asm-x86: @${MLIR_OPT} ./vector-fma.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -98,6 +106,7 @@ vector-fma-asm-x86: vector-fma-asm-rv: @${MLIR_OPT} ./vector-fma.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -108,6 +117,7 @@ vector-fma-asm-rv: run-targets += vector-fma-run vector-fma-run: @${MLIR_OPT} ./vector-fma.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -115,17 +125,20 @@ vector-fma-run: vector-long-lower: @${MLIR_OPT} ./vector-long.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts -o ./log.mlir vector-long-translate: @${MLIR_OPT} ./vector-long.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll vector-long-asm-x86: @${MLIR_OPT} ./vector-long.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -134,6 +147,7 @@ vector-long-asm-x86: vector-long-asm-rv: @${MLIR_OPT} ./vector-long.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -144,6 +158,7 @@ vector-long-asm-rv: run-targets += vector-long-run vector-long-run: @${MLIR_OPT} ./vector-long.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -187,6 +202,7 @@ vector-shape-cast-translate: run-targets += vector-shape-cast-run vector-shape-cast-run: @${MLIR_OPT} ./vector-shape-cast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -209,6 +225,7 @@ vector-type-cast-translate: run-targets += vector-type-cast-run vector-type-cast-run: @${MLIR_OPT} ./vector-type-cast.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -253,6 +270,7 @@ vector-shuffle-translate: run-targets += vector-shuffle-run vector-shuffle-run: @${MLIR_OPT} ./vector-shuffle.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -275,6 +293,7 @@ vector-splat-translate: run-targets += vector-splat-run vector-splat-run: @${MLIR_OPT} ./vector-splat.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -297,6 +316,7 @@ vector-insert-translate: run-targets += vector-insert-run vector-insert-run: @${MLIR_OPT} ./vector-insert.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -319,6 +339,7 @@ vector-reduction-translate: run-targets += vector-reduction-run vector-reduction-run: @${MLIR_OPT} ./vector-reduction.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -341,6 +362,7 @@ vector-outerproduct-translate: run-targets += vector-outerproduct-run vector-outerproduct-run: @${MLIR_OPT} ./vector-outerproduct.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -363,6 +385,7 @@ vector-create-mask-translate: run-targets += vector-create-mask-run vector-create-mask-run: @${MLIR_OPT} ./vector-create-mask.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -384,6 +407,7 @@ vector-extract-translate: run-targets += vector-extract-run vector-extract-run: @${MLIR_OPT} ./vector-extract.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -405,6 +429,7 @@ vector-maskedload-translate: run-targets += vector-maskedload-run vector-maskedload-run: @${MLIR_OPT} ./vector-maskedload.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -427,6 +452,7 @@ vector-maskedstore-translate: run-targets += vector-maskedstore-run vector-maskedstore-run: @${MLIR_OPT} ./vector-maskedstore.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -449,6 +475,7 @@ vector-extract-strided-slice-translate: run-targets += vector-extract-strided-slice-run vector-extract-strided-slice-run: @${MLIR_OPT} ./vector-extract-strided-slice.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -470,6 +497,7 @@ vector-constant-mask-translate: run-targets += vector-constant-mask-run vector-constant-mask-run: @${MLIR_OPT} ./vector-constant-mask.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -491,6 +519,7 @@ vector-expandload-translate: run-targets += vector-expandload-run vector-expandload-run: @${MLIR_OPT} ./vector-expandload.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -512,6 +541,7 @@ vector-compressstore-translate: run-targets += vector-compressstore-run vector-compressstore-run: @${MLIR_OPT} ./vector-compressstore.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -533,6 +563,7 @@ vector-insert-strided-slice-translate: run-targets += vector-insert-strided-slice-run vector-insert-strided-slice-run: @${MLIR_OPT} ./vector-insert-strided-slice.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -554,6 +585,7 @@ vector-scatter-translate: run-targets += vector-scatter-run vector-scatter-run: @${MLIR_OPT} ./vector-scatter.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -576,6 +608,7 @@ vector-gather-translate: run-targets += vector-gather-run vector-gather-run: @${MLIR_OPT} ./vector-gather.mlir \ + -convert-vector-to-scf -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ -split-input-file -verify-diagnostics \ --reconcile-unrealized-casts | \ @@ -598,7 +631,7 @@ vector-transfer-read-translate: run-targets += vector-transfer-read-run vector-transfer-read-run: @${MLIR_OPT} ./vector-transfer-read.mlir \ - --convert-vector-to-scf --lower-affine --convert-scf-to-cf \ + --convert-vector-to-scf --lower-affine --convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ @@ -669,3 +702,27 @@ vector-store-run: --reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +vector-iteration-lower: + @${MLIR_OPT} ./vector-iteration.mlir \ + --lower-affine \ + -convert-vector-to-scf -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ + --reconcile-unrealized-casts -o ./log.mlir + +vector-iteration-translate: + @${MLIR_OPT} ./vector-iteration.mlir \ + --lower-affine \ + -convert-vector-to-scf -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ + --reconcile-unrealized-casts | \ + ${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll + +vector-iteration-run: + @${MLIR_OPT} ./vector-iteration.mlir \ + --lower-affine \ + -convert-vector-to-scf -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ + --reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=i32 \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} diff --git a/examples/MLIRVector/vector-iteration.mlir b/examples/MLIRVector/vector-iteration.mlir new file mode 100644 index 000000000..22bd42580 --- /dev/null +++ b/examples/MLIRVector/vector-iteration.mlir @@ -0,0 +1,32 @@ +// RUN: buddy-opt %s \ +// RUN: -lower-affine \ +// RUN: -convert-vector-to-scf -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=i32 \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +memref.global "private" @gv : memref<4x4xf32> = dense<[[0. , 1. , 2. , 3. ], + [10., 11., 12., 13.], + [20., 21., 22., 23.], + [30., 31., 32., 33.]]> + +func.func @main() -> i32 { + %mem = memref.get_global @gv : memref<4x4xf32> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %sum_0 = arith.constant dense<0.000000e+00> : vector<4xf32> + %sum = affine.for %i = 0 to 3 iter_args(%sum_iter = %sum_0) -> (vector<4xf32>) { + %load_vec1 = vector.load %mem[%c0, %c0] : memref<4x4xf32>, vector<4xf32> + %load_vec2 = vector.load %mem[%i, %c0] : memref<4x4xf32>, vector<4xf32> + %sum_next = vector.fma %load_vec1, %load_vec2, %sum_iter : vector<4xf32> + affine.yield %sum_next : vector<4xf32> + } + // CHECK: ( 0, 33, 72, 117 ) + vector.print %sum : vector<4xf32> + %ret = arith.constant 0 : i32 + return %ret : i32 +} diff --git a/examples/RVVDialect/makefile b/examples/RVVDialect/makefile index d30c64a00..dea63bd25 100644 --- a/examples/RVVDialect/makefile +++ b/examples/RVVDialect/makefile @@ -1,18 +1,48 @@ #!/bin/bash -BUDDY_OPT := ../../build/bin/buddy-opt -BUDDY_TRANSLATE := ../../build/bin/buddy-translate -LLC := ../../llvm/build/bin/llc + +# Build Directories +MLIR_BUILD_DIR := ../../llvm/build/ +BUDDY_MLIR_BUILD_DIR := ../../build/ +CROSS_BUDDY_MLIR_BUILD_DIR := ../../build-cross-rv/ +CROSS_LLVM_BUILD_DIR := ../../llvm/build-cross-clang-rv/ +CROSS_MLIR_BUILD_DIR := ../../llvm/build-cross-mlir-rv/ + +# Buddy MLIR Tools +BUDDY_OPT := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt +BUDDY_TRANSLATE := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-translate + +# Core LLVM/MLIR Tools +MLIR_OPT := ${MLIR_BUILD_DIR}/bin/mlir-opt +MLIR_TRANSLATE := ${MLIR_BUILD_DIR}/bin/mlir-translate +MLIR_CPU_RUNNER := ${MLIR_BUILD_DIR}/bin/mlir-cpu-runner +LLC := ${MLIR_BUILD_DIR}/bin/llc +LOCAL_CLANG := ${MLIR_BUILD_DIR}/bin/clang + +# RISC-V GNU Toolchain +RISCV_GNU_TOOLCHAIN := ${BUDDY_MLIR_BUILD_DIR}/thirdparty/riscv-gnu-toolchain +RISCV_GNU_TOOLCHAIN_SYSROOT := ${RISCV_GNU_TOOLCHAIN}/sysroot +QEMU := ${RISCV_GNU_TOOLCHAIN}/bin/qemu-riscv64 + +# Cross Compiled Toolchain +CROSS_BUDDY_MLIR_LIB := ${CROSS_BUDDY_MLIR_BUILD_DIR}/lib/ +CROSS_LLI := ${CROSS_LLVM_BUILD_DIR}/bin/lli +CROSS_MLIR_CPU_RUNNER := ${CROSS_MLIR_BUILD_DIR}/bin/mlir-cpu-runner +CROSS_MLIR_C_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.so +CROSS_MLIR_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +CROSS_MLIR_LIB := ${CROSS_MLIR_BUILD_DIR}/lib + +# Optimization Flag OPT_FLAG := -O0 -RISCV_GNU_TOOLCHAIN := ../../thirdparty/build-riscv-gnu-toolchain -RISCV_GNU_TOOLCHAIN_SYSROOT := ../../thirdparty/build-riscv-gnu-toolchain/sysroot -QEMU := ../../thirdparty/qemu/build/riscv64-linux-user/qemu-riscv64 -LOCAL_CLANG := ../../thirdparty/build-local-clang/bin/clang -CROSS_LLI := ../../thirdparty/build-cross-clang/bin/lli -CROSS_MLIR_CPU_RUNNER := ../../thirdparty/build-cross-mlir/bin/mlir-cpu-runner -CROSS_MLIR_C_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_c_runner_utils.so -CROSS_MLIR_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_runner_utils.so -CROSS_MLIR_LIB := ../../thirdparty/build-cross-mlir/lib +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}//lib/libmlir_c_runner_utils.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.dylib +MTRIPLE := x86_64-apple-darwin +endif rvv-setvl-lower: @${BUDDY_OPT} ./rvv-setvl.mlir \ @@ -43,7 +73,7 @@ rvv-setvl-128-run: -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} @@ -56,7 +86,7 @@ rvv-setvl-256-run: -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=256 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} @@ -87,7 +117,7 @@ rvv-rsqrt-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-mul-add-lower: @${BUDDY_OPT} ./rvv-mul-add.mlir \ @@ -122,7 +152,7 @@ rvv-mul-add-run: -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -132,6 +162,8 @@ rvv-stripmining-lower: -convert-scf-to-cf \ -convert-math-to-llvm \ -lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -143,6 +175,8 @@ rvv-stripmining-translate: -convert-scf-to-cf \ -convert-math-to-llvm \ -lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -154,13 +188,15 @@ rvv-stripmining-run: -convert-scf-to-cf \ -convert-math-to-llvm \ -lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -170,6 +206,8 @@ rvv-stripmining-aot: -convert-scf-to-cf \ -convert-math-to-llvm \ -lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -182,4 +220,4 @@ rvv-stripmining-aot: -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out diff --git a/examples/RVVExperiment/makefile b/examples/RVVExperiment/makefile index ba424d425..6cadb07cd 100644 --- a/examples/RVVExperiment/makefile +++ b/examples/RVVExperiment/makefile @@ -1,25 +1,50 @@ #!/bin/bash -BUDDY_OPT := ../../build/bin/buddy-opt -BUDDY_TRANSLATE := ../../build/bin/buddy-translate -MLIR_OPT := ../../llvm/build/bin/mlir-opt -MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate -MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner -LLI := ../../llvm/build/bin/lli -LLC := ../../llvm/build/bin/llc -OPT := ../../llvm/build/bin/opt + +# Build Directories +MLIR_BUILD_DIR := ../../llvm/build/ +BUDDY_MLIR_BUILD_DIR := ../../build/ +CROSS_BUDDY_MLIR_BUILD_DIR := ../../build-cross-rv/ +CROSS_LLVM_BUILD_DIR := ../../llvm/build-cross-clang-rv/ +CROSS_MLIR_BUILD_DIR := ../../llvm/build-cross-mlir-rv/ + +# Buddy MLIR Tools +BUDDY_OPT := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt +BUDDY_TRANSLATE := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-translate + +# Core LLVM/MLIR Tools +MLIR_OPT := ${MLIR_BUILD_DIR}/bin/mlir-opt +MLIR_TRANSLATE := ${MLIR_BUILD_DIR}/bin/mlir-translate +MLIR_CPU_RUNNER := ${MLIR_BUILD_DIR}/bin/mlir-cpu-runner +LLC := ${MLIR_BUILD_DIR}/bin/llc +LLI := ${MLIR_BUILD_DIR}/bin/lli +OPT := ${MLIR_BUILD_DIR}/bin/opt +LOCAL_CLANG := ${MLIR_BUILD_DIR}/bin/clang + +# RISC-V GNU Toolchain +RISCV_GNU_TOOLCHAIN := ${BUDDY_MLIR_BUILD_DIR}/thirdparty/riscv-gnu-toolchain +RISCV_GNU_TOOLCHAIN_SYSROOT := ${RISCV_GNU_TOOLCHAIN}/sysroot +QEMU := ${RISCV_GNU_TOOLCHAIN}/bin/qemu-riscv64 + +# Cross Compiled Toolchain +CROSS_BUDDY_MLIR_LIB := ${CROSS_BUDDY_MLIR_BUILD_DIR}/lib/ +CROSS_LLI := ${CROSS_LLVM_BUILD_DIR}/bin/lli +CROSS_MLIR_CPU_RUNNER := ${CROSS_MLIR_BUILD_DIR}/bin/mlir-cpu-runner +CROSS_MLIR_C_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.so +CROSS_MLIR_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +CROSS_MLIR_LIB := ${CROSS_MLIR_BUILD_DIR}/lib + +# Optimization Flag OPT_FLAG := -O3 -MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so -MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so - -RISCV_GNU_TOOLCHAIN := ../../thirdparty/build-riscv-gnu-toolchain -RISCV_GNU_TOOLCHAIN_SYSROOT := ../../thirdparty/build-riscv-gnu-toolchain/sysroot -QEMU := ../../thirdparty/qemu/build/riscv64-linux-user/qemu-riscv64 -LOCAL_CLANG := ../../thirdparty/build-local-clang/bin/clang -CROSS_LLI := ../../thirdparty/build-cross-clang/bin/lli -CROSS_MLIR_CPU_RUNNER := ../../thirdparty/build-cross-mlir/bin/mlir-cpu-runner -CROSS_MLIR_C_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_c_runner_utils.so -CROSS_MLIR_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_runner_utils.so -CROSS_MLIR_LIB := ../../thirdparty/build-cross-mlir/lib + +ifeq ($(shell uname),Linux) +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}//lib/libmlir_c_runner_utils.so +MTRIPLE := x86_64-unknown-linux-gnu +else ifeq ($(shell uname),Darwin) +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.dylib +MTRIPLE := x86_64-apple-darwin +endif MLIR_VECTOR_EXAMPLES := ../MLIRVector @@ -53,7 +78,7 @@ rvv-scalable-run-128: --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -67,16 +92,15 @@ rvv-scalable-aot-128: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out -# Note: this target will trigger an error to show the limitation. -rvv-scalable-run-256-error: +rvv-scalable-run-256: @${BUDDY_OPT} ./rvv-scalable.mlir \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=256 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -86,12 +110,6 @@ rvv-insert-extract-intrinsics-asm: -mattr=+m,+d,+v -riscv-v-vector-bits-min=256 \ --filetype=asm -o log.s -rvv-insert-extract-intrinsics-asm-error: - @${LLC} ./rvv-insert-extract-intrinsics.ll \ - -mtriple riscv64 -target-abi lp64d \ - -mattr=+m,+d,+v -riscv-v-vector-bits-min=128 \ - --filetype=asm -o log.s - rvv-c-setvl-translate: @${LOCAL_CLANG} -march=rv64gcv --target=riscv64-unknown-linux-gnu \ --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT} --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} \ @@ -107,7 +125,7 @@ rvv-c-setvl-run: @${LOCAL_CLANG} -march=rv64gcv --target=riscv64-unknown-linux-gnu \ --sysroot=${RISCV_GNU_TOOLCHAIN_SYSROOT} --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} \ ./rvv-c-setvl.c -fPIC -S -emit-llvm -o log.ll - @${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + @${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --entry-function=main --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic log.ll rvv-setvl-translate: @@ -131,7 +149,7 @@ rvv-setvl-run: --lower-rvv -convert-vector-to-llvm -convert-arith-to-llvm -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -151,7 +169,7 @@ rvv-vscale-128-run: --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -161,7 +179,7 @@ rvv-vscale-256-run: --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=256 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -171,7 +189,7 @@ rvv-vscale-512-run: --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=512 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -185,7 +203,7 @@ rvv-vscale-128-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vscale-256-aot: @${BUDDY_OPT} ./rvv-vscale.mlir \ @@ -197,7 +215,7 @@ rvv-vscale-256-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=256 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vscale-512-aot: @${BUDDY_OPT} ./rvv-vscale.mlir \ @@ -209,7 +227,7 @@ rvv-vscale-512-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=512 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-loop-mask-asm: @${MLIR_OPT} ./rvv-loop-mask.mlir \ @@ -228,7 +246,7 @@ rvv-loop-mask-run: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ -convert-func-to-llvm -reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -240,15 +258,21 @@ rvv-vp-intrinsic-lower: rvv-vp-intrinsic-translate: @${BUDDY_OPT} ./rvv-vp-intrinsic.mlir \ - --convert-scf-to-cf \ - --lower-rvv --lower-bud --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ + -lower-vector-exp \ + --lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir -o log.ll rvv-vp-intrinsic-asm: @${BUDDY_OPT} ./rvv-vp-intrinsic.mlir \ - --convert-scf-to-cf \ - --lower-rvv --lower-bud --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ + -lower-vector-exp \ + --lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ ${LLC} ${OPT_FLAG} -mtriple riscv64 -target-abi lp64d \ @@ -257,50 +281,60 @@ rvv-vp-intrinsic-asm: rvv-vp-intrinsic-run: @${BUDDY_OPT} ./rvv-vp-intrinsic.mlir \ - --convert-scf-to-cf \ - --lower-rvv -lower-vector-exp --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ + -lower-vector-exp \ + --lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-vp-intrinsic-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic.mlir \ - --convert-scf-to-cf \ - --lower-rvv -lower-vector-exp --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ + -lower-vector-exp \ + --lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ ${LLC} -mtriple riscv64 -target-abi lp64d -mattr=+m,+d,+v -riscv-v-vector-bits-min=128 --filetype=obj -o log.o @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-sh-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-sh.mlir \ - --convert-scf-to-cf \ - --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ + --lower-rvv \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ + --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ ${LLC} -mtriple riscv64 -mattr=+v -riscv-v-vector-bits-min=128 --filetype=obj -o log.o @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-sh-jit: @${BUDDY_OPT} ./rvv-vp-intrinsic-sh.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+v \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-vp-intrinsic-add-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-add.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -309,20 +343,22 @@ rvv-vp-intrinsic-add-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-add-jit: @${BUDDY_OPT} ./rvv-vp-intrinsic-add.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-vp-intrinsic-and-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-and.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -331,20 +367,22 @@ rvv-vp-intrinsic-and-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-and-jit: @${BUDDY_OPT} ./rvv-vp-intrinsic-and.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} --buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-vp-intrinsic-div-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-div.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -353,10 +391,11 @@ rvv-vp-intrinsic-div-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-mul-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-mul.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -365,10 +404,11 @@ rvv-vp-intrinsic-mul-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-sub-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-sub.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -377,10 +417,11 @@ rvv-vp-intrinsic-sub-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-fneg-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-fneg.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -389,10 +430,11 @@ rvv-vp-intrinsic-fneg-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-ext-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-ext.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -401,10 +443,11 @@ rvv-vp-intrinsic-ext-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-to-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-to.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -413,10 +456,11 @@ rvv-vp-intrinsic-to-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-trunc-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-trunc.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -425,10 +469,11 @@ rvv-vp-intrinsic-trunc-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-rem-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-rem.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -437,10 +482,11 @@ rvv-vp-intrinsic-rem-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-fma-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-fma.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -449,10 +495,11 @@ rvv-vp-intrinsic-fma-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-merge-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-merge.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -461,10 +508,11 @@ rvv-vp-intrinsic-merge-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-select-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-select.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -473,10 +521,11 @@ rvv-vp-intrinsic-select-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-or-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-or.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -485,10 +534,11 @@ rvv-vp-intrinsic-or-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-xor-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-xor.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -497,10 +547,11 @@ rvv-vp-intrinsic-xor-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-max-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-max.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -509,10 +560,11 @@ rvv-vp-intrinsic-max-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-min-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-min.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -521,10 +573,12 @@ rvv-vp-intrinsic-min-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out +# TODO: Fix Me rvv-vp-intrinsic-memory-aot: @${BUDDY_OPT} rvv-vp-intrinsic-memory.mlir \ + -convert-vector-to-scf \ -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ --lower-rvv \ -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ @@ -534,11 +588,14 @@ rvv-vp-intrinsic-memory-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out +# TODO: Fix Me rvv-vp-intrinsic-memory-scalable-aot: @${BUDDY_OPT} rvv-vp-intrinsic-memory-scalable.mlir \ - -convert-linalg-to-loops -lower-affine -convert-scf-to-cf \ + -convert-linalg-to-loops -lower-affine \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --lower-rvv \ -convert-vector-to-llvm -finalize-memref-to-llvm -convert-arith-to-llvm \ -convert-func-to-llvm -reconcile-unrealized-casts | \ @@ -547,10 +604,11 @@ rvv-vp-intrinsic-memory-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-fma-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-fma-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -559,10 +617,11 @@ rvv-vp-intrinsic-fma-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-fneg-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-fneg-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -571,10 +630,11 @@ rvv-vp-intrinsic-fneg-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-sh-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-sh-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -583,10 +643,11 @@ rvv-vp-intrinsic-sh-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-add-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-add-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -595,10 +656,11 @@ rvv-vp-intrinsic-add-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-and-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-and-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -607,10 +669,11 @@ rvv-vp-intrinsic-and-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-div-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-div-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -619,10 +682,11 @@ rvv-vp-intrinsic-div-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-ext-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-ext-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -631,10 +695,11 @@ rvv-vp-intrinsic-ext-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-max-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-max-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -643,10 +708,11 @@ rvv-vp-intrinsic-max-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-merge-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-merge-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -655,10 +721,11 @@ rvv-vp-intrinsic-merge-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-min-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-min-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -667,10 +734,11 @@ rvv-vp-intrinsic-min-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-mul-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-mul-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -679,10 +747,11 @@ rvv-vp-intrinsic-mul-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-or-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-or-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -691,10 +760,11 @@ rvv-vp-intrinsic-or-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-rem-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-rem-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -703,10 +773,11 @@ rvv-vp-intrinsic-rem-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-select-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-select-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -715,10 +786,11 @@ rvv-vp-intrinsic-select-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-sub-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-sub-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -727,10 +799,11 @@ rvv-vp-intrinsic-sub-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-to-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-to-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -739,10 +812,11 @@ rvv-vp-intrinsic-to-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-trunc-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-trunc-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -751,10 +825,11 @@ rvv-vp-intrinsic-trunc-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-xor-scalable-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-xor-scalable.mlir \ + -convert-vector-to-scf \ -convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ @@ -763,10 +838,11 @@ rvv-vp-intrinsic-xor-scalable-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-rem-error: @${BUDDY_OPT} ./rvv-vp-intrinsic-rem-error.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -775,10 +851,11 @@ rvv-vp-intrinsic-rem-error: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-fmul-reduce-error: @${BUDDY_OPT} ./rvv-vp-intrinsic-fmul-reduce-error.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -787,10 +864,11 @@ rvv-vp-intrinsic-fmul-reduce-error: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out rvv-vp-intrinsic-mul-reduce-aot: @${BUDDY_OPT} ./rvv-vp-intrinsic-mul-reduce.mlir \ + -convert-vector-to-scf \ --convert-scf-to-cf \ --lower-rvv --convert-vector-to-llvm --finalize-memref-to-llvm --convert-arith-to-llvm \ --convert-func-to-llvm --reconcile-unrealized-casts | \ @@ -799,7 +877,7 @@ rvv-vp-intrinsic-mul-reduce-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o -mabi=lp64d \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out ################################################################################ # Reuse MLIR Vector Examples @@ -822,12 +900,14 @@ rvv-load-run: --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-broadcast-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-broadcast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -838,15 +918,19 @@ rvv-broadcast-asm: run-targets += rvv-broadcast-run rvv-broadcast-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-broadcast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-fma-asm: - @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/rvv-fma.mlir \ + @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-fma.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -857,15 +941,19 @@ rvv-fma-asm: run-targets += rvv-fma-run rvv-fma-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-fma.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-long-asm: - @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/rvv-long.mlir \ + @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-long.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -876,10 +964,12 @@ rvv-long-asm: run-targets += rvv-long-run rvv-long-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-long.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} @@ -900,12 +990,14 @@ rvv-transpose-run: -convert-vector-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-shape-cast-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-shape-cast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -916,15 +1008,19 @@ rvv-shape-cast-asm: run-targets += rvv-shape-cast-run rvv-shape-cast-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-shape-cast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-bitcast-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-bitcast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -935,15 +1031,19 @@ rvv-bitcast-asm: run-targets += rvv-bitcast-run rvv-bitcast-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-bitcast.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-shuffle-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-shuffle.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -954,15 +1054,19 @@ rvv-shuffle-asm: run-targets += rvv-shuffle-run rvv-shuffle-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-shuffle.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-splat-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-splat.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -973,15 +1077,19 @@ rvv-splat-asm: run-targets += rvv-splat-run rvv-splat-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-splat.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-insert-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-insert.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -992,15 +1100,19 @@ rvv-insert-asm: run-targets += rvv-insert-run rvv-insert-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-insert.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-reduction-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-reduction.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1011,15 +1123,19 @@ rvv-reduction-asm: run-targets += rvv-reduction-run rvv-reduction-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-reduction.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-outerproduct-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-outerproduct.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1030,15 +1146,19 @@ rvv-outerproduct-asm: run-targets += rvv-outerproduct-run rvv-outerproduct-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-outerproduct.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-createmask-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-createmask.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1049,15 +1169,19 @@ rvv-createmask-asm: run-targets += rvv-create-mask-run rvv-create-mask-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-create-mask.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-extract-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-extract.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1068,15 +1192,19 @@ rvv-extract-asm: run-targets += rvv-extract-run rvv-extract-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-extract.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-maskedload-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-maskedload.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1087,15 +1215,19 @@ rvv-maskedload-asm: run-targets += rvv-maskedload-run rvv-maskedload-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-maskedload.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-maskedstore-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-maskedstore.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1106,15 +1238,19 @@ rvv-maskedstore-asm: run-targets += rvv-maskedstore-run rvv-maskedstore-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-maskedstore.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-extract-strided-slice-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-extract-strided-slice.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1125,15 +1261,19 @@ rvv-extract-strided-slice-asm: run-targets += rvv-extract-strided-slice-run rvv-extract-strided-slice-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-extract-strided-slice.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-constant-mask-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-constant-mask.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1144,15 +1284,19 @@ rvv-constant-mask-asm: run-targets += rvv-constant-mask-run rvv-constant-mask-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-constant-mask.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-expand-load-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-expandload.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1163,15 +1307,19 @@ rvv-expand-load-asm: run-targets += rvv-expand-load-run rvv-expand-load-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-expandload.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-compressstore-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-compressstore.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1182,15 +1330,19 @@ rvv-compressstore-asm: run-targets += rvv-compressstore-run rvv-compressstore-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-compressstore.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} rvv-insert-strided-slice-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-insert-strided-slice.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1201,15 +1353,19 @@ rvv-insert-strided-slice-asm: run-targets += rvv-insert-strided-slice-run rvv-insert-strided-slice-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-insert-strided-slice.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} rvv-scatter-asm: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-scatter.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ @@ -1220,9 +1376,11 @@ rvv-scatter-asm: run-targets += rvv-scatter-run rvv-scatter-run: @${MLIR_OPT} ${MLIR_VECTOR_EXAMPLES}/vector-scatter.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ --convert-vector-to-llvm --finalize-memref-to-llvm --convert-func-to-llvm \ --reconcile-unrealized-casts | \ ${MLIR_TRANSLATE} --mlir-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} --march=riscv64 -mattr=+m,+d,+v -jit-linker=jitlink -relocation-model=pic \ --dlopen=${CROSS_MLIR_C_RUNNER_UTILS} --dlopen=${CROSS_MLIR_RUNNER_UTILS} diff --git a/examples/RVVExperiment/rvv-c-setvl.c b/examples/RVVExperiment/rvv-c-setvl.c index c8d1ccfbb..4a8489d55 100644 --- a/examples/RVVExperiment/rvv-c-setvl.c +++ b/examples/RVVExperiment/rvv-c-setvl.c @@ -3,7 +3,7 @@ int main() { int avl = 70; - int vl = vsetvl_e32m2(avl); + int vl = __riscv_vsetvl_e32m2(avl); printf("vl: %d\n", vl); return 0; diff --git a/examples/VectorExpDialect/makefile b/examples/VectorExpDialect/makefile index 8d55cc14a..ab85a8a2c 100644 --- a/examples/VectorExpDialect/makefile +++ b/examples/VectorExpDialect/makefile @@ -1,30 +1,46 @@ #!/bin/bash -BUDDY_OPT := ../../build/bin/buddy-opt -BUDDY_TRANSLATE := ../../build/bin/buddy-translate -MLIR_OPT := ../../llvm/build/bin/mlir-opt -MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate -MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner -LLC := ../../llvm/build/bin/llc -OPT_FLAG := -O0 -RISCV_GNU_TOOLCHAIN := ../../thirdparty/build-riscv-gnu-toolchain -RISCV_GNU_TOOLCHAIN_SYSROOT := ../../thirdparty/build-riscv-gnu-toolchain/sysroot -QEMU := ../../thirdparty/qemu/build/riscv64-linux-user/qemu-riscv64 -LOCAL_CLANG := ../../thirdparty/build-local-clang/bin/clang -CROSS_LLI := ../../thirdparty/build-cross-clang/bin/lli -CROSS_MLIR_CPU_RUNNER := ../../thirdparty/build-cross-mlir/bin/mlir-cpu-runner -CROSS_MLIR_C_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_c_runner_utils.so -CROSS_MLIR_RUNNER_UTILS := ../../thirdparty/build-cross-mlir/lib/libmlir_runner_utils.so -CROSS_MLIR_LIB := ../../thirdparty/build-cross-mlir/lib -CROSS_BUDDY_MLIR_LIB := ../../thirdparty/build-cross-buddy-mlir/lib/ +# Build Directories +MLIR_BUILD_DIR := ../../llvm/build/ +BUDDY_MLIR_BUILD_DIR := ../../build/ +CROSS_BUDDY_MLIR_BUILD_DIR := ../../build-cross-rv/ +CROSS_LLVM_BUILD_DIR := ../../llvm/build-cross-clang-rv/ +CROSS_MLIR_BUILD_DIR := ../../llvm/build-cross-mlir-rv/ + +# Buddy MLIR Tools +BUDDY_OPT := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt +BUDDY_TRANSLATE := ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-translate + +# Core LLVM/MLIR Tools +MLIR_OPT := ${MLIR_BUILD_DIR}/bin/mlir-opt +MLIR_TRANSLATE := ${MLIR_BUILD_DIR}/bin/mlir-translate +MLIR_CPU_RUNNER := ${MLIR_BUILD_DIR}/bin/mlir-cpu-runner +LLC := ${MLIR_BUILD_DIR}/bin/llc +LOCAL_CLANG := ${MLIR_BUILD_DIR}/bin/clang + +# RISC-V GNU Toolchain +RISCV_GNU_TOOLCHAIN := ${BUDDY_MLIR_BUILD_DIR}/thirdparty/riscv-gnu-toolchain +RISCV_GNU_TOOLCHAIN_SYSROOT := ${RISCV_GNU_TOOLCHAIN}/sysroot +QEMU := ${RISCV_GNU_TOOLCHAIN}/bin/qemu-riscv64 + +# Cross Compiled Toolchain +CROSS_BUDDY_MLIR_LIB := ${CROSS_BUDDY_MLIR_BUILD_DIR}/lib/ +CROSS_LLI := ${CROSS_LLVM_BUILD_DIR}/bin/lli +CROSS_MLIR_CPU_RUNNER := ${CROSS_MLIR_BUILD_DIR}/bin/mlir-cpu-runner +CROSS_MLIR_C_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.so +CROSS_MLIR_RUNNER_UTILS := ${CROSS_MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +CROSS_MLIR_LIB := ${CROSS_MLIR_BUILD_DIR}/lib + +# Optimization Flag +OPT_FLAG := -O0 ifeq ($(shell uname),Linux) -MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so -MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.so +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}//lib/libmlir_c_runner_utils.so MTRIPLE := x86_64-unknown-linux-gnu else ifeq ($(shell uname),Darwin) -MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib -MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib +MLIR_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_runner_utils.dylib +MLIR_C_RUNNER_UTILS := ${MLIR_BUILD_DIR}/lib/libmlir_c_runner_utils.dylib MTRIPLE := x86_64-apple-darwin endif @@ -39,6 +55,8 @@ vector-exp-load-original-lower: vector-exp-load-original-translate: @${BUDDY_OPT} ./vector-exp-load-original.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -47,6 +65,8 @@ vector-exp-load-original-translate: vector-exp-load-original-asm: @${BUDDY_OPT} ./vector-exp-load-original.mlir \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -59,6 +79,8 @@ vector-exp-load-original-asm: vector-exp-config-lower: @${BUDDY_OPT} ./vector-exp-predication.mlir \ -lower-vector-exp \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -68,6 +90,8 @@ vector-exp-config-lower: vector-exp-config-translate: @${BUDDY_OPT} ./vector-exp-predication.mlir \ -lower-vector-exp \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -77,6 +101,8 @@ vector-exp-config-translate: vector-exp-config-run: @${BUDDY_OPT} ./vector-exp-predication.mlir \ -lower-vector-exp \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -87,6 +113,8 @@ vector-exp-config-run: vector-exp-predication-memory-lower: @${BUDDY_OPT} ./vector-exp-predication-memory.mlir \ -lower-vector-exp \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ @@ -96,13 +124,14 @@ vector-exp-predication-memory-lower: vector-exp-predication-memory-run: @${BUDDY_OPT} ./vector-exp-predication-memory.mlir \ -lower-vector-exp \ + -convert-vector-to-scf \ + -convert-scf-to-cf \ -convert-vector-to-llvm \ -finalize-memref-to-llvm \ -convert-func-to-llvm \ -reconcile-unrealized-casts |\ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -119,8 +148,7 @@ vector-exp-predication-matmul-run: -convert-func-to-llvm \ -reconcile-unrealized-casts |\ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -141,7 +169,7 @@ vector-exp-predication-matmul-aot: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out vector-exp-predication-matmul-elf: @${BUDDY_OPT} ./vector-exp-predication-matmul.mlir \ @@ -212,7 +240,7 @@ vector-exp-add-mask-run: -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -243,8 +271,7 @@ vector-exp-add-predication-run: -convert-func-to-llvm \ -reconcile-unrealized-casts | \ ${BUDDY_TRANSLATE} -buddy-to-llvmir | \ - ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} \ - -cpu rv64,x-v=true,vlen=128 \ + ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max \ ${CROSS_LLI} -march=riscv64 -mattr=+m,+d,+v \ -dlopen=${CROSS_MLIR_C_RUNNER_UTILS} \ -dlopen=${CROSS_MLIR_RUNNER_UTILS} @@ -291,4 +318,4 @@ vector-exp-dynamic-vector-run: @${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-gcc log.o \ -L${CROSS_MLIR_LIB} -lmlir_runner_utils -lmlir_c_runner_utils \ -o a.out - @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu rv64,x-v=true,vlen=128 a.out + @LD_LIBRARY_PATH=${CROSS_MLIR_LIB} ${QEMU} -L ${RISCV_GNU_TOOLCHAIN_SYSROOT} -cpu max a.out diff --git a/examples/VectorExpDialect/vector-exp-predication-matmul.mlir b/examples/VectorExpDialect/vector-exp-predication-matmul.mlir index 557b38f9e..fc4aee47e 100644 --- a/examples/VectorExpDialect/vector-exp-predication-matmul.mlir +++ b/examples/VectorExpDialect/vector-exp-predication-matmul.mlir @@ -85,8 +85,8 @@ func.func @main() -> i32 { call @matmul(%mem_i32, %mem_i32, %result_mem) : (memref<10x10xi32>, memref<10x10xi32>, memref<10x10xi32>) -> () - // %print_result_mem = memref.cast %result_mem : memref<10x10xi32> to memref<*xi32> - // call @printMemrefI32(%print_result_mem) : (memref<*xi32>) -> () + %print_result_mem = memref.cast %result_mem : memref<10x10xi32> to memref<*xi32> + call @printMemrefI32(%print_result_mem) : (memref<*xi32>) -> () %ret = arith.constant 0 : i32 return %ret : i32 diff --git a/examples/lit.cfg.py b/examples/lit.cfg.py index 91693e444..c1c4c05bd 100644 --- a/examples/lit.cfg.py +++ b/examples/lit.cfg.py @@ -39,8 +39,11 @@ 'BuddyLeNet', 'BuddyBert', 'BuddyLlama', + 'BuddyWhisper', 'BuddyBert', + 'BuddyMobileNetV3', 'BuddyResNet18', + 'BuddyGPU', 'ConvOpt', 'DAPDialect', 'DIPDialect', diff --git a/flake.lock b/flake.lock index 7bdd04677..bd7992239 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1694529238, - "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", "owner": "numtide", "repo": "flake-utils", - "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1699099776, - "narHash": "sha256-X09iKJ27mGsGambGfkKzqvw5esP1L/Rf8H3u3fCqIiU=", + "lastModified": 1722813957, + "narHash": "sha256-IAoYyYnED7P8zrBFMnmp7ydaJfwTnwcnqxUElC1I26Y=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "85f1ba3e51676fa8cc604a3d863d729026a6b8eb", + "rev": "cb9a96f23c491c081b38eab96d22fa958043c9fa", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 8f94e2aec..c3af6d9d5 100644 --- a/flake.nix +++ b/flake.nix @@ -9,36 +9,17 @@ outputs = { self, nixpkgs, flake-utils }@inputs: let overlay = import ./nix/overlay.nix; - pkgsForSys = system: import nixpkgs { overlays = [ overlay ]; inherit system; }; in flake-utils.lib.eachDefaultSystem (system: let - pkgs = pkgsForSys system; - mkLLVMShell = pkgs.mkShell.override { stdenv = pkgs.llvmPkgs.stdenv; }; + pkgs = import nixpkgs { overlays = [ overlay ]; inherit system; }; in { # Help other use packages in this flake legacyPackages = pkgs; - devShells.default = mkLLVMShell { - buildInputs = with pkgs; [ - # buddy-mlir build tools - cmake - ninja - python3 - llvmPkgs.bintools # For ld.lld - - # buddy-mlir libraries - libjpeg - libpng - zlib-ng - ]; - - postHook = '' - export PATH="${pkgs.clang-tools}/bin:$PATH" - ''; - }; + devShells.default = pkgs.buddy-mlir.devShell; formatter = pkgs.nixpkgs-fmt; }) // diff --git a/frontend/Interfaces/buddy/Core/Container.h b/frontend/Interfaces/buddy/Core/Container.h index db8b66c17..6e3ff18d5 100644 --- a/frontend/Interfaces/buddy/Core/Container.h +++ b/frontend/Interfaces/buddy/Core/Container.h @@ -132,7 +132,7 @@ MemRef::MemRef(intptr_t sizes[N], T init) : MemRef(sizes) { template MemRef::MemRef(intptr_t sizes[N], bool needMalloc, intptr_t offset) - : offset(offset), aligned(nullptr), allocated(nullptr) { + : allocated(nullptr), aligned(nullptr), offset(offset) { for (size_t i = 0; i < N; i++) { this->sizes[i] = sizes[i]; } @@ -152,7 +152,7 @@ MemRef::MemRef(std::vector sizes, T init) : MemRef(sizes) { template MemRef::MemRef(std::vector sizes, bool needMalloc, intptr_t offset) - : offset(offset), aligned(nullptr), allocated(nullptr) { + : allocated(nullptr), aligned(nullptr), offset(offset) { if (sizes.size() != N) { throw std::runtime_error("Invalid number of dimensions."); } diff --git a/frontend/Interfaces/buddy/DAP/AudioContainer.h b/frontend/Interfaces/buddy/DAP/AudioContainer.h index 9bc924574..7c3901e73 100644 --- a/frontend/Interfaces/buddy/DAP/AudioContainer.h +++ b/frontend/Interfaces/buddy/DAP/AudioContainer.h @@ -14,6 +14,13 @@ // //===----------------------------------------------------------------------===// // +// The audio decoding process in this file references the `AudioFile` library, +// which is hereby acknowledged. +// For the license of the `AudioFile` library, +// please see: https://github.com/adamstark/AudioFile/blob/master/LICENSE +// +//===----------------------------------------------------------------------===// +// // Audio container descriptor. // //===----------------------------------------------------------------------===// @@ -21,79 +28,592 @@ #ifndef FRONTEND_INTERFACES_BUDDY_DAP_AUDIOCONTAINER #define FRONTEND_INTERFACES_BUDDY_DAP_AUDIOCONTAINER -#include "AudioFile.h" #include "buddy/Core/Container.h" +#include +#include +#include +#include +#include namespace dap { - -// Audio container. -// - T represents the type of the elements. -// - N represents the number of audio channels (Normally would be 1 or 2). -// If N is smaller than channels from the file, only previous N channels will be -// manipulated. -template class Audio { +template class Audio : public MemRef { public: - Audio() : audioFile(), data(nullptr) {} - explicit Audio(std::string filename) : audioFile(filename), data(nullptr) {} - void fetchMetadata(const AudioFile &aud); - bool save(std::string filename); - AudioFile &getAudioFile() { - moveToAudioFile(); - return audioFile; - } - MemRef &getMemRef() { - moveToMemRef(); - return *data; - } - -protected: - void moveToMemRef(); - void moveToAudioFile(); - AudioFile audioFile; - MemRef *data; + // Constructor to initialize the Audio MemRef object with a file name. + Audio(std::string filename); + // Constructor to convert MemRef object to Audio MemRef object. Member + // variables are initialized with default values. + Audio(MemRef &&memref) noexcept; + + // Retrieve the name of the audio format. + std::string getFormatName() const { + switch (this->audioFormat) { + case AudioFormat::WAV: + return "WAV"; + default: + return "Unsupported format"; + } + } + // Returns the number of bits per sample. + int getBitDepth() const { return static_cast(this->bitsPerSample); } + // Returns the number of samples per channel. + size_t getSamplesNum() const { return this->numSamples; } + // Returns the number of audio channels. + int getChannelsNum() const { return static_cast(this->numChannels); } + // Returns the sampling rate in samples per second. + int getSampleRate() const { return static_cast(this->sampleRate); } + + // Sets the number of bits per sample. + void setBitDepth(int bitDepth) { + this->bitsPerSample = static_cast(bitDepth); + } + // Sets the number of samples per channel. + void setSamplesNum(size_t samplesNum) { this->numSamples = samplesNum; } + // Sets the number of audio channels. + void setChannelsNum(int channelsNum) { + this->numChannels = static_cast(channelsNum); + } + // Sets the sampling rate in samples per second. + void setSampleRate(int sampleRate) { + this->sampleRate = static_cast(sampleRate); + } + + // Create an Audio File with file name and format. + bool saveToFile(std::string filename, std::string format); + +private: + // Sample bit depth. + uint16_t bitsPerSample; + // Number of samples per channel. + size_t numSamples; + // Number of audio channels. + uint16_t numChannels; + // Samples per second (Hz). + uint32_t sampleRate; + // Enum to represent supported audio formats. + enum class AudioFormat { + ERROR, // Represents an error or unsupported format. + WAV, // WAV format. + } audioFormat; + // Enum to represent byte order of data. + enum class Endianness { LittleEndian, BigEndian }; + + // Decoders for multiple audio file formats. + // Decode a WAV file into MemRef format. + bool decodeWaveFile(const std::vector &fileData); + + // Encoders for multiple audio file formats. + // Encode a MemRef into WAV format. + bool EncodeWaveFile(std::vector &fileData); + + // Helper functions for decoding and data manipulation + // Find the index of a specified chunk in the audio file. + size_t getIndexOfChunk(const std::vector &fileData, + const std::string &chunkHeaderID, size_t startIndex, + Endianness endianness = Endianness::LittleEndian); + // Convert four bytes to a 32-bit integer according to byte order of data. + int32_t fourBytesToI32(const std::vector &fileData, + size_t startIndex, + Endianness endianness = Endianness::LittleEndian); + // Convert two bytes to a 16-bit integer according to byte order of data. + int16_t twoBytesToI16(const std::vector &fileData, size_t startIndex, + Endianness endianness = Endianness::LittleEndian); + // Normalize 8-bit unsigned integer sample to a range of -1.0 to 1.0. + T oneByteToSample(uint8_t data) { + return static_cast(data - 128) / static_cast(128.); + } + // Normalize 16-bit signed integer sample to a range of -1.0 to 1.0. + T twoBytesToSample(int16_t data) { + return static_cast(data) / static_cast(32768.); + } + + // Helper functions for encoding and data manipulation. + // Converts each character in the string to a byte. + void stringToBytes(std::vector &fileData, const std::string &str) { + for (size_t i = 0; i < str.size(); i++) + fileData.push_back(static_cast(str[i])); + } + // Converts a 32-bit integer to four bytes according to byte order of data. + void i32ToFourBytes(std::vector &fileData, int32_t num, + Endianness endianness = Endianness::LittleEndian); + // Converts a 16-bit integer to two bytes according to byte order of data. + void i16ToTwoBytes(std::vector &fileData, int16_t num, + Endianness endianness = Endianness::LittleEndian); + // Converts an audio sample to a 8-bit PCM format (one byte). + uint8_t sampleToOneByte(T sample); + // Converts an audio sample to a 16-bit PCM format (two bytes). + int16_t sampleToI16(T sample); }; -template bool Audio::save(std::string filename) { - if (!this->audioFile.samples) { - auto temp = this->data->release(); - if constexpr (std::is_same_v) { - for (int i = 0; i < audioFile.numSamples; i++) { - if (temp[i] != temp[i]) { // To handle NaN values - temp[i] = 0.9999999; - } else { // Clamp the values between -1.0 to 1.0 - temp[i] = std::clamp(temp[i], float(-1.0), float(0.9999999)); - } +// Audio Container Constructor. +// Constructs an audio container object from the audio file path. +template Audio::Audio(std::string filePath) { + // --------------------------------------------------------------------------- + // 1. Read the audio file into a std::vector. + // --------------------------------------------------------------------------- + // Open the file in binary mode and position the file pointer at the end of + // the file. + std::ifstream file(filePath, std::ios::binary | std::ios::ate); + // Check if the file was successfully opened. + if (!file) { + throw std::runtime_error("Error: Unable to open file at " + filePath); + } + // Get the size of the file. + size_t dataLength = file.tellg(); + // Move file pointer to the beginning of the file. + file.seekg(0, std::ios::beg); + // Create a vector to store the data. + std::vector fileData(dataLength); + // Read the data. + if (!file.read(reinterpret_cast(fileData.data()), dataLength)) { + throw std::runtime_error("Error: Unable to read data from " + filePath); + } + // --------------------------------------------------------------------------- + // 2. Determine the audio format and decode the audio data into MemRef. + // --------------------------------------------------------------------------- + std::string header(fileData.begin(), fileData.begin() + 4); + // Check the file header to determine the format. + if (header == "RIFF") { + this->audioFormat = AudioFormat::WAV; + bool success = decodeWaveFile(fileData); + if (!success) { + this->audioFormat = AudioFormat::ERROR; + throw std::runtime_error("Failed to decode WAV file from " + filePath); + }; + } else { + this->audioFormat = AudioFormat::ERROR; + throw std::runtime_error("Unsupported audio format detected in file " + + filePath); + } +} + +// Constructs an audio container object from a MemRef object. Initializes +// metadata with default values. +template +Audio::Audio(MemRef &&memref) noexcept + : MemRef(std::move(memref)), bitsPerSample(0), numSamples(0), + numChannels(0), sampleRate(0) {} + +// Create Audio File. +// Save Audio MemRef to the specified file path using the desired format. +template +bool Audio::saveToFile(std::string filePath, std::string format) { + // --------------------------------------------------------------------------- + // 1. Determine the audio format and encode the MemRef into file data. + // --------------------------------------------------------------------------- + // Convert the string to lowercase before comparison, ensuring that case + // variations are handled without repeating conditions. + std::transform(format.begin(), format.end(), format.begin(), ::tolower); + // Vector for storing bytes in a specific audio format. + std::vector fileData; + // Select encoder. + if (format == "wav" || format == "wave") { + bool success = EncodeWaveFile(fileData); + if (!success) { + std::cerr << "Failed to encode WAVE file." << std::endl; + return false; + } + } else { + std::cerr << "Unsupported: The encoding method for " << format + << " format is not yet supported." << std::endl; + return false; + } + // --------------------------------------------------------------------------- + // 2. Write std::vector into audio file. + // --------------------------------------------------------------------------- + std::ofstream outputFile(filePath, std::ios::binary); + + if (outputFile.is_open()) { + for (size_t i = 0; i < fileData.size(); i++) { + char value = static_cast(fileData[i]); + outputFile.write(&value, sizeof(char)); + } + + outputFile.close(); + + return true; + } + + return false; +} + +// WAV Audio File Decoder +template +bool Audio::decodeWaveFile(const std::vector &fileData) { + // This container class only cares about the data and key information in the + // audio file, so only the format and data chunk are decoded here. + // Find the starting indices of critical chunks within the WAV file. + size_t indexOfFormatChunk = getIndexOfChunk(fileData, "fmt ", 12); + size_t indexOfDataChunk = getIndexOfChunk(fileData, "data", 12); + + // Decode the 'format' chunk to obtain format specifications. + // Format sub-chunk: + // sub-chunk ID: char[4] | 4 bytes | "fmt " + // sub-chunk size: uint32_t | 4 bytes + // audio format: uint16_t | 2 bytes | 1 for PCM + // number of channels: uint16_t | 2 bytes + // sample rate: uint32_t | 4 bytes + // byte rate: uint32_t | 4 bytes + // block align: uint16_t | 2 bytes + // bits per sample: uint16_t | 2 bytes + std::string formatChunkID(fileData.begin() + indexOfFormatChunk, + fileData.begin() + indexOfFormatChunk + 4); + // uint32_t fmtChunkSize = fourBytesToI32(fileData, indexOfFormatChunk + 4); + // uint16_t audioFormat = twoBytesToI16(fileData, indexOfFormatChunk + 8); + this->numChannels = twoBytesToI16(fileData, indexOfFormatChunk + 10); + this->sampleRate = fourBytesToI32(fileData, indexOfFormatChunk + 12); + // byteRate = sampleRate * numChannels * bitsPerSample / 8 + // uint32_t byteRate = fourBytesToI32(fileData, indexOfFormatChunk + 16); + // blockAlign = numChannels * bitsPerSample / 8 + uint16_t blockAlign = twoBytesToI16(fileData, indexOfFormatChunk + 20); + this->bitsPerSample = twoBytesToI16(fileData, indexOfFormatChunk + 22); + uint16_t numBytesPerSample = static_cast(this->bitsPerSample) / 8; + + // Decode `data` chunk. + // Data sub-chunk: + // sub-chunk ID: char[4] | 4 bytes | "data" + // sub-chunk size: uint32_t | 4 bytes + // data | remains + std::string dataChunkID(fileData.begin() + indexOfDataChunk, + fileData.begin() + indexOfDataChunk + 4); + int32_t dataChunkSize = fourBytesToI32(fileData, indexOfDataChunk + 4); + this->numSamples = dataChunkSize / blockAlign; + // size_t numSamplesPerChannels = this->numSamples / this->numChannels; + size_t samplesStartIndex = indexOfDataChunk + 8; + + // Audio MemRef layout defaults to 1 dimension. + // Sample values from multiple channels are stored together. + if (N == 1) { + this->sizes[0] = this->numSamples; + } else if (N == this->numChannels) { + // TODO: add conversion from 1 dimension to multi-dimension + std::cerr << "Unsupported: The MemRef layout of multi-dimensional channels " + "is not yet supported." + << std::endl; + return false; + } else { + std::cerr << "Error: dimension mismatch (audio file channel: " + << this->numChannels << " MemRef layout channel: " << N << ")" + << std::endl; + return false; + } + + // Allocate memory for MemRef. + this->setStrides(); + size_t size = this->product(this->sizes); + this->allocated = (T *)malloc(sizeof(T) * size); + this->aligned = this->allocated; + + // Sample data type: 8 bit + if (this->bitsPerSample == 8) { + size_t memrefIndex = 0; + for (size_t i = 0; i < this->numSamples; i++) { + for (size_t channel = 0; channel < this->numChannels; channel++) { + size_t sampleIndex = + samplesStartIndex + (blockAlign * i) + channel * numBytesPerSample; + this->aligned[memrefIndex] = oneByteToSample(fileData[sampleIndex]); + memrefIndex++; + } + } + } + // Sample data type: 16 bit + else if (this->bitsPerSample == 16) { + size_t memrefIndex = 0; + for (size_t i = 0; i < this->numSamples; i++) { + for (size_t channel = 0; channel < this->numChannels; channel++) { + size_t sampleIndex = + samplesStartIndex + (blockAlign * i) + channel * numBytesPerSample; + int16_t dataTwoBytes = twoBytesToI16(fileData, sampleIndex); + this->aligned[memrefIndex] = twoBytesToSample(dataTwoBytes); + memrefIndex++; + } + } + } + // Other data types are not currently supported. + else { + std::cerr << "Unsupported audio data type." << std::endl; + return false; + } + + return true; +} + +// WAV Audio File Encoder +template +bool Audio::EncodeWaveFile(std::vector &fileData) { + // Encode the 'header' chunk. + // RIFF chunk descriptor + // chunk ID: char[4] | 4 bytes | "RIFF" + // chunk size: uint32_t | 4bytes + // format: char[4] | 4 bytes | "WAVE" + stringToBytes(fileData, "RIFF"); + int16_t audioFormat = this->bitsPerSample == 32 ? 0 : 1; + // Size for 'format' sub-chunk, doesn't include metadata length. + int32_t formatChunkSize = audioFormat == 1 ? 16 : 18; + // Size for 'data' sub-chunk, doesn't include metadata length. + int32_t dataChunkSize = + this->numSamples * this->numChannels * this->bitsPerSample / 8; + // The file size in bytes include header chunk size(4, not counting RIFF and + // WAVE), the format chunk size(formatChunkSize and 8 bytes for metadata), the + // data chunk size(dataChunkSize and 8 bytes for metadata). + int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + dataChunkSize + 8; + i32ToFourBytes(fileData, fileSizeInBytes); + stringToBytes(fileData, "WAVE"); + + // Encode the 'format' chunk. + // Format sub-chunk: + // sub-chunk ID: char[4] | 4 bytes | "fmt " + // sub-chunk size: uint32_t | 4 bytes + // audio format: uint16_t | 2 bytes | 1 for PCM + // number of channels: uint16_t | 2 bytes + // sample rate: uint32_t | 4 bytes + // byte rate: uint32_t | 4 bytes + // block align: uint16_t | 2 bytes + // bits per sample: uint16_t | 2 bytes + stringToBytes(fileData, "fmt "); + i32ToFourBytes(fileData, formatChunkSize); + i16ToTwoBytes(fileData, audioFormat); + i16ToTwoBytes(fileData, static_cast(this->numChannels)); + i32ToFourBytes(fileData, static_cast(this->sampleRate)); + int16_t numBytesPerBlock = + static_cast(dataChunkSize / this->numSamples); + int32_t numBytesPerSecond = + static_cast(this->sampleRate * numBytesPerBlock); + i32ToFourBytes(fileData, numBytesPerSecond); + i16ToTwoBytes(fileData, numBytesPerBlock); + i16ToTwoBytes(fileData, static_cast(this->bitsPerSample)); + + // Encode the 'data' chunk. + // Data sub-chunk: + // sub-chunk ID: char[4] | 4 bytes | "data" + // sub-chunk size: uint32_t | 4 bytes + // data | remains + stringToBytes(fileData, "data"); + i32ToFourBytes(fileData, dataChunkSize); + + // Sample data length: 8 bit + if (this->bitsPerSample == 8) { + size_t memrefIndex = 0; + for (size_t i = 0; i < this->numSamples; i++) { + for (size_t channel = 0; channel < this->numChannels; channel++) { + uint8_t byte = sampleToOneByte(this->aligned[memrefIndex]); + fileData.push_back(byte); + memrefIndex++; + } + } + } + // Sample data length: 16 bit + else if (this->bitsPerSample == 16) { + size_t memrefIndex = 0; + for (size_t i = 0; i < this->numSamples; i++) { + for (size_t channel = 0; channel < this->numChannels; channel++) { + int16_t sampleAsInt = sampleToI16(this->aligned[memrefIndex]); + i16ToTwoBytes(fileData, sampleAsInt); + memrefIndex++; } } - this->audioFile.samples.reset(temp); } - return this->audioFile.save(filename); + // Other data length are not yet supported. + else { + std::cerr << "Unsupported audio data length: " << this->bitsPerSample + << " bit" << std::endl; + return false; + } + + return true; +} + +// Locates the start index of a specific chunk in a WAV file data buffer. +// Params: +// fileData: Vector containing the raw binary data of the WAV file. +// chunkHeaderID: The 4-byte identifier for the chunk (e.g., "fmt ", "data"). +// startIndex: Index to start the search from within the fileData. +// endianness: Byte order used to interpret multi-byte values in the chunk +// size. +// Returns: +// The index of the start of the chunk or 0 if not found. +template +size_t Audio::getIndexOfChunk(const std::vector &fileData, + const std::string &chunkHeaderID, + size_t startIndex, Endianness endianness) { + constexpr int dataLen = 4; + if (chunkHeaderID.size() != dataLen) { + assert(false && "Chunk header ID must be exactly 4 characters long"); + return -1; + } + size_t i = startIndex; + while (i < fileData.size() - dataLen) { + // Check if the current bytes match the chunk header ID + if (memcmp(&fileData[i], chunkHeaderID.data(), dataLen) == 0) { + return i; + } + // Skip to the next chunk: advance by the size of the current chunk + // Move index to the size part of the chunk + i += dataLen; + // Prevent reading beyond vector size + if (i + dataLen > fileData.size()) + break; + // Get the size of the chunk. + auto chunkSize = fourBytesToI32(fileData, i, endianness); + if (chunkSize < 0) { + assert(false && "Invalid chunk size encountered"); + return -1; + } + // Move to the next chunk header + i += (dataLen + chunkSize); + } + // Return 0 if the chunk is not found + return 0; +} + +// Converts four bytes from the file data array to a 32-bit integer based on +// endianness. Params: +// fileData: Vector containing the raw binary data. +// startIndex: Index in fileData where the 4-byte sequence starts. +// endianness: Specifies the byte order (LittleEndian or BigEndian). +// Returns: +// The 32-bit integer converted from the byte sequence. +template +int32_t Audio::fourBytesToI32(const std::vector &fileData, + size_t startIndex, Endianness endianness) { + // Ensure the index is within the bounds to prevent out-of-range access. + if (startIndex + 3 >= fileData.size()) { + throw std::out_of_range("Index out of range for fourBytesToI32"); + } + // Use uint32_t to prevent sign extension and maintain accurate binary + // representation during bit operations. + uint32_t result; + if (endianness == Endianness::LittleEndian) { + result = (static_cast(fileData[startIndex + 3]) << 24) | + (static_cast(fileData[startIndex + 2]) << 16) | + (static_cast(fileData[startIndex + 1]) << 8) | + static_cast(fileData[startIndex]); + } else { + result = (static_cast(fileData[startIndex]) << 24) | + (static_cast(fileData[startIndex + 1]) << 16) | + (static_cast(fileData[startIndex + 2]) << 8) | + static_cast(fileData[startIndex + 3]); + } + // Convert the unsigned result to signed int32_t to match the function's + // return type. + return static_cast(result); +} + +// Converts two bytes from the file data array to a 16-bit integer based on +// endianness. Params: +// fileData: Vector containing the raw binary data. +// startIndex: Index in fileData where the 2-byte sequence starts. +// endianness: Specifies the byte order (LittleEndian or BigEndian). +// Returns: +// The 16-bit integer converted from the byte sequence. +template +int16_t Audio::twoBytesToI16(const std::vector &fileData, + size_t startIndex, Endianness endianness) { + // Ensure the index is within the bounds to prevent out-of-range access. + if (startIndex + 1 >= fileData.size()) { + throw std::out_of_range("Index out of range for twoBytesToI16"); + } + // Use uint16_t to prevent sign extension and maintain accurate binary + // representation during bit operations. + uint16_t result; + if (endianness == Endianness::LittleEndian) { + result = (static_cast(fileData[startIndex + 1]) << 8) | + static_cast(fileData[startIndex]); + } else { + result = (static_cast(fileData[startIndex]) << 8) | + static_cast(fileData[startIndex + 1]); + } + // Convert the unsigned result to signed int16_t to match the function's + // return type. + return static_cast(result); } +// Converts a 32-bit integer to four bytes based on endianness. +// Params: +// fileData: Vector containing the raw binary data. +// num: A 32-bit integer prepared for convertion. +// endianness: Specifies the byte order (LittleEndian or BigEndian). template -void Audio::fetchMetadata(const AudioFile &aud) { - this->audioFile.setBitDepth(aud.getBitDepth()); - this->audioFile.setSampleRate(aud.getSampleRate()); - this->audioFile.numSamples = aud.numSamples; - this->audioFile.numChannels = aud.numChannels; - this->audioFile.setAudioBuffer(nullptr); +void Audio::i32ToFourBytes(std::vector &fileData, int32_t num, + Endianness endianness) { + // Use uint8_t to prevent sign extension and maintain accurate binary + // representation during bit operations. + uint8_t bytes[4]; + if (endianness == Endianness::LittleEndian) { + bytes[3] = static_cast(num >> 24) & 0xFF; + bytes[2] = static_cast(num >> 16) & 0xFF; + bytes[1] = static_cast(num >> 8) & 0xFF; + bytes[0] = static_cast(num) & 0xFF; + } else { + bytes[0] = static_cast(num >> 24) & 0xFF; + bytes[1] = static_cast(num >> 16) & 0xFF; + bytes[2] = static_cast(num >> 8) & 0xFF; + bytes[3] = static_cast(num) & 0xFF; + } + // Append the converted bytes to the fileData vector. + for (size_t i = 0; i < 4; i++) + fileData.push_back(bytes[i]); } -template void Audio::moveToMemRef() { - if (data) - delete data; - intptr_t sizes[N]; - for (size_t i = 0; i < N; ++i) { - sizes[i] = audioFile.numSamples; - } - data = new MemRef(audioFile.samples, sizes); + +// Converts a 16-bit integer to two bytes based on endianness. +// Params: +// fileData: Vector containing the raw binary data. +// num: A 16-bit integer prepared for convertion. +// endianness: Specifies the byte order (LittleEndian or BigEndian). +template +void Audio::i16ToTwoBytes(std::vector &fileData, int16_t num, + Endianness endianness) { + // Use uint8_t to prevent sign extension and maintain accurate binary + // representation during bit operations. + uint8_t bytes[2]; + if (endianness == Endianness::LittleEndian) { + bytes[1] = static_cast(num >> 8) & 0xFF; + bytes[0] = static_cast(num) & 0xFF; + } else { + bytes[0] = static_cast(num >> 8) & 0xFF; + bytes[1] = static_cast(num) & 0xFF; + } + // Append the converted bytes to the fileData vector. + fileData.push_back(bytes[0]); + fileData.push_back(bytes[1]); } -template void Audio::moveToAudioFile() { - if (data) { - auto temp = data->release(); - audioFile.setAudioBuffer(temp); + +// Converts an audio sample to a 8-bit PCM format (one byte). +// Params: +// sample: A floating-point value representing the audio sample. +// Returns: +// An 8-bit unsigned integer representing the sample as one byte. +template uint8_t Audio::sampleToOneByte(T sample) { + if (std::isnan(sample)) { + // Handle corner case for NaN (Not a Number). Reset NaN to 1. + sample = static_cast(1.); + } else { + // Restricts sample value in range [-1.0, 1.0]. + sample = std::min(sample, static_cast(1.)); + sample = std::max(sample, static_cast(-1.)); } + // Converts a normalized floating-point audio sample to the [0, 255] range. + sample = (sample + static_cast(1.)) / static_cast(2.); + return static_cast(sample * 255.); } +// Converts an audio sample to a 16-bit PCM format (two bytes). +// Params: +// sample: A floating-point value representing the audio sample. +// Returns: +// A 16-bit signed integer representing the sample as two bytes. +template int16_t Audio::sampleToI16(T sample) { + if (std::isnan(sample)) { + // Handle corner case for NaN (Not a Number). Reset NaN to 1. + sample = static_cast(1.); + } else { + // Restricts sample value in range [-1.0, 1.0]. + sample = std::min(sample, static_cast(1.)); + sample = std::max(sample, static_cast(-1.)); + } + // Converts a normalized floating-point audio sample to the [-32767, 32767] + // range. + return static_cast(sample * 32767.); +} } // namespace dap #endif // FRONTEND_INTERFACES_BUDDY_DAP_AUDIOCONTAINER diff --git a/frontend/Interfaces/buddy/DAP/DAP.h b/frontend/Interfaces/buddy/DAP/DAP.h index 5f86565cc..48fd2afbf 100644 --- a/frontend/Interfaces/buddy/DAP/DAP.h +++ b/frontend/Interfaces/buddy/DAP/DAP.h @@ -21,10 +21,10 @@ #ifndef FRONTEND_INTERFACES_BUDDY_DAP_DAP #define FRONTEND_INTERFACES_BUDDY_DAP_DAP -#include "AudioFile.h" #include "buddy/DAP/AudioContainer.h" #include "buddy/DAP/DSP/Biquad.h" #include "buddy/DAP/DSP/FIR.h" #include "buddy/DAP/DSP/IIR.h" +#include "buddy/DAP/DSP/WhisperPreprocess.h" #endif // FRONTEND_INTERFACES_BUDDY_DAP_DAP diff --git a/frontend/Interfaces/buddy/DAP/DSP/WhisperPreprocess.h b/frontend/Interfaces/buddy/DAP/DSP/WhisperPreprocess.h new file mode 100644 index 000000000..a6c3ef3b2 --- /dev/null +++ b/frontend/Interfaces/buddy/DAP/DSP/WhisperPreprocess.h @@ -0,0 +1,54 @@ +//===- WhisperPreprocess.h ------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// Header file for whisper preprocess operation in DAP dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS +#define FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS + +#include "buddy/Core/Container.h" +#include "buddy/DAP/AudioContainer.h" +#include "buddy/DAP/DSP/IIRDesign.h" + +namespace dap { +namespace detail { +// Declare the whisper preprocess C interface. +extern "C" { +// The original MLIR function: +// ```mlir +// func.func @buddy_whisperPreprocess(%in : memref) -> +// memref<1x80x3000xf32> +// ``` +// +// After applying the '-llvm-request-c-wrappers' pass: +// The result of the function (memref<1x80x3000xf32>) is modified to be the +// first operand. +void _mlir_ciface_buddy_whisperPreprocess(MemRef *outputFeatures, + MemRef *inputRawSpeech); +} +} // namespace detail + +// Function for Whisper preprocess +void whisperPreprocess(MemRef *inputRawSpeech, + MemRef *outputFeatures) { + detail::_mlir_ciface_buddy_whisperPreprocess(outputFeatures, inputRawSpeech); +} + +} // namespace dap + +#endif // FRONTEND_INTERFACES_BUDDY_DAP_DSP_WHISPERPREPROCESS diff --git a/frontend/Interfaces/buddy/DIP/ImgContainer.h b/frontend/Interfaces/buddy/DIP/ImgContainer.h new file mode 100644 index 000000000..2ea30648f --- /dev/null +++ b/frontend/Interfaces/buddy/DIP/ImgContainer.h @@ -0,0 +1,254 @@ +//===- ImgContainer.h -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// Image container descriptor (without OpenCV dependency). +// +//===----------------------------------------------------------------------===// + +#ifndef FRONTEND_INTERFACES_BUDDY_DIP_IMGCONTAINER +#define FRONTEND_INTERFACES_BUDDY_DIP_IMGCONTAINER + +#include "buddy/Core/Container.h" +#include +#include +#include + +namespace dip { +enum ImageModes { + DIP_GRAYSCALE = 0, + DIP_RGB = 1, +}; + +template class Image : public MemRef { +public: + // Constructor initializes the image by loading from a file. + // Params: + // filename: Specifies the path to the image file. + // mode: Specifies the image mode (e.g., DIP_GRAYSCALE, DIP_RGB). + // norm: Indicates whether to normalize pixel values (default is false). + Image(std::string filename, ImageModes mode, bool norm = false); + + // Retrieves the name of the current image format as a string. + std::string getFormatName() const { + switch (this->imageFormat) { + case ImageFormat::BMP: + return "BMP"; + default: + return "Unsupported format"; + } + } + // Returns the width of the image in pixels. + size_t getWidth() const { return this->width; } + // Returns the height of the image in pixels. + size_t getHeight() const { return this->height; } + // Returns the bit depth of the image. + int getBitDepth() const { return this->bitDepth; } + +private: + // Enum to represent supported image formats. + enum class ImageFormat { + ERROR, // Represents an error or unsupported format. + BMP, // BMP file format. + } imageFormat; + // Mode of the image (e.g., DIP_GRAYSCALE, DIP_RGB). + ImageModes imageMode; + // Width of the image in pixels. + size_t width; + // Height of the image in pixels. + size_t height; + // Bit depth of the image. + int bitDepth; + // Normalization flag. + bool isNorm; + // Determines the image format from raw file data. + void determineFormat(const std::vector &fileData); + // Decodes a BMP image from raw file data. + bool decodeBMP(const std::vector &fileData); +}; + +// Image Container Constructor +// Constructs an image container object from the image file path. +template +Image::Image(std::string filePath, ImageModes mode, bool norm) + : imageMode(mode), isNorm(norm) { + // --------------------------------------------------------------------------- + // 1. Read the image file into a std::vector. + // --------------------------------------------------------------------------- + // Open the file in binary mode and position the file pointer at the end of + // the file. + std::ifstream file(filePath, std::ios::binary | std::ios::ate); + // Check if the file was successfully opened. + if (!file) { + throw std::runtime_error("Error: Unable to open file at " + filePath); + } + // Get the size of the file. + size_t dataLength = file.tellg(); + // Move file pointer to the beginning of the file. + file.seekg(0, std::ios::beg); + // Create a vector to store the data. + std::vector fileData(dataLength); + // Read the data. + if (!file.read(reinterpret_cast(fileData.data()), dataLength)) { + throw std::runtime_error("Error: Unable to read data from " + filePath); + } + file.close(); + + // --------------------------------------------------------------------------- + // 2. Determine the image format and decode the image data into MemRef. + // --------------------------------------------------------------------------- + // Determine the image format from the raw file data. + determineFormat(fileData); + if (this->imageFormat == ImageFormat::BMP) { + bool success = decodeBMP(fileData); + if (!success) { + this->imageFormat = ImageFormat::ERROR; + throw std::runtime_error("Failed to decode BMP file from " + filePath); + }; + } else { + throw std::runtime_error("Unsupported image file format."); + } +} + +// Determines the image format by inspecting the header of the file data. +template +void Image::determineFormat(const std::vector &fileData) { + if (fileData.size() > 2 && fileData[0] == 'B' && fileData[1] == 'M') { + this->imageFormat = ImageFormat::BMP; + } else { + this->imageFormat = ImageFormat::ERROR; + } +} + +// BMP Image File Decoder +template +bool Image::decodeBMP(const std::vector &fileData) { + // Check if the provided data is large enough to contain a minimal BMP header + // (54 bytes). + if (fileData.size() < 54) { + throw std::runtime_error("Invalid BMP File: too small to contain header"); + } + + // Extract image information from BMP header + this->width = *reinterpret_cast(&fileData[18]); + this->height = *reinterpret_cast(&fileData[22]); + this->bitDepth = *reinterpret_cast(&fileData[28]); + uint32_t compression = *reinterpret_cast(&fileData[30]); + size_t pixelDataOffset = *reinterpret_cast(&fileData[10]); + + // Currently, only the BI_RGB (value 0) compression method is supported. + if (compression != 0) { + std::cerr << "Unsupported BMP file compression method." << std::endl; + return false; + } + + // Currently, only the NCHW format with 4 dimensions is supported. + if (N == 4) { + if (this->imageMode == ImageModes::DIP_GRAYSCALE) { + // TODO: Add batch setting. + this->sizes[0] = 1; + this->sizes[1] = 1; + this->sizes[2] = this->height; + this->sizes[3] = this->width; + this->setStrides(); + size_t size = this->product(this->sizes); + this->allocated = (T *)malloc(sizeof(T) * size); + this->aligned = this->allocated; + // Fullfill data to memref container. + size_t memrefIndex = 0; + if (this->bitDepth == 32) { + // BMP file is upside-down storage. + for (size_t i = this->height; i > 0; i--) { + for (size_t j = 0; j < this->width; j++) { + // Locate the current pixel. + size_t pixelIndex = + pixelDataOffset + (((i - 1) * this->width) + j) * 4; + // Extract the blue, green, and red value from the current pixel. + int bluePixel = + *reinterpret_cast(&fileData[pixelIndex]); + int greenPixel = + *reinterpret_cast(&fileData[pixelIndex + 1]); + int redPixel = + *reinterpret_cast(&fileData[pixelIndex + 2]); + // Calculate the gray scale value. + int grayScaleValue = static_cast( + 0.299 * redPixel + 0.587 * greenPixel + 0.114 * bluePixel); + // Store the gray scale value into memref container. + this->aligned[memrefIndex] = + this->isNorm ? static_cast(grayScaleValue) / 255 + : static_cast(grayScaleValue); + memrefIndex++; + } + } + } else { + std::cerr << "Unsupported: " << this->bitDepth << "bit depth." + << std::endl; + return false; + } + } else if (this->imageMode == ImageModes::DIP_RGB) { + // TODO: Add batch setting. + this->sizes[0] = 1; + this->sizes[1] = 3; + this->sizes[2] = this->height; + this->sizes[3] = this->width; + this->setStrides(); + size_t size = this->product(this->sizes); + this->allocated = (T *)malloc(sizeof(T) * size); + this->aligned = this->allocated; + // Fullfill data to memref container. + size_t memrefIndex = 0; + size_t colorStride = this->height * this->width; + if (this->bitDepth == 32) { + // BMP file is upside-down storage. + for (size_t i = height; i > 0; i--) { + for (size_t j = 0; j < width; j++) { + // Locate the current pixel. + size_t pixelIndex = pixelDataOffset + (((i - 1) * width) + j) * 4; + // Extract the blue, green, and red value from the current pixel. + int bluePixel = + *reinterpret_cast(&fileData[pixelIndex]); + int greenPixel = + *reinterpret_cast(&fileData[pixelIndex + 1]); + int redPixel = + *reinterpret_cast(&fileData[pixelIndex + 2]); + // Store the values into memref container as RGB order. (BGR -> RGB) + this->aligned[memrefIndex] = this->isNorm + ? static_cast(redPixel) / 255 + : static_cast(redPixel); + this->aligned[memrefIndex + colorStride] = + this->isNorm ? static_cast(greenPixel) / 255 + : static_cast(greenPixel); + this->aligned[memrefIndex + 2 * colorStride] = + this->isNorm ? static_cast(bluePixel) / 255 + : static_cast(bluePixel); + memrefIndex++; + } + } + } else { + std::cerr << "Unsupported: " << this->bitDepth << "bit depth." + << std::endl; + return false; + } + } + } else { + std::cerr << "Unsupported: " << N << " dimension layout." << std::endl; + return false; + } + return true; +} + +} // namespace dip + +#endif // FRONTEND_INTERFACES_BUDDY_DIP_IMGCONTAINER diff --git a/frontend/Interfaces/buddy/LLM/TextContainer.h b/frontend/Interfaces/buddy/LLM/TextContainer.h index 28432b3c1..30adb2742 100644 --- a/frontend/Interfaces/buddy/LLM/TextContainer.h +++ b/frontend/Interfaces/buddy/LLM/TextContainer.h @@ -79,6 +79,7 @@ template class Text : public MemRef { // Tokens are identified by ids and thick underlines are replaced with // whitespaces. std::string revertLlama(); + std::string revertWhisper(); // Get sequence length size_t getTokenCnt() { return this->tokenCnt; } @@ -346,6 +347,39 @@ template std::string Text::revertLlama() { return dst; } +template std::string Text::revertWhisper() { + std::string dst; + + const int PAD_ID = 50257; + const int CLS_ID = 50258; + const int SEP_ID = 50257; + const int TRAN_ID = 50359; + const int NOTIMESTAMPS_ID = 50363; + + for (size_t i = 0; i < this->tokenCnt; i++) { + int id = this->aligned[i]; + // pad / start / type timestamps / language + if (id == PAD_ID || id == CLS_ID || id == TRAN_ID || + id == NOTIMESTAMPS_ID || + (id >= 50259 && id <= 50357)) + continue; + if (id == SEP_ID) + break; + // Replace each "▁" with a space. + std::string token = this->idToTokenVec[id]; + size_t pos = token.find("Ġ"); + while (pos != std::string::npos) { + token.replace(pos, 2, " "); + pos = token.find("Ġ", pos + 1); + } + dst.append(token); + } + if (dst[0] == ' ') { + dst.erase(0, 1); + } + return dst; +} + template void Text::loadVocab(const std::string &vocab) { // TODO-LOW: If in the future, there are more vocab file types to support, diff --git a/frontend/Interfaces/lib/CMakeLists.txt b/frontend/Interfaces/lib/CMakeLists.txt index 9f6f61b29..6a98a18b9 100644 --- a/frontend/Interfaces/lib/CMakeLists.txt +++ b/frontend/Interfaces/lib/CMakeLists.txt @@ -21,13 +21,13 @@ add_custom_command(OUTPUT DIP.o -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llc + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llc -mtriple=${BUDDY_TARGET_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${CMAKE_CURRENT_BINARY_DIR}/DIP.o - DEPENDS buddy-opt + DEPENDS mlir-translate llc buddy-opt ) add_library(BuddyLibDIP STATIC DIP.o) @@ -50,23 +50,42 @@ add_custom_command(OUTPUT DAP.o -llvm-request-c-wrappers -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate --mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llc + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate --mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llc -mtriple=${BUDDY_TARGET_TRIPLE} -mattr=${BUDDY_OPT_ATTR} --filetype=obj -o ${CMAKE_CURRENT_BINARY_DIR}/DAP.o - DEPENDS buddy-opt + DEPENDS mlir-translate llc buddy-opt ) -add_library(BuddyLibDAP STATIC DAP.o) - -SET_TARGET_PROPERTIES(BuddyLibDAP PROPERTIES - LINKER_LANGUAGE CXX - ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY} +add_custom_command(OUTPUT DAP-extend.o + COMMAND ${CMAKE_BINARY_DIR}/bin/buddy-opt ${CMAKE_CURRENT_SOURCE_DIR}/DAP-extend.mlir + -extend-dap + -one-shot-bufferize + -convert-linalg-to-loops + -convert-scf-to-cf + -expand-strided-metadata + -lower-affine + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-math-to-llvm + -llvm-request-c-wrappers + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llc + -mtriple=${BUDDY_TARGET_TRIPLE} + -mattr=${BUDDY_OPT_ATTR} + -filetype=obj -relocation-model=pic + -o ${CMAKE_CURRENT_BINARY_DIR}/DAP-extend.o + DEPENDS mlir-translate llc buddy-opt ) - add_custom_command(OUTPUT DAPVectorization.o +add_custom_command(OUTPUT DAPVectorization.o COMMAND cat ${CMAKE_CURRENT_SOURCE_DIR}/DAP.mlir | sed 's/buddy_fir/buddy_fir_vectorization/' | sed 's/buddy_iir/buddy_iir_vectorization/' | @@ -83,18 +102,22 @@ SET_TARGET_PROPERTIES(BuddyLibDAP PROPERTIES -llvm-request-c-wrappers -convert-func-to-llvm -reconcile-unrealized-casts | - ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir | - ${LLVM_MLIR_BINARY_DIR}/llc + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llc -mtriple=${BUDDY_TARGET_TRIPLE} -mattr=${BUDDY_OPT_ATTR} -filetype=obj -o ${CMAKE_CURRENT_BINARY_DIR}/DAPVectorization.o - DEPENDS buddy-opt + DEPENDS mlir-translate llc buddy-opt ) -add_library(BuddyLibDAPVectorization STATIC DAPVectorization.o) +add_library(BuddyLibDAP STATIC + DAP.o + DAP-extend.o + DAPVectorization.o + ) -SET_TARGET_PROPERTIES(BuddyLibDAPVectorization PROPERTIES +SET_TARGET_PROPERTIES(BuddyLibDAP PROPERTIES LINKER_LANGUAGE CXX ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY} ) diff --git a/frontend/Interfaces/lib/DAP-extend.mlir b/frontend/Interfaces/lib/DAP-extend.mlir new file mode 100644 index 000000000..c77fe3873 --- /dev/null +++ b/frontend/Interfaces/lib/DAP-extend.mlir @@ -0,0 +1,4 @@ +func.func @buddy_whisperPreprocess(%in : memref) -> memref<1x80x3000xf32> { + %out = dap.whisper_preprocess %in : memref to memref<1x80x3000xf32> + return %out : memref<1x80x3000xf32> +} diff --git a/frontend/Python/frontend.py b/frontend/Python/frontend.py index bd92a8074..f30eb2a28 100644 --- a/frontend/Python/frontend.py +++ b/frontend/Python/frontend.py @@ -158,6 +158,8 @@ def __init__( "where.self": WhereOp, "sqrt.default": SqrtOp, "reciprocal.default": ReciprocalOp, + "clamp_min.default": ClampMinOp, + "clamp_max.default": ClampMaxOp, } @property diff --git a/frontend/Python/graph/operation.py b/frontend/Python/graph/operation.py index 903b12865..14bfbf275 100644 --- a/frontend/Python/graph/operation.py +++ b/frontend/Python/graph/operation.py @@ -469,3 +469,13 @@ class SqrtOp(Op): def __init__(self) -> None: super().__init__() self._op_type = OpType.ElementwiseType + +class ClampMinOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType + +class ClampMaxOp(Op): + def __init__(self) -> None: + super().__init__() + self._op_type = OpType.ElementwiseType diff --git a/frontend/Python/ops/func.py b/frontend/Python/ops/func.py index ad6e512be..a7dcc5e11 100644 --- a/frontend/Python/ops/func.py +++ b/frontend/Python/ops/func.py @@ -106,7 +106,10 @@ def param_extract( TensorDType.Int64: ir.IntegerType.get_signless(64), } memref_element_type = dtype_mapping[node.tensor_meta["dtype"]] - output_shape = list(node.tensor_meta["shape"]) + if(len(node.tensor_meta['shape'])== 0): + output_shape = [1] + else: + output_shape = list(node.tensor_meta["shape"]) subview_size = functools.reduce(lambda x, y: x * y, output_shape) offset_attr = ir._denseI64ArrayAttr([offset], None) size_attr = ir._denseI64ArrayAttr([subview_size], None) diff --git a/frontend/Python/ops/math.py b/frontend/Python/ops/math.py index 19820c2b3..f1afc2161 100644 --- a/frontend/Python/ops/math.py +++ b/frontend/Python/ops/math.py @@ -28,7 +28,8 @@ def erf_op(node, symbol_table): def sqrt_op(node, symbol_table): input_tensor = symbol_table.get((str(node.args[0]), 0)) - return math.SqrtOp(input_tensor) + op = math.SqrtOp(input_tensor) + return op ops_registry = { diff --git a/frontend/Python/ops/tosa.py b/frontend/Python/ops/tosa.py index e5fe9a4e3..5de51ca56 100644 --- a/frontend/Python/ops/tosa.py +++ b/frontend/Python/ops/tosa.py @@ -21,9 +21,10 @@ import array from typing import Dict, List, Tuple, Union import numpy +import sys import mlir.ir as ir -from mlir.dialects import tensor, tosa +from mlir.dialects import tensor, tosa, arith, linalg from ..graph import TensorDType from ..graph import ( @@ -57,6 +58,8 @@ SigmoidOp, ReciprocalOp, MeanOp, + ClampMinOp, + ClampMaxOp, ) from .utils import * @@ -220,7 +223,7 @@ def addmm_op( def bmm_op(node: BatchMatmulOp, symbol_table) -> ir.Operation: """ Import batch matrix multiplication operation. - From buddy graph ir's `BatchMatmulOp` operator to MLIR TOSA `matmul` + From buddy graph ir's `BatchMatmulOp` operator to MLIR TOSA `matmul` operation. """ input_ = symbol_table.get((str(node.args[0]), 0)) @@ -962,57 +965,56 @@ def maxpool2d_op(node: MaxPool2dOp, symbol_table): ) return op + +# TODO: Rename convolution2d_op -> convolution_op def convolution2d_op(node: Conv2dOp, symbol_table): """ Import the convolution operation. From Buddy Conv2dOp to MLIR TOSA `conv2d` operation. + arg[0]: Tensor input + arg[1]: Tensor weight + arg[2]: Tensor? bias + arg[3]: SymInt[] stride + arg[4]: SymInt[] padding + arg[5]: SymInt[] dilation + arg[6]: bool transposed + arg[7]: SymInt[] output_padding + arg[8]: SymInt groups """ + # Get arguments from convolution node. assert len(node.args) == 9 - input1 = symbol_table.get((str(node.args[0]), 0)) - weight = symbol_table.get((str(node.args[1]), 0)) + input = node.args[0] + weight = node.args[1] + bias = node.args[2] + stride = node.args[3] + input_padding = node.args[4] + dilation = node.args[5] is_kernel_transposed = node.args[6] + out_padding = node.args[7] + groups = node.args[8] + + # Prepare input, weight, and output information. + input_val = symbol_table.get((str(input), 0)) + input_shape = list(ir.RankedTensorType(input_val.type).shape) + weight_val = symbol_table.get((str(weight), 0)) + weight_shape = ir.RankedTensorType(weight_val.type).shape dtype = node.tensor_meta["dtype"] result_element_type = mlir_element_type_get(dtype) - if node._layout.find("NCHW") != -1: - perm_list = [0, 2, 3, 1] - perm_const_op = tosa.ConstOp( - ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) - ) - out_shape = list(ir.RankedTensorType(input1.type).shape) - perm_shape = [] - perm_shape.append(out_shape[0]) - perm_shape.append(out_shape[2]) - perm_shape.append(out_shape[3]) - perm_shape.append(out_shape[1]) - permute_result_type = ir.RankedTensorType.get( - perm_shape, result_element_type - ) - input1 = tosa.TransposeOp( - permute_result_type, input1, perm_const_op.results[0] - ).result - if node._layout.find("FCHW") != -1: - perm_list = [0, 2, 3, 1] - perm_const_op = tosa.ConstOp( - ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) - ) - out_shape = list(ir.RankedTensorType(weight.type).shape) - perm_shape = [] - perm_shape.append(out_shape[0]) - perm_shape.append(out_shape[2]) - perm_shape.append(out_shape[3]) - perm_shape.append(out_shape[1]) - permute_result_type = ir.RankedTensorType.get( - perm_shape, result_element_type - ) - weight = tosa.TransposeOp( - permute_result_type, weight, perm_const_op.results[0] - ).result + out_shape = node.tensor_meta["shape"] + + # Prepare Depthwise Conv2D information + is_grouped = (list(weight_shape)[1] == 1) and (groups != 1) + is_depthwise = (groups == list(weight_shape)[0]) and is_grouped + + # Prepare input channel and output channel. if is_kernel_transposed: - in_channels = list(ir.RankedTensorType(weight.type).shape)[0] - out_channels = list(ir.RankedTensorType(weight.type).shape)[1] + in_channels = list(weight_shape)[0] + out_channels = list(weight_shape)[1] * groups else: - in_channels = list(ir.RankedTensorType(weight.type).shape)[1] - out_channels = list(ir.RankedTensorType(weight.type).shape)[0] + in_channels = list(weight_shape)[1] * groups + out_channels = list(weight_shape)[0] + + # Prepare bias tensor. if len(node._parents) == 2: new_size_tensor_type = ir.RankedTensorType.get( [out_channels], result_element_type @@ -1023,74 +1025,229 @@ def convolution2d_op(node: Conv2dOp, symbol_table): ) bias_tensor = tosa.ConstOp(new_size_attr).results[0] else: - bias_tensor = symbol_table.get((str(node.args[2]), 0)) - assert input1 != None and weight != None and bias_tensor != None - stride = node.args[3] - input_padding = node.args[4] - if len(input_padding) == 1: - input_padding = [input_padding[0]] * 4 - elif len(input_padding) == 2: - input_padding = [input_padding[0]] * 2 + [input_padding[1]] * 2 - dilation = node.args[5] - groups = node.args[8] - out_shape = node.tensor_meta["shape"] - if node._layout.find("NCHW") != -1: - perm_shape = [] - perm_shape.append(out_shape[0]) - perm_shape.append(out_shape[2]) - perm_shape.append(out_shape[3]) - perm_shape.append(out_shape[1]) - out_shape = perm_shape - output = ir.RankedTensorType.get(out_shape, result_element_type) + bias_tensor = symbol_table.get((str(bias), 0)) + + # Prepare attributes. + dilation_attr = ir._denseI64ArrayAttr(dilation, None) stride_attr = ir._denseI64ArrayAttr(stride, None) - assert groups == 1, 'tosa.conv2d only support one group' - if is_kernel_transposed: - if sum(input_padding) > 0 or sum(dilation) > len(dilation): - raise NotImplementedError - out_padding = node.args[7] - for i in range(len(out_padding), 4): - out_padding = [0] + out_padding - out_padding_attr = ir._denseI64ArrayAttr(out_padding, None) - out_shape_attr = ir._denseI64ArrayAttr(out_shape, None) - op = tosa.TransposeConv2DOp( - output, - input1, - weight, - bias_tensor, - out_padding_attr, - stride_attr, - out_shape_attr, - ) - else: + + # Convolution 2D + if len(weight_shape) == 4: + # Prepare input padding. + if len(input_padding) == 1: + input_padding = [input_padding[0]] * 4 + elif len(input_padding) == 2: + input_padding = [input_padding[0]] * 2 + [input_padding[1]] * 2 + # Prepare input_padding attributes. input_padding_attr = ir._denseI64ArrayAttr(input_padding, None) - dilation_attr = ir._denseI64ArrayAttr(dilation, None) - op = tosa.Conv2DOp( - output, - input1, - weight, - bias_tensor, - input_padding_attr, - stride_attr, - dilation_attr, + # If the input layout is NCHW, then convert to NHWC. + if node._layout.find("NCHW") != -1: + perm_list = [0, 2, 3, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get( + memoryview(array.array("i", perm_list)) + ) + ) + perm_shape = [] + perm_shape.append(input_shape[0]) + perm_shape.append(input_shape[2]) + perm_shape.append(input_shape[3]) + perm_shape.append(input_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + input_val = tosa.TransposeOp( + permute_result_type, input_val, perm_const_op.results[0] + ).result + # If the output layout is NCHW, then convert to NHWC + if node._layout.find("NCHW") != -1: + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[2]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + out_shape = perm_shape + output_type = ir.RankedTensorType.get(out_shape, result_element_type) + + # Depthwise Conv2D Operation. + if is_depthwise is True: + # If groups == in_channels,out_channels == in_channels + if node._layout.find("FCHW") != -1: + perm_list = [2, 3, 0, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get( + memoryview(array.array("i", perm_list)) + ) + ) + perm_shape = [] + perm_shape.append(weight_shape[2]) + perm_shape.append(weight_shape[3]) + perm_shape.append(weight_shape[0]) + perm_shape.append(weight_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + weight_depthwise = tosa.TransposeOp( + permute_result_type, weight_val, perm_const_op.results[0] + ).result + op = tosa.DepthwiseConv2DOp( + output_type, + input_val, + weight_depthwise, + bias_tensor, + input_padding_attr, + stride_attr, + dilation_attr, + ) + else: + # Transpose Conv2D Operation. + if is_kernel_transposed: + if sum(input_padding) > 0 or sum(dilation) > len(dilation): + raise NotImplementedError + for i in range(len(out_padding), 4): + out_padding = [0] + out_padding + out_padding_attr = ir._denseI64ArrayAttr(out_padding, None) + out_shape_attr = ir._denseI64ArrayAttr(out_shape, None) + op = tosa.TransposeConv2DOp( + output_type, + input_val, + weight_val, + bias_tensor, + out_padding_attr, + stride_attr, + out_shape_attr, + ) + # Generic Conv2D Operation. + else: + if node._layout.find("FCHW") != -1: + perm_list = [0, 2, 3, 1] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get( + memoryview(array.array("i", perm_list)) + ) + ) + perm_shape = [] + perm_shape.append(weight_shape[0]) + perm_shape.append(weight_shape[2]) + perm_shape.append(weight_shape[3]) + perm_shape.append(weight_shape[1]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + weight_val = tosa.TransposeOp( + permute_result_type, + weight_val, + perm_const_op.results[0], + ).result + op = tosa.Conv2DOp( + output_type, + input_val, + weight_val, + bias_tensor, + input_padding_attr, + stride_attr, + dilation_attr, + ) + # Output transpose + if node._layout.find("NCHW") != -1: + perm_list = [0, 3, 1, 2] + perm_const_op = tosa.ConstOp( + ir.DenseElementsAttr.get( + memoryview(array.array("i", perm_list)) + ) + ) + perm_shape = [] + perm_shape.append(out_shape[0]) + perm_shape.append(out_shape[3]) + perm_shape.append(out_shape[1]) + perm_shape.append(out_shape[2]) + permute_result_type = ir.RankedTensorType.get( + perm_shape, result_element_type + ) + op = tosa.TransposeOp( + permute_result_type, op.result, perm_const_op.results[0] + ) + # Convolution 1D + elif len(weight_shape) == 3: + # Prepare input with padding. + if input_padding[0] != 0: + input_shape = list(ir.RankedTensorType(input_val.type).shape) + padded_type = ir.RankedTensorType.get( + [ + input_shape[0], + input_shape[1], + input_shape[2] + 2 * input_padding[0], + ], + result_element_type, + ) + pad_values_type = ir.RankedTensorType.get( + [3, 2], ir.IntegerType.get_signless(32) + ) + pad_values = ir.DenseElementsAttr.get( + numpy.array( + [[0, 0], [0, 0], [input_padding[0], input_padding[0]]], + dtype=numpy.int32, + ), + type=pad_values_type, + ) + pad_constant = arith.ConstantOp(pad_values_type, pad_values).result + input_val = tosa.PadOp(padded_type, input_val, pad_constant) + output_type = ir.RankedTensorType.get(out_shape, result_element_type) + output_conv = tensor.EmptyOp(list(out_shape), result_element_type) + assert groups == 1, "only support one group" + # Con1D Operation Without Bias + conv_op = linalg.conv_1d_ncw_fcw( + input_val, + weight_val, + outs=[output_conv], + strides=stride_attr, + dilations=dilation_attr, ) - if node._layout.find("NCHW") != -1: - perm_list = [0, 3, 1, 2] - perm_const_op = tosa.ConstOp( - ir.DenseElementsAttr.get(memoryview(array.array("i", perm_list))) + output = tensor.EmptyOp(list(out_shape), result_element_type) + generic_map = ir.AffineMap.get_permutation( + [i for i in range(len(list(out_shape)))] ) - perm_shape = [] - perm_shape.append(out_shape[0]) - perm_shape.append(out_shape[3]) - perm_shape.append(out_shape[1]) - perm_shape.append(out_shape[2]) - permute_result_type = ir.RankedTensorType.get( - perm_shape, result_element_type + loop_type = [ + ir.Attribute.parse("#linalg.iterator_type") + ] * len(list(out_shape)) + loop_type[1] = ir.Attribute.parse("#linalg.iterator_type") + # Add Bias To Conv2d. + op = linalg.GenericOp( + [output_type], + [conv_op, bias_tensor], + [output], + ir.ArrayAttr.get( + [ + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(list(out_shape)))] + ) + ), + ir.AffineMapAttr.get(generic_map.get_submap([1])), + ir.AffineMapAttr.get( + generic_map.get_submap( + [i for i in range(len(list(out_shape)))] + ) + ), + ] + ), + ir.ArrayAttr.get(loop_type), ) - op = tosa.TransposeOp( - permute_result_type, op.result, perm_const_op.results[0] + block = ir.Block.create_at_start( + op.region, + [ + result_element_type, + ir.RankedTensorType(bias_tensor.type).element_type, + result_element_type, + ], ) + add_op = arith.AddFOp(block.arguments[1], block.arguments[0]) + block.append(add_op) + block.append(linalg.YieldOp([add_op.result])) + return op + def relu_op(node: ReluOp, symbol_table): """ Import the tensor relu operation. @@ -1111,6 +1268,7 @@ def relu_op(node: ReluOp, symbol_table): return op + def iota_op(node: IotaOp, symbol_table): """ Import the tensor iota operation. @@ -1132,6 +1290,7 @@ def iota_op(node: IotaOp, symbol_table): return op + def sigmoid_op(node: SigmoidOp, symbol_table): """ Import the tensor sigmoid operation. @@ -1214,6 +1373,60 @@ def mean_op(node: MeanOp, symbol_table): return ret +def clamp_min_op(node: ClampMinOp, symbol_table): + """ + Creates a TOSA clamp operation to set a minimum value for a tensor. + + Retrieves the input tensor and its minimum clamp value from the symbol table, + setting the maximum clamp value to the highest possible for the data type. + The operation ensures no values are below the specified minimum. + + Parameters: + - node (ClampMinOp): Node with tensor and minimum value details. + - symbol_table (dict): Dictionary mapping identifiers to values or nodes. + + Returns: + - tosa.ClampOp: Configured TOSA clamp operation with minimum clamping. + """ + input1 = symbol_table.get((str(node.args[0]), 0), node.args[0]) + min_value = symbol_table.get((str(node.args[1]), 0), node.args[1]) + tensor_type = input1.type + min_value_int = round(min_value) + min_int = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), min_value_int) + max_int = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), sys.maxsize) + min_fp = ir.FloatAttr.get(ir.F32Type.get(), min_value) + max_fp = ir.FloatAttr.get(ir.F32Type.get(), float("inf")) + op = tosa.ClampOp(tensor_type, input1, min_int, max_int, min_fp, max_fp) + return op + + +def clamp_max_op(node: ClampMaxOp, symbol_table): + """ + Creates a TOSA clamp operation to set a maximum value for a tensor. + + Retrieves the input tensor and its maximum clamp value from the symbol table, + setting the minimum clamp value to the lowest possible for the data type. + The operation ensures no values exceed the specified maximum. + + Parameters: + - node (ClampMaxOp): Node with tensor and maximum value details. + - symbol_table (dict): Dictionary mapping identifiers to values or nodes. + + Returns: + - tosa.ClampOp: Configured TOSA clamp operation with maximum clamping. + """ + input1 = symbol_table.get((str(node.args[0]), 0), node.args[0]) + max_value = symbol_table.get((str(node.args[1]), 0), node.args[1]) + tensor_type = input1.type + min_value_int = round(max_value) + min_int = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), -sys.maxsize) + max_int = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), min_value_int) + min_fp = ir.FloatAttr.get(ir.F32Type.get(), -float("inf")) + max_fp = ir.FloatAttr.get(ir.F32Type.get(), max_value) + op = tosa.ClampOp(tensor_type, input1, min_int, max_int, min_fp, max_fp) + return op + + ops_registry = { "AddOp": add_op, "MulOp": mul_op, @@ -1246,4 +1459,6 @@ def mean_op(node: MeanOp, symbol_table): "SigmoidOp": sigmoid_op, "ReciprocalOp": reciprocal_op, "MeanOp": mean_op, + "ClampMinOp": clamp_min_op, + "ClampMaxOp": clamp_max_op, } diff --git a/midend/include/Dialect/DAP/DAPOps.td b/midend/include/Dialect/DAP/DAPOps.td index 9e7d894b9..70d7a21fe 100644 --- a/midend/include/Dialect/DAP/DAPOps.td +++ b/midend/include/Dialect/DAP/DAPOps.td @@ -50,8 +50,7 @@ def DAP_FirOp : DAP_Op<"fir"> { }]; } -def DAP_BiquadOp : DAP_Op<"biquad"> -{ +def DAP_BiquadOp : DAP_Op<"biquad"> { let summary = [{Biquad filter, a infinite impulse response (IIR) filter. ```mlir @@ -94,4 +93,49 @@ def DAP_IirOp : DAP_Op<"iir"> { }]; } +def DAP_RFFT400Op : DAP_Op<"rfft400"> { + let summary = "RFFT operation for length 400."; + let description = [{ + The RFFT algorithm is designed to handle real-valued input signals. Real + signals exhibit conjugate symmetry in the frequency domain, meaning that + the positive and negative frequency components are complex conjugates of + each other. This symmetry property allows the RFFT algorithm to compute + only half of the frequency spectrum, reducing computational costs. + + Example: + + ```mlir + dap.rfft400 %data : memref<400xf64> + ``` + }]; + + let arguments = (ins AnyRankedOrUnrankedMemRef:$memref); + let assemblyFormat = [{ + $memref attr-dict `:` type($memref) + }]; +} + +def DAP_WhisperPreprocessOp : DAP_Op<"whisper_preprocess"> { + let summary = "preprocessor for Whisper model"; + let description = [{ + Preprocessor for Whisper model, do features extraction for input audio. + Input MemRef stores the raw speech data, Output MemRef contains computed + features with shape memref<1x80x3000xf32>. + + Example: + + ```mlir + %output = dap.whisper_preprocess %input : memref to memref<1x80x3000xf32> + ``` + }]; + + let arguments = (ins Arg:$memrefI); + let results = (outs Res:$memrefO); + let assemblyFormat = [{ + $memrefI attr-dict `:` type($memrefI) `to` type($memrefO) + }]; +} + #endif // DAP_DAPOPS_TD diff --git a/midend/lib/Conversion/CMakeLists.txt b/midend/lib/Conversion/CMakeLists.txt index bd3c7f150..cfe12a8d6 100644 --- a/midend/lib/Conversion/CMakeLists.txt +++ b/midend/lib/Conversion/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(LowerBud) add_subdirectory(LowerDIP) add_subdirectory(LowerRVV) add_subdirectory(LowerDAP) +add_subdirectory(ExtendDAP) add_subdirectory(DAPVectorization) add_subdirectory(MatMulOptimization) add_subdirectory(TransposeOptimization) @@ -13,3 +14,4 @@ add_subdirectory(LowerLinalgToGemmini) add_subdirectory(SchedulingOnDevices) add_subdirectory(LowerSche) add_subdirectory(FuncBufferize) +add_subdirectory(DepthwiseConvOptimization) diff --git a/midend/lib/Conversion/ConvOptimization/CMakeLists.txt b/midend/lib/Conversion/ConvOptimization/CMakeLists.txt index fc88a92ef..336c95a30 100644 --- a/midend/lib/Conversion/ConvOptimization/CMakeLists.txt +++ b/midend/lib/Conversion/ConvOptimization/CMakeLists.txt @@ -1,3 +1,5 @@ add_mlir_library(ConvOptimization ConvOptimize.cpp + ConvNhwcFhwcOptimize.cpp + ConvNhwcFhwcOptimizeTile.cpp ) diff --git a/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimize.cpp b/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimize.cpp new file mode 100644 index 000000000..e4bc67e36 --- /dev/null +++ b/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimize.cpp @@ -0,0 +1,276 @@ +//====- ConvNhwcFhwcOptimize.cpp----------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Conv2DNhwcFhwcOp optimize. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +using namespace mlir; +using namespace vector; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// + +namespace { + +class ConvNhwcFhwcOptimizePattern : public ConversionPattern { +public: + explicit ConvNhwcFhwcOptimizePattern(MLIRContext *context, + int64_t vecSizeParam) + : ConversionPattern(linalg::Conv2DNhwcFhwcOp::getOperationName(), 1, + context) { + vecSize = vecSizeParam; + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + auto convOp = dyn_cast_or_null(op); + auto loc = op->getLoc(); + + // Some constant we need. + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value c1 = + rewriter.create(loc, rewriter.getIndexAttr(1)); + + const Value vecSizeValue = + rewriter.create(loc, rewriter.getIndexAttr(vecSize)); + const AffineExpr d0 = rewriter.getAffineDimExpr(0); + const AffineExpr d1 = rewriter.getAffineDimExpr(1); + const AffineExpr s0 = rewriter.getAffineSymbolExpr(0); + + Value input = op->getOperand(0); + Value filter = op->getOperand(1); + Value output = op->getOperand(2); + + int strHeight, strWidth, dilHeight, dilWidth; + + // Strides. + if (!convOp.getStrides()) { + strHeight = 1; + strWidth = 1; + } else { + strHeight = convOp.getStrides().getValues()[0]; + strWidth = convOp.getStrides().getValues() + [convOp.getStrides().getValues().size() - 1]; + } + + // Dilations. + if (!convOp.getDilations()) { + dilHeight = 1; + dilWidth = 1; + } else { + dilHeight = convOp.getDilations().getValues()[0]; + dilWidth = convOp.getDilations().getValues() + [convOp.getDilations().getValues().size() - 1]; + } + + ShapedType inputTy = input.getType().cast(); + Type elemTy = inputTy.getElementType(); + VectorType vecTy = VectorType::get(vecSize, elemTy); + + const Value zeroElementType = + rewriter.create(loc, rewriter.getZeroAttr(elemTy)); + + // Dims + Value N = rewriter.create(loc, output, 0); // N + Value OH = rewriter.create(loc, output, 1); // OH + Value OW = rewriter.create(loc, output, 2); // OW + Value OC = rewriter.create(loc, output, 3); // OC + Value IC = rewriter.create(loc, input, 3); // IC + Value FH = rewriter.create(loc, filter, 1); // FH + Value FW = rewriter.create(loc, filter, 2); // FW + + // clang format off + // Step 1: Create outer most loops. + // Create the scf::ForallOp operation For N,OH,OW,OC + auto outputForAllOp = rewriter.create( + loc, SmallVector({N, OH, OW, OC}), ValueRange{}, + std::nullopt, // No mapping specified in this example + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange loopIndices) { + Value ivN = loopIndices[0]; // Index for the first dimension N + Value ivOH = loopIndices[1]; // Index for the second dimension OH + Value ivOW = loopIndices[2]; // Index for the third dimension OW + Value ivOC = loopIndices[3]; // Index for the third dimension OC + + Value addRes = nestedBuilder.create( + loc, output, ValueRange{ivN, ivOH, ivOW, ivOC}); + // IC + auto forOp = nestedBuilder.create( + nestedLoc, c0, IC, vecSizeValue, ValueRange{addRes}, + [&](OpBuilder &builder, Location loc, Value ivIC, + ValueRange iargs) { + Value tVec; + if (isa(elemTy)) { + tVec = builder.create(loc, vecTy, + zeroElementType); + } else { + tVec = builder.create(loc, vecTy, + zeroElementType); + } + + Value remainLen = builder.create( + loc, + AffineMap::get(2, 1, {-d0 + s0, d1}, builder.getContext()), + ValueRange{ivIC, vecSizeValue, IC}); + Value remainMask = builder.create( + loc, VectorType::get({vecSize}, rewriter.getI1Type()), + ValueRange{remainLen}); + + // FH + auto forOp = builder.create( + loc, c0, FH, c1, ValueRange{tVec}, + [&](OpBuilder &builder, Location loc, Value ivFH, + ValueRange iargs) { + Value rowInput = builder.create( + loc, + AffineMap::get(2, 0, d0 * strHeight + d1 * dilHeight), + ValueRange{ivOH, ivFH}); + Value rowFilter = ivFH; + // FW + auto forOp = builder.create( + loc, c0, FW, c1, ValueRange{iargs[0]}, + [&](OpBuilder &builder, Location loc, Value ivFW, + ValueRange iargs) { + Value columnInput = + builder.create( + loc, + AffineMap::get( + 2, 0, d0 * strWidth + d1 * dilWidth), + ValueRange{ivOW, ivFW}); + Value columnFilter = ivFW; + Value iVec = builder.create( + loc, vecTy, input, + ValueRange{ivN, rowInput, columnInput, ivIC}); + Value fVec = builder.create( + loc, vecTy, filter, + ValueRange{ivOC, rowFilter, columnFilter, + ivIC}); + Value tVecNext; + if (isa(elemTy)) { + Value mulVec = builder.create( + loc, iVec, fVec); + tVecNext = builder.create( + loc, mulVec, iargs[0]); + } else { + tVecNext = builder.create( + loc, vecTy, iVec, fVec, iargs[0]); + } + + builder.create(loc, + ValueRange{tVecNext}); + }); + builder.create( + loc, ValueRange{forOp.getResult(0)}); + }); + auto reduceVecOp = builder.create( + loc, vector::CombiningKind::ADD, forOp.getResult(0)); + auto maskedOp = + cast(mlir::vector::maskOperation( + builder, reduceVecOp, remainMask)); + Value reduceVec = maskedOp->getResult(0); + Value addNext; + if (isa(elemTy)) { + addNext = + builder.create(loc, iargs[0], reduceVec); + } else { + addNext = + builder.create(loc, iargs[0], reduceVec); + } + builder.create(loc, ValueRange{addNext}); + }); + + nestedBuilder.create( + loc, forOp.getResult(0), output, + ValueRange{ivN, ivOH, ivOW, ivOC}); + nestedBuilder.create(nestedLoc); + }); + // clang format on + + rewriter.eraseOp(op); + return success(); + } + +private: + int64_t vecSize; +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// ConvNhwcFhwcOptimizePass +//===----------------------------------------------------------------------===// + +namespace { +class ConvNhwcFhwcOptimizePass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvNhwcFhwcOptimizePass) + StringRef getArgument() const final { return "conv-nhwc-fhwc-optimize"; } + StringRef getDescription() const final { + return "Conv2d NHWC FHWC optimize."; + } + ConvNhwcFhwcOptimizePass() = default; + ConvNhwcFhwcOptimizePass(const ConvNhwcFhwcOptimizePass &) {} + explicit ConvNhwcFhwcOptimizePass(int64_t vecSizeParam) { + vecSize = vecSizeParam; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + Option vecSize{*this, "vec-size", llvm::cl::desc("Vector size."), + llvm::cl::init(16)}; +}; +} // end anonymous namespace. + +void ConvNhwcFhwcOptimizePass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target + .addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context, vecSize); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} + +namespace mlir { +namespace buddy { +void registerConvNhwcFhwcOptimizePass() { + PassRegistration(); +} +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimizeTile.cpp b/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimizeTile.cpp new file mode 100644 index 000000000..db812aceb --- /dev/null +++ b/midend/lib/Conversion/ConvOptimization/ConvNhwcFhwcOptimizeTile.cpp @@ -0,0 +1,342 @@ +//====- ConvNhwcFhwcOptimizeTile.cpp------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Conv2DNhwcFhwcOp tile optimize. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +using namespace mlir; +using namespace vector; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// + +namespace { + +class ConvNhwcFhwcTileOptimizePattern : public ConversionPattern { +public: + explicit ConvNhwcFhwcTileOptimizePattern(MLIRContext *context, + int64_t vecSizeParam, + int64_t tilingOHParam, + int64_t tilingOWParam, + int64_t tilingOCParam) + : ConversionPattern(linalg::Conv2DNhwcFhwcOp::getOperationName(), 1, + context) { + vecSize = vecSizeParam; + tilingOH = tilingOHParam; + tilingOW = tilingOWParam; + tilingOC = tilingOCParam; + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + auto convOp = dyn_cast_or_null(op); + auto loc = op->getLoc(); + + // Some constant we need. + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value c1 = + rewriter.create(loc, rewriter.getIndexAttr(1)); + + const Value vecSizeValue = + rewriter.create(loc, rewriter.getIndexAttr(vecSize)); + const AffineExpr d0 = rewriter.getAffineDimExpr(0); + const AffineExpr d1 = rewriter.getAffineDimExpr(1); + const AffineExpr s0 = rewriter.getAffineSymbolExpr(0); + + Value input = op->getOperand(0); + Value filter = op->getOperand(1); + Value output = op->getOperand(2); + + int strHeight, strWidth, dilHeight, dilWidth; + + // Strides. + if (!convOp.getStrides()) { + strHeight = 1; + strWidth = 1; + } else { + strHeight = convOp.getStrides().getValues()[0]; + strWidth = convOp.getStrides().getValues() + [convOp.getStrides().getValues().size() - 1]; + } + + // Dilations. + if (!convOp.getDilations()) { + dilHeight = 1; + dilWidth = 1; + } else { + dilHeight = convOp.getDilations().getValues()[0]; + dilWidth = convOp.getDilations().getValues() + [convOp.getDilations().getValues().size() - 1]; + } + + ShapedType inputTy = input.getType().cast(); + Type elemTy = inputTy.getElementType(); + VectorType vecTy = VectorType::get(vecSize, elemTy); + + const Value zeroElementType = + rewriter.create(loc, rewriter.getZeroAttr(elemTy)); + + // Dims + Value N = rewriter.create(loc, output, 0); // N + Value OH = rewriter.create(loc, output, 1); // OH + Value OW = rewriter.create(loc, output, 2); // OW + Value OC = rewriter.create(loc, output, 3); // OC + Value IC = rewriter.create(loc, input, 3); // IC + Value FH = rewriter.create(loc, filter, 1); // FH + Value FW = rewriter.create(loc, filter, 2); // FW + + auto tilingUpperBound = + AffineMap::get(2, 1, {d0 + d1, s0}, rewriter.getContext()); + + Value stepOH = rewriter.create( + loc, AffineMap::get(1, 0, d0.ceilDiv(tilingOH)), OH); + Value stepOW = rewriter.create( + loc, AffineMap::get(1, 0, d0.ceilDiv(tilingOW)), OW); + Value stepOC = rewriter.create( + loc, AffineMap::get(1, 0, d0.ceilDiv(tilingOC)), OC); + + // clang format off + // Step 1: Create outer most loops. + // Create the scf::ForallOp operation For N,OH,OW,OC + rewriter.create( + loc, SmallVector{c0, c0, c0, c0}, + SmallVector({N, OH, OW, OC}), + SmallVector({c1, stepOH, stepOW, stepOC}), + ValueRange{}, + std::nullopt, // No mapping specified in this example + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange loopIndices) { + Value ivN = loopIndices[0]; // Index for the first dimension N + + Value ubOH = nestedBuilder.create( + loc, tilingUpperBound, + ValueRange{loopIndices[1], stepOH, + OH}); // ub for the second dimension OH + Value ubOW = nestedBuilder.create( + loc, tilingUpperBound, + ValueRange{loopIndices[2], stepOW, + OW}); // ub for the second dimension OW + Value ubOC = nestedBuilder.create( + loc, tilingUpperBound, + ValueRange{loopIndices[3], stepOC, + OC}); // ub for the second dimension OC + + rewriter.create( + loc, + SmallVector{loopIndices[1], loopIndices[2], + loopIndices[3]}, + SmallVector({ubOH, ubOW, ubOC}), + SmallVector({c1, c1, c1}), ValueRange{}, + std::nullopt, // No mapping specified in this example + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange loopIndices) { + Value ivOH = loopIndices[0]; // Index for the first dimension OH + Value ivOW = loopIndices[1]; // Index for the first dimension OW + Value ivOC = loopIndices[2]; // Index for the first dimension OC + + Value addRes = nestedBuilder.create( + loc, output, ValueRange{ivN, ivOH, ivOW, ivOC}); + // IC + auto forOp = nestedBuilder.create( + nestedLoc, c0, IC, vecSizeValue, ValueRange{addRes}, + [&](OpBuilder &builder, Location loc, Value ivIC, + ValueRange iargs) { + Value tVec; + if (isa(elemTy)) { + tVec = builder.create( + loc, vecTy, zeroElementType); + } else { + tVec = builder.create(loc, vecTy, + zeroElementType); + } + + Value remainLen = builder.create( + loc, + AffineMap::get(2, 1, {-d0 + s0, d1}, + builder.getContext()), + ValueRange{ivIC, vecSizeValue, IC}); + Value remainMask = builder.create( + loc, VectorType::get({vecSize}, rewriter.getI1Type()), + ValueRange{remainLen}); + + // FH + auto forOp = builder.create( + loc, c0, FH, c1, ValueRange{tVec}, + [&](OpBuilder &builder, Location loc, Value ivFH, + ValueRange iargs) { + Value rowInput = + builder.create( + loc, + AffineMap::get( + 2, 0, d0 * strHeight + d1 * dilHeight), + ValueRange{ivOH, ivFH}); + Value rowFilter = ivFH; + // FW + auto forOp = builder.create( + loc, c0, FW, c1, ValueRange{iargs[0]}, + [&](OpBuilder &builder, Location loc, + Value ivFW, ValueRange iargs) { + Value columnInput = + builder.create( + loc, + AffineMap::get(2, 0, + d0 * strWidth + + d1 * dilWidth), + ValueRange{ivOW, ivFW}); + Value columnFilter = + builder.create( + loc, AffineMap::get(1, 0, d0), ivFW); + Value iVec = builder.create( + loc, vecTy, input, + ValueRange{ivN, rowInput, columnInput, + ivIC}); + Value fVec = builder.create( + loc, vecTy, filter, + ValueRange{ivOC, rowFilter, columnFilter, + ivIC}); + Value tVecNext; + if (isa(elemTy)) { + Value mulVec = + builder.create(loc, iVec, + fVec); + tVecNext = builder.create( + loc, mulVec, iargs[0]); + } else { + tVecNext = builder.create( + loc, vecTy, iVec, fVec, iargs[0]); + } + + builder.create( + loc, ValueRange{tVecNext}); + }); + builder.create( + loc, ValueRange{forOp.getResult(0)}); + }); + auto reduceVecOp = builder.create( + loc, vector::CombiningKind::ADD, forOp.getResult(0)); + auto maskedOp = + cast(mlir::vector::maskOperation( + builder, reduceVecOp, remainMask)); + Value reduceVec = maskedOp->getResult(0); + Value addNext; + if (isa(elemTy)) { + addNext = builder.create(loc, iargs[0], + reduceVec); + } else { + addNext = builder.create(loc, iargs[0], + reduceVec); + } + builder.create(loc, ValueRange{addNext}); + }); + + nestedBuilder.create( + loc, forOp.getResult(0), output, + ValueRange{ivN, ivOH, ivOW, ivOC}); + nestedBuilder.create(nestedLoc); + }); + nestedBuilder.create(nestedLoc); + }); + // clang format on + + rewriter.eraseOp(op); + return success(); + } + +private: + int64_t vecSize; + int64_t tilingOH; + int64_t tilingOW; + int64_t tilingOC; +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// ConvNhwcFhwcTileOptimizePass +//===----------------------------------------------------------------------===// + +namespace { +class ConvNhwcFhwcTileOptimizePass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvNhwcFhwcTileOptimizePass) + StringRef getArgument() const final { return "conv-nhwc-fhwc-tile-optimize"; } + StringRef getDescription() const final { + return "Conv2d NHWC FHWC optimize with Tile."; + } + ConvNhwcFhwcTileOptimizePass() = default; + ConvNhwcFhwcTileOptimizePass(const ConvNhwcFhwcTileOptimizePass &) {} + explicit ConvNhwcFhwcTileOptimizePass(int64_t vecSizeParam) { + vecSize = vecSizeParam; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + Option vecSize{*this, "vec-size", llvm::cl::desc("Vector size."), + llvm::cl::init(16)}; + Option tilingOH{*this, "tiling-height", + llvm::cl::desc("tiling the output height."), + llvm::cl::init(1)}; + Option tilingOW{*this, "tiling-width", + llvm::cl::desc("tiling the output width."), + llvm::cl::init(1)}; + Option tilingOC{*this, "tiling-channel", + llvm::cl::desc("tiling the output channel."), + llvm::cl::init(1)}; +}; +} // end anonymous namespace. + +void ConvNhwcFhwcTileOptimizePass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target + .addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context, vecSize, tilingOH, + tilingOW, tilingOC); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} + +namespace mlir { +namespace buddy { +void registerConvNhwcFhwcTileOptimizePass() { + PassRegistration(); +} +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/ConvOptimization/ConvOptimize.cpp b/midend/lib/Conversion/ConvOptimization/ConvOptimize.cpp index 043b66498..308bbcf05 100644 --- a/midend/lib/Conversion/ConvOptimization/ConvOptimize.cpp +++ b/midend/lib/Conversion/ConvOptimization/ConvOptimize.cpp @@ -2,7 +2,7 @@ // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// You may obtain N copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // @@ -35,19 +35,25 @@ namespace { class ConvOptimizePattern : public ConversionPattern { public: - explicit ConvOptimizePattern(MLIRContext *context, int64_t vecSizeParam, int64_t kernelMParam, int64_t kernelNParam) - : ConversionPattern(linalg::Conv2DNchwFchwOp::getOperationName(), 1, context) { + explicit ConvOptimizePattern(MLIRContext *context, int64_t vecSizeParam, + int64_t kernelMParam, int64_t kernelNParam) + : ConversionPattern(linalg::Conv2DNchwFchwOp::getOperationName(), 1, + context) { vecSize = vecSizeParam; kernelM = kernelMParam; kernelN = kernelNParam; } - LogicalResult matchAndRewrite(Operation *op, ArrayRef /*operands*/, ConversionPatternRewriter &rewriter) const override { + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); // Some constant we need. - const Value c0 = rewriter.create(loc, rewriter.getIndexAttr(0)); - const Value cf0 = rewriter.create(loc, rewriter.getF32FloatAttr(0.)); + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value cf0 = + rewriter.create(loc, rewriter.getF32FloatAttr(0.)); const AffineExpr d0 = rewriter.getAffineDimExpr(0); const AffineExpr d1 = rewriter.getAffineDimExpr(1); @@ -63,88 +69,198 @@ class ConvOptimizePattern : public ConversionPattern { VectorType vecTy = VectorType::get(vecSize, elemTy); // Dims - Value a = rewriter.create(loc, output, 0); - Value b = rewriter.create(loc, output, 1); - Value c = rewriter.create(loc, output, 2); - Value d = rewriter.create(loc, output, 3); - Value e = rewriter.create(loc, input, 1); - Value f = rewriter.create(loc, filter, 2); - Value g = rewriter.create(loc, filter, 3); + Value N = rewriter.create(loc, output, 0); // N + Value OC = rewriter.create(loc, output, 1); // OC + Value OH = rewriter.create(loc, output, 2); // OH + Value OW = rewriter.create(loc, output, 3); // OW + Value IC = rewriter.create(loc, input, 1); // IC + Value FH = rewriter.create(loc, filter, 2); // FH + Value FW = rewriter.create(loc, filter, 3); // FW // memref<1xvector> MemRefType bufferTy = MemRefType::get(1, vecTy); Value buffer = rewriter.create(loc, bufferTy); // Step 1: Create outer most loops. - affine::buildAffineLoopNest(rewriter, loc, c0, a, 1, [&](OpBuilder &, Location loc, ValueRange ivRange) { - Value ivA = ivRange.front(); - affine::buildAffineLoopNest(rewriter, loc, c0, b, 1, [&](OpBuilder &, Location loc, ValueRange ivRange) { - Value ivB = ivRange.front(); - affine::buildAffineLoopNest(rewriter, loc, c0, d, 1, [&](OpBuilder &, Location loc, ValueRange ivRange) { - Value ivD = ivRange.front(); - affine::buildAffineLoopNest(rewriter, loc, c0, c, 1, [&](OpBuilder &builder, Location loc, ValueRange ivRange) { - Value ivC = ivRange.front(); - Value t = builder.create(loc, vecTy, cf0); - builder.create(loc, t, buffer, c0); - affine::buildAffineLoopNest(rewriter, loc, c0, e, 1, [&](OpBuilder &builder, Location loc, ValueRange ivRange) { - Value ivE = ivRange.front(); - - Value fixed = builder.create(loc, AffineMap::get(1, 0, d0.ceilDiv(kernelM) * kernelM), ValueRange{f}); - - affine::buildAffineLoopNest(rewriter, loc, c0, fixed, kernelM, [&]([[maybe_unused]] OpBuilder &builder, Location loc, ValueRange ivRange) { - Value ivF = ivRange.front(); - affine::buildAffineLoopNest(rewriter, loc, c0, g, kernelN * vecSize, [&](OpBuilder &builder, Location loc, ValueRange ivRange) { - Value ivG = ivRange.front(); - - SmallVector iList; - SmallVector fList; - for (int i = 0; i < kernelM; ++i) { - Value rowInput = builder.create(loc, AffineMap::get(2, 0, d0 + i + d1), ValueRange{ivC, ivF}); - Value rowFilter = builder.create(loc, AffineMap::get(1, 0, d0 + i), ivF); - for (int j = 0; j < kernelN; ++j) { - Value columnInput = builder.create(loc, AffineMap::get(2, 0, d0 + d1 + j * vecSize), ValueRange{ivD, ivG}); - Value columnFilter = builder.create(loc, AffineMap::get(1, 0, d0 + j * vecSize), ivG); - - Value i = builder.create(loc, vecTy, input, ValueRange{ivA, ivE, rowInput, columnInput}); - - auto protectedF = - builder.create(loc, vecTy, IntegerSet::get(1, 1, {s0 - 1 - d0}, {false}), ValueRange{rowFilter, f}, true); - - // if row in range, read normally. - auto thenBuilder = protectedF.getThenBodyBuilder(); - Value normalReadVec = thenBuilder.create(loc, vecTy, filter, ValueRange{ivB, ivE, rowFilter, columnFilter}); - thenBuilder.create(loc, normalReadVec); - - // if row out of range, give back a empty vector. - auto elseBuilder = protectedF.getElseBodyBuilder(); - Value emptyVec = elseBuilder.create(loc, vecTy, cf0); - elseBuilder.create(loc, emptyVec); - - iList.push_back(i); - fList.push_back(protectedF->getOpResult(0)); - } - } - Value lastResult = builder.create(loc, buffer, c0); - for (int i = 0; i < kernelM; ++i) { - for (int j = 0; j < kernelN; ++j) { - lastResult = builder.create(loc, vecTy, iList[i * kernelN + j], fList[i * kernelN + j], lastResult); - } - } - - builder.create(loc, lastResult, buffer, c0); - }); + affine::buildAffineLoopNest( + rewriter, loc, c0, N, 1, + [&](OpBuilder &, Location loc, ValueRange ivRange) { + Value ivA = ivRange.front(); + affine::buildAffineLoopNest( + rewriter, loc, c0, OC, 1, + [&](OpBuilder &, Location loc, ValueRange ivRange) { + Value ivB = ivRange.front(); + affine::buildAffineLoopNest( + rewriter, loc, c0, OW, 1, + [&](OpBuilder &, Location loc, ValueRange ivRange) { + Value ivD = ivRange.front(); + affine::buildAffineLoopNest( + rewriter, loc, c0, OH, 1, + [&](OpBuilder &builder, Location loc, + ValueRange ivRange) { + Value ivC = ivRange.front(); + Value t = builder.create(loc, vecTy, cf0); + builder.create(loc, t, buffer, c0); + affine::buildAffineLoopNest( + rewriter, loc, c0, IC, 1, + [&](OpBuilder &builder, Location loc, + ValueRange ivRange) { + Value ivE = ivRange.front(); + + Value fixed = + builder.create( + loc, + AffineMap::get(1, 0, + d0.ceilDiv(kernelM) * + kernelM), + ValueRange{FH}); + + affine::buildAffineLoopNest( + rewriter, loc, c0, fixed, kernelM, + [&]([[maybe_unused]] OpBuilder &builder, + Location loc, ValueRange ivRange) { + Value ivF = ivRange.front(); + affine::buildAffineLoopNest( + rewriter, loc, c0, FW, + kernelN * vecSize, + [&](OpBuilder &builder, + Location loc, + ValueRange ivRange) { + Value ivG = ivRange.front(); + + SmallVector iList; + SmallVector fList; + for (int i = 0; i < kernelM; + ++i) { + Value rowInput = builder.create< + affine::AffineApplyOp>( + loc, + AffineMap::get(2, 0, + d0 + i + d1), + ValueRange{ivC, ivF}); + Value rowFilter = + builder.create< + affine::AffineApplyOp>( + loc, + AffineMap::get(1, 0, + d0 + i), + ivF); + for (int j = 0; j < kernelN; + ++j) { + Value columnInput = + builder.create< + affine:: + AffineApplyOp>( + loc, + AffineMap::get( + 2, 0, + d0 + d1 + + j * vecSize), + ValueRange{ivD, ivG}); + Value columnFilter = + builder.create< + affine:: + AffineApplyOp>( + loc, + AffineMap::get( + 1, 0, + d0 + j * vecSize), + ivG); + + Value i = builder.create< + TransferReadOp>( + loc, vecTy, input, + ValueRange{ivA, ivE, + rowInput, + columnInput}); + + auto protectedF = + builder.create< + affine::AffineIfOp>( + loc, vecTy, + IntegerSet::get( + 1, 1, + {s0 - 1 - d0}, + {false}), + ValueRange{rowFilter, + FH}, + true); + + // if row in range, read + // normally. + auto thenBuilder = + protectedF + .getThenBodyBuilder(); + Value normalReadVec = + thenBuilder.create< + TransferReadOp>( + loc, vecTy, filter, + ValueRange{ + ivB, ivE, + rowFilter, + columnFilter}); + thenBuilder.create< + affine::AffineYieldOp>( + loc, normalReadVec); + + // if row out of range, give + // back N empty vector. + auto elseBuilder = + protectedF + .getElseBodyBuilder(); + Value emptyVec = + elseBuilder + .create( + loc, vecTy, cf0); + elseBuilder.create< + affine::AffineYieldOp>( + loc, emptyVec); + + iList.push_back(i); + fList.push_back( + protectedF->getOpResult( + 0)); + } + } + Value lastResult = + builder + .create( + loc, buffer, c0); + for (int i = 0; i < kernelM; + ++i) { + for (int j = 0; j < kernelN; + ++j) { + lastResult = builder.create< + vector::FMAOp>( + loc, vecTy, + iList[i * kernelN + j], + fList[i * kernelN + j], + lastResult); + } + } + + builder.create( + loc, lastResult, buffer, c0); + }); + }); + }); + + Value reduceVec = + builder.create(loc, buffer, c0); + Value reducedRes = + builder.create( + loc, vector::CombiningKind::ADD, reduceVec); + Value bias = builder.create( + loc, output, ValueRange{ivA, ivB, ivC, ivD}); + Value addRes = builder.create( + loc, bias, reducedRes); + builder.create( + loc, addRes, output, + ValueRange{ivA, ivB, ivC, ivD}); + }); + }); }); - }); - - Value reduceVec = builder.create(loc, buffer, c0); - Value reducedRes = builder.create(loc, vector::CombiningKind::ADD, reduceVec); - Value bias = builder.create(loc, output, ValueRange{ivA, ivB, ivC, ivD}); - Value addRes = builder.create(loc, bias, reducedRes); - builder.create(loc, addRes, output, ValueRange{ivA, ivB, ivC, ivD}); - }); }); - }); - }); rewriter.create(loc, buffer); @@ -164,14 +280,16 @@ class ConvOptimizePattern : public ConversionPattern { //===----------------------------------------------------------------------===// namespace { -class ConvOptimizePass : public PassWrapper> { +class ConvOptimizePass + : public PassWrapper> { public: MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvOptimizePass) StringRef getArgument() const final { return "conv-optimize"; } StringRef getDescription() const final { return "Conv optimize."; } ConvOptimizePass() = default; ConvOptimizePass(const ConvOptimizePass &) {} - explicit ConvOptimizePass(int64_t vecSizeParam, int64_t kernelMParam, int64_t kernelNParam) { + explicit ConvOptimizePass(int64_t vecSizeParam, int64_t kernelMParam, + int64_t kernelNParam) { vecSize = vecSizeParam; kernelM = kernelMParam; kernelN = kernelNParam; @@ -180,14 +298,23 @@ class ConvOptimizePass : public PassWrapper(); + registry.insert(); } - Option vecSize{*this, "vec-size", llvm::cl::desc("Vector size using in kernel."), llvm::cl::init(16)}; + Option vecSize{*this, "vec-size", + llvm::cl::desc("Vector size using in kernel."), + llvm::cl::init(16)}; - Option kernelM{*this, "kernel-m", llvm::cl::desc("Specify how many rows kernel will contain."), llvm::cl::init(4)}; + Option kernelM{ + *this, "kernel-m", + llvm::cl::desc("Specify how many rows kernel will contain."), + llvm::cl::init(4)}; - Option kernelN{*this, "kernel-n", llvm::cl::desc("Specify how many columns kernel will cantain."), llvm::cl::init(2)}; + Option kernelN{ + *this, "kernel-n", + llvm::cl::desc("Specify how many columns kernel will cantain."), + llvm::cl::init(2)}; }; } // end anonymous namespace. @@ -196,7 +323,9 @@ void ConvOptimizePass::runOnOperation() { ModuleOp module = getOperation(); ConversionTarget target(*context); - target.addLegalDialect(); + target + .addLegalDialect(); target.addLegalOp(); target.addLegalOp(); diff --git a/midend/lib/Conversion/ConvVectorization/GEMMPointwiseConv2DNhwcHwcf.cpp b/midend/lib/Conversion/ConvVectorization/GEMMPointwiseConv2DNhwcHwcf.cpp index 55c876dd6..918a1388d 100644 --- a/midend/lib/Conversion/ConvVectorization/GEMMPointwiseConv2DNhwcHwcf.cpp +++ b/midend/lib/Conversion/ConvVectorization/GEMMPointwiseConv2DNhwcHwcf.cpp @@ -122,8 +122,7 @@ class GEMMPointwiseConvPattern : public ConversionPattern { namespace { class PointwiseConvToGemmPass - : public PassWrapper> { + : public PassWrapper> { public: MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(PointwiseConvToGemmPass) StringRef getArgument() const final { return "pointwise-conv-to-gemm"; } @@ -144,14 +143,20 @@ class PointwiseConvToGemmPass void PointwiseConvToGemmPass::runOnOperation() { MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); ConversionTarget target(*context); - target.addLegalDialect(); + target + .addLegalDialect(); target.addLegalOp(); target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); } namespace mlir { diff --git a/midend/lib/Conversion/DepthwiseConvOptimization/CMakeLists.txt b/midend/lib/Conversion/DepthwiseConvOptimization/CMakeLists.txt new file mode 100644 index 000000000..8493e2a60 --- /dev/null +++ b/midend/lib/Conversion/DepthwiseConvOptimization/CMakeLists.txt @@ -0,0 +1,3 @@ +add_mlir_library(DepthwiseConvOptimization + DepthwiseConvNhwcHwc.cpp + ) diff --git a/midend/lib/Conversion/DepthwiseConvOptimization/DepthwiseConvNhwcHwc.cpp b/midend/lib/Conversion/DepthwiseConvOptimization/DepthwiseConvNhwcHwc.cpp new file mode 100644 index 000000000..04bf76f76 --- /dev/null +++ b/midend/lib/Conversion/DepthwiseConvOptimization/DepthwiseConvNhwcHwc.cpp @@ -0,0 +1,331 @@ +//====- DepthwiseConvNhwcHwc.cpp +//--------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DepthwiseConvNhwcHwc optimize. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +using namespace mlir; +using namespace vector; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// + +namespace { + +class DepthwiseConv2DNhwcHwcOptimizePattern : public ConversionPattern { +public: + explicit DepthwiseConv2DNhwcHwcOptimizePattern(MLIRContext *context, + int64_t vecSizeParam) + : ConversionPattern(linalg::DepthwiseConv2DNhwcHwcOp::getOperationName(), + 1, context) { + vecSize = vecSizeParam; + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + auto convOp = dyn_cast_or_null(op); + auto loc = op->getLoc(); + + // Some constant we need. + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value c1 = + rewriter.create(loc, rewriter.getIndexAttr(1)); + + const Value vecSizeValue = + rewriter.create(loc, rewriter.getIndexAttr(vecSize)); + const AffineExpr d0 = rewriter.getAffineDimExpr(0); + const AffineExpr d1 = rewriter.getAffineDimExpr(1); + const AffineExpr s0 = rewriter.getAffineSymbolExpr(0); + + Value input = op->getOperand(0); + Value filter = op->getOperand(1); + Value output = op->getOperand(2); + + int strHeight, strWidth, dilHeight, dilWidth; + + // Strides. + if (!convOp.getStrides()) { + strHeight = 1; + strWidth = 1; + } else { + strHeight = convOp.getStrides().getValues()[0]; + strWidth = convOp.getStrides().getValues() + [convOp.getStrides().getValues().size() - 1]; + } + + // Dilations. + if (!convOp.getDilations()) { + dilHeight = 1; + dilWidth = 1; + } else { + dilHeight = convOp.getDilations().getValues()[0]; + dilWidth = convOp.getDilations().getValues() + [convOp.getDilations().getValues().size() - 1]; + } + + ShapedType inputTy = input.getType().cast(); + Type elemTy = inputTy.getElementType(); + VectorType vecTy = VectorType::get(vecSize, elemTy); + + const Value zeroElementType = + rewriter.create(loc, rewriter.getZeroAttr(elemTy)); + + Value zeroElementTypeVec; + if (isa(elemTy)) { + zeroElementTypeVec = + rewriter.create(loc, vecTy, zeroElementType); + } else { + zeroElementTypeVec = + rewriter.create(loc, vecTy, zeroElementType); + } + // Dims + Value N = rewriter.create(loc, output, 0); // N + Value OH = rewriter.create(loc, output, 1); // OH + Value OW = rewriter.create(loc, output, 2); // OW + Value OC = rewriter.create(loc, output, 3); // OC/FC/IC + + Value applyOC = rewriter.create( + loc, AffineMap::get(1, 0, d0.floorDiv(vecSize) * vecSize), OC); + Value tailLength = rewriter.create( + loc, AffineMap::get(1, 0, d0 % vecSize), ValueRange{OC}); + Value maskVector = rewriter.create( + loc, VectorType::get({vecSize}, rewriter.getI1Type()), + ValueRange{tailLength}); + + Value FH = rewriter.create(loc, filter, 0); // FH + Value FW = rewriter.create(loc, filter, 1); // FW + + // clang format off + // Step 1: Create outer most loops. + // Create the scf::ForallOp operation For N,OH,OW + auto outputForAllOp = rewriter.create( + loc, SmallVector({N, OH, OW}), ValueRange{}, + std::nullopt, // No mapping specified in this example + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange loopIndices) { + Value ivN = loopIndices[0]; // Index for the first dimension N + Value ivOH = loopIndices[1]; // Index for the second dimension OH + Value ivOW = loopIndices[2]; // Index for the third dimension OW + // OC + nestedBuilder.create( + nestedLoc, c0, applyOC, vecSizeValue, ValueRange{std::nullopt}, + [&](OpBuilder &builder, Location loc, Value ivOC, + ValueRange iargs) { + Value tVec = builder.create( + loc, vecTy, output, ValueRange{ivN, ivOH, ivOW, ivOC}); + + // FH + auto forOp = builder.create( + loc, c0, FH, c1, ValueRange{tVec}, + [&](OpBuilder &builder, Location loc, Value ivFH, + ValueRange iargs) { + Value rowInput = builder.create( + loc, + AffineMap::get(2, 0, d0 * strHeight + d1 * dilHeight), + ValueRange{ivOH, ivFH}); + Value rowFilter = ivFH; + // FW + auto forOp = builder.create( + loc, c0, FW, c1, ValueRange{iargs[0]}, + [&](OpBuilder &builder, Location loc, Value ivFW, + ValueRange iargs) { + Value columnInput = + builder.create( + loc, + AffineMap::get( + 2, 0, d0 * strWidth + d1 * dilWidth), + ValueRange{ivOW, ivFW}); + Value columnFilter = + builder.create( + loc, AffineMap::get(1, 0, d0), ivFW); + Value iVec = builder.create( + loc, vecTy, input, + ValueRange{ivN, rowInput, columnInput, ivOC}); + Value fVec = builder.create( + loc, vecTy, filter, + ValueRange{rowFilter, columnFilter, ivOC}); + Value tVecNext; + if (isa(elemTy)) { + Value mulVec = builder.create( + loc, iVec, fVec); + tVecNext = builder.create( + loc, mulVec, iargs[0]); + } else { + tVecNext = builder.create( + loc, vecTy, iVec, fVec, iargs[0]); + } + + builder.create(loc, + ValueRange{tVecNext}); + }); + builder.create( + loc, ValueRange{forOp.getResult(0)}); + }); + builder.create( + loc, forOp.getResult(0), output, + ValueRange{ivN, ivOH, ivOW, ivOC}); + + builder.create(loc, ValueRange{std::nullopt}); + }); + + // applyOC + Value condition = nestedBuilder.create( + loc, arith::CmpIPredicate::sgt, tailLength, c0); + nestedBuilder.create( + loc, condition, [&](OpBuilder &builder, Location loc) { + Value tVec = builder.create( + loc, vecTy, output, ValueRange{ivN, ivOH, ivOW, applyOC}, + maskVector, zeroElementTypeVec); + // FH + auto forOp = builder.create( + loc, c0, FH, c1, ValueRange{tVec}, + [&](OpBuilder &builder, Location loc, Value ivFH, + ValueRange iargs) { + Value rowInput = builder.create( + loc, + AffineMap::get(2, 0, d0 * strHeight + d1 * dilHeight), + ValueRange{ivOH, ivFH}); + Value rowFilter = ivFH; + // FW + auto forOp = builder.create( + loc, c0, FW, c1, ValueRange{iargs[0]}, + [&](OpBuilder &builder, Location loc, Value ivFW, + ValueRange iargs) { + Value columnInput = + builder.create( + loc, + AffineMap::get( + 2, 0, d0 * strWidth + d1 * dilWidth), + ValueRange{ivOW, ivFW}); + Value columnFilter = + builder.create( + loc, AffineMap::get(1, 0, d0), ivFW); + Value iVec = builder.create( + loc, vecTy, input, + ValueRange{ivN, rowInput, columnInput, applyOC}, + maskVector, zeroElementTypeVec); + Value fVec = builder.create( + loc, vecTy, filter, + ValueRange{rowFilter, columnFilter, applyOC}, + maskVector, zeroElementTypeVec); + Value tVecNext; + if (isa(elemTy)) { + Value mulVec = builder.create( + loc, iVec, fVec); + tVecNext = builder.create( + loc, mulVec, iargs[0]); + } else { + tVecNext = builder.create( + loc, vecTy, iVec, fVec, iargs[0]); + } + + builder.create(loc, + ValueRange{tVecNext}); + }); + builder.create( + loc, ValueRange{forOp.getResult(0)}); + }); + builder.create( + loc, output, ValueRange{ivN, ivOH, ivOW, applyOC}, + maskVector, forOp.getResult(0)); + builder.create(loc, ValueRange{std::nullopt}); + }); + + nestedBuilder.create(nestedLoc); + }); + // clang format on + + rewriter.eraseOp(op); + return success(); + } + +private: + int64_t vecSize; +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// DepthwiseConv2DNhwcHwcOptimizePass +//===----------------------------------------------------------------------===// + +namespace { +class DepthwiseConv2DNhwcHwcOptimizePass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + DepthwiseConv2DNhwcHwcOptimizePass) + StringRef getArgument() const final { + return "depthwise-conv-nhwc-hwc-optimize"; + } + StringRef getDescription() const final { + return "Depthwise Conv2d NHWC HWC optimize."; + } + DepthwiseConv2DNhwcHwcOptimizePass() = default; + DepthwiseConv2DNhwcHwcOptimizePass( + const DepthwiseConv2DNhwcHwcOptimizePass &) {} + explicit DepthwiseConv2DNhwcHwcOptimizePass(int64_t vecSizeParam) { + vecSize = vecSizeParam; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + Option vecSize{*this, "vec-size", llvm::cl::desc("Vector size."), + llvm::cl::init(16)}; +}; +} // end anonymous namespace. + +void DepthwiseConv2DNhwcHwcOptimizePass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target + .addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context, vecSize); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} + +namespace mlir { +namespace buddy { +void registerDepthwiseConv2DNhwcHwcOptimizePass() { + PassRegistration(); +} +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/ExtendDAP/CMakeLists.txt b/midend/lib/Conversion/ExtendDAP/CMakeLists.txt new file mode 100644 index 000000000..5ecaa64c9 --- /dev/null +++ b/midend/lib/Conversion/ExtendDAP/CMakeLists.txt @@ -0,0 +1,3 @@ +add_mlir_library(ExtendDAPPass + ExtendDAPPass.cpp + ) diff --git a/midend/lib/Conversion/ExtendDAP/ExtendDAPPass.cpp b/midend/lib/Conversion/ExtendDAP/ExtendDAPPass.cpp new file mode 100644 index 000000000..20918fda9 --- /dev/null +++ b/midend/lib/Conversion/ExtendDAP/ExtendDAPPass.cpp @@ -0,0 +1,1637 @@ +//====- ExtendDAPPass.cpp - Extend DAP Dialect Lowering Pass -------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file defines Extend DAP dialect lowering pass. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Pass/Pass.h" + +#include "DAP/DAPDialect.h" +#include "DAP/DAPOps.h" +#include + +using namespace mlir; +using namespace buddy; +using namespace vector; +using namespace mlir::arith; +using namespace mlir::linalg; +using namespace mlir::bufferization; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// +Value initMelFilter(PatternRewriter &rewriter, Location loc, Value c0, Value c1, + Value f0) { + FloatType f64Ty = rewriter.getF64Type(); + std::vector data{ + 0.024862593984176087, 0.0019908218880980706, 0.022871772096078023, + 0.003981643776196141, 0.020880950207979945, 0.005972465664294215, + 0.018890128319881874, 0.007963287552392284, 0.016899306431783803, + 0.00995410944049036, 0.014908484543685726, 0.011944931328588433, + 0.012917662655587655, 0.013935753216686492, 0.0109268407674896, + 0.015926575104784558, 0.008936018879391525, 0.017917396992882653, + 0.006945196991293433, 0.019908218880980738, 0.004954375103195362, + 0.021899040769078785, 0.0029635532150973053, 0.02388986265717686, + 0.000972731326999214, 0.025880684545274913, 0.025835325311175383, + 0.0010180905610987637, 0.023844503423077285, 0.003008912449196894, + 0.0218536815349792, 0.004999734337294947, 0.019862859646881146, + 0.00699055622539301, 0.017872037758783092, 0.008981378113491093, + 0.015881215870684983, 0.010972200001589204, 0.013890393982586886, + 0.012963021889687279, 0.011899572094488815, 0.01495384377778533, + 0.009908750206390747, 0.016944665665883398, 0.007917928318292721, + 0.01893548755398142, 0.00592710643019463, 0.020874010059259842, + 0.0040404255283634505, 0.02211421726709443, 0.003318606124028175, + 0.021736724240202378, 0.0036109675065762467, 0.020497701500567712, + 0.0047621938624405336, 0.018486659689787407, 0.00659261778657699, + 0.015856038061722075, 0.00896277117173217, 0.01273876809538182, + 0.011751329556399702, 0.009250369503549623, 0.014853145171184273, + 0.005490841259941731, 0.018177473406255133, 0.0015463665611462304, + 0.0028155461301291296, 0.016329520204672005, 0.0074201889869586046, + 0.011181051149095055, 0.012018863908450889, 0.006065350444974551, + 0.016561277418378373, 0.0010297985194884273, 0.004360878822945124, + 0.012770536755428743, 0.009707189146398206, 0.00698640273562069, + 0.014854299940667337, 0.0014180475372245899, 0.004391219533926463, + 0.011486922519974862, 0.010089744452471235, 0.005411105098683222, + 0.00040022287019040627, 0.01473556574143922, 0.006518189694660876, + 0.008278412940789993, 0.012277561276489102, 0.0021878083895293813, + 0.003967812818254116, 0.01018448003033658, 0.009981875463793392, + 0.0038694348523115605, 0.002286485205918727, 0.011274894758755125, + 0.00846622203459276, 0.00482029386344711, 0.0013397691078130697, + 0.01167825163503901, 0.0076086824024487005, 0.005156961757386885, + 0.0010091040034666294, 0.011507895445653202, 0.007301822420859842, + 0.00498210435635522, 0.00119016554305424, 0.010863499380371759, + 0.007451189902152603, 0.004385921781217367, 0.001791381421388157, + 0.009832492179588035, 0.007973956151288167, 0.003447454713364045, + 0.00273258990979288, 0.008491348038548032, 0.008797688393881514, + 0.00223576473247454, 0.003943828025486854, 0.00690675179681992, + 0.00985924113208423, 0.0008110002442122107, 0.005364237465034378, + 0.005136650837642653, 0.0008692337979845255, 0.009462301431073093, + 0.0069410774768914035, 0.0032312041128928558, 0.0027783994364693502, + 0.0072370497293947405, 0.008628834806348044, 0.0012336377872858904, + 0.004773912819246566, 0.004943322320664174, 0.0009189908321450893, + 0.008653006854042458, 0.006818502698028209, 0.0026164354511310182, + 0.0032485836741847724, 0.006051854749492107, 0.008880466967901061, + 0.0002863667968266877, 0.005574480003847867, 0.0034677978994882394, + 0.0022684930397946727, 0.006649229002149792, 0.007871464954585431, + 0.0009245911230515482, 0.004809896965650232, 0.003870811942679267, + 0.0017483289767150309, 0.006817032762306986, 0.007283343633866355, + 0.00117032890964782, 0.00444812418116144, 0.0038987290660258203, + 0.001612904728456526, 0.006627129222403821, 0.007047320122030166, + 0.001089299240400779, 0.004421714754438077, 0.003615982700014719, + 0.0017961093868459883, 0.006142666159628658, 0.007102936106246769, + 0.000739228059530935, 0.00467144758787073, 0.003079108186777991, + 0.002239959069494691, 0.005418988314025048, 0.007397185776342812, + 0.0001706867135296284, 0.005145462616431531, 0.0023375742945811327, + 0.0028937394565202498, 0.004504461875632637, 0.0006420162966089689, + 0.006671349456684142, 0.005798479593256033, 0.0014345343541053727, + 0.003713231338714718, 0.0034412191129693185, 0.0016279830841734021, + 0.005447903871833263, 0.006591092774765041, 0.0004075043790786325, + 0.004660011565279688, 0.0022658304669981606, 0.002728930355794336, + 0.0041241565549176885, 0.0007978491463089834, 0.0059824826428372165, + 0.005700822451952891, 0.001009910787399597, 0.003912510374718906, + 0.0027308466911522586, 0.0021241982974849216, 0.00445178259490492, + 0.00033588622025093644, 0.006172718498657583, 0.005150905140435021, + 0.001294369078080886, 0.003494806955712255, 0.002888072359801849, + 0.0018387087709894891, 0.004481775641522813, 0.0001826105862667237, + 0.006075478923243776, 0.004886660120436632, 0.0013131388257841057, + 0.0033530009608540383, 0.0027890160764299436, 0.001819341801271444, + 0.004264893327075782, 0.0002856826416888495, 0.00574077057772162, + 0.00485997904767977, 0.0011121684505728251, 0.003439706723569149, + 0.002478930810892349, 0.002019434399458528, 0.003845693171211873, + 0.0005991620753479068, 0.0052124555315313965, 0.005028639927429189, + 0.0007317548848252071, 0.0037133714976539506, 0.001997469461541363, + 0.002398103067878713, 0.0032631840382575193, 0.0010828346381034754, + 0.004528898614973675, 0.005355687096081724, 0.00020713973879943928, + 0.004137659389003323, 0.001379277219478372, 0.002919631681924923, + 0.0025514147001573046, 0.0017016039748465222, 0.0037235521808362372, + 0.0004835762677681217, 0.00489568966151517, 0.004680894973891053, + 0.0006545265135735772, 0.003552918766430338, 0.001740005261220558, + 0.002424942558969623, 0.0028254840088675383, 0.001296966351508908, + 0.003910962756514519, 0.0001689901440481931, 0.0049964415041615, + 0.004270978712160627, 0.0008546266928695816, 0.003226396296503344, + 0.001859853580513147, 0.00218181388084606, 0.002865080468156712, + 0.0011372314651887762, 0.003870307355800277, 9.264904953149247e-05, + 0.004875534243443842, 0.00408313784805493, 0.0008483414885398006, + 0.0031157837355450563, 0.0017792497148243077, 0.0021484296230351824, + 0.002710157941108815, 0.0011810755105253086, 0.003641066167393322, + 0.00021372139801543514, 0.00457197439367783, 0.004079728686464056, + 0.0006716204322456959, 0.0031838932167458007, 0.0015337045412328826, + 0.0022880577470275453, 0.0023957886502200695, 0.0013922222773092897, + 0.0032578727592072563, 0.0004963868075910343, 0.004119956868194443, + 0.004227725348621998, 0.00035597961441710105, 0.003398120989238948, + 0.0011543279262957885, 0.0025685166298558978, 0.0019526762381744762, + 0.0017389122704728477, 0.0027510245500531635, 0.0009093079110897976, + 0.0035493728619318513, 7.970355170674751e-05, 0.004347721173810539, + 0.0037299628548962886, 0.0006682946412839993, 0.0029616929924361443, + 0.001407619285405272, 0.0021934231299760003, 0.002146943929526545, + 0.001425153267515856, 0.0028862685736478176, 0.000656883405055712, + 0.0036255932177690904, 0.004154576575021074, 9.92650919068254e-05, + 0.00344310661360058, 0.0007839298194100073, 0.0027316366521800855, + 0.0014685945469131891, 0.002020166690759591, 0.002153259274416371, + 0.0013086967293390965, 0.002837924001919553, 0.0005972267679186017, + 0.0035225887294227346, 0.00399151753267442, 0.00010181095051366054, + 0.0033326481291776314, 0.0007358568907029797, 0.002673778725680842, + 0.0013699028308922986, 0.002014909322184054, 0.0020039487710816176, + 0.0013560399186872648, 0.002637994711270937, 0.0006971705151904762, + 0.003272040651460256, 3.8301111693687365e-05, 0.0039060865916495753, + 0.0033682563798377797, 0.000553036427908215, 0.0027580986579326967, + 0.0011402059404032124, 0.0021479409360276127, 0.0017273754528982098, + 0.0015377832141225299, 0.002314544965393207, 0.0009276254922174464, + 0.002901714477888205, 0.00031746777031236304, 0.003488883990383202, + 0.0035233401613614045, 0.0002608386658672823, 0.0029582927579996227, + 0.0008045974293106004, 0.0023932453546378412, 0.0013483561927539183, + 0.0018281979512760594, 0.0018921149561972363, 0.0012631505479142777, + 0.0024358737196405545, 0.0006981031445524962, 0.0029796324830838727, + 0.00013305574119071463, 0.003523391246527191, 0.0032513804428682767, + 0.0003849812001174835, 0.0027281082517878774, 0.0008885386678154236, + 0.0022048360607074776, 0.0013920961355133636, 0.0016815638696270783, + 0.0018956536032113034, 0.001158291678546679, 0.002399211070909244, + 0.0006350194874662795, 0.0029027685386071836, 0.0001117472963858801, + 0.003406326006305124, 0.0031327631853624, 0.0003667416797849643, + 0.002648177672131103, 0.0008330700230168817, 0.0021635921588998063, + 0.001299398366248799, 0.00167900664566851, 0.0017657267094807168, + 0.0011944211324372133, 0.002232055052712634, 0.0007098356192059166, + 0.0026983833959445514, 0.00022525010597461998, 0.003164711739176469, + 0.0031413131931234614, 0.002692554165534394, 0.002243795137945327, + 0.0017950361103562596, 0.001346277082767192, 0.0008975180551781247, + 0.0004487590275890572}; + Value melFilterData = rewriter.create( + loc, DenseFPElementsAttr::get(RankedTensorType::get(391, f64Ty), + ArrayRef(data))); + + IndexType idxTy = rewriter.getIndexType(); + std::vector D1Index{ + 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, + 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, + 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, + 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, + 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, + 46, 46, 47, 47, 48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, + 53, 54, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, + 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, + 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, + 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82, 83, + 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 89, 89, 90, 90, + 91, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, + 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103, 104, 104, 105, 105, + 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111, 112, 112, 113, + 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119, 120, 120, + 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127, 128, + 128, 129, 129, 130, 130, 131, 131, 132, 132, 133, 133, 134, 134, 135, 135, + 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 141, 141, 142, 142, 143, + 143, 144, 144, 145, 145, 146, 146, 147, 147, 148, 148, 149, 149, 150, 150, + 151, 151, 152, 152, 153, 153, 154, 154, 155, 155, 156, 156, 157, 157, 158, + 158, 159, 159, 160, 160, 161, 161, 162, 162, 163, 163, 164, 164, 165, 165, + 166, 166, 167, 167, 168, 168, 169, 169, 170, 170, 171, 171, 172, 172, 173, + 173, 174, 174, 175, 175, 176, 176, 177, 177, 178, 178, 179, 179, 180, 180, + 181, 181, 182, 182, 183, 183, 184, 184, 185, 185, 186, 186, 187, 187, 188, + 188, 189, 189, 190, 190, 191, 191, 192, 192, 193, 194, 195, 196, 197, 198, + 199}; + Value dim1Index = rewriter.create( + loc, DenseElementsAttr::get(RankedTensorType::get(391, idxTy), + ArrayRef(D1Index))); + + std::vector D2Index{ + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10, 11, 11, 12, 12, 13, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, + 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, + 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 33, 34, 34, 35, 35, 36, + 36, 37, 36, 37, 37, 38, 38, 39, 38, 39, 39, 40, 39, 40, 40, 41, 41, 42, + 41, 42, 42, 43, 42, 43, 43, 44, 43, 44, 44, 45, 44, 45, 45, 46, 45, 46, + 46, 47, 46, 47, 47, 48, 47, 48, 48, 49, 48, 49, 49, 50, 49, 50, 49, 50, + 50, 51, 50, 51, 51, 52, 51, 52, 51, 52, 52, 53, 52, 53, 53, 54, 53, 54, + 53, 54, 54, 55, 54, 55, 54, 55, 55, 56, 55, 56, 55, 56, 56, 57, 56, 57, + 56, 57, 57, 58, 57, 58, 57, 58, 58, 59, 58, 59, 58, 59, 58, 59, 59, 60, + 59, 60, 59, 60, 60, 61, 60, 61, 60, 61, 60, 61, 61, 62, 61, 62, 61, 62, + 61, 62, 62, 63, 62, 63, 62, 63, 62, 63, 63, 64, 63, 64, 63, 64, 63, 64, + 64, 65, 64, 65, 64, 65, 64, 65, 65, 66, 65, 66, 65, 66, 65, 66, 66, 67, + 66, 67, 66, 67, 66, 67, 66, 67, 67, 68, 67, 68, 67, 68, 67, 68, 67, 68, + 68, 69, 68, 69, 68, 69, 68, 69, 68, 69, 69, 70, 69, 70, 69, 70, 69, 70, + 69, 70, 70, 71, 70, 71, 70, 71, 70, 71, 70, 71, 71, 72, 71, 72, 71, 72, + 71, 72, 71, 72, 71, 72, 72, 73, 72, 73, 72, 73, 72, 73, 72, 73, 73, 74, + 73, 74, 73, 74, 73, 74, 73, 74, 73, 74, 74, 75, 74, 75, 74, 75, 74, 75, + 74, 75, 74, 75, 74, 75, 75, 76, 75, 76, 75, 76, 75, 76, 75, 76, 75, 76, + 76, 77, 76, 77, 76, 77, 76, 77, 76, 77, 76, 77, 76, 77, 77, 78, 77, 78, + 77, 78, 77, 78, 77, 78, 77, 78, 77, 78, 78, 79, 78, 79, 78, 79, 78, 79, + 78, 79, 78, 79, 78, 79, 79, 79, 79, 79, 79, 79, 79}; + Value dim2Index = rewriter.create( + loc, DenseElementsAttr::get(RankedTensorType::get(391, idxTy), + ArrayRef(D2Index))); + + RankedTensorType melFilterType = RankedTensorType::get({201, 80}, f64Ty); + Value melFilter = rewriter.create(loc, melFilterType, f0); + auto mTp = + MemRefType::get(melFilterType.getShape(), melFilterType.getElementType()); + Value melFilterMemRef = + rewriter.create(loc, mTp, melFilter); + + // TODO : remove tomemref & totensor, and use insert to replace store. !! + Value c391 = rewriter.create(loc, 391); + Value number, d1, d2; + // rewriter.create(loc, c0, c391, c1, std::nullopt, + // [&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) { + // number = builder.create(loc, melFilterData, iv); + // d1 = builder.create(loc, dim1Index, iv); + // d2 = builder.create(loc, dim2Index, iv); + // builder.create(loc, number, melFilterMemRef, + // ValueRange{d1, d2}); builder.create(loc, std::nullopt); + // }); + auto loopOp = rewriter.create(loc, c0, c391, c1); + rewriter.setInsertionPointToStart(loopOp.getBody()); + + Value iv = loopOp.getInductionVar(); + number = rewriter.create(loc, melFilterData, iv); + d1 = rewriter.create(loc, dim1Index, iv); + d2 = rewriter.create(loc, dim2Index, iv); + rewriter.create(loc, number, melFilterMemRef, + ValueRange{d1, d2}); + + rewriter.setInsertionPointAfter(loopOp); + + Value newMelFilter = rewriter.create( + loc, melFilterMemRef, /*restrict=*/true, /*writable=*/false); + + return newMelFilter; +} + +Value getHanningWindow400(PatternRewriter &rewriter, Location loc) { + FloatType f64Ty = rewriter.getF64Type(); + std::vector hanningWindow400{0.0, + 6.168375916970614e-05, + 0.0002467198171342, + 0.0005550625190150482, + 0.0009866357858642205, + 0.001541333133436018, + 0.002219017698460002, + 0.003019522272410202, + 0.0039426493427611176, + 0.0049881711417212315, + 0.00615582970243117, + 0.007445336922613066, + 0.00885637463565564, + 0.01038859468911707, + 0.012041619030626338, + 0.013815039801161721, + 0.015708419435684517, + 0.017721290771101017, + 0.019853157161528523, + 0.02210349260083494, + 0.024471741852423234, + 0.02695732058622735, + 0.029559615522887273, + 0.03227798458506631, + 0.035111757055874326, + 0.03806023374435674, + 0.04112268715800954, + 0.044298361682277465, + 0.04758647376699032, + 0.05098621211969223, + 0.054496737905816106, + 0.05811718495565327, + 0.06184665997806821, + 0.06568424278090434, + 0.06962898649802812, + 0.07367991782295402, + 0.07783603724899257, + 0.08209631931586497, + 0.08645971286271914, + 0.09092514128748835, + 0.09549150281252633, + 0.10015767075645471, + 0.1049224938121548, + 0.10978479633083521, + 0.11474337861210543, + 0.11979701719998453, + 0.1249444651847702, + 0.1301844525106951, + 0.13551568628929433, + 0.14093685111840565, + 0.14644660940672627, + 0.15204360170384285, + 0.15772644703565564, + 0.1634937432451133, + 0.16934406733817414, + 0.17527597583490823, + 0.18128800512565513, + 0.1873786718321474, + 0.1935464731735117, + 0.19978988733705805, + 0.2061073738537635, + 0.21249737397836072, + 0.21895831107393465, + 0.22548859100093405, + 0.23208660251050156, + 0.2387507176420256, + 0.24547929212481434, + 0.2522706657837962, + 0.2591231629491423, + 0.2660350928697134, + 0.2730047501302266, + 0.2800304150720424, + 0.28711035421746367, + 0.2942428206974456, + 0.30142605468260963, + 0.30865828381745525, + 0.31593772365766115, + 0.3232625781103715, + 0.3306310398773543, + 0.3380412909009253, + 0.34549150281252644, + 0.3529798373838481, + 0.3605044469803854, + 0.36806347501731357, + 0.3756550564175726, + 0.38327731807204724, + 0.39092837930172886, + 0.3986063523217438, + 0.4063093427071377, + 0.41403544986029517, + 0.4217827674798846, + 0.4295493840312088, + 0.4373333832178479, + 0.44513284445447737, + 0.45294584334074284, + 0.4607704521360776, + 0.4686047402353433, + 0.4764467746451787, + 0.48429462046093585, + 0.49214634134408974, + 0.5, + 0.5078536586559104, + 0.5157053795390641, + 0.5235532253548213, + 0.5313952597646567, + 0.5392295478639225, + 0.5470541566592572, + 0.5548671555455227, + 0.5626666167821522, + 0.5704506159687914, + 0.5782172325201155, + 0.5859645501397047, + 0.5936906572928624, + 0.6013936476782563, + 0.6090716206982714, + 0.6167226819279528, + 0.6243449435824273, + 0.6319365249826864, + 0.6394955530196147, + 0.647020162616152, + 0.6545084971874737, + 0.6619587090990747, + 0.6693689601226458, + 0.6767374218896286, + 0.6840622763423391, + 0.6913417161825449, + 0.6985739453173903, + 0.7057571793025544, + 0.7128896457825363, + 0.7199695849279575, + 0.7269952498697734, + 0.7339649071302867, + 0.7408768370508576, + 0.7477293342162038, + 0.7545207078751857, + 0.7612492823579744, + 0.7679133974894983, + 0.7745114089990659, + 0.7810416889260654, + 0.7875026260216393, + 0.7938926261462367, + 0.8002101126629421, + 0.8064535268264883, + 0.8126213281678527, + 0.8187119948743449, + 0.8247240241650918, + 0.8306559326618259, + 0.8365062567548867, + 0.8422735529643444, + 0.8479563982961571, + 0.8535533905932737, + 0.8590631488815944, + 0.8644843137107058, + 0.8698155474893048, + 0.8750555348152298, + 0.8802029828000155, + 0.8852566213878946, + 0.8902152036691648, + 0.8950775061878451, + 0.8998423292435453, + 0.9045084971874737, + 0.9090748587125117, + 0.9135402871372809, + 0.9179036806841352, + 0.9221639627510075, + 0.9263200821770461, + 0.9303710135019718, + 0.9343157572190957, + 0.9381533400219317, + 0.9418828150443468, + 0.9455032620941839, + 0.9490137878803078, + 0.9524135262330098, + 0.9557016383177226, + 0.9588773128419905, + 0.9619397662556434, + 0.9648882429441257, + 0.9677220154149337, + 0.9704403844771128, + 0.9730426794137726, + 0.9755282581475768, + 0.977896507399165, + 0.9801468428384715, + 0.982278709228899, + 0.9842915805643155, + 0.9861849601988383, + 0.9879583809693737, + 0.9896114053108829, + 0.9911436253643444, + 0.9925546630773869, + 0.9938441702975689, + 0.9950118288582788, + 0.996057350657239, + 0.9969804777275899, + 0.99778098230154, + 0.998458666866564, + 0.9990133642141358, + 0.9994449374809851, + 0.9997532801828658, + 0.9999383162408303, + 1.0, + 0.9999383162408303, + 0.9997532801828658, + 0.9994449374809851, + 0.9990133642141358, + 0.998458666866564, + 0.99778098230154, + 0.9969804777275899, + 0.996057350657239, + 0.9950118288582788, + 0.9938441702975689, + 0.9925546630773869, + 0.9911436253643444, + 0.9896114053108829, + 0.9879583809693737, + 0.9861849601988383, + 0.9842915805643155, + 0.982278709228899, + 0.9801468428384715, + 0.977896507399165, + 0.9755282581475768, + 0.9730426794137726, + 0.9704403844771128, + 0.9677220154149337, + 0.9648882429441257, + 0.9619397662556434, + 0.9588773128419905, + 0.9557016383177226, + 0.9524135262330098, + 0.9490137878803078, + 0.9455032620941839, + 0.9418828150443468, + 0.9381533400219317, + 0.9343157572190957, + 0.9303710135019718, + 0.9263200821770461, + 0.9221639627510075, + 0.9179036806841352, + 0.9135402871372809, + 0.9090748587125117, + 0.9045084971874737, + 0.8998423292435453, + 0.8950775061878451, + 0.8902152036691648, + 0.8852566213878946, + 0.8802029828000155, + 0.8750555348152298, + 0.8698155474893048, + 0.8644843137107058, + 0.8590631488815944, + 0.8535533905932737, + 0.8479563982961571, + 0.8422735529643444, + 0.8365062567548867, + 0.8306559326618259, + 0.8247240241650918, + 0.8187119948743449, + 0.8126213281678527, + 0.8064535268264883, + 0.8002101126629421, + 0.7938926261462367, + 0.7875026260216393, + 0.7810416889260654, + 0.7745114089990659, + 0.7679133974894983, + 0.7612492823579744, + 0.7545207078751857, + 0.7477293342162038, + 0.7408768370508576, + 0.7339649071302867, + 0.7269952498697734, + 0.7199695849279575, + 0.7128896457825363, + 0.7057571793025544, + 0.6985739453173903, + 0.6913417161825449, + 0.6840622763423391, + 0.6767374218896286, + 0.6693689601226458, + 0.6619587090990747, + 0.6545084971874737, + 0.647020162616152, + 0.6394955530196147, + 0.6319365249826864, + 0.6243449435824273, + 0.6167226819279528, + 0.6090716206982714, + 0.6013936476782563, + 0.5936906572928624, + 0.5859645501397047, + 0.5782172325201155, + 0.5704506159687914, + 0.5626666167821522, + 0.5548671555455227, + 0.5470541566592572, + 0.5392295478639225, + 0.5313952597646567, + 0.5235532253548213, + 0.5157053795390641, + 0.5078536586559104, + 0.5, + 0.49214634134408974, + 0.48429462046093585, + 0.4764467746451787, + 0.4686047402353433, + 0.4607704521360776, + 0.45294584334074284, + 0.44513284445447737, + 0.4373333832178479, + 0.4295493840312088, + 0.4217827674798846, + 0.41403544986029517, + 0.4063093427071377, + 0.3986063523217438, + 0.39092837930172886, + 0.38327731807204724, + 0.3756550564175726, + 0.36806347501731357, + 0.3605044469803854, + 0.3529798373838481, + 0.34549150281252644, + 0.3380412909009253, + 0.3306310398773543, + 0.3232625781103715, + 0.31593772365766115, + 0.30865828381745525, + 0.30142605468260963, + 0.2942428206974456, + 0.28711035421746367, + 0.2800304150720424, + 0.2730047501302266, + 0.2660350928697134, + 0.2591231629491423, + 0.2522706657837962, + 0.24547929212481434, + 0.2387507176420256, + 0.23208660251050156, + 0.22548859100093405, + 0.21895831107393465, + 0.21249737397836072, + 0.2061073738537635, + 0.19978988733705805, + 0.1935464731735117, + 0.1873786718321474, + 0.18128800512565513, + 0.17527597583490823, + 0.16934406733817414, + 0.1634937432451133, + 0.15772644703565564, + 0.15204360170384285, + 0.14644660940672627, + 0.14093685111840565, + 0.13551568628929433, + 0.1301844525106951, + 0.1249444651847702, + 0.11979701719998453, + 0.11474337861210543, + 0.10978479633083521, + 0.1049224938121548, + 0.10015767075645471, + 0.09549150281252633, + 0.09092514128748835, + 0.08645971286271914, + 0.08209631931586497, + 0.07783603724899257, + 0.07367991782295402, + 0.06962898649802812, + 0.06568424278090434, + 0.06184665997806821, + 0.05811718495565327, + 0.054496737905816106, + 0.05098621211969223, + 0.04758647376699032, + 0.044298361682277465, + 0.04112268715800954, + 0.03806023374435674, + 0.035111757055874326, + 0.03227798458506631, + 0.029559615522887273, + 0.02695732058622735, + 0.024471741852423234, + 0.02210349260083494, + 0.019853157161528523, + 0.017721290771101017, + 0.015708419435684517, + 0.013815039801161721, + 0.012041619030626338, + 0.01038859468911707, + 0.00885637463565564, + 0.007445336922613066, + 0.00615582970243117, + 0.0049881711417212315, + 0.0039426493427611176, + 0.003019522272410202, + 0.002219017698460002, + 0.001541333133436018, + 0.0009866357858642205, + 0.0005550625190150482, + 0.0002467198171342, + 6.168375916970614e-05}; + Value window = rewriter.create( + loc, DenseFPElementsAttr::get(RankedTensorType::get(400, f64Ty), + ArrayRef(hanningWindow400))); + return window; +} + +// Implement numpy reflect padding, low for left padding length, high for right +// padding length +Value padReflect(PatternRewriter &rewriter, Location loc, Value c0, Value c1, + Value input, int64_t low, int64_t high) { + Value lowPadLen = rewriter.create(loc, low); + Value highPadLen = rewriter.create(loc, high); + SmallVector lowValues; + SmallVector highValues; + lowValues.push_back(lowPadLen); + highValues.push_back(c0); + + FloatType f64Ty = rewriter.getF64Type(); + IndexType idxTy = rewriter.getIndexType(); + // Pad left part(low) for input tensor + int64_t inputSize = + llvm::cast(input.getType()).getShape()[0]; + int64_t lowPaddedSize = inputSize + low; + auto padOp1 = rewriter.create( + loc, RankedTensorType::get(lowPaddedSize, f64Ty), input, lowValues, + highValues); + + Region *padOpRegion1 = &padOp1.getRegion(); + int64_t sourceRank1 = llvm::cast(input.getType()).getRank(); + SmallVector blockArgTypes1(sourceRank1, idxTy); + SmallVector blockArgLocs1(sourceRank1, loc); + + // Create Block for padOp1 and insert operations + OpBuilder::InsertPoint ip1(rewriter.saveInsertionPoint()); + rewriter.createBlock(padOpRegion1, padOpRegion1->end(), blockArgTypes1, + blockArgLocs1); + Value iv1 = padOp1.getRegion().front().getArgument(0); + Value idx1 = rewriter.create(loc, lowPadLen, iv1); + Value elem1 = rewriter.create(loc, input, idx1); + rewriter.create(loc, elem1); + rewriter.restoreInsertionPoint(ip1); + lowValues.clear(); + highValues.clear(); + + Value lowPaddedInput = padOp1.getResult(); + + // Pad right part(high) for lowPaddedInput tensor + lowValues.push_back(c0); + highValues.push_back(highPadLen); + int64_t highPaddedSize = lowPaddedSize + high; + Value lowPaddedInputDim = + rewriter.create(loc, lowPaddedInput, c0); + Value symIndex = rewriter.create(loc, lowPaddedInputDim, c1); + auto padOp2 = rewriter.create( + loc, RankedTensorType::get(highPaddedSize, f64Ty), lowPaddedInput, + lowValues, highValues); + Region *padOpRegion2 = &padOp2.getRegion(); + int64_t sourceRank2 = + llvm::cast(lowPaddedInput.getType()).getRank(); + SmallVector blockArgTypes2(sourceRank2, idxTy); + SmallVector blockArgLocs2(sourceRank2, loc); + + OpBuilder::InsertPoint ip2(rewriter.saveInsertionPoint()); + rewriter.createBlock(padOpRegion2, padOpRegion2->end(), blockArgTypes2, + blockArgLocs2); + Value iv2 = padOp2.getRegion().front().getArgument(0); + Value sub = rewriter.create(loc, iv2, symIndex); + Value idx2 = rewriter.create(loc, symIndex, sub); + Value elem2 = rewriter.create(loc, lowPaddedInput, idx2); + rewriter.create(loc, elem2); + rewriter.restoreInsertionPoint(ip2); + lowValues.clear(); + highValues.clear(); + + return padOp2.getResult(); +} + +inline Value WA(OpBuilder &builder, Location loc, Value wa, Value x, Value i, + Value ido, Value c1) { + Value idom1 = builder.create(loc, ido, c1); + Value tmp1 = builder.create(loc, x, idom1); + Value index = builder.create(loc, tmp1, i); + return builder.create(loc, wa, index); +} + +inline Value CC(OpBuilder &builder, Location loc, Value cc, Value a, Value b, + Value c, Value ido, Value l1) { + Value tmp1 = builder.create(loc, l1, c); + Value tmp2 = builder.create(loc, tmp1, b); + Value tmp3 = builder.create(loc, tmp2, ido); + Value index = builder.create(loc, tmp3, a); + return builder.create(loc, cc, index); +} + +inline void CH(OpBuilder &builder, Location loc, Value ch, Value a, Value b, + Value c, Value ido, Value cdim, Value toWrite) { + Value tmp1 = builder.create(loc, cdim, c); + Value tmp2 = builder.create(loc, tmp1, b); + Value tmp3 = builder.create(loc, tmp2, ido); + Value index = builder.create(loc, tmp3, a); + builder.create(loc, toWrite, ch, index); + return; +} + +inline std::vector PM(OpBuilder &builder, Location loc, Value c, + Value d) { + return {builder.create(loc, c, d), + builder.create(loc, c, d)}; +} + +inline std::vector MULPM(OpBuilder &builder, Location loc, Value c, + Value d, Value e, Value f) { + Value tmp1 = builder.create(loc, c, e); + Value tmp2 = builder.create(loc, d, f); + Value tmp3 = builder.create(loc, c, f); + Value tmp4 = builder.create(loc, d, e); + return {builder.create(loc, tmp1, tmp2), + builder.create(loc, tmp3, tmp4)}; +} + +void radf4Extend(OpBuilder &opBuilder, Location loc, Value cc, Value ch, + Value wa, Value ido, Value l1, Value cdim, Value c0, Value c1, + Value c2, Value c3) { + opBuilder.create( + loc, c0, l1, c1, std::nullopt, + [&](OpBuilder &builder, Location loc, Value k, ValueRange kargs) { + builder.create( + loc, c2, ido, c2, std::nullopt, + [&](OpBuilder &b, Location loc, Value i, ValueRange iargs) { + Value ic = b.create(loc, ido, i); + Value icm1 = b.create(loc, ic, c1); + Value im1 = b.create(loc, i, c1); + Value im2 = b.create(loc, i, c2); + + Value wa0im2 = WA(b, loc, wa, c0, im2, ido, c1); + Value wa0im1 = WA(b, loc, wa, c0, im1, ido, c1); + Value ccim1k1 = CC(b, loc, cc, im1, k, c1, ido, l1); + Value ccik1 = CC(b, loc, cc, i, k, c1, ido, l1); + std::vector cr2_ci2 = + MULPM(b, loc, wa0im2, wa0im1, ccim1k1, ccik1); + + Value wa1im2 = WA(b, loc, wa, c1, im2, ido, c1); + Value wa1im1 = WA(b, loc, wa, c1, im1, ido, c1); + Value ccim1k2 = CC(b, loc, cc, im1, k, c2, ido, l1); + Value ccik2 = CC(b, loc, cc, i, k, c2, ido, l1); + std::vector cr3_ci3 = + MULPM(b, loc, wa1im2, wa1im1, ccim1k2, ccik2); + + Value wa2im2 = WA(b, loc, wa, c2, im2, ido, c1); + Value wa2im1 = WA(b, loc, wa, c2, im1, ido, c1); + Value ccim1k3 = CC(b, loc, cc, im1, k, c3, ido, l1); + Value ccik3 = CC(b, loc, cc, i, k, c3, ido, l1); + std::vector cr4_ci4 = + MULPM(b, loc, wa2im2, wa2im1, ccim1k3, ccik3); + + std::vector tr1_tr4 = PM(b, loc, cr4_ci4[0], cr2_ci2[0]); + std::vector ti1_ti4 = PM(b, loc, cr2_ci2[1], cr4_ci4[1]); + Value ccim1k0 = CC(b, loc, cc, im1, k, c0, ido, l1); + std::vector tr2_tr3 = PM(b, loc, ccim1k0, cr3_ci3[0]); + Value ccik0 = CC(b, loc, cc, i, k, c0, ido, l1); + std::vector ti2_ti3 = PM(b, loc, ccik0, cr3_ci3[1]); + + std::vector chtmp0 = PM(b, loc, tr2_tr3[0], tr1_tr4[0]); + CH(b, loc, ch, im1, c0, k, ido, cdim, chtmp0[0]); + CH(b, loc, ch, icm1, c3, k, ido, cdim, chtmp0[1]); + + std::vector chtmp1 = PM(b, loc, ti1_ti4[0], ti2_ti3[0]); + CH(b, loc, ch, i, c0, k, ido, cdim, chtmp1[0]); + CH(b, loc, ch, ic, c3, k, ido, cdim, chtmp1[1]); + + std::vector chtmp2 = PM(b, loc, tr2_tr3[1], ti1_ti4[1]); + CH(b, loc, ch, im1, c2, k, ido, cdim, chtmp2[0]); + CH(b, loc, ch, icm1, c1, k, ido, cdim, chtmp2[1]); + + std::vector chtmp3 = PM(b, loc, tr1_tr4[1], ti2_ti3[1]); + CH(b, loc, ch, i, c2, k, ido, cdim, chtmp3[0]); + CH(b, loc, ch, ic, c1, k, ido, cdim, chtmp3[1]); + + b.create(loc, std::nullopt); + }); + + builder.create(loc, std::nullopt); + }); + + return; +} + +void radf4(OpBuilder &opBuilder, Location loc, Value cc, Value ch, Value wa, + Value ido, Value l1, Value c0, Value c1, Value c2, Value c3) { + FloatType f64Ty = opBuilder.getF64Type(); + Value cdim = opBuilder.create(loc, 4); + Value hsqt2 = opBuilder.create( + loc, APFloat(double(0.70710678118654752440)), f64Ty); + Value idom1 = opBuilder.create(loc, ido, c1); + + opBuilder.create( + loc, c0, l1, c1, std::nullopt, + [&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) { + Value cc0k3 = CC(builder, loc, cc, c0, iv, c3, ido, l1); + Value cc0k1 = CC(builder, loc, cc, c0, iv, c1, ido, l1); + std::vector tr1_tmp0 = PM(builder, loc, cc0k3, cc0k1); + CH(builder, loc, ch, c0, c2, iv, ido, cdim, tr1_tmp0[1]); + + Value cc0k0 = CC(builder, loc, cc, c0, iv, c0, ido, l1); + Value cc0k2 = CC(builder, loc, cc, c0, iv, c2, ido, l1); + std::vector tr2_tmp1 = PM(builder, loc, cc0k0, cc0k2); + CH(builder, loc, ch, idom1, c1, iv, ido, cdim, tr2_tmp1[1]); + + std::vector tmp2_tmp3 = + PM(builder, loc, tr2_tmp1[0], tr1_tmp0[0]); + CH(builder, loc, ch, c0, c0, iv, ido, cdim, tmp2_tmp3[0]); + CH(builder, loc, ch, idom1, c3, iv, ido, cdim, tmp2_tmp3[1]); + + builder.create(loc, std::nullopt); + }); + + Value reminder = opBuilder.create(loc, ido, c2); + Value condition0 = opBuilder.create( + loc, arith::CmpIPredicate::eq, reminder, c0); + opBuilder.create( + loc, condition0, [&](OpBuilder &builder, Location loc) { + Value negHsqt2 = builder.create( + loc, APFloat(double(-0.70710678118654752440)), f64Ty); + + builder.create( + loc, c0, l1, c1, std::nullopt, + [&](OpBuilder &b, Location loc, Value iv, ValueRange iargs) { + Value ccidom1k1 = CC(b, loc, cc, idom1, iv, c1, ido, l1); + Value ccidom1k3 = CC(b, loc, cc, idom1, iv, c3, ido, l1); + Value tmp0 = b.create(loc, ccidom1k1, ccidom1k3); + Value ti1 = b.create(loc, negHsqt2, tmp0); + + Value tmp1 = b.create(loc, ccidom1k1, ccidom1k3); + Value tr1 = b.create(loc, hsqt2, tmp1); + + Value ccidom1k0 = CC(b, loc, cc, idom1, iv, c0, ido, l1); + std::vector tmp2_tmp3 = PM(b, loc, ccidom1k0, tr1); + CH(b, loc, ch, idom1, c0, iv, ido, cdim, tmp2_tmp3[0]); + CH(b, loc, ch, idom1, c2, iv, ido, cdim, tmp2_tmp3[1]); + + Value ccidom1k2 = CC(b, loc, cc, idom1, iv, c2, ido, l1); + std::vector tmp4_tmp5 = PM(b, loc, ti1, ccidom1k2); + CH(b, loc, ch, c0, c3, iv, ido, cdim, tmp4_tmp5[0]); + CH(b, loc, ch, c0, c1, iv, ido, cdim, tmp4_tmp5[1]); + + b.create(loc, std::nullopt); + }); + + builder.create(loc, std::nullopt); + }); + + Value condition1 = + opBuilder.create(loc, arith::CmpIPredicate::sgt, ido, c2); + opBuilder.create( + loc, condition1, [&](OpBuilder &builder, Location loc) { + radf4Extend(builder, loc, cc, ch, wa, ido, l1, cdim, c0, c1, c2, c3); + builder.create(loc, std::nullopt); + }); + + return; +} + +void radf5Extend(OpBuilder &opBuilder, Location loc, Value cc, Value ch, + Value wa, Value ido, Value l1, Value cdim, Value tr11, + Value tr12, Value ti11, Value ti12, Value c0, Value c1, + Value c2, Value c3, Value c4) { + opBuilder.create( + loc, c0, l1, c1, std::nullopt, + [&](OpBuilder &builder, Location loc, Value k, ValueRange kargs) { + builder.create( + loc, c2, ido, c2, std::nullopt, + [&](OpBuilder &b, Location loc, Value i, ValueRange iargs) { + Value ic = b.create(loc, ido, i); + Value icm1 = b.create(loc, ic, c1); + Value im1 = b.create(loc, i, c1); + Value im2 = b.create(loc, i, c2); + + Value wa0im2 = WA(b, loc, wa, c0, im2, ido, c1); + Value wa0im1 = WA(b, loc, wa, c0, im1, ido, c1); + Value ccim1k1 = CC(b, loc, cc, im1, k, c1, ido, l1); + Value ccik1 = CC(b, loc, cc, i, k, c1, ido, l1); + std::vector dr2_di2 = + MULPM(b, loc, wa0im2, wa0im1, ccim1k1, ccik1); + + Value wa1im2 = WA(b, loc, wa, c1, im2, ido, c1); + Value wa1im1 = WA(b, loc, wa, c1, im1, ido, c1); + Value ccim1k2 = CC(b, loc, cc, im1, k, c2, ido, l1); + Value ccik2 = CC(b, loc, cc, i, k, c2, ido, l1); + std::vector dr3_di3 = + MULPM(b, loc, wa1im2, wa1im1, ccim1k2, ccik2); + + Value wa2im2 = WA(b, loc, wa, c2, im2, ido, c1); + Value wa2im1 = WA(b, loc, wa, c2, im1, ido, c1); + Value ccim1k3 = CC(b, loc, cc, im1, k, c3, ido, l1); + Value ccik3 = CC(b, loc, cc, i, k, c3, ido, l1); + std::vector dr4_di4 = + MULPM(b, loc, wa2im2, wa2im1, ccim1k3, ccik3); + + Value wa3im2 = WA(b, loc, wa, c3, im2, ido, c1); + Value wa3im1 = WA(b, loc, wa, c3, im1, ido, c1); + Value ccim1k4 = CC(b, loc, cc, im1, k, c4, ido, l1); + Value ccik4 = CC(b, loc, cc, i, k, c4, ido, l1); + std::vector dr5_di5 = + MULPM(b, loc, wa3im2, wa3im1, ccim1k4, ccik4); + + std::vector cr2_ci5 = PM(b, loc, dr5_di5[0], dr2_di2[0]); + std::vector ci2_cr5 = PM(b, loc, dr2_di2[1], dr5_di5[1]); + std::vector cr3_ci4 = PM(b, loc, dr4_di4[0], dr3_di3[0]); + std::vector ci3_cr4 = PM(b, loc, dr3_di3[1], dr4_di4[1]); + + Value ccim1k0 = CC(b, loc, cc, im1, k, c0, ido, l1); + Value tmpch0 = b.create(loc, ccim1k0, cr2_ci5[0]); + Value chim10k = b.create(loc, tmpch0, cr3_ci4[0]); + CH(b, loc, ch, im1, c0, k, ido, cdim, chim10k); + + Value ccik0 = CC(b, loc, cc, i, k, c0, ido, l1); + Value tmpch1 = b.create(loc, ccik0, ci2_cr5[0]); + Value chi0k = b.create(loc, tmpch1, ci3_cr4[0]); + CH(b, loc, ch, i, c0, k, ido, cdim, chi0k); + + Value tmp0 = b.create(loc, tr11, cr2_ci5[0]); + Value tmp1 = b.create(loc, ccim1k0, tmp0); + Value tmp2 = b.create(loc, tr12, cr3_ci4[0]); + Value tr2 = b.create(loc, tmp1, tmp2); + + Value tmp3 = b.create(loc, tr11, ci2_cr5[0]); + Value tmp4 = b.create(loc, ccik0, tmp3); + Value tmp5 = b.create(loc, tr12, ci3_cr4[0]); + Value ti2 = b.create(loc, tmp4, tmp5); + + Value tmp6 = b.create(loc, tr12, cr2_ci5[0]); + Value tmp7 = b.create(loc, ccim1k0, tmp6); + Value tmp8 = b.create(loc, tr11, cr3_ci4[0]); + Value tr3 = b.create(loc, tmp7, tmp8); + + Value tmp9 = b.create(loc, tr12, ci2_cr5[0]); + Value tmp10 = b.create(loc, ccik0, tmp9); + Value tmp11 = b.create(loc, tr11, ci3_cr4[0]); + Value ti3 = b.create(loc, tmp10, tmp11); + + std::vector tr5_tr4 = + MULPM(b, loc, ci2_cr5[1], ci3_cr4[1], ti11, ti12); + std::vector ti5_ti4 = + MULPM(b, loc, cr2_ci5[1], cr3_ci4[1], ti11, ti12); + + std::vector chtmp0 = PM(b, loc, tr2, tr5_tr4[0]); + CH(b, loc, ch, im1, c2, k, ido, cdim, chtmp0[0]); + CH(b, loc, ch, icm1, c1, k, ido, cdim, chtmp0[1]); + + std::vector chtmp1 = PM(b, loc, ti5_ti4[0], ti2); + CH(b, loc, ch, i, c2, k, ido, cdim, chtmp1[0]); + CH(b, loc, ch, ic, c1, k, ido, cdim, chtmp1[1]); + + std::vector chtmp2 = PM(b, loc, tr3, tr5_tr4[1]); + CH(b, loc, ch, im1, c4, k, ido, cdim, chtmp2[0]); + CH(b, loc, ch, icm1, c3, k, ido, cdim, chtmp2[1]); + + std::vector chtmp3 = PM(b, loc, ti5_ti4[1], ti3); + CH(b, loc, ch, i, c4, k, ido, cdim, chtmp3[0]); + CH(b, loc, ch, ic, c3, k, ido, cdim, chtmp3[1]); + + b.create(loc, std::nullopt); + }); + + builder.create(loc, std::nullopt); + }); + + return; +} + +void radf5(OpBuilder &builder, Location loc, Value cc, Value ch, Value wa, + Value ido, Value l1, Value c0, Value c1, Value c2, Value c3, + Value c4) { + FloatType f64Ty = builder.getF64Type(); + Value cdim = builder.create(loc, 5); + Value tr11 = builder.create( + loc, APFloat(double(0.3090169943749474241)), f64Ty); + Value tr12 = builder.create( + loc, APFloat(double(-0.8090169943749474241)), f64Ty); + Value ti11 = builder.create( + loc, APFloat(double(0.95105651629515357212)), f64Ty); + Value ti12 = builder.create( + loc, APFloat(double(0.58778525229247312917)), f64Ty); + Value idom1 = builder.create(loc, ido, c1); + + builder.create( + loc, c0, l1, c1, std::nullopt, + [&](OpBuilder &b, Location loc, Value iv, ValueRange iargs) { + Value cc0k4 = CC(b, loc, cc, c0, iv, c4, ido, l1); + Value cc0k1 = CC(b, loc, cc, c0, iv, c1, ido, l1); + std::vector cr2_ci5 = PM(b, loc, cc0k4, cc0k1); + + Value cc0k3 = CC(b, loc, cc, c0, iv, c3, ido, l1); + Value cc0k2 = CC(b, loc, cc, c0, iv, c2, ido, l1); + std::vector cr3_ci4 = PM(b, loc, cc0k3, cc0k2); + + Value cc0k0 = CC(b, loc, cc, c0, iv, c0, ido, l1); + Value tmpch0 = b.create(loc, cc0k0, cr2_ci5[0]); + Value ch0 = b.create(loc, tmpch0, cr3_ci4[0]); + CH(b, loc, ch, c0, c0, iv, ido, cdim, ch0); + + Value tmpch1 = b.create(loc, tr11, cr2_ci5[0]); + Value tmpch2 = b.create(loc, tr12, cr3_ci4[0]); + Value tmpch3 = b.create(loc, cc0k0, tmpch1); + Value ch1 = b.create(loc, tmpch2, tmpch3); + CH(b, loc, ch, idom1, c1, iv, ido, cdim, ch1); + + Value tmpch4 = b.create(loc, ti11, cr2_ci5[1]); + Value tmpch5 = b.create(loc, ti12, cr3_ci4[1]); + Value ch2 = b.create(loc, tmpch4, tmpch5); + CH(b, loc, ch, c0, c2, iv, ido, cdim, ch2); + + Value tmpch6 = b.create(loc, tr12, cr2_ci5[0]); + Value tmpch7 = b.create(loc, tr11, cr3_ci4[0]); + Value tmpch8 = b.create(loc, tmpch6, tmpch7); + Value ch3 = b.create(loc, cc0k0, tmpch8); + CH(b, loc, ch, idom1, c3, iv, ido, cdim, ch3); + + Value tmpch9 = b.create(loc, ti12, cr2_ci5[1]); + Value tmpch10 = b.create(loc, ti11, cr3_ci4[1]); + Value ch4 = b.create(loc, tmpch9, tmpch10); + CH(b, loc, ch, c0, c4, iv, ido, cdim, ch4); + + b.create(loc, std::nullopt); + }); + + Value condition = + builder.create(loc, arith::CmpIPredicate::ne, ido, c1); + builder.create(loc, condition, [&](OpBuilder &b, Location loc) { + radf5Extend(b, loc, cc, ch, wa, ido, l1, cdim, tr11, tr12, ti11, ti12, c0, + c1, c2, c3, c4); + b.create(loc, std::nullopt); + }); + + return; +} + +// Calculate abspower of bufferMem and store result to a specific line in the +// resultMem +void absPower(OpBuilder &builder, Location loc, Value bufferMem, + Value resultMem, Value idx, Value c0, Value c1, Value c2) { + Value c200 = builder.create(loc, 200); + Value c398 = builder.create(loc, 398); + Value c399 = builder.create(loc, 399); + Value power = builder.create(loc, 2); + + Value firstNum = builder.create(loc, bufferMem, c0); + Value firstPow = builder.create(loc, firstNum, power); + builder.create(loc, firstPow, resultMem, + ValueRange{idx, c0}); + + Value lastNum = builder.create(loc, bufferMem, c399); + Value lastPow = builder.create(loc, lastNum, power); + builder.create(loc, lastPow, resultMem, + ValueRange{idx, c200}); + + builder.create( + loc, c1, c398, c2, ValueRange{c1}, + [&](OpBuilder &b, Location loc, Value iv, ValueRange iargs) { + Value j = b.create(loc, iv, c1); + Value num1 = b.create(loc, bufferMem, iv); + Value num2 = b.create(loc, bufferMem, j); + Value pow1 = b.create(loc, num1, power); + Value pow2 = b.create(loc, num2, power); + Value add = b.create(loc, pow1, pow2); + b.create(loc, add, resultMem, + ValueRange{idx, iargs[0]}); + + Value indexNext = b.create(loc, iargs[0], c1); + + b.create(loc, indexNext); + }); + + return; +} + +// Compute Log Mel Spectrogram +Value spectrogram(PatternRewriter &rewriter, Location loc, Value f0, Value c0, + Value c1, Value c2, Value c3, Value c4, Value c5, Value input, + Value window, Value melFilters) { + FloatType f64Ty = rewriter.getF64Type(); + + Value numFrames = rewriter.create(loc, 3001); + Value hopLength = rewriter.create(loc, 160); + Value c400 = rewriter.create(loc, 400); + + MemRefType spectrogramTy = MemRefType::get({3001, 201}, f64Ty); + Value spectrogram = rewriter.create(loc, spectrogramTy); + + RankedTensorType tensorTy0 = RankedTensorType::get({400}, f64Ty); + MemRefType mTp = MemRefType::get({400}, f64Ty); + + // #mulf_trait for 'linalg.generic' operation. + AffineMap mulFIdMap = + AffineMap::getMultiDimIdentityMap(1, rewriter.getContext()); + SmallVector mulFIndexingMaps = {mulFIdMap, mulFIdMap, mulFIdMap}; + SmallVector mulFIteratorTypes = { + utils::IteratorType::parallel}; + + rewriter.create( + loc, c0, numFrames, c1, ValueRange{c0}, + [&](OpBuilder &builder, Location loc, Value iv, ValueRange iargs) { + auto extractSliceOp = rewriter.create( + loc, input, iargs[0], c400, c1); + Value buffer400 = extractSliceOp.getResult(); + Value buffer = + rewriter.create(loc, tensorTy0, buffer400); + + // 'linalg.generic' operation use #mulf_trait. + auto mulfOp = rewriter.create( + loc, /*resultTensorTypes=*/tensorTy0, + /*inputs=*/ValueRange{buffer, window}, + /*outputs=*/ValueRange{buffer}, mulFIndexingMaps, mulFIteratorTypes, + [&](OpBuilder &b, Location loc, ValueRange args) { + Value elem = b.create(loc, args[0], args[1]); + b.create(loc, elem); + }); + Value multiplied = mulfOp.getResult(0); + + Value bufferMem = + builder.create(loc, mTp, multiplied); + + // Compute 'dap.rfft400' operation, result stores in `bufferMem`. + builder.create(loc, bufferMem); + + // Store the result in a single line specified by `iv`. + absPower(builder, loc, bufferMem, spectrogram, iv, c0, c1, c2); + + Value timestepNext = + builder.create(loc, iargs[0], hopLength); + + builder.create(loc, timestepNext); + }); + + // TODO: check alloc and dealloc + // MemRefType melFiltersTransposeTy = MemRefType::get({80, 201}, f64Ty); + // Value alloc0 = rewriter.create(loc, + // melFiltersTransposeTy); Value init0 = + // rewriter.create(loc, alloc0); + Value init0 = + rewriter.create(loc, ArrayRef{80, 201}, f64Ty); + auto transposeOp0 = rewriter.create( + loc, /*input=*/melFilters, + /*init=*/init0, + /*permutation=*/ArrayRef{1, 0}); + Value melFiltersT = transposeOp0.getResult()[0]; + + Value gram = rewriter.create( + loc, spectrogram, /*restrict=*/true, /*writable=*/false); + Value init1 = rewriter.create( + loc, ArrayRef{201, 3001}, f64Ty); + auto transposeOp1 = rewriter.create( + loc, /*input=*/gram, + /*init=*/init1, + /*permutation=*/ArrayRef{1, 0}); + Value spectrogramT = transposeOp1.getResult()[0]; + + rewriter.create(loc, spectrogram); + + Value init2 = + rewriter.create(loc, ArrayRef{80, 3001}, f64Ty); + auto matmulOp = rewriter.create( + loc, /*inputs=*/ValueRange{melFiltersT, spectrogramT}, + /*outputs=*/ValueRange{init2}); + Value matMulResult = matmulOp.getResultTensors()[0]; + + // Initialize a tensor with constant `1e-10`. + RankedTensorType tensorTy1 = RankedTensorType::get({80, 3001}, f64Ty); + Value cMelFloor = rewriter.create( + loc, APFloat(double(0.0000000001)), f64Ty); + Value melFloor = rewriter.create(loc, tensorTy1, cMelFloor); + + auto linalgMaxOp = rewriter.create( + loc, /*input=*/ValueRange{melFloor, matMulResult}, + /*outputs=*/ValueRange{melFloor}); + Value spectrogramMax = linalgMaxOp.getResultTensors()[0]; + + // #log10_trait for 'linalg.generic' operation. + AffineMap log10IdMap = + AffineMap::getMultiDimIdentityMap(2, rewriter.getContext()); + SmallVector log10IndexingMaps = {log10IdMap, log10IdMap}; + SmallVector log10IteratorTypes = { + utils::IteratorType::parallel, utils::IteratorType::parallel}; + + // 'linalg.generic' operation use #log10_trait. + auto log10Op = rewriter.create( + loc, /*resultTensorTypes=*/tensorTy1, + /*inputs=*/ValueRange{spectrogramMax}, + /*outputs=*/ValueRange{spectrogramMax}, log10IndexingMaps, + log10IteratorTypes, [&](OpBuilder &b, Location loc, ValueRange args) { + Value elem = b.create(loc, args[0]); + b.create(loc, elem); + }); + Value spectrogramLog10 = log10Op.getResult(0); + + return spectrogramLog10; +} + +namespace { +class DAPRFFT400Lowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + explicit DAPRFFT400Lowering(MLIRContext *context) + : OpRewritePattern(context) {} + + LogicalResult matchAndRewrite(dap::RFFT400Op op, + PatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + auto ctx = op->getContext(); + Value bufferMem = op->getOperand(0); + + Value c0 = rewriter.create(loc, 0); + Value c1 = rewriter.create(loc, 1); + Value c2 = rewriter.create(loc, 2); + Value c3 = rewriter.create(loc, 3); + Value c4 = rewriter.create(loc, 4); + Value c5 = rewriter.create(loc, 5); + + FloatType f64Ty = rewriter.getF64Type(); + Value f0 = + rewriter.create(loc, APFloat(double(0.0)), f64Ty); + int64_t inputLength = 400; + + // Generate ch MemRef + RankedTensorType tensorTy = RankedTensorType::get({inputLength}, f64Ty); + MemRefType m25Ty = MemRefType::get({inputLength}, f64Ty); + Value chTensor = rewriter.create(loc, tensorTy, f0); + Value ch = rewriter.create(loc, m25Ty, chTensor); + + // Generate wa MemRefs + std::vector tw0Vec{ + 0.999877, 0.015707, 0.999507, 0.031411, 0.998890, 0.047106, + 0.998027, 0.062791, 0.996917, 0.078459, 0.995562, 0.094108, + 0.993961, 0.109734, 0.992115, 0.125333, 0.990024, 0.140901, + 0.987688, 0.156434, 0.985109, 0.171929, 0.982287, 0.187381, + 0.979223, 0.202787, 0.975917, 0.218143, 0.972370, 0.233445, + 0.968583, 0.248690, 0.964557, 0.263873, 0.960294, 0.278991, + 0.955793, 0.294040, 0.951057, 0.309017, 0.946085, 0.323917, + 0.940881, 0.338738, 0.935444, 0.353475, 0.929776, 0.368125, + 0.923880, 0.382683, 0.917755, 0.397148, 0.911403, 0.411514, + 0.904827, 0.425779, 0.898028, 0.439939, 0.891007, 0.453990, + 0.883766, 0.467930, 0.876307, 0.481754, 0.868632, 0.495459, + 0.860742, 0.509041, 0.852640, 0.522499, 0.844328, 0.535827, + 0.835807, 0.549023, 0.827081, 0.562083, 0.818150, 0.575005, + 0.809017, 0.587785, 0.799685, 0.600420, 0.790155, 0.612907, + 0.780430, 0.625243, 0.770513, 0.637424, 0.760406, 0.649448, + 0.750111, 0.661312, 0.739631, 0.673013, 0.728969, 0.684547, + 0.718126, 0.695913, 0.000000, 0.999507, 0.031411, 0.998027, + 0.062791, 0.995562, 0.094108, 0.992115, 0.125333, 0.987688, + 0.156434, 0.982287, 0.187381, 0.975917, 0.218143, 0.968583, + 0.248690, 0.960294, 0.278991, 0.951057, 0.309017, 0.940881, + 0.338738, 0.929776, 0.368125, 0.917755, 0.397148, 0.904827, + 0.425779, 0.891007, 0.453990, 0.876307, 0.481754, 0.860742, + 0.509041, 0.844328, 0.535827, 0.827081, 0.562083, 0.809017, + 0.587785, 0.790155, 0.612907, 0.770513, 0.637424, 0.750111, + 0.661312, 0.728969, 0.684547, 0.707107, 0.707107, 0.684547, + 0.728969, 0.661312, 0.750111, 0.637424, 0.770513, 0.612907, + 0.790155, 0.587785, 0.809017, 0.562083, 0.827081, 0.535827, + 0.844328, 0.509041, 0.860742, 0.481754, 0.876307, 0.453990, + 0.891007, 0.425779, 0.904827, 0.397148, 0.917755, 0.368125, + 0.929776, 0.338738, 0.940881, 0.309017, 0.951057, 0.278991, + 0.960294, 0.248690, 0.968583, 0.218143, 0.975917, 0.187381, + 0.982287, 0.156434, 0.987688, 0.125333, 0.992115, 0.094108, + 0.995562, 0.062791, 0.998027, 0.031411, 0.999507, 0.000000, + 0.998890, 0.047106, 0.995562, 0.094108, 0.990024, 0.140901, + 0.982287, 0.187381, 0.972370, 0.233445, 0.960294, 0.278991, + 0.946085, 0.323917, 0.929776, 0.368125, 0.911403, 0.411514, + 0.891007, 0.453990, 0.868632, 0.495459, 0.844328, 0.535827, + 0.818150, 0.575005, 0.790155, 0.612907, 0.760406, 0.649448, + 0.728969, 0.684547, 0.695913, 0.718126, 0.661312, 0.750111, + 0.625243, 0.780430, 0.587785, 0.809017, 0.549023, 0.835807, + 0.509041, 0.860742, 0.467930, 0.883766, 0.425779, 0.904827, + 0.382683, 0.923880, 0.338738, 0.940881, 0.294040, 0.955793, + 0.248690, 0.968583, 0.202787, 0.979223, 0.156434, 0.987688, + 0.109734, 0.993961, 0.062791, 0.998027, 0.015707, 0.999877, + -0.031411, 0.999507, -0.078459, 0.996917, -0.125333, 0.992115, + -0.171929, 0.985109, -0.218143, 0.975917, -0.263873, 0.964557, + -0.309017, 0.951057, -0.353475, 0.935444, -0.397148, 0.917755, + -0.439939, 0.898028, -0.481754, 0.876307, -0.522499, 0.852640, + -0.562083, 0.827081, -0.600420, 0.799685, -0.637424, 0.770513, + -0.673013, 0.739631, 0.000000}; + Value wa0Tensor = rewriter.create( + loc, DenseFPElementsAttr::get(RankedTensorType::get({297}, f64Ty), + ArrayRef(tw0Vec))); + Value wa0 = rewriter.create( + loc, MemRefType::get({297}, f64Ty), wa0Tensor); + + std::vector tw1Vec{ + 0.998027, 0.062791, 0.992115, 0.125333, 0.982287, 0.187381, + 0.968583, 0.248690, 0.951057, 0.309017, 0.929776, 0.368125, + 0.904827, 0.425779, 0.876307, 0.481754, 0.844328, 0.535827, + 0.809017, 0.587785, 0.770513, 0.637424, 0.728969, 0.684547, + 0.992115, 0.125333, 0.968583, 0.248690, 0.929776, 0.368125, + 0.876307, 0.481754, 0.809017, 0.587785, 0.728969, 0.684547, + 0.637424, 0.770513, 0.535827, 0.844328, 0.425779, 0.904827, + 0.309017, 0.951057, 0.187381, 0.982287, 0.062791, 0.998027, + 0.982287, 0.187381, 0.929776, 0.368125, 0.844328, 0.535827, + 0.728969, 0.684547, 0.587785, 0.809017, 0.425779, 0.904827, + 0.248690, 0.968583, 0.062791, 0.998027, -0.125333, 0.992115, + -0.309017, 0.951057, -0.481754, 0.876307, -0.637424, 0.770513}; + Value wa1Tensor = rewriter.create( + loc, DenseFPElementsAttr::get(RankedTensorType::get({72}, f64Ty), + ArrayRef(tw1Vec))); + Value wa1 = rewriter.create( + loc, MemRefType::get({72}, f64Ty), wa1Tensor); + + std::vector tw2Vec{0.968583, 0.248690, 0.876307, 0.481754, + 0.876307, 0.481754, 0.535827, 0.844328, + 0.728969, 0.684547, 0.062791, 0.998027, + 0.535827, 0.844328, -0.425779, 0.904827}; + Value wa2Tensor = rewriter.create( + loc, DenseFPElementsAttr::get(RankedTensorType::get({16}, f64Ty), + ArrayRef(tw2Vec))); + Value wa2 = rewriter.create( + loc, MemRefType::get({16}, f64Ty), wa2Tensor); + + Value c16 = rewriter.create(loc, 16); + Value c25 = rewriter.create(loc, 25); + Value c80 = rewriter.create(loc, 80); + Value c100 = rewriter.create(loc, 100); + + radf5(rewriter, loc, bufferMem, ch, wa2, c1, c80, c0, c1, c2, c3, c4); + radf5(rewriter, loc, ch, bufferMem, wa2, c5, c16, c0, c1, c2, c3, c4); + radf4(rewriter, loc, bufferMem, ch, wa1, c25, c4, c0, c1, c2, c3); + radf4(rewriter, loc, ch, bufferMem, wa0, c100, c1, c0, c1, c2, c3); + + rewriter.eraseOp(op); + return success(); + } +}; + +class DAPWhisperPreprocessLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + explicit DAPWhisperPreprocessLowering(MLIRContext *context) + : OpRewritePattern(context) {} + + LogicalResult matchAndRewrite(dap::WhisperPreprocessOp op, + PatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + auto ctx = op->getContext(); + Value input = op->getOperand(0); + + Value c0 = rewriter.create(loc, 0); + Value c1 = rewriter.create(loc, 1); + Value c2 = rewriter.create(loc, 2); + Value c3 = rewriter.create(loc, 3); + Value c4 = rewriter.create(loc, 4); + Value c5 = rewriter.create(loc, 5); + Value c80 = rewriter.create(loc, 80); + Value c3000 = rewriter.create(loc, 3000); + Value c480000 = rewriter.create(loc, 480000); + + FloatType f32 = FloatType::getF32(ctx); + FloatType f64 = FloatType::getF64(ctx); + + Value inputFeatures = rewriter.create( + loc, input, /*restrict=*/true, /*writable=*/false); + Value inputFeaturesSize = + rewriter.create(loc, inputFeatures, c0); + Value padConstantHigh = + rewriter.create(loc, c480000, inputFeaturesSize); + + // Pad inputFeatures to MaxLength = 480000 + SmallVector paddedShape; + paddedShape.push_back(480000); + + SmallVector lowValues; + SmallVector highValues; + lowValues.push_back(c0); + highValues.push_back(padConstantHigh); + + Value f0 = + rewriter.create(loc, APFloat(double(0.0)), f64); + auto padConstantOp = rewriter.create( + loc, RankedTensorType::get(paddedShape, f64), inputFeatures, lowValues, + highValues, f0); + Value paddedInput = padConstantOp.getResult(); + + // Generate melFilter with 391 numbers + Value melFilter = initMelFilter(rewriter, loc, c0, c1, f0); + + // Generate hanning window with length 400 + Value window = getHanningWindow400(rewriter, loc); + + // Reflect pad for paddedInput, both left and right part pad with length 200 + Value finalPaddedInput = + padReflect(rewriter, loc, c0, c1, paddedInput, 200, 200); + Value logSpec = spectrogram(rewriter, loc, f0, c0, c1, c2, c3, c4, c5, + finalPaddedInput, window, melFilter); + + auto extractSliceOp = rewriter.create( + loc, /*source=*/logSpec, + /*offsets=*/ValueRange{c0, c0}, + /*sizes=*/ValueRange{c80, c3000}, + /*strides=*/ValueRange{c1, c1}); + Value logSpecCut = extractSliceOp.getResult(); + + Value maxInit = + rewriter.create(loc, APFloat(double(-10.0)), f64); + auto forOp0 = rewriter.create( + loc, c0, c80, c1, maxInit, + [&](OpBuilder &builder, Location loc, Value i, ValueRange iargs0) { + auto forOp1 = builder.create( + loc, c0, c3000, c1, iargs0[0], + [&](OpBuilder &b, Location loc, Value j, ValueRange iargs1) { + Value elem = b.create(loc, logSpecCut, + ValueRange{i, j}); + Value larger = + b.create(loc, elem, iargs1[0]); + b.create(loc, larger); + }); + + Value maxNext = forOp1.getResults()[0]; + builder.create(loc, maxNext); + }); + Value maxNum = forOp0.getResults()[0]; + + Value f8 = rewriter.create(loc, APFloat(double(8.0)), f64); + Value maxNumMinus8 = rewriter.create(loc, maxNum, f8); + Value logSpecFloor = rewriter.create( + loc, RankedTensorType::get({80, 3000}, f64), maxNumMinus8); + + auto linalgMaxOp = rewriter.create( + loc, /*input=*/ValueRange{logSpecCut, logSpecFloor}, + /*outputs=*/ValueRange{logSpecFloor}); + Value logSpecMax = linalgMaxOp.getResultTensors()[0]; + + Value f0F32 = + rewriter.create(loc, APFloat(float(0.0)), f32); + Value f4 = rewriter.create(loc, APFloat(double(4.0)), f64); + RankedTensorType resultTy = RankedTensorType::get({80, 3000}, f32); + Value InputFeaturesF32 = + rewriter.create(loc, resultTy, f0F32); + + // #tail_processing_trait for 'linalg.generic' operation. + AffineMap IdMap = + AffineMap::getMultiDimIdentityMap(2, rewriter.getContext()); + SmallVector IndexingMaps = {IdMap, IdMap}; + SmallVector IteratorTypes = { + utils::IteratorType::parallel, utils::IteratorType::parallel}; + + // 'linalg.generic' operation use #tail_processing_trait. + auto tailProcessOp = rewriter.create( + loc, /*resultTensorTypes=*/resultTy, + /*inputs=*/ValueRange{logSpecMax}, + /*outputs=*/ValueRange{InputFeaturesF32}, IndexingMaps, IteratorTypes, + [&](OpBuilder &b, Location loc, ValueRange args) { + Value add4 = b.create(loc, args[0], f4); + Value div4 = b.create(loc, add4, f4); + Value elem = b.create(loc, f32, div4); + b.create(loc, elem); + }); + Value result = tailProcessOp.getResult(0); + + // Compute reassociation indices [[0, 1], 2] + SmallVector> reassociationIndices( + resultTy.getRank()); + int64_t index = 0; + for (index = 0; index <= 1; index++) { + reassociationIndices[0].push_back(index); + } + reassociationIndices[1].push_back(index); + + RankedTensorType expandTy = RankedTensorType::get({1, 80, 3000}, f32); + + Value resultExpand = rewriter.create( + loc, /*resultType=*/expandTy, /*src=*/result, + /*reassociation=*/reassociationIndices); + + auto resultMemTp = + MemRefType::get(expandTy.getShape(), expandTy.getElementType()); + Value resultMemRef = rewriter.create( + loc, resultMemTp, resultExpand); + + // Replace 'dap.whisper_preprocess' operation with the generated result. The + // replaced op is erased. + rewriter.replaceOp(op, resultMemRef); + return success(); + } +}; + +} // end anonymous namespace + +void populateExtendDAPConversionPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); + // TODO : extract operators +} + +//===----------------------------------------------------------------------===// +// ExtendDAPPass +//===----------------------------------------------------------------------===// + +namespace { +class ExtendDAPPass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ExtendDAPPass) + ExtendDAPPass() = default; + ExtendDAPPass(const ExtendDAPPass &) {} + + StringRef getArgument() const final { return "extend-dap"; } + StringRef getDescription() const final { return "Extend DAP Dialect."; } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + // Buddy Compiler designed dialect + registry.insert(); + } +}; +} // end anonymous namespace. + +void ExtendDAPPass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + // Add legal dialects. + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + // Add legal operations. + target.addLegalOp(); + + RewritePatternSet patterns(context); + populateExtendDAPConversionPatterns(patterns); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} + +namespace mlir { +namespace buddy { +void registerExtendDAPPass() { PassRegistration(); } +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/MatMulOptimization/BatchMatMulOptimize.cpp b/midend/lib/Conversion/MatMulOptimization/BatchMatMulOptimize.cpp index 757ac8ae9..9b81a4748 100644 --- a/midend/lib/Conversion/MatMulOptimization/BatchMatMulOptimize.cpp +++ b/midend/lib/Conversion/MatMulOptimization/BatchMatMulOptimize.cpp @@ -81,8 +81,19 @@ class BatchMatMulOptimizePattern : public ConversionPattern { const Value zeroElementType = rewriter.create( loc, rewriter.getZeroAttr(elementType)); - const Value zeroElementTypeVec = rewriter.create( - loc, VectorType::get({affineVectorSize}, elementType), zeroElementType); + + const Value zeroElementTypeVec = + isa(elementType) + ? rewriter + .create( + loc, VectorType::get({affineVectorSize}, elementType), + zeroElementType) + .getResult() + : rewriter + .create( + loc, VectorType::get({affineVectorSize}, elementType), + zeroElementType) + .getResult(); // Get dimensions of input tensors. Value batch = rewriter.create(loc, A, 0); @@ -90,7 +101,8 @@ class BatchMatMulOptimizePattern : public ConversionPattern { Value bCol = rewriter.create(loc, B, 2); Value bRow = rewriter.create(loc, B, 1); - // Calculate the length of the tail, which might not fit in a vector. + // Calculate the length of the tail, which might not fit in a + // vector. Value tailLength = rewriter.create( loc, AffineMap::get(1, 0, d0 % affineVectorSize), ValueRange{bCol}); @@ -148,14 +160,14 @@ class BatchMatMulOptimizePattern : public ConversionPattern { C.getType().cast().getDimSize(2) % affineVectorSize != 0) { - // Depending on the position, use either full vectors or tail - // vectors. + // Depending on the position, use either full vectors or + // tail vectors. affine::AffineIfOp branchingOp = builder.create( loc, IntegerSet::get( 1, 1, {d0 * -affineVectorSize + s0 - affineVectorSize}, {false}), - ValueRange{loopVarBatchIdx, bCol}, true); + ValueRange{loopVarColOfB, bCol}, true); // Branch handling full vector operations. OpBuilder trueBranchBuilder = branchingOp.getThenBodyBuilder(); @@ -192,9 +204,9 @@ class BatchMatMulOptimizePattern : public ConversionPattern { loopVarColOfB}); Value computedVec; - // Compute the result vector either through integer - // multiplication and addition or fused multiply-add - // based on the element type. + // Compute the result vector either through + // integer multiplication and addition or fused + // multiply-add based on the element type. if (isa(elementType)) { Value mulVec = builder.create(loc, aVec, bVec); @@ -248,9 +260,9 @@ class BatchMatMulOptimizePattern : public ConversionPattern { maskVector, zeroElementTypeVec); Value computedVec; - // Compute the result vector either through integer - // multiplication and addition or fused multiply-add - // based on the element type. + // Compute the result vector either through + // integer multiplication and addition or fused + // multiply-add based on the element type. if (isa(elementType)) { Value mulVec = builder.create(loc, aVec, bVec); @@ -301,9 +313,9 @@ class BatchMatMulOptimizePattern : public ConversionPattern { loopVarColOfB}); Value computedVec; - // Compute the result vector either through integer - // multiplication and addition or fused multiply-add - // based on the element type. + // Compute the result vector either through + // integer multiplication and addition or fused + // multiply-add based on the element type. if (isa(elementType)) { Value mulVec = builder.create(loc, aVec, bVec); diff --git a/midend/lib/Conversion/MatMulOptimization/BatchMatMulSCFOptimize.cpp b/midend/lib/Conversion/MatMulOptimization/BatchMatMulSCFOptimize.cpp new file mode 100644 index 000000000..a3d079be2 --- /dev/null +++ b/midend/lib/Conversion/MatMulOptimization/BatchMatMulSCFOptimize.cpp @@ -0,0 +1,281 @@ +//===- BatchMatMulOptimize.cpp --------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the batchmatmul scf vectorization optimization. +// +//===----------------------------------------------------------------------===// +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/IntegerSet.h" +#include "mlir/IR/ValueRange.h" +#include "llvm/ADT/ArrayRef.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace mlir; +using namespace vector; +using namespace affine; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// + +namespace { + +class BatchMatMuSCFOptimizePattern : public ConversionPattern { +private: + int64_t vecSize; + +public: + explicit BatchMatMuSCFOptimizePattern(MLIRContext *context, + int64_t vecSizeParam) + : ConversionPattern(linalg::BatchMatmulOp::getOperationName(), 1, + context) { + vecSize = vecSizeParam; + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + + // Retrieve input tensors A, B, and C. + Value A = op->getOperand(0); + Value B = op->getOperand(1); + Value C = op->getOperand(2); + + // Acquire the element type of input tensors. + Type elementType = A.getType().cast().getElementType(); + + // Define constants. + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value c1 = + rewriter.create(loc, rewriter.getIndexAttr(1)); + const Value cVecSize = + rewriter.create(loc, rewriter.getIndexAttr(vecSize)); + const AffineExpr d0 = rewriter.getAffineDimExpr(0); + const AffineExpr d1 = rewriter.getAffineDimExpr(1); + const AffineExpr d2 = rewriter.getAffineDimExpr(2); + const AffineExpr s0 = rewriter.getAffineSymbolExpr(0); + const AffineExpr zeroAffine = rewriter.getAffineConstantExpr(0); + + const Value zeroElementType = rewriter.create( + loc, rewriter.getZeroAttr(elementType)); + + // Get dimensions of input tensors. + Value batch = rewriter.create(loc, A, 0); + Value aRow = rewriter.create(loc, A, 1); + Value bCol = rewriter.create(loc, B, 2); + Value bRow = rewriter.create(loc, B, 1); + + VectorType vecTy = VectorType::get({vecSize}, elementType); + Value zeroElementTypeVec; + if (isa(elementType)) + zeroElementTypeVec = + rewriter.create(loc, vecTy, zeroElementType); + else + zeroElementTypeVec = + rewriter.create(loc, vecTy, zeroElementType); + // Calculate the length of the tail, which might not fit in a + // vector. + Value tailLength = rewriter.create( + loc, AffineMap::get(1, 0, d0 % vecSize), ValueRange{bCol}); + + // Generate a mask vector based on the tail length. + Value maskVector = rewriter.create( + loc, VectorType::get({vecSize}, rewriter.getI1Type()), + ValueRange{tailLength}); + + Value ApplyBCol = rewriter.create( + loc, AffineMap::get(1, 0, d0.floorDiv(vecSize) * vecSize), bCol); + + rewriter.create( + loc, SmallVector({c0}), + SmallVector({batch}), + SmallVector({c1}), ValueRange{}, + std::nullopt, // No mapping specified in this example + [&](OpBuilder &builder, Location loc, ValueRange loopIndices) { + Value loopVarBatchIdx = loopIndices[0]; + builder.create( + loc, c0, aRow, c1, ValueRange{std::nullopt}, + [&](OpBuilder &builder, Location loc, Value loopVarRowOfA, + ValueRange iargs) { + builder.create( + loc, c0, bRow, c1, ValueRange{std::nullopt}, + [&](OpBuilder &builder, Location loc, Value loopVarRowOfB, + ValueRange iargs) { + Value aElement = builder.create( + loc, A, + ValueRange{loopVarBatchIdx, loopVarRowOfA, + loopVarRowOfB}); + Value aVec = builder.create( + loc, vecTy, aElement); + builder.create( + loc, c0, ApplyBCol, cVecSize, + ValueRange{std::nullopt}, + [&](OpBuilder &builder, Location loc, + Value loopVarColOfB, ValueRange iargs) { + Value bVec = builder.create( + loc, vecTy, B, + ValueRange{loopVarBatchIdx, loopVarRowOfB, + loopVarColOfB}); + + Value cVec = builder.create( + loc, vecTy, C, + ValueRange{loopVarBatchIdx, loopVarRowOfA, + loopVarColOfB}); + Value computedVec; + + if (isa(elementType)) { + Value mulVec = builder.create( + loc, aVec, bVec); + computedVec = builder.create( + loc, mulVec, cVec); + } else { + computedVec = builder.create( + loc, aVec, bVec, cVec); + } + builder.create( + loc, computedVec, C, + ValueRange{loopVarBatchIdx, loopVarRowOfA, + loopVarColOfB}); + builder.create( + loc, ValueRange{std::nullopt}); + }); + Value condition = builder.create( + loc, arith::CmpIPredicate::sgt, tailLength, c0); + builder.create( + loc, condition, + [&](OpBuilder &builder, Location loc) { + Value bVec = builder.create( + loc, vecTy, B, + ValueRange{loopVarBatchIdx, loopVarRowOfB, + ApplyBCol}, + maskVector, zeroElementTypeVec); + + Value cVec = builder.create( + loc, vecTy, C, + ValueRange{loopVarBatchIdx, loopVarRowOfA, + ApplyBCol}, + maskVector, zeroElementTypeVec); + + Value computedVec; + + if (isa(elementType)) { + Value mulVec = builder.create( + loc, aVec, bVec); + computedVec = builder.create( + loc, mulVec, cVec); + } else { + computedVec = builder.create( + loc, aVec, bVec, cVec); + } + + builder.create( + loc, C, + ValueRange{loopVarBatchIdx, loopVarRowOfA, + ApplyBCol}, + maskVector, computedVec); + builder.create(loc); + }); + builder.create(loc, + ValueRange{std::nullopt}); + }); + builder.create(loc, ValueRange{std::nullopt}); + }); + + builder.create(loc); + }); + + rewriter.eraseOp(op); + return success(); + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// BatchMatMuSCFOptimize +//===----------------------------------------------------------------------===// + +/// This is a partial lowering linalg pooling operations to mixture of +/// Affine + Vector operations. +namespace { +class BatchMatMuSCFOptimize + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BatchMatMuSCFOptimize) + StringRef getArgument() const final { return "batchmatmul-scf-optimize"; } + StringRef getDescription() const final { + return "BatchMatMul SCF Optimization."; + } + BatchMatMuSCFOptimize() = default; + BatchMatMuSCFOptimize(const BatchMatMuSCFOptimize &) {} + explicit BatchMatMuSCFOptimize(int64_t vecSizeParam) { + vecSize = vecSizeParam; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + Option vecSize{*this, "vector-size", + llvm::cl::desc("Strip mining size."), + llvm::cl::init(16)}; +}; +} // end anonymous namespace. + +void BatchMatMuSCFOptimize::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target + .addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context, vecSize); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} +// add to buddy-opt.cpp +namespace mlir { +namespace buddy { +void registerBatchMatMuSCFOptimize() { + PassRegistration(); +} +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/MatMulOptimization/BatchMatMulTileOptimize.cpp b/midend/lib/Conversion/MatMulOptimization/BatchMatMulTileOptimize.cpp new file mode 100644 index 000000000..91d10c645 --- /dev/null +++ b/midend/lib/Conversion/MatMulOptimization/BatchMatMulTileOptimize.cpp @@ -0,0 +1,353 @@ +//===- BatchMatMulOptimize.cpp --------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the batchmatmul tile optimization. +// +//===----------------------------------------------------------------------===// +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/IntegerSet.h" +#include "mlir/IR/ValueRange.h" +#include "llvm/ADT/ArrayRef.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace mlir; +using namespace vector; +using namespace affine; + +//===----------------------------------------------------------------------===// +// Rewrite Pattern +//===----------------------------------------------------------------------===// + +namespace { + +class BatchMatMulTileOptimizePattern : public ConversionPattern { +private: + int64_t vecSize, kernelM, kernelN; + +public: + explicit BatchMatMulTileOptimizePattern(MLIRContext *context, + int64_t vecSizeParam, + int64_t kernelMParam, + int64_t kernelNParam) + : ConversionPattern(linalg::BatchMatmulOp::getOperationName(), 1, + context) { + vecSize = vecSizeParam; + kernelM = kernelMParam; + kernelN = kernelNParam; + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + + // Retrieve input tensors A, B, and C. + Value A = op->getOperand(0); + Value B = op->getOperand(1); + Value C = op->getOperand(2); + + // Acquire the element type of input tensors. + Type elementType = A.getType().cast().getElementType(); + ShapedType ATy = A.getType().cast(); + + // Define constants. + const Value c0 = + rewriter.create(loc, rewriter.getIndexAttr(0)); + const Value c1 = + rewriter.create(loc, rewriter.getIndexAttr(1)); + + const AffineExpr d0 = rewriter.getAffineDimExpr(0); + const AffineExpr d1 = rewriter.getAffineDimExpr(1); + const AffineExpr d2 = rewriter.getAffineDimExpr(2); + const AffineExpr s0 = rewriter.getAffineSymbolExpr(0); + const AffineExpr s1 = rewriter.getAffineSymbolExpr(1); + const AffineExpr s2 = rewriter.getAffineSymbolExpr(2); + + const AffineExpr zeroAffine = rewriter.getAffineConstantExpr(0); + + // Get dimensions of input tensors. + Value batch = rewriter.create(loc, A, 0); + Value M = rewriter.create(loc, A, 1); // aRow + Value K = rewriter.create(loc, B, 1); // bRow + Value N = rewriter.create(loc, B, 2); // bCol + + SmallVector reducedValues = llvm::to_vector<4>( + llvm::map_range(ArrayRef{}, + [](const LoopReduction &red) { return red.value; })); + + // Configs + int64_t kNLen = vecSize * kernelN; + + // Create the primary parallel batch level loop. + AffineParallelOp parallelBatchLoop = + rewriter.create( + loc, ValueRange(reducedValues).getTypes(), ValueRange{batch}, + ArrayRef{ + rewriter.getNamedAttr("lowerBoundsGroups", + rewriter.getI32TensorAttr({1})), + rewriter.getNamedAttr("upperBoundsGroups", + rewriter.getI32TensorAttr({1})), + rewriter.getNamedAttr( + "lowerBoundsMap", + AffineMapAttr::get(AffineMap::get(0, 0, {zeroAffine}, + rewriter.getContext()))), + rewriter.getNamedAttr("upperBoundsMap", + AffineMapAttr::get(AffineMap::get( + 1, 0, {d0}, rewriter.getContext()))), + rewriter.getNamedAttr("reductions", rewriter.getArrayAttr({})), + rewriter.getNamedAttr("steps", rewriter.getI64ArrayAttr({1}))}); + + // Create the loop body for the parallel loop. + Block *loopBody = new Block(); + rewriter.setInsertionPointToStart(loopBody); + loopBody->addArgument(rewriter.getIndexType(), loc); + Value loopVarBatchIdx = loopBody->getArguments()[0]; + + // Prefetching data from tensor 'A' for better cache utilization. + rewriter.create( + loc, A, AffineMap::get(3, 0, {d0, d1, d2}, rewriter.getContext()), + ArrayRef{loopVarBatchIdx, M, K}, false, 3, true); + + // build loop body + affine::buildAffineLoopNest( + rewriter, loc, {c0}, {N}, kNLen, + [&](OpBuilder &builder, Location loc, ValueRange ivRange) { + auto ivJ = ivRange.front(); + affine::buildAffineLoopNest( + builder, loc, {c0}, {M}, kernelM, + [&](OpBuilder &builder, Location loc, ValueRange ivRange) { + Value ivI = ivRange.front(); + SmallVector cptrs; + + const VectorType vTy = + VectorType::get(vecSize, ATy.getElementType()); + + for (int i = 0; i < kernelM; i++) { + Value fixedIV = builder.create( + loc, + AffineMap::get(1, 1, {d0 + i, s0 - 1}, + builder.getContext()), + SmallVector{ivI, M}); + MemRefType resTy = MemRefType::get( + ATy.getShape(), ATy.getElementType(), + AffineMap::get(3, 3, d1 * s2 + d0 * s1 + s0 + d2)); + auto cptr = builder.create( + loc, resTy, C, + SmallVector{loopVarBatchIdx, fixedIV, c0}, + SmallVector{c1, c1, N}, + SmallVector{c1, c1, c1}); + cptrs.push_back(cptr); + } + affine::buildAffineLoopNest( + builder, loc, {c0}, {K}, 1, + [&](OpBuilder &builder, Location loc, ValueRange ivRange) { + Value ivK = ivRange.front(); + SmallVector bs; + + for (int j = 0; j < kernelN; j++) { + Value fixedJV = ivJ; + if (j != 0) { + fixedJV = builder.create( + loc, AffineMap::get(1, 0, d0 + j * vecSize), ivJ); + } + bs.push_back(builder.create( + loc, vTy, B, + ValueRange{loopVarBatchIdx, ivK, fixedJV})); + } + + for (int i = 0; i < kernelM; ++i) { + Value fixedIV = ivI; + if (i != 0) { + fixedIV = builder.create( + loc, + AffineMap::get(1, 0, {d0 + i}, + builder.getContext()), + SmallVector{ivI}); + } + affine::AffineIfOp mBranchingOp = + builder.create( + loc, + IntegerSet::get(1, 1, {-d0 + s0 - 1}, {false}), + ValueRange{fixedIV, M}, false); + OpBuilder mTrueBranchBuilder = + mBranchingOp.getThenBodyBuilder(); + Value ksubAElement = + mTrueBranchBuilder.create( + loc, A, + ValueRange{loopVarBatchIdx, fixedIV, ivK}); + + for (int j = 0; j < kernelN; j++) { + Value fixedJV = ivJ; + if (j != 0) { + fixedJV = + mTrueBranchBuilder + .create( + loc, + AffineMap::get(1, 0, d0 + j * vecSize), + ivJ); + } + Value vecC = mTrueBranchBuilder.create( + loc, vTy, cptrs[i], ValueRange{c0, c0, fixedJV}); + if (isa(elementType)) { + Value vecA = + mTrueBranchBuilder.create( + loc, vTy, ksubAElement); + Value vecMul = + mTrueBranchBuilder.create( + loc, vTy, vecA, bs[j]); + vecC = mTrueBranchBuilder.create( + loc, vTy, vecMul, vecC); + } else { + Value vecA = + mTrueBranchBuilder.create( + loc, vTy, ksubAElement); + vecC = mTrueBranchBuilder.create( + loc, vTy, vecA, bs[j], vecC); + } + // store vecC + Value tailLength = + mTrueBranchBuilder.create( + loc, AffineMap::get(2, 0, -d0 + d1), + ValueRange{fixedJV, N}); + affine::AffineIfOp nBranchingOp = + mTrueBranchBuilder.create( + loc, + IntegerSet::get(1, 0, {-vecSize + d0}, + {false}), + ValueRange{tailLength}, true); + // Calculate the length of the tail, which might not + // fit in a vector. + OpBuilder nTrueBranchBuilder = + nBranchingOp.getThenBodyBuilder(); + nTrueBranchBuilder.create( + loc, vecC, cptrs[i], ValueRange{c0, c0, fixedJV}); + OpBuilder nFalseBranchBuilder = + nBranchingOp.getElseBodyBuilder(); + // Generate a mask vector based on the tail length. + Value maskVector = + nFalseBranchBuilder.create( + loc, + VectorType::get({vecSize}, + rewriter.getI1Type()), + ValueRange{tailLength}); + nFalseBranchBuilder.create( + loc, cptrs[i], ValueRange{c0, c0, fixedJV}, + maskVector, vecC); + } + } + }); + }); + }); + + rewriter.create(loc); + + // Finalize the loop and erase the original operation. + parallelBatchLoop.getRegion().push_back(loopBody); + rewriter.setInsertionPointAfter(parallelBatchLoop); + + rewriter.eraseOp(op); + return success(); + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// BatchMatMulTileOptimizePass +//===----------------------------------------------------------------------===// + +/// This is a partial lowering linalg pooling operations to mixture of +/// Affine + Vector operations. +namespace { +class BatchMatMulTileOptimizePass + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BatchMatMulTileOptimizePass) + StringRef getArgument() const final { return "batchmatmul-tile-optimize"; } + StringRef getDescription() const final { + return "BatchMatMul Tile Optimization."; + } + BatchMatMulTileOptimizePass() = default; + BatchMatMulTileOptimizePass(const BatchMatMulTileOptimizePass &) {} + explicit BatchMatMulTileOptimizePass(int64_t vecSizeParam, + int64_t kernelMParam, + int64_t kernelNParam) { + vecSize = vecSizeParam; + kernelM = kernelMParam; + kernelN = kernelNParam; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + Option vecSize{*this, "vec-size", + llvm::cl::desc("Strip mining size."), + llvm::cl::init(16)}; + + Option kernelM{*this, "kernel-m", + llvm::cl::desc("Strip mining size."), + llvm::cl::init(4)}; + + Option kernelN{*this, "kernel-n", + llvm::cl::desc("Strip mining size."), + llvm::cl::init(2)}; +}; +} // end anonymous namespace. + +void BatchMatMulTileOptimizePass::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target + .addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + RewritePatternSet patterns(context); + patterns.add(context, vecSize, kernelM, + kernelN); + + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} +// add to buddy-opt.cpp +namespace mlir { +namespace buddy { +void registerBatchMatMulTileOptimizePass() { + PassRegistration(); +} +} // namespace buddy +} // namespace mlir diff --git a/midend/lib/Conversion/MatMulOptimization/CMakeLists.txt b/midend/lib/Conversion/MatMulOptimization/CMakeLists.txt index 8e726863e..2803af674 100644 --- a/midend/lib/Conversion/MatMulOptimization/CMakeLists.txt +++ b/midend/lib/Conversion/MatMulOptimization/CMakeLists.txt @@ -1,8 +1,10 @@ add_mlir_library(MatMulOptimization - BatchMatMulOptimize.cpp MatMulOptimize.cpp MatMulVectorization.cpp MatMulParallelVectorization.cpp + BatchMatMulOptimize.cpp + BatchMatMulTileOptimize.cpp + BatchMatMulSCFOptimize.cpp LINK_LIBS PUBLIC BuddyUtils ) @@ -11,6 +13,14 @@ add_mlir_library(BatchMatMulOptimization BatchMatMulOptimize.cpp ) +add_mlir_library(BatchMatMulTileOptimization + BatchMatMulTileOptimize.cpp +) + +add_mlir_library(BatchMatMulSCFOptimization + BatchMatMulSCFOptimize.cpp +) + add_mlir_library(MatMulParallelVectorization MatMulParallelVectorization.cpp ) diff --git a/midend/lib/Conversion/MatMulOptimization/MatMulParallelVectorization.cpp b/midend/lib/Conversion/MatMulOptimization/MatMulParallelVectorization.cpp index d10c80e3a..23d0ef4e7 100644 --- a/midend/lib/Conversion/MatMulOptimization/MatMulParallelVectorization.cpp +++ b/midend/lib/Conversion/MatMulOptimization/MatMulParallelVectorization.cpp @@ -14,7 +14,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the matmul-paralell-vectorization optimization. +// This file implements the matmul-parallel-vectorization optimization. // //===----------------------------------------------------------------------===// @@ -318,7 +318,7 @@ class MatMulParallelVectorizationPass public: MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(MatMulParallelVectorizationPass) StringRef getArgument() const final { - return "matmul-paralell-vectorization-optimize"; + return "matmul-parallel-vectorization-optimize"; } StringRef getDescription() const final { return "MatMulParallelVectorization Optimization."; diff --git a/nix/buddy-llvm.nix b/nix/buddy-llvm.nix new file mode 100644 index 000000000..af5bc1c86 --- /dev/null +++ b/nix/buddy-llvm.nix @@ -0,0 +1,76 @@ +{ stdenv +, cmake +, ninja +, python3 +, fetchFromGitHub +}: + +let + pythonEnv = python3.withPackages (ps: [ + ps.numpy + ps.pybind11 + ps.pyyaml + ps.ml-dtypes + ]); +in +stdenv.mkDerivation rec { + name = "llvm-for-buddy-mlir"; + version = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + rev = version; + hash = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw="; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + propagatedBuildInputs = [ + pythonEnv + ]; + + nativeBuildInputs = [ + cmake + ninja + ]; + + cmakeDir = "../llvm"; + cmakeFlags = [ + "-DLLVM_ENABLE_PROJECTS=mlir" + "-DLLVM_TARGETS_TO_BUILD=host;RISCV" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DCMAKE_BUILD_TYPE=Release" + # required for MLIR python binding + "-DMLIR_ENABLE_BINDINGS_PYTHON=ON" + # required for not, FileCheck... + "-DLLVM_INSTALL_UTILS=ON" + ]; + + outputs = [ "out" "lib" "dev" ]; + + postInstall = '' + # buddy-mlir have custom RVV backend that required LLVM backend, + # and those LLVM backend headers require this config.h header file. + # However for LLVM, this config.h is meant to be used on build phase only, + # so it will not be installed for cmake install. + # We have to do some hack + cp -v "include/llvm/Config/config.h" "$dev/include/llvm/Config/config.h" + + # move llvm-config to $dev to resolve a circular dependency + moveToOutput "bin/llvm-config*" "$dev" + + # move all lib files to $lib except lib/cmake + moveToOutput "lib" "$lib" + moveToOutput "lib/cmake" "$dev" + + # patch configuration files so each path points to the new $lib or $dev paths + substituteInPlace "$dev/lib/cmake/llvm/LLVMConfig.cmake" \ + --replace 'set(LLVM_BINARY_DIR "''${LLVM_INSTALL_PREFIX}")' 'set(LLVM_BINARY_DIR "'"$lib"'")' + substituteInPlace \ + "$dev/lib/cmake/llvm/LLVMExports-release.cmake" \ + "$dev/lib/cmake/mlir/MLIRTargets-release.cmake" \ + --replace "\''${_IMPORT_PREFIX}/lib/lib" "$lib/lib/lib" \ + --replace "\''${_IMPORT_PREFIX}/lib/objects-Release" "$lib/lib/objects-Release" \ + --replace "$out/bin/llvm-config" "$dev/bin/llvm-config" # patch path for llvm-config + ''; +} diff --git a/nix/buddy-mlir.nix b/nix/buddy-mlir.nix index b59d82275..db10c6281 100644 --- a/nix/buddy-mlir.nix +++ b/nix/buddy-mlir.nix @@ -1,51 +1,68 @@ -{ cmake, ninja, python3, llvmPackages_16, fetchFromGitHub, libjpeg, libpng, zlib-ng }: +{ lib +, stdenv +, buddy-llvm +, cmake +, ninja +, llvmPkgs +, libjpeg +, libpng +, zlib-ng +, ccls +}: let - # Using git submodule to obtain the llvm source is really slow. - # So here I use tarball to save time from git index. - llvmSrc = fetchFromGitHub { - owner = "llvm"; - repo = "llvm-project"; - rev = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; - sha256 = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw"; - }; -in -# Use clang instead of gcc to build -llvmPackages_16.stdenv.mkDerivation { - pname = "buddy-mlir"; - version = "unstable-2023-11-07+rev=38bfd56"; - - srcs = [ - llvmSrc - ../. - ]; - sourceRoot = "llvm-project"; - unpackPhase = '' - sourceArray=($srcs) - cp -r ''${sourceArray[0]} llvm-project - cp -r ''${sourceArray[1]} buddy-mlir + self = stdenv.mkDerivation { + pname = "buddy-mlir"; + version = "unstable-2024-07-18"; - # Directories copied from nix store are read only - chmod -R u+w llvm-project buddy-mlir - ''; + src = with lib.fileset; toSource { + root = ./..; + fileset = unions [ + ./../backend + ./../cmake + ./../examples + ./../frontend + ./../midend + ./../tests + ./../tools + ./../thirdparty + ./../CMakeLists.txt + ./../flake.lock + ./../flake.nix + ]; + }; - # Tablegen in latest commit have bug. See llvm-projects issue #68166 - prePatch = "pushd $NIX_BUILD_TOP/llvm-project"; - patches = [ ./tblgen.patch ]; - postPatch = "popd"; + nativeBuildInputs = [ + cmake + ninja + llvmPkgs.bintools + ]; - nativeBuildInputs = [ cmake ninja python3 llvmPackages_16.bintools libjpeg libpng zlib-ng ]; + buildInputs = [ + buddy-llvm + ]; - cmakeDir = "../llvm"; - cmakeFlags = [ - "-DCMAKE_BUILD_TYPE=Release" - "-DLLVM_ENABLE_PROJECTS=mlir" - "-DLLVM_TARGETS_TO_BUILD=host;RISCV" - "-DLLVM_ENABLE_ASSERTIONS=ON" - "-DLLVM_USE_LINKER=lld" + cmakeFlags = [ + "-DMLIR_DIR=${buddy-llvm.dev}/lib/cmake/mlir" + "-DLLVM_DIR=${buddy-llvm.dev}/lib/cmake/llvm" + "-DLLVM_MAIN_SRC_DIR=${buddy-llvm.src}/llvm" + "-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON" + "-DCMAKE_BUILD_TYPE=Release" + ]; - "-DLLVM_EXTERNAL_PROJECTS=buddy-mlir" - "-DLLVM_EXTERNAL_BUDDY_MLIR_SOURCE_DIR=../../buddy-mlir" - ]; + passthru = { + llvm = buddy-llvm; + devShell = self.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs ++ [ + libjpeg + libpng + zlib-ng + ccls + ]; + }); + }; - checkTarget = "check-mlir check-buddy"; -} + # No need to do check, and it also takes too much time to finish. + doCheck = false; + }; +in +self diff --git a/nix/overlay.nix b/nix/overlay.nix index 19c97fc33..767f23bdd 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -1,6 +1,8 @@ final: prev: { # Add an alias here can help future migration - llvmPkgs = final.llvmPackages_16; - buddy-mlir = final.callPackage ./buddy-mlir.nix { }; + llvmPkgs = final.llvmPackages_17; + # Use clang instead of gcc to compile, to avoid gcc13 miscompile issue. + buddy-llvm = final.callPackage ./buddy-llvm.nix { stdenv = final.llvmPkgs.stdenv; }; + buddy-mlir = final.callPackage ./buddy-mlir.nix { stdenv = final.llvmPkgs.stdenv; }; } diff --git a/requirements.txt b/requirements.txt index f2a1232fb..9818b8ec7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,6 @@ protobuf pybind11 == 2.11.1 torchvision tabulate +datasets +soundfile +librosa diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2456107a3..3340ed14b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,7 +13,9 @@ set(BUDDY_TEST_DEPENDS buddy-translate buddy-container-test buddy-audio-container-test + buddy-new-image-container-test buddy-text-container-test + mlir-cpu-runner ) if(BUDDY_ENABLE_OPENCV) diff --git a/tests/Interface/core/AudioContainerTest.cpp b/tests/Interface/core/AudioContainerTest.cpp index a31c7800f..684584c3a 100644 --- a/tests/Interface/core/AudioContainerTest.cpp +++ b/tests/Interface/core/AudioContainerTest.cpp @@ -20,22 +20,72 @@ // RUN: buddy-audio-container-test 2>&1 | FileCheck %s +#include "AudioFile.h" #include #include using namespace std; int main() { - dap::Audio aud("../../../../tests/Interface/core/NASA_Mars.wav"); - auto &audioFile = aud.getAudioFile(); + // --------------------------------------------------------------------------- + // 1. Print Decoded Reuslts using Buddy Audio Container + // --------------------------------------------------------------------------- + + // Read and decode audio file with Buddy Audio Container. + dap::Audio aud("../../../../tests/Interface/core/TestAudio.wav"); + + // CHECK: WAV + fprintf(stderr, "%s\n", aud.getFormatName().c_str()); + // CHECK: 16 + fprintf(stderr, "%d\n", aud.getBitDepth()); + // CHECK: 77040 + fprintf(stderr, "%lu\n", aud.getSamplesNum()); + // CHECK: 1 + fprintf(stderr, "%d\n", aud.getChannelsNum()); + // CHECK: 16000 + fprintf(stderr, "%d\n", aud.getSampleRate()); + // CHECK: -0.000153 + fprintf(stderr, "%f\n", aud.getData()[3]); + // CHECK: -0.000275 + fprintf(stderr, "%f\n", aud.getData()[4]); + + // --------------------------------------------------------------------------- + // 2. Compare Encoded results using Buddy Audio Container and AudioFile.h + // --------------------------------------------------------------------------- + + // Encode the audio data and save it to a file using the Buddy Audio Container + string filePath = "./buddyEncodeResult.wav"; + aud.saveToFile(filePath, "WAVE"); + + // Print metadata and sample values using the Buddy Audio Container. + dap::Audio audContainer(filePath); + // CHECK: 16 + fprintf(stderr, "%d\n", audContainer.getBitDepth()); + // CHECK: 77040 + fprintf(stderr, "%lu\n", audContainer.getSamplesNum()); + // CHECK: 1 + fprintf(stderr, "%d\n", audContainer.getChannelsNum()); + // CHECK: 16000 + fprintf(stderr, "%d\n", audContainer.getSampleRate()); + // CHECK: -0.000122 + fprintf(stderr, "%f\n", audContainer.getData()[3]); + // CHECK: -0.000244 + fprintf(stderr, "%f\n", audContainer.getData()[4]); + + // Print metadata and sample values using the third-party (AudioFile.h). + AudioFile audFile(filePath); + // CHECK: 16 + fprintf(stderr, "%d\n", audFile.getBitDepth()); + // CHECK: 77040 + fprintf(stderr, "%d\n", audFile.getNumSamplesPerChannel()); // CHECK: 1 - fprintf(stderr, "%u\n", audioFile.getNumChannels()); - // CHECK: 24 - fprintf(stderr, "%u\n", audioFile.getBitDepth()); - // CHECK: 2000000 - fprintf(stderr, "%u\n", audioFile.getNumSamplesPerChannel()); - // CHECK: 100000 - fprintf(stderr, "%u\n", audioFile.getSampleRate()); + fprintf(stderr, "%d\n", audFile.getNumChannels()); + // CHECK: 16000 + fprintf(stderr, "%d\n", audFile.getSampleRate()); + // CHECK: -0.000122 + fprintf(stderr, "%f\n", audFile.getSample(0, 3)); + // CHECK: -0.000244 + fprintf(stderr, "%f\n", audFile.getSample(0, 4)); return 0; } diff --git a/tests/Interface/core/CMakeLists.txt b/tests/Interface/core/CMakeLists.txt index c82cb5a28..f6c6da4c3 100644 --- a/tests/Interface/core/CMakeLists.txt +++ b/tests/Interface/core/CMakeLists.txt @@ -17,10 +17,14 @@ if(BUDDY_MLIR_ENABLE_DIP_LIB OR BUDDY_ENABLE_OPENCV) ) endif() +_add_test_executable(buddy-new-image-container-test + NewImageContainerTest.cpp +) + _add_test_executable(buddy-audio-container-test AudioContainerTest.cpp ) _add_test_executable(buddy-text-container-test TextContainerTest.cpp -) \ No newline at end of file +) diff --git a/tests/Interface/core/NewImageContainerTest.cpp b/tests/Interface/core/NewImageContainerTest.cpp new file mode 100644 index 000000000..87b49804a --- /dev/null +++ b/tests/Interface/core/NewImageContainerTest.cpp @@ -0,0 +1,58 @@ +//===- NewImageContainerTest.cpp ------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This is the image container test file. +// +//===----------------------------------------------------------------------===// + +// RUN: buddy-new-image-container-test 2>&1 | FileCheck %s + +#include + +int main() { + //===--------------------------------------------------------------------===// + // Test new image container - bmp format image. + //===--------------------------------------------------------------------===// + // Default Gray Scale + dip::Image bmpGrayDefault( + "../../../../tests/Interface/core/TestImage.bmp", dip::DIP_GRAYSCALE); + // CHECK: BMP + fprintf(stderr, "%s\n", bmpGrayDefault.getFormatName().c_str()); + // CHECK: 28 + fprintf(stderr, "%ld\n", bmpGrayDefault.getWidth()); + // CHECK: 28 + fprintf(stderr, "%ld\n", bmpGrayDefault.getHeight()); + // CHECK: 32 + fprintf(stderr, "%d\n", bmpGrayDefault.getBitDepth()); + // CHECK: 7 + fprintf(stderr, "%f\n", bmpGrayDefault.getData()[0]); + // Gray Scale + Normalization + dip::Image bmpGrayNorm( + "../../../../tests/Interface/core/TestImage.bmp", dip::DIP_GRAYSCALE, + true /* norm */); + // CHECK: BMP + fprintf(stderr, "%s\n", bmpGrayNorm.getFormatName().c_str()); + // CHECK: 28 + fprintf(stderr, "%ld\n", bmpGrayNorm.getWidth()); + // CHECK: 28 + fprintf(stderr, "%ld\n", bmpGrayNorm.getHeight()); + // CHECK: 32 + fprintf(stderr, "%d\n", bmpGrayNorm.getBitDepth()); + // CHECK: 0.027451 + fprintf(stderr, "%f\n", bmpGrayNorm.getData()[0]); + + return 0; +} diff --git a/tests/Interface/core/TestAudio.wav b/tests/Interface/core/TestAudio.wav new file mode 100644 index 000000000..069c2329e Binary files /dev/null and b/tests/Interface/core/TestAudio.wav differ diff --git a/tests/Interface/core/TestImage.bmp b/tests/Interface/core/TestImage.bmp new file mode 100644 index 000000000..7a9e02a29 Binary files /dev/null and b/tests/Interface/core/TestImage.bmp differ diff --git a/tests/lit.cfg.py b/tests/lit.cfg.py index 4cf5e245f..53b10eaa8 100644 --- a/tests/lit.cfg.py +++ b/tests/lit.cfg.py @@ -67,6 +67,7 @@ "buddy-translate", "buddy-container-test", "buddy-audio-container-test", + "buddy-new-image-container-test", "buddy-text-container-test", "mlir-cpu-runner", ] diff --git a/tests/lit.site.cfg.py.in b/tests/lit.site.cfg.py.in index 6a5e5f37e..90681ef25 100644 --- a/tests/lit.site.cfg.py.in +++ b/tests/lit.site.cfg.py.in @@ -28,7 +28,7 @@ config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@" config.host_ldflags = '@HOST_LDFLAGS@' config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' -config.llvm_build_dir = "@LLVM_PROJECT_BUILD_DIR@" +config.llvm_build_dir = "@LLVM_BINARY_DIR@" config.host_arch = "@HOST_ARCH@" config.buddy_src_root = "@CMAKE_SOURCE_DIR@" config.buddy_obj_root = "@CMAKE_BINARY_DIR@" diff --git a/thirdparty/.gitignore b/thirdparty/.gitignore deleted file mode 100644 index 550f1113f..000000000 --- a/thirdparty/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -# RISC-V GNU Toolchain -/riscv-gnu-toolchain - -# SiFive QEMU -/qemu - -# RVV Environment -/build-riscv-gnu-toolchain -/build-local-clang -/build-cross-clang -/build-cross-mlir -/build-cross-buddy-mlir diff --git a/thirdparty/build-rvv-env.sh b/thirdparty/build-rvv-env.sh deleted file mode 100755 index b88fcf74c..000000000 --- a/thirdparty/build-rvv-env.sh +++ /dev/null @@ -1,169 +0,0 @@ -#!/bin/bash -num_thread="" -if [ -n "$1" ]; then - num_thread="$1" - echo "Number of threads was set to $num_thread for make" -fi -#------------------------------------------------------------------------------- -# Clone riscv-gnu-toolchain -#------------------------------------------------------------------------------- - -if [ ! -d "riscv-gnu-toolchain" ] -then - git clone git@github.com:riscv-collab/riscv-gnu-toolchain.git - cd riscv-gnu-toolchain - git checkout rvv-next - git submodule update --init --recursive - cd .. -else - echo "riscv-gnu-toolchain was cloned already" -fi - -#------------------------------------------------------------------------------- -# Build riscv-gnu-toolchain -#------------------------------------------------------------------------------- - -if [ ! -d "build-riscv-gnu-toolchain" ] -then - cd riscv-gnu-toolchain - mkdir build-linux - cd build-linux - ../configure --prefix=$PWD/../../build-riscv-gnu-toolchain - make linux -j $num_thread - cd ../.. -else - echo "riscv-gnu-toolchain was built already" -fi - -#------------------------------------------------------------------------------- -# Clone and build QEMU for RVV -#------------------------------------------------------------------------------- - -# TODO: test qemu in riscv-gnu-toolchain master branch - -# cd .. -# mkdir build-qemu -# cd build-qemu -# ../configure --prefix=$PWD/../build-qemu -# make build-qemu -j - -if [ ! -d "qemu" ] -then - git clone git@github.com:sifive/qemu.git - cd qemu - git checkout 856da0e94f - mkdir build - cd build - ../configure - make -j $num_thread - cd ../.. -else - echo "qemu was cloned and built already" -fi - -#------------------------------------------------------------------------------- -# Build local clang -#------------------------------------------------------------------------------- - -if [ ! -d "build-local-clang" ] -then - mkdir build-local-clang - cd build-local-clang - cmake -G Ninja ../../llvm/llvm \ - -DLLVM_TARGETS_TO_BUILD="host;RISCV" \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DCMAKE_BUILD_TYPE=RELEASE - ninja - cd .. -else - echo "native clang was built already" -fi - -#------------------------------------------------------------------------------- -# Build cross clang and lli -#------------------------------------------------------------------------------- - -if [ ! -d "build-cross-clang" ] -then - mkdir build-cross-clang - cd build-cross-clang - cmake -G Ninja ../../llvm/llvm \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DLLVM_TARGETS_TO_BUILD="RISCV" \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_C_COMPILER=$PWD/../build-local-clang/bin/clang \ - -DCMAKE_CXX_COMPILER=$PWD/../build-local-clang/bin/clang++ \ - -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DLLVM_TABLEGEN=$PWD/../build-local-clang/bin/llvm-tblgen \ - -DCLANG_TABLEGEN=$PWD/../build-local-clang/bin/clang-tblgen \ - -DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu \ - -DLLVM_TARGET_ARCH=RISCV64 \ - -DCMAKE_BUILD_TYPE=Release \ - -DLLVM_ENABLE_ZSTD=Off - ninja clang lli - cd .. -else - echo "clang cross-compiler for riscv64 was built already" -fi - -#------------------------------------------------------------------------------- -# Build cross MLIR -#------------------------------------------------------------------------------- - -if [ ! -d "build-cross-mlir" ] -then - mkdir build-cross-mlir - cd build-cross-mlir - cmake -G Ninja ../../llvm/llvm \ - -DLLVM_ENABLE_PROJECTS="mlir" \ - -DLLVM_BUILD_EXAMPLES=OFF \ - -DCMAKE_CROSSCOMPILING=True \ - -DLLVM_TARGET_ARCH=RISCV64 \ - -DLLVM_TARGETS_TO_BUILD=RISCV \ - -DCMAKE_BUILD_TYPE=Release \ - -DLLVM_ENABLE_ASSERTIONS=ON \ - -DLLVM_NATIVE_ARCH=RISCV \ - -DLLVM_HOST_TRIPLE=riscv64-unknown-linux-gnu \ - -DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu \ - -DCMAKE_C_COMPILER=$PWD/../build-local-clang/bin/clang \ - -DCMAKE_CXX_COMPILER=$PWD/../build-local-clang/bin/clang++ \ - -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DMLIR_TABLEGEN=$PWD/../../llvm/build/bin/mlir-tblgen \ - -DLLVM_TABLEGEN=$PWD/../../llvm/build/bin/llvm-tblgen \ - -DMLIR_LINALG_ODS_YAML_GEN=$PWD/../../llvm/build/bin/mlir-linalg-ods-yaml-gen \ - -DMLIR_PDLL_TABLEGEN=$PWD/../../llvm/build/bin/mlir-pdll \ - -DLLVM_ENABLE_ZSTD=Off - ninja -else - echo "mlir for riscv64 was built already" -fi - -#------------------------------------------------------------------------------- -# Build cross Buddy-MLIR -#------------------------------------------------------------------------------- - -if [ ! -d "build-cross-buddy-mlir" ] -then - mkdir build-cross-buddy-mlir - cd build-cross-buddy-mlir - cmake -G Ninja ../../ \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DMLIR_DIR=$PWD/../build-cross-mlir/lib/cmake/mlir \ - -DLLVM_DIR=$PWD/../build-cross-mlir/lib/cmake/llvm \ - -DCMAKE_CROSSCOMPILING=True \ - -DLLVM_TARGETS_TO_BUILD=RISCV \ - -DCMAKE_BUILD_TYPE=Release \ - -DLLVM_ENABLE_ASSERTIONS=ON \ - -DLLVM_NATIVE_ARCH=RISCV \ - -DLLVM_HOST_TRIPLE=riscv64-unknown-linux-gnu \ - -DCMAKE_C_COMPILER=$PWD/../build-local-clang/bin/clang \ - -DCMAKE_CXX_COMPILER=$PWD/../build-local-clang/bin/clang++ \ - -DCMAKE_C_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DCMAKE_CXX_FLAGS="--target=riscv64-unknown-linux-gnu --sysroot=$PWD/../build-riscv-gnu-toolchain/sysroot --gcc-toolchain=$PWD/../build-riscv-gnu-toolchain" \ - -DLLVM_ENABLE_ZSTD=Off - ninja StaticMLIRCRunnerUtils StaticMLIRRunnerUtils -else - echo "buddy-mlir libs for riscv64 was built already" -fi diff --git a/thirdparty/mimalloc b/thirdparty/mimalloc index 4e50d6714..81a771161 160000 --- a/thirdparty/mimalloc +++ b/thirdparty/mimalloc @@ -1 +1 @@ -Subproject commit 4e50d6714d471b72b2285e25a3df6c92db944593 +Subproject commit 81a771161e37c8559c442fff099115cd1977db1e diff --git a/thirdparty/riscv-gnu-toolchain b/thirdparty/riscv-gnu-toolchain new file mode 160000 index 000000000..3fb1523f2 --- /dev/null +++ b/thirdparty/riscv-gnu-toolchain @@ -0,0 +1 @@ +Subproject commit 3fb1523f2df35da6b1008f13a214e4b4370f7b5a diff --git a/tools/buddy-llc/CMakeLists.txt b/tools/buddy-llc/CMakeLists.txt index 06b2a9c32..cac8ac20a 100644 --- a/tools/buddy-llc/CMakeLists.txt +++ b/tools/buddy-llc/CMakeLists.txt @@ -29,7 +29,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_tool(buddy-llc - ${LLVM_PROJECT_SOURCE_DIR}/llvm/tools/llc/llc.cpp + ${LLVM_MAIN_SRC_DIR}/tools/llc/llc.cpp DEPENDS buddy_intrinsics_gen diff --git a/tools/buddy-opt/CMakeLists.txt b/tools/buddy-opt/CMakeLists.txt index 69a897cef..94109d28d 100644 --- a/tools/buddy-opt/CMakeLists.txt +++ b/tools/buddy-opt/CMakeLists.txt @@ -20,14 +20,18 @@ target_link_libraries(buddy-opt LowerDIPPass BuddyDAP LowerDAPPass + ExtendDAPPass DAPVectorization BuddyRVV LowerRVVPass MatMulOptimization BatchMatMulOptimization + BatchMatMulTileOptimization + BatchMatMulSCFOptimization MatMulParallelVectorization TransposeOptimization ConvOptimization + DepthwiseConvOptimization VectorExp LowerVectorExpPass BuddyGemmini diff --git a/tools/buddy-opt/buddy-opt.cpp b/tools/buddy-opt/buddy-opt.cpp index 5a1c28634..a40fda18f 100644 --- a/tools/buddy-opt/buddy-opt.cpp +++ b/tools/buddy-opt/buddy-opt.cpp @@ -40,30 +40,37 @@ #include "DAP/DAPOps.h" #include "DIP/DIPDialect.h" #include "DIP/DIPOps.h" -#include "RVV/RVVDialect.h" -#include "VectorExp/VectorExpDialect.h" -#include "VectorExp/VectorExpOps.h" #include "Gemmini/GemminiDialect.h" #include "Gemmini/GemminiOps.h" +#include "RVV/RVVDialect.h" #include "Sche/ScheDialect.h" #include "Sche/ScheOps.h" +#include "VectorExp/VectorExpDialect.h" +#include "VectorExp/VectorExpOps.h" namespace mlir { namespace buddy { void registerConvVectorizationPass(); void registerPointwiseConvToGemmPass(); +void registerPointwiseConvToGemmForNhwcFhwcPass(); void registerPoolingVectorizationPass(); void registerLowerBudPass(); void registerLowerDIPPass(); +void registerBatchMatMulOptimizePass(); +void registerBatchMatMulTileOptimizePass(); +void registerBatchMatMuSCFOptimize(); void registerLowerDAPPass(); +void registerExtendDAPPass(); void registerDAPVectorizePass(); void registerLowerRVVPass(); -void registerBatchMatMulOptimizePass(); void registerMatMulOptimizePass(); void registerMatMulVectorizationPass(); void registerMatMulParallelVectorizationPass(); void registerTransposeOptimizationPass(); void registerConvOptimizePass(); +void registerConvNhwcFhwcOptimizePass(); +void registerConvNhwcFhwcTileOptimizePass(); +void registerDepthwiseConv2DNhwcHwcOptimizePass(); void registerLowerVectorExpPass(); void registerLowerGemminiPass(); void registerLowerLinalgToGemminiPass(); @@ -77,6 +84,7 @@ int main(int argc, char **argv) { // Register all MLIR passes. mlir::registerAllPasses(); mlir::buddy::registerPointwiseConvToGemmPass(); + // mlir::buddy::registerPointwiseConvToGemmForNhwcFhwcPass(); // Register Vectorization of Convolution. mlir::buddy::registerConvVectorizationPass(); // Register Vectorization of Pooling. @@ -84,6 +92,7 @@ int main(int argc, char **argv) { mlir::buddy::registerLowerBudPass(); mlir::buddy::registerLowerDIPPass(); mlir::buddy::registerLowerDAPPass(); + mlir::buddy::registerExtendDAPPass(); // Register Vectorization of DAP Dialect. mlir::buddy::registerDAPVectorizePass(); mlir::buddy::registerLowerRVVPass(); @@ -93,11 +102,16 @@ int main(int argc, char **argv) { // Register Several Optimize Pass. mlir::buddy::registerMatMulOptimizePass(); + mlir::buddy::registerBatchMatMulOptimizePass(); + mlir::buddy::registerBatchMatMulTileOptimizePass(); + mlir::buddy::registerBatchMatMuSCFOptimize(); mlir::buddy::registerMatMulVectorizationPass(); mlir::buddy::registerMatMulParallelVectorizationPass(); - mlir::buddy::registerBatchMatMulOptimizePass(); mlir::buddy::registerTransposeOptimizationPass(); mlir::buddy::registerConvOptimizePass(); + mlir::buddy::registerConvNhwcFhwcOptimizePass(); + mlir::buddy::registerConvNhwcFhwcTileOptimizePass(); + mlir::buddy::registerDepthwiseConv2DNhwcHwcOptimizePass(); mlir::buddy::registerDeviceSchedulePass(); mlir::buddy::registerLowerSchePass(); mlir::buddy::registerFuncBufferizeDynamicOffsetPass();