From 3479af17f3b1cdee1ea3a6f2b47a6500b3b8ae28 Mon Sep 17 00:00:00 2001
From: GokayGulsoy <gokaygulsoy144@gmail.com>
Date: Sun, 10 Dec 2023 20:45:45 +0300
Subject: [PATCH] measurement files added

---
 ECRv2/.vscode/c_cpp_properties.json           |   4 +-
 ECRv2/README.md                               |  15 +-
 ECRv2/src/cudnn/Makefile                      | 304 -------------
 ECRv2/src/cudnn/README.md                     |   5 -
 ECRv2/src/cudnn/cudnn_half.cu                 | 426 -----------------
 ECRv2/src/cudnn/samples_common.mk             |  34 --
 ECRv2/src/cudnn/time.txt                      |  32 --
 ECRv2/src/cudnn/time_fast/time_resnet.txt     |  47 --
 ECRv2/src/cudnn/time_fast/time_vgg.txt        |  16 -
 .../src/cudnn/time_fft_tiling/time_resnet.txt |  47 --
 ECRv2/src/cudnn/time_fft_tiling/time_vgg.txt  |  16 -
 ECRv2/src/cudnn/time_gemm/time_resnet.txt     |  47 --
 ECRv2/src/cudnn/time_gemm/time_vgg.txt        |  16 -
 ECRv2/src/cudnn/time_im_gemm/time_resnet.txt  |  47 --
 ECRv2/src/cudnn/time_im_gemm/time_vgg.txt     |  16 -
 ECRv2/src/cudnn/time_resnet/batchsize32.txt   |  47 --
 ECRv2/src/cudnn/time_resnet/cudnn_half.cu     | 427 ------------------
 ECRv2/src/cudnn/time_sparsity/batchsize32.txt |   9 -
 ECRv2/src/cudnn/time_sparsity/cudnn_half.cu   | 425 -----------------
 .../cudnn/time_stride/stride1_batchsize32.txt |  16 -
 .../cudnn/time_stride/stride2_batchsize32.txt |  16 -
 .../cudnn/time_stride/stride3_batchsize32.txt |  16 -
 ECRv2/src/cudnn/time_vgg/batchsize32.txt      |  16 -
 ECRv2/src/cudnn/time_vgg/cudnn_half.cu        | 426 -----------------
 ECRv2/times_vgg/singleECR_times.txt           |  16 +
 dataset/resnet/kernel_name.txt                |   2 +-
 dataset/vggdata/all_conv_name.txt             |   2 +-
 dataset/vggdata/kernel_name.txt               |  32 +-
 speedup/resnet/cuDNN_fast                     |   0
 speedup/resnet/cuDNN_fft                      |  49 ++
 speedup/resnet/cuDNN_gemm.txt                 |  49 ++
 speedup/resnet/cuDNN_imp_gemm.tx              |  49 ++
 speedup/resnet/singleECR.txt                  |  49 ++
 speedup/resnet/speedup.ipynb                  |   6 +-
 34 files changed, 246 insertions(+), 2478 deletions(-)
 delete mode 100644 ECRv2/src/cudnn/Makefile
 delete mode 100644 ECRv2/src/cudnn/README.md
 delete mode 100644 ECRv2/src/cudnn/cudnn_half.cu
 delete mode 100644 ECRv2/src/cudnn/samples_common.mk
 delete mode 100644 ECRv2/src/cudnn/time.txt
 delete mode 100644 ECRv2/src/cudnn/time_fast/time_resnet.txt
 delete mode 100644 ECRv2/src/cudnn/time_fast/time_vgg.txt
 delete mode 100644 ECRv2/src/cudnn/time_fft_tiling/time_resnet.txt
 delete mode 100644 ECRv2/src/cudnn/time_fft_tiling/time_vgg.txt
 delete mode 100644 ECRv2/src/cudnn/time_gemm/time_resnet.txt
 delete mode 100644 ECRv2/src/cudnn/time_gemm/time_vgg.txt
 delete mode 100644 ECRv2/src/cudnn/time_im_gemm/time_resnet.txt
 delete mode 100644 ECRv2/src/cudnn/time_im_gemm/time_vgg.txt
 delete mode 100644 ECRv2/src/cudnn/time_resnet/batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_resnet/cudnn_half.cu
 delete mode 100644 ECRv2/src/cudnn/time_sparsity/batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_sparsity/cudnn_half.cu
 delete mode 100644 ECRv2/src/cudnn/time_stride/stride1_batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_stride/stride2_batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_stride/stride3_batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_vgg/batchsize32.txt
 delete mode 100644 ECRv2/src/cudnn/time_vgg/cudnn_half.cu
 create mode 100644 ECRv2/times_vgg/singleECR_times.txt
 create mode 100644 speedup/resnet/cuDNN_fast
 create mode 100644 speedup/resnet/cuDNN_fft
 create mode 100644 speedup/resnet/cuDNN_gemm.txt
 create mode 100644 speedup/resnet/cuDNN_imp_gemm.tx
 create mode 100644 speedup/resnet/singleECR.txt

diff --git a/ECRv2/.vscode/c_cpp_properties.json b/ECRv2/.vscode/c_cpp_properties.json
index b1d7633..f898253 100644
--- a/ECRv2/.vscode/c_cpp_properties.json
+++ b/ECRv2/.vscode/c_cpp_properties.json
@@ -4,7 +4,9 @@
             "name": "Linux",
             "includePath": [
                 "${workspaceFolder}/**",
-                "/usr/local/cuda-12.1/targets/x86_64-linux/include"
+                "/usr/local/cuda-12.1/targets/x86_64-linux/include/**",
+                "/home/gokay/llvm-project/llvm/include/**",
+                "/usr/include/llvm-16"
             ],
             "defines": [],
             "compilerPath": "/home/gokay/llvm-project/build/bin/clang",
diff --git a/ECRv2/README.md b/ECRv2/README.md
index 911bf19..5285821 100644
--- a/ECRv2/README.md
+++ b/ECRv2/README.md
@@ -1,6 +1,8 @@
 
 To use LLVM library together with CUDA CMake can be used:
-## Requirements 
+
+## Requirements
+
 - ninja
 - cmake
 - clang and lld
@@ -23,24 +25,27 @@ cmake -G Ninja -S .. -B . \
 
 ninja
 ```
-# singleECR 
+
+# singleECR
+
 singleECR driver allows us to run single convolution operations per feature and kernel. When the code is built its binary is under build directory. Time measurement is printed to console.
 
 ## Usage
+
 Feature and kernel paths can be provided via command line arguments. Optionally, output directory can be provided for the resulting convolution.
 
 ```shell
 ./singleECR --kernel <path-to-kernel>  --feature <path-to-feature>
 ```
+
 For example,
 
 ```shell
 ./singleECR --kernel ../../dataset/resnet/kernel/layer3.2.conv2.weight  --feature ../../dataset/resnet/feature/feature38 --output singleECR_result.txt
 ```
+
 ## Testing
 
-(TODO_GOKAY A python script that takes in the kernel, feature and optionally the output of singlECR)
 ```shell
-python3 verify_convolution.py --kernel ../../dataset/resnet/kernel/layer3.2.conv2.weight  --feature ../../dataset/resnet/feature/feature38 --test_output singleECR_result.txt
+python3 conv_test.py --kernel ../../dataset/resnet/kernel/layer3.2.conv2.weight  --feature ../../dataset/resnet/feature/feature38 --test_output singleECR_result.txt
 ```
-
diff --git a/ECRv2/src/cudnn/Makefile b/ECRv2/src/cudnn/Makefile
deleted file mode 100644
index 4b30a18..0000000
--- a/ECRv2/src/cudnn/Makefile
+++ /dev/null
@@ -1,304 +0,0 @@
-################################################################################
-#
-# Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
-#
-# NOTICE TO USER:
-#
-# This source code is subject to NVIDIA ownership rights under U.S. and
-# international Copyright laws.
-#
-# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
-# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
-# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
-# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
-# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
-# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-# OR PERFORMANCE OF THIS SOURCE CODE.
-#
-# U.S. Government End Users.  This source code is a "commercial item" as
-# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of
-# "commercial computer software" and "commercial computer software
-# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
-# and is provided to the U.S. Government only as a commercial end item.
-# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
-# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
-# source code with only those rights set forth herein.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= "/usr/local/cuda-12.1"
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
-    $(info WARNING - x86_64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=x86_64 instead)
-    TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
-    $(info WARNING - ARMv7 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=armv7l instead)
-    TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
-    $(info WARNING - aarch64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=aarch64 instead)
-    TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
-    $(info WARNING - ppc64le variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=ppc64le instead)
-    TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
-    $(info WARNING - GCC variable has been deprecated)
-    $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
-    HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
-    $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH   := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
-    ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-        ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
-            TARGET_SIZE := 64
-        else ifneq (,$(filter $(TARGET_ARCH),armv7l))
-            TARGET_SIZE := 32
-        endif
-    else
-        TARGET_SIZE := $(shell getconf LONG_BIT)
-    endif
-else
-    $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
-        $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
-    endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
-    TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS   := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
-    $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifeq ($(TARGET_OS),darwin)
-    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
-        HOST_COMPILER ?= clang++
-    endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
-        ifeq ($(TARGET_OS),linux)
-            HOST_COMPILER ?= arm-linux-gnueabihf-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
-        else ifeq ($(TARGET_OS),android)
-            HOST_COMPILER ?= arm-linux-androideabi-g++
-        endif
-    else ifeq ($(TARGET_ARCH),aarch64)
-        ifeq ($(TARGET_OS), linux)
-            HOST_COMPILER ?= aarch64-linux-gnu-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
-        else ifeq ($(TARGET_OS), android)
-            HOST_COMPILER ?= aarch64-linux-android-clang++
-        endif
-    else ifeq ($(TARGET_ARCH),ppc64le)
-        HOST_COMPILER ?= powerpc64le-linux-gnu-g++
-    endif
-endif
-HOST_COMPILER ?= g++
-NVCC          := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS   := -m${TARGET_SIZE}
-CCFLAGS     :=
-LDFLAGS     :=
-
-# build flags
-ifeq ($(TARGET_OS),darwin)
-    LDFLAGS += -rpath $(CUDA_PATH)/lib
-    CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
-    LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
-    CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
-    LDFLAGS += -pie
-    CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
-            LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
-        endif
-    endif
-endif
-
-ifeq ($(TARGET_OS),qnx)
-    CCFLAGS += -DWIN_INTERFACE_CUSTOM
-    LDFLAGS += -lsocket
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
-    CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
-      NVCCFLAGS += -g -G
-      BUILD_TYPE := debug
-else
-      BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES  := -I../../common/inc
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=  75
-
-ifeq ($(SMS),)
-$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-LIBRARIES += -lcublas -lcudnn
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: cudnn.cubin
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
-	@echo "Sample will be waived due to the above missing dependencies"
-else
-	@echo "Sample is ready - all dependencies have been met"
-endif
-
-cudnn.o:cudnn.cu
-	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-cudnn.cubin: cudnn.o
-	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
-	$(EXEC) ./cscc
-
-clean:
-	rm -f cscc cscc.o
-	rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cscc
-
-clobber: clean
diff --git a/ECRv2/src/cudnn/README.md b/ECRv2/src/cudnn/README.md
deleted file mode 100644
index 95018f7..0000000
--- a/ECRv2/src/cudnn/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Change CUDA path with `CUDA_PATH` variable if it's in a non-standard location.
-
-```
-CUDA_PATH=/opt/cuda make
-```
diff --git a/ECRv2/src/cudnn/cudnn_half.cu b/ECRv2/src/cudnn/cudnn_half.cu
deleted file mode 100644
index 11957e3..0000000
--- a/ECRv2/src/cudnn/cudnn_half.cu
+++ /dev/null
@@ -1,426 +0,0 @@
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <vector>
-#include <cstdlib>
-#include <iomanip>
-#include <cuda.h>
-#include <cudnn.h>
-#include "cuda_fp16.h"
-
-using namespace std;
-
-#define CUDA_CALL(f)                                           \
-    {                                                          \
-        cudaError_t err = (f);                                 \
-        if (err != cudaSuccess)                                \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-#define CUDNN_CALL(f)                                          \
-    {                                                          \
-        cudnnStatus_t err = (f);                               \
-        if (err != CUDNN_STATUS_SUCCESS)                       \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-void print(const float *data, int n, int c, int h, int w)
-{
-    std::vector<float> buffer(1 << 20);
-    CUDA_CALL(cudaMemcpy(
-        buffer.data(), data,
-        n * c * h * w * sizeof(float),
-        cudaMemcpyDeviceToHost));
-    int a = 0;
-    for (int i = 0; i < n; ++i)
-    {
-        for (int j = 0; j < c; ++j)
-        {
-            std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
-            for (int k = 0; k < h; ++k)
-            {
-                for (int l = 0; l < w; ++l)
-                {
-                    std::cout << std::setw(4) << std::right << buffer[a];
-                    ++a;
-                }
-                std::cout << std::endl;
-            }
-        }
-    }
-    std::cout << std::endl;
-}
-
-float *LoadKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size, int index)
-{
-    // ifstream kernel_shape("/home/syt/conv_pool/conv_pool/dataset/kernel/kernel_shape/" + name);
-    // for (int i = 0; i < 2; i++)
-    // {
-    //     kernel_shape >> *kernel_width >> *kernel_height;
-    // }
-    int shape[49] = {7,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1};
-    *kernel_width = *kernel_height = shape[index];
-    float *kernel = new float[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/resnetdataset_all/kernel/" + name);
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-            kernel_data >> kernel[j];
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-half *LoadvggKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size)
-{
-    float temp;
-    *kernel_width = *kernel_height = 3;
-    half *kernel = new half[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/kernel/" + name +".txt");
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-        {    
-            kernel_data >> temp;
-            kernel[j]=__float2half(temp);
-        }
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-float *LoadConvWeight(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[49];
-
-    ifstream fea_name("dataset/resnetdataset_all/feature_name.txt");
-    for (int i = 0; i < 49; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[49] = {224,
-                     56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
-                     28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-                     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-                     7, 7, 7, 7, 7, 7, 7};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/resnetdataset_all/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-half *LoadVggConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[16];
-
-    ifstream fea_name("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/all_conv_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[16] = {224, 224, 112, 112, 56, 56, 56, 56, 28, 28, 28, 28, 14, 14, 14, 14};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-    float temp;
-    half *feature = new half[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-        {    
-            fea_data >> temp;
-            feature[j]=__float2half(temp);
-        }
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-float *LoadspConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[9];
-
-    ifstream fea_name("sparsity/dataset/sparsity_name.txt");
-    for (int i = 0; i < 9; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    int shape = 28;
-
-    // *fea_width = *fea_height = shape[index];
-    *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("sparsity/dataset/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-int main(int argc, char *argv[])
-{
-    int batch_size = atoi(argv[1]);
-    // load kernel file name
-    // string kername_name[49];
-    // ifstream open_kernel("/home/syt/conv_pool/conv_pool/dataset/resnetdataset_all/kernel_name.txt");
-    string kername_name[16];
-    ifstream open_kernel("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/kernel_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        open_kernel >> kername_name[i];
-    }
-    open_kernel.close();
-
-    ofstream time_file(string("/home/lfa/fsy/syt/conv_pool/ECR/cudnn/time_2080/half_batchsize") + argv[1] + string(".txt"));
-    //  ofstream time_file(string("/home/syt/conv_pool/conv_pool/ECR/cudnn/time_gemm/batchsize") + argv[1] + string(".txt"));
-
-    for (int t = 0; t < 16; t++)
-    {
-        cudnnHandle_t cudnn;
-        CUDNN_CALL(cudnnCreate(&cudnn));
-
-        // input
-        // int batch_size = 2;
-        int *fea_width_ = new int;
-        int *fea_height_ = new int;
-        half *matrix;
-        // matrix = LoadConvWeight(fea_width_, fea_height_, batch_size, i);
-        matrix = LoadVggConvWeigth(fea_width_, fea_height_, batch_size, t);
-        // matrix = LoadspConvWeigth(fea_width_, fea_height_, batch_size, i);
-
-        const int in_n = batch_size;
-        const int in_c = 1;
-        const int in_h = *fea_width_;
-        const int in_w = *fea_height_;
-        const int in_size = in_h * in_w * in_c * in_n;
-
-        // cout << "in:" << in_n << " " << in_c << " " << in_h << " " << in_w << endl;
-
-        // filter
-        // int stride = 1;
-        int *kernel_width_ = new int;
-        int *kernel_height_ = new int;
-        half *kernel;
-
-        // kernel = LoadKernel(kername_name[i], kernel_width_, kernel_height_, batch_size, i);
-
-        kernel = LoadvggKernel(kername_name[t], kernel_width_, kernel_height_, batch_size);
-        const int filt_k = 1;
-        const int filt_c = 1;
-        const int filt_h = *kernel_width_;
-        const int filt_w = *kernel_height_;
-        const int file_size = filt_h * filt_w * filt_c * filt_k;
-
-        // cout << "ker:" << filt_k << " " << filt_c << " " << filt_h << " " << filt_w << endl;
-
-        // 记录时间
-        cudaEvent_t start, stop;
-        float elapsed_time = 0.0;
-        cudaEventCreate(&start);
-        cudaEventCreate(&stop);
-        cudaEventRecord(start, 0);
-
-        cudnnTensorDescriptor_t in_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            in_n, in_c, in_h, in_w));
-
-            half *in_data;
-        CUDA_CALL(cudaMalloc(
-            &in_data, in_n * in_c * in_h * in_w * sizeof(float)/2));
-
-        cudnnFilterDescriptor_t filt_desc;
-        CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
-        CUDNN_CALL(cudnnSetFilter4dDescriptor(
-            filt_desc, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
-            filt_k, filt_c, filt_h, filt_w));
-
-        half *filt_data;
-        CUDA_CALL(cudaMalloc(
-            &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)/2));
-
-        // convolution
-        const int pad_h = 0;
-        const int pad_w = 0;
-        const int str_h = 1;
-        const int str_w = 1;
-        const int dil_h = 1;
-        const int dil_w = 1;
-
-        cudnnConvolutionDescriptor_t conv_desc;
-        CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
-        CUDNN_CALL(cudnnSetConvolution2dDescriptor(
-            conv_desc,
-            pad_h, pad_w, str_h, str_w, dil_h, dil_w,
-            CUDNN_CONVOLUTION, CUDNN_DATA_HALF));
-        // cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH);
-        // output
-        int out_n;
-        int out_c;
-        int out_h;
-        int out_w;
-
-        CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
-            conv_desc, in_desc, filt_desc,
-            &out_n, &out_c, &out_h, &out_w));
-        // cout << "out:" << out_n << " " << out_c << " " << out_h << " " << out_w << endl;
-
-        cudnnTensorDescriptor_t out_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            out_n, out_c, out_h, out_w));
-
-        half *out_data;
-        CUDA_CALL(cudaMalloc(
-            &out_data, out_n * out_c * out_h * out_w * sizeof(float)/2));
-
-        // algorithm
-        cudnnConvolutionFwdAlgo_t algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_GEMM;
-
-        // CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
-        //     cudnn,
-        //     in_desc, filt_desc, conv_desc, out_desc,
-        //     CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
-
-        // workspace
-        size_t ws_size;
-        CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
-            cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
-
-        half *ws_data;
-        CUDA_CALL(cudaMalloc(&ws_data, ws_size));
-
-        // perform
-        float alpha = 1.0;
-        float beta = 0.0;
-
-        /*
-        float input_data[25 * 3] = {0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22};
-        float kernel_data[9 * 3] = {0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0};
-        */
-
-        cudaMemcpy(in_data, matrix, in_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-        cudaMemcpy(filt_data, kernel, file_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-
-        CUDNN_CALL(cudnnConvolutionForward(
-            cudnn,
-            &alpha, in_desc, in_data, filt_desc, filt_data,
-            conv_desc, algo, ws_data, ws_size,
-            &beta, out_desc, out_data));
-
-        // results
-        // std::cout << "in_data:" << std::endl;
-        // print(in_data, in_n, in_c, in_h, in_w);
-
-        // std::cout << "filt_data:" << std::endl;
-        // print(filt_data, filt_k, filt_c, filt_h, filt_w);
-
-        // std::cout << "out_data:" << std::endl;
-        // print(out_data, out_n, out_c, out_h, out_w);
-
-        int result_size = out_n * out_c * out_h * out_w;
-        half *result = new half[result_size];
-        cudaMemcpy(result, out_data, result_size * sizeof(float)/2, cudaMemcpyDeviceToHost);
-
-        // finalizing
-        CUDA_CALL(cudaFree(ws_data));
-        CUDA_CALL(cudaFree(out_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
-        CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
-        CUDA_CALL(cudaFree(filt_data));
-        CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
-        CUDA_CALL(cudaFree(in_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
-        CUDNN_CALL(cudnnDestroy(cudnn));
-
-        // 计算时间
-        cudaEventRecord(stop, 0);
-        cudaEventSynchronize(stop);
-        cudaEventElapsedTime(&elapsed_time, start, stop);
-        // cout << elapsed_time << endl;
-        cudaEventDestroy(start);
-        cudaEventDestroy(stop);
-
-        // 写入时间
-        // ofstream time_file("time/time_batchsize1.txt", ios::app);
-        time_file << elapsed_time << endl;
-        // time_file.close();
-        // 结果写入文件
-        // string file_name = "/home/lfa/fsy/syt/conv_pool/ECR/cudnn/output_vgg/output" + to_string(i);
-        // // string file_name = "/home/syt/conv_pool/conv_pool/ECR/cudnn/out_gemm/output" + to_string(i);
-
-        // ofstream output_file(file_name);
-        // for (int i = 0; i < result_size; i++)
-        // {
-        //     output_file << result[i] << " ";
-        //     if ((i + 1) % out_w == 0)
-        //         output_file << "\n";
-        // }
-        // output_file.close();
-        // cout << "res:" << result[1] << endl;
-        memset(result,0,sizeof(result));
-        free(result);
-    }
-
-    time_file.close();
-
-    return 0;
-}
\ No newline at end of file
diff --git a/ECRv2/src/cudnn/samples_common.mk b/ECRv2/src/cudnn/samples_common.mk
deleted file mode 100644
index 1e15155..0000000
--- a/ECRv2/src/cudnn/samples_common.mk
+++ /dev/null
@@ -1,34 +0,0 @@
-# Setting SMS for all samples
-# architecture
-
-ifneq ($(TARGET_ARCH), ppc64le)
-CUDA_VERSION := $(shell cat $(CUDA_PATH)/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}')
-else
-CUDA_VERSION := $(shell cat $(CUDA_PATH)/targets/ppc64le-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}')
-endif
-
-#Link against cublasLt for CUDA 10.1 and up.
-CUBLASLT:=false
-ifeq ($(shell test $(CUDA_VERSION) -ge 10010; echo $$?),0)
-CUBLASLT:=true
-endif
-$(info Linking agains cublasLt = $(CUBLASLT))
-
-ifeq ($(CUDA_VERSION),8000 )
-SMS_VOLTA =
-else
-ifneq ($(TARGET_ARCH), ppc64le)
-ifeq ($(CUDA_VERSION), $(filter $(CUDA_VERSION), 9000 9010 9020))
-SMS_VOLTA ?= 70
-else
-ifeq ($(TARGET_OS), darwin)
-SMS_VOLTA ?= 70
-else
-SMS_VOLTA ?= 70 72 75
-endif #ifneq ($(TARGET_OS), darwin)
-endif #ifeq ($(CUDA_VERSION), $(filter $(CUDA_VERSION), 9000 9010 9020))
-else
-SMS_VOLTA ?= 70
-endif #ifneq ($(TARGET_ARCH), ppc64le)
-endif #ifeq ($(CUDA_VERSION),8000 )
-SMS ?= 30 35 50 53 60 61 62 $(SMS_VOLTA)
diff --git a/ECRv2/src/cudnn/time.txt b/ECRv2/src/cudnn/time.txt
deleted file mode 100644
index 1a3fe39..0000000
--- a/ECRv2/src/cudnn/time.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-1.36141
-1.18371
-1.18614
-1.15264
-1.28784
-1.22947
-1.28982
-1.12986
-1.15386
-1.09363
-1.12003
-1.5615
-1.20592
-1.08922
-1.12413
-1.12688
-2.23859
-1.07363
-1.10995
-1.0784
-1.09808
-1.0943
-1.1201
-1.07677
-1.10061
-1.09296
-1.09658
-1.08029
-1.10288
-1.06099
-1.07981
-1.10906
diff --git a/ECRv2/src/cudnn/time_fast/time_resnet.txt b/ECRv2/src/cudnn/time_fast/time_resnet.txt
deleted file mode 100644
index 6c58ca9..0000000
--- a/ECRv2/src/cudnn/time_fast/time_resnet.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-1.498842
-1.52997
-1.352204
-1.34096
-1.338912
-1.339878
-1.342982
-1.341786
-1.362706
-1.394674
-1.366912
-1.103188
-1.118106
-1.106032
-1.119622
-1.094246
-1.126834
-1.127086
-1.133862
-1.128378
-1.155532
-1.131976
-1.127642
-1.069882
-1.04939
-1.106502
-1.051886
-1.042112
-1.051886
-1.054822
-1.051218
-1.075576
-1.050414
-1.054728
-1.05929
-1.065522
-1.048626
-1.044998
-1.044262
-1.05353
-1.067334
-1.061644
-1.026886
-1.071644
-1.029904
-1.02352
-1.058042
diff --git a/ECRv2/src/cudnn/time_fast/time_vgg.txt b/ECRv2/src/cudnn/time_fast/time_vgg.txt
deleted file mode 100644
index 770d087..0000000
--- a/ECRv2/src/cudnn/time_fast/time_vgg.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-8.244428
-10.53496
-2.811636
-4.446938
-1.50843
-1.434862
-1.385626
-3.294054
-1.21101
-1.133334
-1.118854
-2.962208
-1.162046
-1.0798
-1.053594
-2.959078
diff --git a/ECRv2/src/cudnn/time_fft_tiling/time_resnet.txt b/ECRv2/src/cudnn/time_fft_tiling/time_resnet.txt
deleted file mode 100644
index 384949b..0000000
--- a/ECRv2/src/cudnn/time_fft_tiling/time_resnet.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-1.1328
-1.34643
-1.06483
-1.02045
-1.07802
-1.00634
-1.00922
-1.05632
-1.06554
-1.04966
-1.1297
-0.683584
-0.679712
-0.707904
-0.736672
-0.725984
-0.733312
-0.732096
-0.731776
-0.745856
-0.750208
-0.734208
-0.771488
-0.663328
-0.671968
-0.679232
-0.6808
-0.658656
-0.674592
-0.657152
-0.658112
-0.6824
-0.659936
-0.65728
-0.671712
-0.712576
-0.659392
-0.702144
-0.678176
-0.663328
-0.700128
-0.649696
-0.674624
-0.663712
-0.651584
-0.648288
-0.70624
diff --git a/ECRv2/src/cudnn/time_fft_tiling/time_vgg.txt b/ECRv2/src/cudnn/time_fft_tiling/time_vgg.txt
deleted file mode 100644
index af81e1a..0000000
--- a/ECRv2/src/cudnn/time_fft_tiling/time_vgg.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-8.785606
-11.63882
-2.561178
-4.449632
-1.238618
-1.17013
-1.143816
-3.094618
-0.9470848
-0.9143168
-0.9148032
-2.78231
-0.8726848
-0.8750272
-0.8773312
-2.698176
diff --git a/ECRv2/src/cudnn/time_gemm/time_resnet.txt b/ECRv2/src/cudnn/time_gemm/time_resnet.txt
deleted file mode 100644
index 3574be5..0000000
--- a/ECRv2/src/cudnn/time_gemm/time_resnet.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-1.457616
-1.546673
-1.425665
-1.416509
-1.443509
-1.459167
-1.49969
-1.484921
-1.511067
-1.505296
-1.475314
-1.131281
-1.514733
-1.108248
-1.143115
-1.142843
-1.129089
-1.137077
-1.134342
-1.14011
-1.13191
-1.144483
-1.136984
-1.076832
-1.050228
-1.042749
-1.059286
-1.05894
-1.05298
-1.069438
-1.05886
-1.050689
-1.055591
-1.066893
-1.266105
-1.101788
-1.059091
-1.062666
-1.06943
-1.063473
-1.063801
-1.037766
-1.033702
-1.078181
-1.05837
-1.037567
-1.041084
diff --git a/ECRv2/src/cudnn/time_gemm/time_vgg.txt b/ECRv2/src/cudnn/time_gemm/time_vgg.txt
deleted file mode 100644
index 4af5b24..0000000
--- a/ECRv2/src/cudnn/time_gemm/time_vgg.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-8.098009
-8.85654
-2.712784
-2.490717
-1.494214
-1.398752
-1.367342
-1.387395
-1.129809
-1.099786
-1.102848
-1.123874
-1.066295
-1.040196
-1.048977
-1.042723
diff --git a/ECRv2/src/cudnn/time_im_gemm/time_resnet.txt b/ECRv2/src/cudnn/time_im_gemm/time_resnet.txt
deleted file mode 100644
index 428065b..0000000
--- a/ECRv2/src/cudnn/time_im_gemm/time_resnet.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-1.259628
-1.358336
-1.16967
-1.1581
-1.225676
-1.16604
-1.176308
-1.23748
-1.229562
-1.245146
-1.305364
-0.9357312
-0.9341312
-0.9417088
-0.994272
-0.9396344
-0.9637892
-0.9418176
-0.942624
-0.9506492
-0.9449728
-0.9389632
-0.9567104
-0.85792
-0.8622912
-0.8721152
-0.870976
-0.860832
-0.8793216
-0.865536
-0.8981576
-0.8808896
-0.852704
-0.8786752
-0.8802112
-0.8541632
-0.8895552
-0.8734144
-0.8550912
-1.0387704
-0.873888
-0.8481536
-0.8515328
-0.8708224
-0.8738048
-0.8366592
-0.8618752
diff --git a/ECRv2/src/cudnn/time_im_gemm/time_vgg.txt b/ECRv2/src/cudnn/time_im_gemm/time_vgg.txt
deleted file mode 100644
index 61744f7..0000000
--- a/ECRv2/src/cudnn/time_im_gemm/time_vgg.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-8.115706
-10.66202
-2.246182
-4.257288
-1.24889
-1.17317
-1.093254
-3.125716
-0.91088
-0.9024384
-0.8913536
-2.780704
-0.8514624
-0.8350848
-0.8339392
-2.669132
diff --git a/ECRv2/src/cudnn/time_resnet/batchsize32.txt b/ECRv2/src/cudnn/time_resnet/batchsize32.txt
deleted file mode 100644
index dde5c53..0000000
--- a/ECRv2/src/cudnn/time_resnet/batchsize32.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-1.457233
-1.439345
-1.422449
-1.390892
-1.404805
-1.399081
-1.437757
-1.429952
-1.445194
-1.42816
-1.470265
-1.127645
-1.092218
-1.13515
-1.143337
-1.136884
-1.123715
-1.121395
-1.1275
-1.120337
-1.129813
-1.155315
-1.152745
-1.086163
-1.053969
-1.079892
-1.073841
-1.070092
-1.067156
-1.060364
-1.069228
-1.062227
-1.056918
-1.061615
-1.054527
-1.175946
-1.133718
-1.080154
-1.075609
-1.050516
-1.053262
-1.06012
-1.035418
-1.063372
-1.029721
-1.034117
-1.03581
diff --git a/ECRv2/src/cudnn/time_resnet/cudnn_half.cu b/ECRv2/src/cudnn/time_resnet/cudnn_half.cu
deleted file mode 100644
index b4953e9..0000000
--- a/ECRv2/src/cudnn/time_resnet/cudnn_half.cu
+++ /dev/null
@@ -1,427 +0,0 @@
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <vector>
-#include <cstdlib>
-#include <iomanip>
-#include <cuda.h>
-#include <cudnn.h>
-#include "cuda_fp16.h"
-
-using namespace std;
-
-#define CUDA_CALL(f)                                           \
-    {                                                          \
-        cudaError_t err = (f);                                 \
-        if (err != cudaSuccess)                                \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-#define CUDNN_CALL(f)                                          \
-    {                                                          \
-        cudnnStatus_t err = (f);                               \
-        if (err != CUDNN_STATUS_SUCCESS)                       \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-void print(const float *data, int n, int c, int h, int w)
-{
-    std::vector<float> buffer(1 << 20);
-    CUDA_CALL(cudaMemcpy(
-        buffer.data(), data,
-        n * c * h * w * sizeof(float),
-        cudaMemcpyDeviceToHost));
-    int a = 0;
-    for (int i = 0; i < n; ++i)
-    {
-        for (int j = 0; j < c; ++j)
-        {
-            std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
-            for (int k = 0; k < h; ++k)
-            {
-                for (int l = 0; l < w; ++l)
-                {
-                    std::cout << std::setw(4) << std::right << buffer[a];
-                    ++a;
-                }
-                std::cout << std::endl;
-            }
-        }
-    }
-    std::cout << std::endl;
-}
-
-float *LoadKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size, int index)
-{
-    // ifstream kernel_shape("/home/syt/conv_pool/conv_pool/dataset/kernel/kernel_shape/" + name);
-    // for (int i = 0; i < 2; i++)
-    // {
-    //     kernel_shape >> *kernel_width >> *kernel_height;
-    // }
-    int shape[49] = {7,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1};
-    *kernel_width = *kernel_height = shape[index];
-    float *kernel = new float[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/resnet/kernel/" + name);
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-            kernel_data >> kernel[j];
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-half *LoadvggKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size)
-{
-    float temp;
-    *kernel_width = *kernel_height = 3;
-    half *kernel = new half[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/vggdata/kernel/" + name +".txt");
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-        {    
-            kernel_data >> temp;
-            kernel[j]=__float2half(temp);
-        }
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-half *LoadConvWeight(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[49];
-
-    ifstream fea_name("dataset/resnet/feature_name.txt");
-    for (int i = 0; i < 49; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[49] = {224,
-                     56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
-                     28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-                     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-                     7, 7, 7, 7, 7, 7, 7};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-    float temp;
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/resnet/kernel/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> temp;
-            feature[j] = __float2half(temp);
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-half *LoadVggConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[16];
-
-    ifstream fea_name("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/all_conv_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[16] = {224, 224, 112, 112, 56, 56, 56, 56, 28, 28, 28, 28, 14, 14, 14, 14};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-    float temp;
-    half *feature = new half[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-        {    
-            fea_data >> temp;
-            feature[j]=__float2half(temp);
-        }
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-float *LoadspConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[9];
-
-    ifstream fea_name("sparsity/dataset/sparsity_name.txt");
-    for (int i = 0; i < 9; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    int shape = 28;
-
-    // *fea_width = *fea_height = shape[index];
-    *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("sparsity/dataset/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-int main(int argc, char *argv[])
-{
-    int batch_size = atoi(argv[1]);
-    // load kernel file name
-    string kername_name[49];
-    // ifstream open_kernel("/home/syt/conv_pool/conv_pool/dataset/resnetdataset_all/kernel_name.txt");
-    // string kername_name[16];
-    ifstream open_kernel("dataset/resnet/kernel_name.txt");
-    for (int i = 1; i < 49; i++)
-    {
-        open_kernel >> kername_name[i];
-    }
-    open_kernel.close();
-
-    ofstream time_file(string("ECR/ECR/time_resnet/batchsize") + argv[1] + string(".txt"));
-    //  ofstream time_file(string("/home/syt/conv_pool/conv_pool/ECR/cudnn/time_gemm/batchsize") + argv[1] + string(".txt"));
-    // 1-47
-    for (int t = 1; t < 48; t++)
-    {
-        cudnnHandle_t cudnn;
-        CUDNN_CALL(cudnnCreate(&cudnn));
-
-        // input
-        // int batch_size = 2;
-        int *fea_width_ = new int;
-        int *fea_height_ = new int;
-        half *matrix;
-        matrix = LoadConvWeight(fea_width_, fea_height_, batch_size, i);
-        // matrix = LoadVggConvWeigth(fea_width_, fea_height_, batch_size, t);
-        // matrix = LoadspConvWeigth(fea_width_, fea_height_, batch_size, i);
-
-        const int in_n = batch_size;
-        const int in_c = 1;
-        const int in_h = *fea_width_;
-        const int in_w = *fea_height_;
-        const int in_size = in_h * in_w * in_c * in_n;
-
-        // cout << "in:" << in_n << " " << in_c << " " << in_h << " " << in_w << endl;
-
-        // filter
-        // int stride = 1;
-        int *kernel_width_ = new int;
-        int *kernel_height_ = new int;
-        half *kernel;
-
-        kernel = LoadKernel(kername_name[i], kernel_width_, kernel_height_, batch_size, i);
-
-        // kernel = LoadvggKernel(kername_name[t], kernel_width_, kernel_height_, batch_size);
-        const int filt_k = 1;
-        const int filt_c = 1;
-        const int filt_h = *kernel_width_;
-        const int filt_w = *kernel_height_;
-        const int file_size = filt_h * filt_w * filt_c * filt_k;
-
-        // cout << "ker:" << filt_k << " " << filt_c << " " << filt_h << " " << filt_w << endl;
-
-        // 记录时间
-        cudaEvent_t start, stop;
-        float elapsed_time = 0.0;
-        cudaEventCreate(&start);
-        cudaEventCreate(&stop);
-        cudaEventRecord(start, 0);
-
-        cudnnTensorDescriptor_t in_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            in_n, in_c, in_h, in_w));
-
-            half *in_data;
-        CUDA_CALL(cudaMalloc(
-            &in_data, in_n * in_c * in_h * in_w * sizeof(float)/2));
-
-        cudnnFilterDescriptor_t filt_desc;
-        CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
-        CUDNN_CALL(cudnnSetFilter4dDescriptor(
-            filt_desc, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
-            filt_k, filt_c, filt_h, filt_w));
-
-        half *filt_data;
-        CUDA_CALL(cudaMalloc(
-            &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)/2));
-
-        // convolution
-        const int pad_h = 0;
-        const int pad_w = 0;
-        const int str_h = 1;
-        const int str_w = 1;
-        const int dil_h = 1;
-        const int dil_w = 1;
-
-        cudnnConvolutionDescriptor_t conv_desc;
-        CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
-        CUDNN_CALL(cudnnSetConvolution2dDescriptor(
-            conv_desc,
-            pad_h, pad_w, str_h, str_w, dil_h, dil_w,
-            CUDNN_CONVOLUTION, CUDNN_DATA_HALF));
-        // cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH);
-        // output
-        int out_n;
-        int out_c;
-        int out_h;
-        int out_w;
-
-        CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
-            conv_desc, in_desc, filt_desc,
-            &out_n, &out_c, &out_h, &out_w));
-        // cout << "out:" << out_n << " " << out_c << " " << out_h << " " << out_w << endl;
-
-        cudnnTensorDescriptor_t out_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            out_n, out_c, out_h, out_w));
-
-        half *out_data;
-        CUDA_CALL(cudaMalloc(
-            &out_data, out_n * out_c * out_h * out_w * sizeof(float)/2));
-
-        // algorithm
-        cudnnConvolutionFwdAlgo_t algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_GEMM;
-
-        // CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
-        //     cudnn,
-        //     in_desc, filt_desc, conv_desc, out_desc,
-        //     CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
-
-        // workspace
-        size_t ws_size;
-        CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
-            cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
-
-        half *ws_data;
-        CUDA_CALL(cudaMalloc(&ws_data, ws_size));
-
-        // perform
-        float alpha = 1.0;
-        float beta = 0.0;
-
-        /*
-        float input_data[25 * 3] = {0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22};
-        float kernel_data[9 * 3] = {0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0};
-        */
-
-        cudaMemcpy(in_data, matrix, in_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-        cudaMemcpy(filt_data, kernel, file_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-
-        CUDNN_CALL(cudnnConvolutionForward(
-            cudnn,
-            &alpha, in_desc, in_data, filt_desc, filt_data,
-            conv_desc, algo, ws_data, ws_size,
-            &beta, out_desc, out_data));
-
-        // results
-        // std::cout << "in_data:" << std::endl;
-        // print(in_data, in_n, in_c, in_h, in_w);
-
-        // std::cout << "filt_data:" << std::endl;
-        // print(filt_data, filt_k, filt_c, filt_h, filt_w);
-
-        // std::cout << "out_data:" << std::endl;
-        // print(out_data, out_n, out_c, out_h, out_w);
-
-        int result_size = out_n * out_c * out_h * out_w;
-        half *result = new half[result_size];
-        cudaMemcpy(result, out_data, result_size * sizeof(float)/2, cudaMemcpyDeviceToHost);
-
-        // finalizing
-        CUDA_CALL(cudaFree(ws_data));
-        CUDA_CALL(cudaFree(out_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
-        CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
-        CUDA_CALL(cudaFree(filt_data));
-        CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
-        CUDA_CALL(cudaFree(in_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
-        CUDNN_CALL(cudnnDestroy(cudnn));
-
-        // 计算时间
-        cudaEventRecord(stop, 0);
-        cudaEventSynchronize(stop);
-        cudaEventElapsedTime(&elapsed_time, start, stop);
-        // cout << elapsed_time << endl;
-        cudaEventDestroy(start);
-        cudaEventDestroy(stop);
-
-        // 写入时间
-        // ofstream time_file("time/time_batchsize1.txt", ios::app);
-        time_file << elapsed_time << endl;
-        // time_file.close();
-        // 结果写入文件
-        // string file_name = "/home/lfa/fsy/syt/conv_pool/ECR/cudnn/output_vgg/output" + to_string(i);
-        // // string file_name = "/home/syt/conv_pool/conv_pool/ECR/cudnn/out_gemm/output" + to_string(i);
-
-        // ofstream output_file(file_name);
-        // for (int i = 0; i < result_size; i++)
-        // {
-        //     output_file << result[i] << " ";
-        //     if ((i + 1) % out_w == 0)
-        //         output_file << "\n";
-        // }
-        // output_file.close();
-        // cout << "res:" << result[1] << endl;
-        memset(result,0,sizeof(result));
-        free(result);
-    }
-
-    time_file.close();
-
-    return 0;
-}
\ No newline at end of file
diff --git a/ECRv2/src/cudnn/time_sparsity/batchsize32.txt b/ECRv2/src/cudnn/time_sparsity/batchsize32.txt
deleted file mode 100644
index 27af9f6..0000000
--- a/ECRv2/src/cudnn/time_sparsity/batchsize32.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-1.13853
-0.752192
-0.740608
-0.760096
-0.781472
-0.746464
-0.805792
-0.7592
-0.760256
diff --git a/ECRv2/src/cudnn/time_sparsity/cudnn_half.cu b/ECRv2/src/cudnn/time_sparsity/cudnn_half.cu
deleted file mode 100644
index 510279f..0000000
--- a/ECRv2/src/cudnn/time_sparsity/cudnn_half.cu
+++ /dev/null
@@ -1,425 +0,0 @@
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <vector>
-#include <cstdlib>
-#include <iomanip>
-#include <cuda.h>
-#include <cudnn.h>
-#include "cuda_fp16.h"
-
-using namespace std;
-
-#define CUDA_CALL(f)                                           \
-    {                                                          \
-        cudaError_t err = (f);                                 \
-        if (err != cudaSuccess)                                \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-#define CUDNN_CALL(f)                                          \
-    {                                                          \
-        cudnnStatus_t err = (f);                               \
-        if (err != CUDNN_STATUS_SUCCESS)                       \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-void print(const float *data, int n, int c, int h, int w)
-{
-    std::vector<float> buffer(1 << 20);
-    CUDA_CALL(cudaMemcpy(
-        buffer.data(), data,
-        n * c * h * w * sizeof(float),
-        cudaMemcpyDeviceToHost));
-    int a = 0;
-    for (int i = 0; i < n; ++i)
-    {
-        for (int j = 0; j < c; ++j)
-        {
-            std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
-            for (int k = 0; k < h; ++k)
-            {
-                for (int l = 0; l < w; ++l)
-                {
-                    std::cout << std::setw(4) << std::right << buffer[a];
-                    ++a;
-                }
-                std::cout << std::endl;
-            }
-        }
-    }
-    std::cout << std::endl;
-}
-
-float *LoadKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size, int index)
-{
-    // ifstream kernel_shape("/home/syt/conv_pool/conv_pool/dataset/kernel/kernel_shape/" + name);
-    // for (int i = 0; i < 2; i++)
-    // {
-    //     kernel_shape >> *kernel_width >> *kernel_height;
-    // }
-    int shape[49] = {7,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1};
-    *kernel_width = *kernel_height = shape[index];
-    float *kernel = new float[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/resnetdataset_all/kernel/" + name);
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-            kernel_data >> kernel[j];
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-half *LoadvggKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size)
-{
-    float temp;
-    *kernel_width = *kernel_height = 3;
-    half *kernel = new half[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/kernel/" + name +".txt");
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-        {    
-            kernel_data >> temp;
-            kernel[j]=__float2half(temp);
-        }
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-float *LoadConvWeight(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[49];
-
-    ifstream fea_name("dataset/resnetdataset_all/feature_name.txt");
-    for (int i = 0; i < 49; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[49] = {224,
-                     56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
-                     28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-                     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-                     7, 7, 7, 7, 7, 7, 7};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/resnetdataset_all/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-half *LoadVggConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[16];
-
-    ifstream fea_name("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/all_conv_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[16] = {224, 224, 112, 112, 56, 56, 56, 56, 28, 28, 28, 28, 14, 14, 14, 14};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-    float temp;
-    half *feature = new half[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("/home/lfa/fsy/syt/conv_pool/dataset/vggdata/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-        {    
-            fea_data >> temp;
-            feature[j]=__float2half(temp);
-        }
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-half *LoadspConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[9];
-
-    ifstream fea_name("sparsity/dataset/sparsity_name.txt");
-    for (int i = 0; i < 9; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    int shape = 28;
-
-    // *fea_width = *fea_height = shape[index];
-    *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-    float temp;
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/sparsity/dataset/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> temp;
-            feature[j] = __float2half(temp);
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-int main(int argc, char *argv[])
-{
-    int batch_size = atoi(argv[1]);
-    // load kernel file name
-    // string kername_name[49];
-    // ifstream open_kernel("/home/syt/conv_pool/conv_pool/dataset/resnetdataset_all/kernel_name.txt");
-    string kername_name[9];
-    ifstream open_kernel("dataset/sparsity/dataset/sparsity_name.txt");
-    for (int i = 0; i < 9; i++)
-    {
-        open_kernel >> kername_name[i];
-    }
-    open_kernel.close();
-
-    ofstream time_file("ECR/ECR/time_sparsity/batchsize" + argv[1] + string(".txt"));
-    //  ofstream time_file(string("/home/syt/conv_pool/conv_pool/ECR/cudnn/time_gemm/batchsize") + argv[1] + string(".txt"));
-
-    for (int t = 0; t < 9; t++)
-    {
-        cudnnHandle_t cudnn;
-        CUDNN_CALL(cudnnCreate(&cudnn));
-
-        // input
-        // int batch_size = 2;
-        int *fea_width_ = new int;
-        int *fea_height_ = new int;
-        half *matrix;
-        // matrix = LoadConvWeight(fea_width_, fea_height_, batch_size, i);
-        matrix = LoadVggConvWeigth(fea_width_, fea_height_, batch_size, t);
-        // matrix = LoadspConvWeigth(fea_width_, fea_height_, batch_size, i);
-
-        const int in_n = batch_size;
-        const int in_c = 1;
-        const int in_h = *fea_width_;
-        const int in_w = *fea_height_;
-        const int in_size = in_h * in_w * in_c * in_n;
-
-        // cout << "in:" << in_n << " " << in_c << " " << in_h << " " << in_w << endl;
-
-        // filter
-        // int stride = 1;
-        int *kernel_width_ = new int;
-        int *kernel_height_ = new int;
-        half *kernel
-
-        kernel = LoadspConvWeigth(kername_name[t], kernel_width_, kernel_height_, batch_size);
-        const int filt_k = 1;
-        const int filt_c = 1;
-        const int filt_h = *kernel_width_;
-        const int filt_w = *kernel_height_;
-        const int file_size = filt_h * filt_w * filt_c * filt_k;
-
-        // cout << "ker:" << filt_k << " " << filt_c << " " << filt_h << " " << filt_w << endl;
-
-        // 记录时间
-        cudaEvent_t start, stop;
-        float elapsed_time = 0.0;
-        cudaEventCreate(&start);
-        cudaEventCreate(&stop);
-        cudaEventRecord(start, 0);
-
-        cudnnTensorDescriptor_t in_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            in_n, in_c, in_h, in_w));
-
-            half *in_data;
-        CUDA_CALL(cudaMalloc(
-            &in_data, in_n * in_c * in_h * in_w * sizeof(float)/2));
-
-        cudnnFilterDescriptor_t filt_desc;
-        CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
-        CUDNN_CALL(cudnnSetFilter4dDescriptor(
-            filt_desc, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
-            filt_k, filt_c, filt_h, filt_w));
-
-        half *filt_data;
-        CUDA_CALL(cudaMalloc(
-            &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)/2));
-
-        // convolution
-        const int pad_h = 0;
-        const int pad_w = 0;
-        const int str_h = 1;
-        const int str_w = 1;
-        const int dil_h = 1;
-        const int dil_w = 1;
-
-        cudnnConvolutionDescriptor_t conv_desc;
-        CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
-        CUDNN_CALL(cudnnSetConvolution2dDescriptor(
-            conv_desc,
-            pad_h, pad_w, str_h, str_w, dil_h, dil_w,
-            CUDNN_CONVOLUTION, CUDNN_DATA_HALF));
-        // cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH);
-        // output
-        int out_n;
-        int out_c;
-        int out_h;
-        int out_w;
-
-        CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
-            conv_desc, in_desc, filt_desc,
-            &out_n, &out_c, &out_h, &out_w));
-        // cout << "out:" << out_n << " " << out_c << " " << out_h << " " << out_w << endl;
-
-        cudnnTensorDescriptor_t out_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            out_n, out_c, out_h, out_w));
-
-        half *out_data;
-        CUDA_CALL(cudaMalloc(
-            &out_data, out_n * out_c * out_h * out_w * sizeof(float)/2));
-
-        // algorithm
-        cudnnConvolutionFwdAlgo_t algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_GEMM;
-
-        // CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
-        //     cudnn,
-        //     in_desc, filt_desc, conv_desc, out_desc,
-        //     CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
-
-        // workspace
-        size_t ws_size;
-        CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
-            cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
-
-        half *ws_data;
-        CUDA_CALL(cudaMalloc(&ws_data, ws_size));
-
-        // perform
-        float alpha = 1.0;
-        float beta = 0.0;
-
-        /*
-        float input_data[25 * 3] = {0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22};
-        float kernel_data[9 * 3] = {0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0};
-        */
-
-        cudaMemcpy(in_data, matrix, in_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-        cudaMemcpy(filt_data, kernel, file_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-
-        CUDNN_CALL(cudnnConvolutionForward(
-            cudnn,
-            &alpha, in_desc, in_data, filt_desc, filt_data,
-            conv_desc, algo, ws_data, ws_size,
-            &beta, out_desc, out_data));
-
-        // results
-        // std::cout << "in_data:" << std::endl;
-        // print(in_data, in_n, in_c, in_h, in_w);
-
-        // std::cout << "filt_data:" << std::endl;
-        // print(filt_data, filt_k, filt_c, filt_h, filt_w);
-
-        // std::cout << "out_data:" << std::endl;
-        // print(out_data, out_n, out_c, out_h, out_w);
-
-        int result_size = out_n * out_c * out_h * out_w;
-        half *result = new half[result_size];
-        cudaMemcpy(result, out_data, result_size * sizeof(float)/2, cudaMemcpyDeviceToHost);
-
-        // finalizing
-        CUDA_CALL(cudaFree(ws_data));
-        CUDA_CALL(cudaFree(out_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
-        CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
-        CUDA_CALL(cudaFree(filt_data));
-        CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
-        CUDA_CALL(cudaFree(in_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
-        CUDNN_CALL(cudnnDestroy(cudnn));
-
-        // 计算时间
-        cudaEventRecord(stop, 0);
-        cudaEventSynchronize(stop);
-        cudaEventElapsedTime(&elapsed_time, start, stop);
-        // cout << elapsed_time << endl;
-        cudaEventDestroy(start);
-        cudaEventDestroy(stop);
-
-        // 写入时间
-        // ofstream time_file("time/time_batchsize1.txt", ios::app);
-        time_file << elapsed_time << endl;
-        // time_file.close();
-        // 结果写入文件
-        // string file_name = "/home/lfa/fsy/syt/conv_pool/ECR/cudnn/output_vgg/output" + to_string(i);
-        // // string file_name = "/home/syt/conv_pool/conv_pool/ECR/cudnn/out_gemm/output" + to_string(i);
-
-        // ofstream output_file(file_name);
-        // for (int i = 0; i < result_size; i++)
-        // {
-        //     output_file << result[i] << " ";
-        //     if ((i + 1) % out_w == 0)
-        //         output_file << "\n";
-        // }
-        // output_file.close();
-        // cout << "res:" << result[1] << endl;
-        memset(result,0,sizeof(result));
-        free(result);
-    }
-
-    time_file.close();
-
-    return 0;
-}
\ No newline at end of file
diff --git a/ECRv2/src/cudnn/time_stride/stride1_batchsize32.txt b/ECRv2/src/cudnn/time_stride/stride1_batchsize32.txt
deleted file mode 100644
index e4fc1a6..0000000
--- a/ECRv2/src/cudnn/time_stride/stride1_batchsize32.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-9.5097
-9.18128
-2.11053
-2.51846
-1.3369
-1.0785
-1.07843
-1.04432
-0.675456
-0.665248
-0.677344
-0.661664
-0.634176
-0.64832
-0.623584
-0.625184
diff --git a/ECRv2/src/cudnn/time_stride/stride2_batchsize32.txt b/ECRv2/src/cudnn/time_stride/stride2_batchsize32.txt
deleted file mode 100644
index 17a9268..0000000
--- a/ECRv2/src/cudnn/time_stride/stride2_batchsize32.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-2.41258
-2.78582
-1.14509
-1.09994
-0.693184
-0.698336
-0.68096
-0.792736
-0.718176
-0.673184
-0.625184
-0.630048
-0.633344
-0.604992
-0.608
-0.657824
diff --git a/ECRv2/src/cudnn/time_stride/stride3_batchsize32.txt b/ECRv2/src/cudnn/time_stride/stride3_batchsize32.txt
deleted file mode 100644
index c22652d..0000000
--- a/ECRv2/src/cudnn/time_stride/stride3_batchsize32.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-2.46832
-2.43386
-1.50442
-1.0936
-0.707168
-0.702176
-0.959616
-0.688096
-0.64704
-0.626816
-0.6352
-0.625856
-0.632512
-0.610688
-0.618016
-0.6104
diff --git a/ECRv2/src/cudnn/time_vgg/batchsize32.txt b/ECRv2/src/cudnn/time_vgg/batchsize32.txt
deleted file mode 100644
index 1284b5e..0000000
--- a/ECRv2/src/cudnn/time_vgg/batchsize32.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-7.31648
-7.462711
-2.263564
-2.198837
-1.348295
-1.302553
-1.356063
-1.344357
-1.105748
-1.085286
-1.096859
-1.105219
-1.060116
-1.038973
-1.057504
-1.043154
diff --git a/ECRv2/src/cudnn/time_vgg/cudnn_half.cu b/ECRv2/src/cudnn/time_vgg/cudnn_half.cu
deleted file mode 100644
index e2868ba..0000000
--- a/ECRv2/src/cudnn/time_vgg/cudnn_half.cu
+++ /dev/null
@@ -1,426 +0,0 @@
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <vector>
-#include <cstdlib>
-#include <iomanip>
-#include <cuda.h>
-#include <cudnn.h>
-#include "cuda_fp16.h"
-
-using namespace std;
-
-#define CUDA_CALL(f)                                           \
-    {                                                          \
-        cudaError_t err = (f);                                 \
-        if (err != cudaSuccess)                                \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-#define CUDNN_CALL(f)                                          \
-    {                                                          \
-        cudnnStatus_t err = (f);                               \
-        if (err != CUDNN_STATUS_SUCCESS)                       \
-        {                                                      \
-            std::cout                                          \
-                << "    Error occurred: " << err << std::endl; \
-            std::exit(1);                                      \
-        }                                                      \
-    }
-
-void print(const float *data, int n, int c, int h, int w)
-{
-    std::vector<float> buffer(1 << 20);
-    CUDA_CALL(cudaMemcpy(
-        buffer.data(), data,
-        n * c * h * w * sizeof(float),
-        cudaMemcpyDeviceToHost));
-    int a = 0;
-    for (int i = 0; i < n; ++i)
-    {
-        for (int j = 0; j < c; ++j)
-        {
-            std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
-            for (int k = 0; k < h; ++k)
-            {
-                for (int l = 0; l < w; ++l)
-                {
-                    std::cout << std::setw(4) << std::right << buffer[a];
-                    ++a;
-                }
-                std::cout << std::endl;
-            }
-        }
-    }
-    std::cout << std::endl;
-}
-
-float *LoadKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size, int index)
-{
-    // ifstream kernel_shape("/home/syt/conv_pool/conv_pool/dataset/kernel/kernel_shape/" + name);
-    // for (int i = 0; i < 2; i++)
-    // {
-    //     kernel_shape >> *kernel_width >> *kernel_height;
-    // }
-    int shape[49] = {7,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1,
-                     1, 3, 1, 1, 3, 1, 1, 3, 1};
-    *kernel_width = *kernel_height = shape[index];
-    float *kernel = new float[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/resnetdataset_all/kernel/" + name);
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-            kernel_data >> kernel[j];
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-half *LoadvggKernel(string name, int *&kernel_width, int *&kernel_height, int batch_size)
-{
-    float temp;
-    *kernel_width = *kernel_height = 3;
-    half *kernel = new half[*kernel_width * *kernel_height * batch_size];
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream kernel_data("dataset/vggdata/kernel/" + name +".txt");
-        for (int j = i * (*kernel_width * *kernel_height); j < (i + 1) * (*kernel_width * *kernel_height); j++)
-        {    
-            kernel_data >> temp;
-            kernel[j]=__float2half(temp);
-        }
-        kernel_data.close();
-    }
-
-    return kernel;
-}
-
-float *LoadConvWeight(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[49];
-
-    ifstream fea_name("dataset/resnetdataset_all/feature_name.txt");
-    for (int i = 0; i < 49; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[49] = {224,
-                     56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
-                     28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-                     14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-                     7, 7, 7, 7, 7, 7, 7};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/resnetdataset_all/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-half *LoadVggConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[16];
-
-    ifstream fea_name("dataset/vggdata/all_conv_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    // int shape = 28;
-    int shape[16] = {224, 224, 112, 112, 56, 56, 56, 56, 28, 28, 28, 28, 14, 14, 14, 14};
-
-    *fea_width = *fea_height = shape[index];
-    // *fea_width = *fea_height = shape;
-    float temp;
-    half *feature = new half[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("dataset/vggdata/feature/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-        {    
-            fea_data >> temp;
-            feature[j]=__float2half(temp);
-        }
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-float *LoadspConvWeigth(int *&fea_width, int *&fea_height, int batch_size, int index)
-{
-    string *name = new string[9];
-
-    ifstream fea_name("sparsity/dataset/sparsity_name.txt");
-    for (int i = 0; i < 9; i++)
-    {
-        fea_name >> name[i];
-    }
-    fea_name.close();
-
-    // int shape[11] = {224, 112, 56, 56, 56, 28, 28, 28, 14, 14, 14};
-    int shape = 28;
-
-    // *fea_width = *fea_height = shape[index];
-    *fea_width = *fea_height = shape;
-
-    float *feature = new float[batch_size * *fea_width * *fea_height];
-
-    for (int i = 0; i < batch_size; i++)
-    {
-        ifstream fea_data("sparsity/dataset/" + name[index]);
-        for (int j = i * (*fea_width * *fea_height); j < (i + 1) * (*fea_width * *fea_height); j++)
-            fea_data >> feature[j];
-        fea_data.close();
-    }
-
-    return feature;
-}
-
-int main(int argc, char *argv[])
-{
-    int batch_size = atoi(argv[1]);
-    // load kernel file name
-    // string kername_name[49];
-    // ifstream open_kernel("/home/syt/conv_pool/conv_pool/dataset/resnetdataset_all/kernel_name.txt");
-    string kername_name[16];
-    ifstream open_kernel("dataset/vggdata/kernel_name.txt");
-    for (int i = 0; i < 16; i++)
-    {
-        open_kernel >> kername_name[i];
-    }
-    open_kernel.close();
-
-    ofstream time_file("ECR/cudnn/time_vgg/batchsize") + argv[1] + string(".txt"));
-    //  ofstream time_file(string("/home/syt/conv_pool/conv_pool/ECR/cudnn/time_gemm/batchsize") + argv[1] + string(".txt"));
-
-    for (int t = 0; t < 16; t++)
-    {
-        cudnnHandle_t cudnn;
-        CUDNN_CALL(cudnnCreate(&cudnn));
-
-        // input
-        // int batch_size = 2;
-        int *fea_width_ = new int;
-        int *fea_height_ = new int;
-        half *matrix;
-        // matrix = LoadConvWeight(fea_width_, fea_height_, batch_size, i);
-        matrix = LoadVggConvWeigth(fea_width_, fea_height_, batch_size, t);
-        // matrix = LoadspConvWeigth(fea_width_, fea_height_, batch_size, i);
-
-        const int in_n = batch_size;
-        const int in_c = 1;
-        const int in_h = *fea_width_;
-        const int in_w = *fea_height_;
-        const int in_size = in_h * in_w * in_c * in_n;
-
-        // cout << "in:" << in_n << " " << in_c << " " << in_h << " " << in_w << endl;
-
-        // filter
-        // int stride = 1;
-        int *kernel_width_ = new int;
-        int *kernel_height_ = new int;
-        half *kernel;
-
-        // kernel = LoadKernel(kername_name[i], kernel_width_, kernel_height_, batch_size, i);
-
-        kernel = LoadvggKernel(kername_name[t], kernel_width_, kernel_height_, batch_size);
-        const int filt_k = 1;
-        const int filt_c = 1;
-        const int filt_h = *kernel_width_;
-        const int filt_w = *kernel_height_;
-        const int file_size = filt_h * filt_w * filt_c * filt_k;
-
-        // cout << "ker:" << filt_k << " " << filt_c << " " << filt_h << " " << filt_w << endl;
-
-        // 记录时间
-        cudaEvent_t start, stop;
-        float elapsed_time = 0.0;
-        cudaEventCreate(&start);
-        cudaEventCreate(&stop);
-        cudaEventRecord(start, 0);
-
-        cudnnTensorDescriptor_t in_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            in_n, in_c, in_h, in_w));
-
-            half *in_data;
-        CUDA_CALL(cudaMalloc(
-            &in_data, in_n * in_c * in_h * in_w * sizeof(float)/2));
-
-        cudnnFilterDescriptor_t filt_desc;
-        CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
-        CUDNN_CALL(cudnnSetFilter4dDescriptor(
-            filt_desc, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW,
-            filt_k, filt_c, filt_h, filt_w));
-
-        half *filt_data;
-        CUDA_CALL(cudaMalloc(
-            &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)/2));
-
-        // convolution
-        const int pad_h = 0;
-        const int pad_w = 0;
-        const int str_h = 1;
-        const int str_w = 1;
-        const int dil_h = 1;
-        const int dil_w = 1;
-
-        cudnnConvolutionDescriptor_t conv_desc;
-        CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
-        CUDNN_CALL(cudnnSetConvolution2dDescriptor(
-            conv_desc,
-            pad_h, pad_w, str_h, str_w, dil_h, dil_w,
-            CUDNN_CONVOLUTION, CUDNN_DATA_HALF));
-        // cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH);
-        // output
-        int out_n;
-        int out_c;
-        int out_h;
-        int out_w;
-
-        CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
-            conv_desc, in_desc, filt_desc,
-            &out_n, &out_c, &out_h, &out_w));
-        // cout << "out:" << out_n << " " << out_c << " " << out_h << " " << out_w << endl;
-
-        cudnnTensorDescriptor_t out_desc;
-        CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
-        CUDNN_CALL(cudnnSetTensor4dDescriptor(
-            out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF,
-            out_n, out_c, out_h, out_w));
-
-        half *out_data;
-        CUDA_CALL(cudaMalloc(
-            &out_data, out_n * out_c * out_h * out_w * sizeof(float)/2));
-
-        // algorithm
-        cudnnConvolutionFwdAlgo_t algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING;
-        // = CUDNN_CONVOLUTION_FWD_ALGO_GEMM;
-
-        // CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
-        //     cudnn,
-        //     in_desc, filt_desc, conv_desc, out_desc,
-        //     CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
-
-        // workspace
-        size_t ws_size;
-        CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
-            cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
-
-        half *ws_data;
-        CUDA_CALL(cudaMalloc(&ws_data, ws_size));
-
-        // perform
-        float alpha = 1.0;
-        float beta = 0.0;
-
-        /*
-        float input_data[25 * 3] = {0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22,
-                                    0, 0, 15, 8, 0, 22, 0, 0, 23, 0, 0, 8, 0, 0, 0, 0, 0, 19, 0, 0, 10, 4, 0, 0, 22};
-        float kernel_data[9 * 3] = {0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0,
-                                    0, 1, 0, 1, 0, 1, 0, 1, 0};
-        */
-
-        cudaMemcpy(in_data, matrix, in_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-        cudaMemcpy(filt_data, kernel, file_size * sizeof(float)/2, cudaMemcpyHostToDevice);
-
-        CUDNN_CALL(cudnnConvolutionForward(
-            cudnn,
-            &alpha, in_desc, in_data, filt_desc, filt_data,
-            conv_desc, algo, ws_data, ws_size,
-            &beta, out_desc, out_data));
-
-        // results
-        // std::cout << "in_data:" << std::endl;
-        // print(in_data, in_n, in_c, in_h, in_w);
-
-        // std::cout << "filt_data:" << std::endl;
-        // print(filt_data, filt_k, filt_c, filt_h, filt_w);
-
-        // std::cout << "out_data:" << std::endl;
-        // print(out_data, out_n, out_c, out_h, out_w);
-
-        int result_size = out_n * out_c * out_h * out_w;
-        half *result = new half[result_size];
-        cudaMemcpy(result, out_data, result_size * sizeof(float)/2, cudaMemcpyDeviceToHost);
-
-        // finalizing
-        CUDA_CALL(cudaFree(ws_data));
-        CUDA_CALL(cudaFree(out_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
-        CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
-        CUDA_CALL(cudaFree(filt_data));
-        CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
-        CUDA_CALL(cudaFree(in_data));
-        CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
-        CUDNN_CALL(cudnnDestroy(cudnn));
-
-        // 计算时间
-        cudaEventRecord(stop, 0);
-        cudaEventSynchronize(stop);
-        cudaEventElapsedTime(&elapsed_time, start, stop);
-        // cout << elapsed_time << endl;
-        cudaEventDestroy(start);
-        cudaEventDestroy(stop);
-
-        // 写入时间
-        // ofstream time_file("time/time_batchsize1.txt", ios::app);
-        time_file << elapsed_time << endl;
-        // time_file.close();
-        // 结果写入文件
-        // string file_name = "/home/lfa/fsy/syt/conv_pool/ECR/cudnn/output_vgg/output" + to_string(i);
-        // // string file_name = "/home/syt/conv_pool/conv_pool/ECR/cudnn/out_gemm/output" + to_string(i);
-
-        // ofstream output_file(file_name);
-        // for (int i = 0; i < result_size; i++)
-        // {
-        //     output_file << result[i] << " ";
-        //     if ((i + 1) % out_w == 0)
-        //         output_file << "\n";
-        // }
-        // output_file.close();
-        // cout << "res:" << result[1] << endl;
-        memset(result,0,sizeof(result));
-        free(result);
-    }
-
-    time_file.close();
-
-    return 0;
-}
\ No newline at end of file
diff --git a/ECRv2/times_vgg/singleECR_times.txt b/ECRv2/times_vgg/singleECR_times.txt
new file mode 100644
index 0000000..053d857
--- /dev/null
+++ b/ECRv2/times_vgg/singleECR_times.txt
@@ -0,0 +1,16 @@
+Measured time: 0.136416
+Measured time: 0.986848
+Measured time: 0.540448
+Measured time: 0.542496
+Measured time: 0.334304
+Measured time: 0.338272
+Measured time: 0.376416
+Measured time: 0.340064
+Measured time: 0.239392
+Measured time: 0.243744
+Measured time: 0.24096
+Measured time: 0.237184
+Measured time: 0.224032
+Measured time: 0.238336
+Measured time: 0.246112
+Measured time: 0.1368
diff --git a/dataset/resnet/kernel_name.txt b/dataset/resnet/kernel_name.txt
index d913b29..fdac1e0 100644
--- a/dataset/resnet/kernel_name.txt
+++ b/dataset/resnet/kernel_name.txt
@@ -46,4 +46,4 @@ layer4.1.conv2.weight
 layer4.1.conv3.weight
 layer4.2.conv1.weight
 layer4.2.conv2.weight
-layer4.2.conv3.weight
\ No newline at end of file
+layer4.2.conv3.weight
diff --git a/dataset/vggdata/all_conv_name.txt b/dataset/vggdata/all_conv_name.txt
index d917be2..94b447b 100644
--- a/dataset/vggdata/all_conv_name.txt
+++ b/dataset/vggdata/all_conv_name.txt
@@ -13,4 +13,4 @@ feature_26__28_28.txt
 feature_29__14_14.txt
 feature_31__14_14.txt
 feature_33__14_14.txt
-feature_35__14_14.txt
\ No newline at end of file
+feature_35__14_14.txt
diff --git a/dataset/vggdata/kernel_name.txt b/dataset/vggdata/kernel_name.txt
index 049b660..d67e76e 100644
--- a/dataset/vggdata/kernel_name.txt
+++ b/dataset/vggdata/kernel_name.txt
@@ -1,16 +1,16 @@
-features.0.weight
-features.2.weight
-features.5.weight
-features.7.weight
-features.10.weight
-features.12.weight
-features.14.weight
-features.16.weight
-features.19.weight
-features.21.weight
-features.23.weight
-features.25.weight
-features.28.weight
-features.30.weight
-features.32.weight
-features.34.weight
\ No newline at end of file
+features.0.weight.txt
+features.2.weight.txt
+features.5.weight.txt
+features.7.weight.txt
+features.10.weight.txt
+features.12.weight.txt
+features.14.weight.txt
+features.16.weight.txt
+features.19.weight.txt
+features.21.weight.txt
+features.23.weight.txt
+features.25.weight.txt
+features.28.weight.txt
+features.30.weight.txt
+features.32.weight.txt
+features.34.weight.txt
diff --git a/speedup/resnet/cuDNN_fast b/speedup/resnet/cuDNN_fast
new file mode 100644
index 0000000..e69de29
diff --git a/speedup/resnet/cuDNN_fft b/speedup/resnet/cuDNN_fft
new file mode 100644
index 0000000..b1c7874
--- /dev/null
+++ b/speedup/resnet/cuDNN_fft
@@ -0,0 +1,49 @@
+Measured time: 8.62298
+Measured time: 0.840128
+Measured time: 0.572864
+Measured time: 0.605728
+Measured time: 0.601792
+Measured time: 0.555808
+Measured time: 0.565696
+Measured time: 0.601664
+Measured time: 0.623968
+Measured time: 0.604608
+Measured time: 0.715872
+Measured time: 0.542624
+Measured time: 0.53648
+Measured time: 0.570528
+Measured time: 0.522624
+Measured time: 0.562688
+Measured time: 0.569184
+Measured time: 0.524544
+Measured time: 0.559968
+Measured time: 0.541312
+Measured time: 0.517248
+Measured time: 0.543808
+Measured time: 0.543776
+Measured time: 0.655936
+Measured time: 0.523232
+Measured time: 0.537856
+Measured time: 0.572896
+Measured time: 0.567968
+Measured time: 0.536096
+Measured time: 0.65584
+Measured time: 0.51088
+Measured time: 0.611776
+Measured time: 0.537056
+Measured time: 0.509952
+Measured time: 0.546304
+Measured time: 0.573504
+Measured time: 0.529952
+Measured time: 0.64896
+Measured time: 0.62672
+Measured time: 0.52288
+Measured time: 0.599488
+Measured time: 0.531488
+Measured time: 0.526304
+Measured time: 0.555904
+Measured time: 0.558848
+Measured time: 0.530336
+Measured time: 0.536096
+Measured time: 0.512736
+
diff --git a/speedup/resnet/cuDNN_gemm.txt b/speedup/resnet/cuDNN_gemm.txt
new file mode 100644
index 0000000..1837be2
--- /dev/null
+++ b/speedup/resnet/cuDNN_gemm.txt
@@ -0,0 +1,49 @@
+Measured time: 0.591488
+Measured time: 0.556512
+Measured time: 0.600864
+Measured time: 0.521696
+Measured time: 0.561472
+Measured time: 0.51952
+Measured time: 0.541568
+Measured time: 0.52384
+Measured time: 0.648832
+Measured time: 0.839616
+Measured time: 0.542048
+Measured time: 0.542144
+Measured time: 0.600512
+Measured time: 0.51696
+Measured time: 0.522432
+Measured time: 0.60672
+Measured time: 0.517504
+Measured time: 0.505792
+Measured time: 0.51184
+Measured time: 0.520064
+Measured time: 0.520576
+Measured time: 0.506784
+Measured time: 0.560256
+Measured time: 0.511968
+Measured time: 0.571264
+Measured time: 0.746624
+Measured time: 0.548608
+Measured time: 0.654208
+Measured time: 0.645408
+Measured time: 0.521728
+Measured time: 0.524352
+Measured time: 0.52192
+Measured time: 0.512096
+Measured time: 0.522912
+Measured time: 0.577408
+Measured time: 0.545984
+Measured time: 0.704896
+Measured time: 0.526496
+Measured time: 0.519776
+Measured time: 0.574656
+Measured time: 0.503264
+Measured time: 0.540352
+Measured time: 0.534624
+Measured time: 0.52304
+Measured time: 0.536448
+Measured time: 0.602144
+Measured time: 0.52032
+Measured time: 0.52544
+
diff --git a/speedup/resnet/cuDNN_imp_gemm.tx b/speedup/resnet/cuDNN_imp_gemm.tx
new file mode 100644
index 0000000..6cf77bf
--- /dev/null
+++ b/speedup/resnet/cuDNN_imp_gemm.tx
@@ -0,0 +1,49 @@
+Measured time: 0.514272
+Measured time: 0.669888
+Measured time: 0.526432
+Measured time: 0.521856
+Measured time: 0.58384
+Measured time: 0.53104
+Measured time: 0.572064
+Measured time: 0.54096
+Measured time: 0.524896
+Measured time: 0.537056
+Measured time: 0.567456
+Measured time: 0.541568
+Measured time: 0.53792
+Measured time: 0.528768
+Measured time: 0.511168
+Measured time: 0.572448
+Measured time: 0.531296
+Measured time: 0.566336
+Measured time: 0.598336
+Measured time: 0.501664
+Measured time: 0.538368
+Measured time: 0.529536
+Measured time: 0.525472
+Measured time: 0.589056
+Measured time: 0.592128
+Measured time: 0.573312
+Measured time: 0.615904
+Measured time: 0.598208
+Measured time: 0.536064
+Measured time: 0.520032
+Measured time: 0.57664
+Measured time: 0.543328
+Measured time: 0.663456
+Measured time: 0.825056
+Measured time: 0.624608
+Measured time: 0.528288
+Measured time: 0.604736
+Measured time: 0.582688
+Measured time: 0.508096
+Measured time: 0.589376
+Measured time: 0.54144
+Measured time: 0.517952
+Measured time: 0.584224
+Measured time: 0.535136
+Measured time: 0.5312
+Measured time: 0.530144
+Measured time: 0.57824
+Measured time: 0.708896
+
diff --git a/speedup/resnet/singleECR.txt b/speedup/resnet/singleECR.txt
new file mode 100644
index 0000000..641e000
--- /dev/null
+++ b/speedup/resnet/singleECR.txt
@@ -0,0 +1,49 @@
+Measured time: 0.18864
+Measured time: 0.270976
+Measured time: 0.190208
+Measured time: 0.217824
+Measured time: 0.352704
+Measured time: 0.239424
+Measured time: 0.193088
+Measured time: 0.379744
+Measured time: 0.193664
+Measured time: 0.205568
+Measured time: 0.352384
+Measured time: 0.167872
+Measured time: 0.170112
+Measured time: 0.240608
+Measured time: 0.26048
+Measured time: 0.176928
+Measured time: 0.24176
+Measured time: 0.166144
+Measured time: 0.17376
+Measured time: 0.245152
+Measured time: 0.162848
+Measured time: 0.16928
+Measured time: 0.279104
+Measured time: 0.171776
+Measured time: 0.172864
+Measured time: 0.248128
+Measured time: 0.170496
+Measured time: 0.169888
+Measured time: 0.243072
+Measured time: 0.170592
+Measured time: 0.172448
+Measured time: 0.24064
+Measured time: 0.1696
+Measured time: 0.166176
+Measured time: 0.243072
+Measured time: 0.165248
+Measured time: 0.171904
+Measured time: 0.231264
+Measured time: 0.162944
+Measured time: 0.169472
+Measured time: 0.29504
+Measured time: 0.167232
+Measured time: 0.1672
+Measured time: 0.23264
+Measured time: 0.172672
+Measured time: 0.169216
+Measured time: 0.229152
+Measured time: 0.169536
+
diff --git a/speedup/resnet/speedup.ipynb b/speedup/resnet/speedup.ipynb
index 31527d9..aec7f22 100644
--- a/speedup/resnet/speedup.ipynb
+++ b/speedup/resnet/speedup.ipynb
@@ -38,16 +38,16 @@
     "\n",
     "\n",
     "path_tc_c = \"../../ECR/cudnn/time_resnet/batchsize32.txt\"\n",
-    "path_tc_p = \"../../PECR/cudnn/time_resnet/batchsize32.txt\"\n",
+    "#path_tc_p = \"../../PECR/cudnn/time_resnet/batchsize32.txt\"\n",
     "\n",
     "time_c = pd.read_csv(path_tc_c, header=None, dtype=float)\n",
-    "time_p = pd.read_csv(path_tc_p, header=None, dtype=float)\n",
+    "#time_p = pd.read_csv(path_tc_p, header=None, dtype=float)\n",
     "\n",
     "time_cudnn = []\n",
     "time_cudnn.append(time_p.iat[0, 0])\n",
     "for i in range(0, 47):\n",
     "    time_cudnn.append(time_c.iat[i, 0])\n",
-    "time_cudnn.append(time_p.iat[1, 0])\n",
+    "#time_cudnn.append(time_p.iat[1, 0])\n",
     "\n",
     "# print(time_cudnn)"
    ]