Skip to content

Commit

Permalink
1. opt O0&O1 2. O1 need fix mnist bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Zepan committed Aug 30, 2022
1 parent b28173f commit 6579d8e
Show file tree
Hide file tree
Showing 12 changed files with 721 additions and 86 deletions.
10 changes: 5 additions & 5 deletions benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ Optimization for RISC-V MCU which have V-extend instructions (like T-Head C906),
BL808 C906 core run mbnet 0.25, 128x128x3 input (mdl in psram, VLEN=128, cpu run in 384M, O2)
|ARCH|MDL_TYPE|OPT0 time|OPT1 time|
|---|---|---|---|---|
|TM_ARCH_CPU | INT8| 315ms| 194ms|
|TM_ARCH_CPU | FP32| 300ms| 258ms|
|TM_ARCH_RV64V| INT8| 178ms| 141ms|
|TM_ARCH_RV64V| FP32| 182ms| 156ms|
|TM_ARCH_RV64V| FP16| 161ms| 125ms|
|TM_ARCH_CPU | INT8| 185ms| 150ms|
|TM_ARCH_CPU | FP32| 258ms| 215ms|
|TM_ARCH_RV64V| INT8| 148ms| 113ms|
|TM_ARCH_RV64V| FP32| 190ms| 145ms|
|TM_ARCH_RV64V| FP16| 154ms| 97ms|

# compare to other infer library
## NNoM
Expand Down
8 changes: 4 additions & 4 deletions examples/auto_test/auto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def runcmd(cmd):
cmd="cd ../../tools/ && python3 h5_to_tflite.py h5/mnist_valid.h5 tflite/mnist_valid_f.tflite 0 && python3 tflite2tmdl.py tflite/mnist_valid_f.tflite tmdl/mnist_valid_f.tmdl fp32 1 28,28,1 10"
res = runcmd(cmd)
print(res[-1])
if res[-1] == "Saved to tmdl/mnist_valid_f.tmdl, tmdl/mnist_valid_f.h":
if res[-1] == "Saved to tinymaix model header to tmdl/mnist_valid_f.h":
print("====Step1.1.1: OK~")
else:
print("====Step1.1.1: ERR!!!")
Expand All @@ -50,7 +50,7 @@ def runcmd(cmd):
cmd="cd ../../tools/ && python3 h5_to_tflite.py h5/mnist_valid.h5 tflite/mnist_valid_q.tflite 1 quant_img_mnist/ 0to1 && python3 tflite2tmdl.py tflite/mnist_valid_q.tflite tmdl/mnist_valid_q.tmdl int8 1 28,28,1 10"
res = runcmd(cmd)
print(res[-1])
if res[-1] == "Saved to tmdl/mnist_valid_q.tmdl, tmdl/mnist_valid_q.h":
if res[-1] == "Saved to tinymaix model header to tmdl/mnist_valid_q.h":
print("====Step1.2.1: OK~")
else:
print("====Step1.2.1: ERR!!!")
Expand Down Expand Up @@ -80,7 +80,7 @@ def runcmd(cmd):
cmd="cd ../../tools/ && python3 h5_to_tflite.py h5/mbnet128_0.25.h5 tflite/mbnet128_0.25_f.tflite 0 && python3 tflite2tmdl.py tflite/mbnet128_0.25_f.tflite tmdl/mbnet128_0.25_f.tmdl fp32 1 128,128,3 1000"
res = runcmd(cmd)
print(res[-1])
if res[-1] == "Saved to tmdl/mbnet128_0.25_f.tmdl, tmdl/mbnet128_0.25_f.h":
if res[-1] == "Saved to tinymaix model header to tmdl/mbnet128_0.25_f.h":
print("====Step2.1.1: OK~")
else:
print("====Step2.1.1: ERR!!!")
Expand Down Expand Up @@ -109,7 +109,7 @@ def runcmd(cmd):
cmd="cd ../../tools/ && python3 h5_to_tflite.py h5/mbnet128_0.25.h5 tflite/mbnet128_0.25_q.tflite 1 quant_img128/ 0to1 && python3 tflite2tmdl.py tflite/mbnet128_0.25_q.tflite tmdl/mbnet128_0.25_q.tmdl int8 1 128,128,3 1000"
res = runcmd(cmd)
print(res[-1])
if res[-1] == "Saved to tmdl/mbnet128_0.25_q.tmdl, tmdl/mbnet128_0.25_q.h":
if res[-1] == "Saved to tinymaix model header to tmdl/mbnet128_0.25_q.h":
print("====Step2.2.1: OK~")
else:
print("====Step2.2.1: ERR!!!")
Expand Down
20 changes: 11 additions & 9 deletions include/tm_port.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ limitations under the License.

/******************************* PORT CONFIG ************************************/
#define TM_ARCH TM_ARCH_CPU
#define TM_OPT_LEVEL TM_OPT1
#define TM_OPT_LEVEL TM_OPT0
#define TM_MDL_TYPE TM_MDL_INT8
#define TM_FASTSCALE (0) //enable if your chip don't have FPU, may speed up 1/3, but decrease accuracy
#define TM_ENABLE_STAT (1) //enable mdl stat functions
Expand Down Expand Up @@ -62,18 +62,20 @@ limitations under the License.
#if TM_EN_PERF
#define TM_GET_TICK(x) __ASM volatile("csrr %0, mcycle" : "=r"(x)); //edit your self

#define TM_TICK_PERUS (1000) //sysconf(_SC_CLK_TCK)/1000000)
#define TM_PERF_INIT() uint64_t _perf_t0, _perf_t1;
#define TM_PERF_REG(x) uint64_t x=0;
#define TM_PERF_START() TM_GET_TICK(_perf_t0);
#define TM_PERF_ADD(x) {TM_GET_TICK(_perf_t1);(x)+=(_perf_t1-_perf_t0);TM_GET_TICK(_perf_t0);};
#define TM_TICK_PERUS (380) //sysconf(_SC_CLK_TCK)/1000000)
#define TM_PERF_REG(x) uint64_t x=0;
#define TM_PERF_EXTREG(x) extern uint64_t x;
#define TM_PERF_INIT(x) uint64_t _##x##_t0, _##x##_t1;
#define TM_PERF_START(x) TM_GET_TICK(_##x##_t0);
#define TM_PERF_ADD(x) {TM_GET_TICK(_##x##_t1);(x)+=(_##x##_t1-_##x##_t0);TM_GET_TICK(_##x##_t0);};
#define TM_PERF_PRINT(x) TM_PRINTF("PERF "#x": %ld us\r\n", (x)/TM_TICK_PERUS)
#else
#define TM_GET_TICK(x)
#define TM_TICK_PERUS
#define TM_PERF_INIT()
#define TM_PERF_REG(x)
#define TM_PERF_START()
#define TM_PERF_REG(x)
#define TM_PERF_EXTREG(x)
#define TM_PERF_INIT(x)
#define TM_PERF_START(x)
#define TM_PERF_ADD(x)
#define TM_PERF_PRINT(x)
#endif
Expand Down
37 changes: 37 additions & 0 deletions src/arch_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,35 @@ TM_INLINE void tm_dot_prod(mtype_t* sptr, mtype_t* kptr,uint32_t size, sumtype_t
return;
}

TM_INLINE void tm_dot_prod_pack2(mtype_t* sptr, mtype_t* kptr, uint32_t size, sumtype_t* result)
{
sumtype_t sum0 = 0;
sumtype_t sum1 = 0;
mtype_t* kptr0 = kptr;
mtype_t* kptr1 = kptr+size;

uint32_t i = 0;
uint32_t cnt = (size>>3)<<3; //8
for(; i+8-1 <cnt; ){
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
sum0 += sptr[i]*kptr0[i]; sum1 += sptr[i]*kptr1[i]; i++;
}
for(; i <size; i++){
sum0 += sptr[i]*kptr0[i];
sum0 += sptr[i]*kptr1[i];
}

result[0] = sum0;
result[1] = sum1;
return;
}

TM_INLINE void tm_dot_prod_gap_3x3x1(mtype_t* sptr, mtype_t* kptr, uint32_t* k_oft, sumtype_t* result)
{
*result = sptr[k_oft[0]]*kptr[0] + sptr[k_oft[1]]*kptr[1] + sptr[k_oft[2]]*kptr[2] + \
Expand All @@ -48,6 +77,14 @@ TM_INLINE void tm_dot_prod_gap_3x3x1(mtype_t* sptr, mtype_t* kptr, uint32_t* k_o
return;
}

TM_INLINE void tm_dot_prod_3x3x1(mtype_t* sptr, mtype_t* kptr, sumtype_t* result)
{
*result = sptr[0]*kptr[0] + sptr[1]*kptr[1] + sptr[2]*kptr[2] + \
sptr[3]*kptr[3] + sptr[4]*kptr[4] + sptr[5]*kptr[5] + \
sptr[6]*kptr[6] + sptr[7]*kptr[7] + sptr[8]*kptr[8] ;
return;
}



#else
Expand Down
Loading

0 comments on commit 6579d8e

Please sign in to comment.