diff --git a/.gitignore b/.gitignore index 956d025..479353d 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ logic digit5x7 pima save_test +blas_perf .vscode mnist build/ diff --git a/Makefile b/Makefile index da029bb..5cdd15b 100644 --- a/Makefile +++ b/Makefile @@ -11,11 +11,11 @@ OBJS = ann.o tensor.o DEPS = ann.h tensor.h ann_config.h # use cblas -#CFLAGS += -g -O3 -DUSE_BLAS -DCBLAS -I. +#CFLAGS += -g -O2 -DUSE_BLAS -DCBLAS -I. #LFLAGS += -L. -lcblas # use openblas -CFLAGS += -g -O3 -DUSE_BLAS -I"/opt/OpenBLAS/include" +CFLAGS += -g -O2 -DUSE_BLAS -I"/opt/OpenBLAS/include" LFLAGS += -L/opt/OpenBLAS/lib/ -lopenblas #-DMKL_ILP64 -m64 -I"${MKLROOT}/include" @@ -24,7 +24,7 @@ LFLAGS += -L/opt/OpenBLAS/lib/ -lopenblas # -DMKL_ILP64 -m64 -I"${MKLROOT}/include" # ${MKLROOT}/lib/libmkl_intel_ilp64.a ${MKLROOT}/lib/libmkl_tbb_thread.a ${MKLROOT}/lib/libmkl_core.a -L${TBBROOT}/lib -ltbb -lc++ -lpthread -lm -ldl -all: mnist logic digit5x7 save_test save_test_binary +all: mnist logic digit5x7 save_test save_test_binary blas_perf $(TARGET): $(OBJS) mnist.o $(CC) $(LFLAGS) -o $@ $^ @@ -41,6 +41,9 @@ save_test: $(OBJS) save_test.o save_test_binary: $(OBJS) save_test_binary.o $(CC) $(LFLAGS) -o $@ $^ +blas_perf: $(OBJS) blas_perf.o + $(CC) $(LFLAGS) -o $@ $^ + %.o: %.c $(DEPS) $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ diff --git a/blas_perf.c b/blas_perf.c new file mode 100644 index 0000000..f36543f --- /dev/null +++ b/blas_perf.c @@ -0,0 +1,133 @@ +//------------------------------------------------------ +// +// Copyright 2023 Mark Seminatore. All rights reserved. +//------------------------------------------------------ +#include +#include +#include +#include + +#ifdef WIN32 +# include +#endif + +#define MAX_SIZE 1024 + +float x[MAX_SIZE], y[MAX_SIZE]; +float a[MAX_SIZE * MAX_SIZE], b[MAX_SIZE * MAX_SIZE], c[MAX_SIZE * MAX_SIZE]; + +struct timer +{ +#ifdef WIN32 + LARGE_INTEGER t; +#else + struct timespec t; +#endif + +}; + +//------------------------------------------------------ +// +//------------------------------------------------------ +void timer_get_time(struct timer* t) +{ +#ifdef WIN32 + QueryPerformanceCounter(&t->t); +#else + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t->t); +#endif + +} + +//------------------------------------------------------ +// +//------------------------------------------------------ +float timer_get_delta(struct timer *t1, struct timer *t2) +{ + float dt; + +#ifdef WIN32 + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + + dt = (t2->t.QuadPart - t1->t.QuadPart) / (float)freq.QuadPart; + +#else + int seconds = (int)(t2->t.tv_sec - t1->t.tv_sec); + long long ns = t2->t.tv_nsec - t1->t.tv_nsec; + dt = (float)seconds + (float)ns / (1000000000); +#endif + + return dt; +} + +//------------------------------------------------------ +// +//------------------------------------------------------ +void test_gemm() +{ + struct timer t1, t2; + CBLAS_INDEX m, n, k; + float dt; + + printf("Testing performance of cblas_sgemm()\n\n"); + + for (int i = 4; i <= MAX_SIZE; i <<= 1) + { + m = n = k = i; + + timer_get_time(&t1); + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a, m, b, k, 1.0f, c, k); + + timer_get_time(&t2); + + dt = timer_get_delta(&t1, &t2); + + printf("%4d: %5.2f GFlops in %5.2fs\n", i, (float)2 * m * n * k / 1000000000 / dt, dt); + } +} + +//------------------------------------------------------ +// +//------------------------------------------------------ +void test_ger() +{ + struct timer t1, t2; + float dt; + + printf("Testing performance of cblas_sger()\n\n"); + + CBLAS_INDEX m = MAX_SIZE, n = MAX_SIZE; + + for (int i = 2; i <= MAX_SIZE; i <<= 1) + { + m = n = i; + + timer_get_time(&t1); + + cblas_sger(CblasRowMajor, m, n, 1.0f, x, 1, y, 1, a, m); + + timer_get_time(&t2); + + dt = timer_get_delta(&t1, &t2); + + printf("%4d: %5.2f GFlops in %5.2fs\n", i, (float)2 * m * n / 1000000000 / dt, dt); + } +} + +//------------------------------------------------------ +// +//------------------------------------------------------ +int main(int argc, char *argv[]) +{ + //cblas_init(); + + printf( "%s\n", openblas_get_config()); + printf(" CPU uArch: %s\n", openblas_get_corename()); + printf("Cores/Threads: %d/%d\n\n", openblas_get_num_procs(), openblas_get_num_threads()); + + test_gemm(); + + return 0; +} \ No newline at end of file diff --git a/mnist.c b/mnist.c index 1aefe45..c78d330 100644 --- a/mnist.c +++ b/mnist.c @@ -167,9 +167,9 @@ int main(int argc, char *argv[]) cblas_init(); if (threads != -1) cblas_set_num_threads(threads); -// printf( "%s\n", openblas_get_config()); -// printf(" CPU uArch: %s\n", openblas_get_corename()); -// printf(" Cores/Threads: %d/%d\n", openblas_get_num_procs(), openblas_get_num_threads()); + printf( "%s\n", cblas_get_config()); + printf(" CPU uArch: %s\n", cblas_get_corename()); + printf(" Cores/Threads: %d/%d\n", cblas_get_num_procs(), cblas_get_num_threads()); #else if (threads != -1) openblas_set_num_threads(threads);