Skip to content

Commit

Permalink
add perf test
Browse files Browse the repository at this point in the history
  • Loading branch information
mseminatore committed Dec 8, 2023
1 parent e6b3cc7 commit a891575
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ logic
digit5x7
pima
save_test
blas_perf
.vscode
mnist
build/
Expand Down
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ OBJS = ann.o tensor.o
DEPS = ann.h tensor.h ann_config.h

# use cblas
#CFLAGS += -g -O3 -DUSE_BLAS -DCBLAS -I.
#CFLAGS += -g -O2 -DUSE_BLAS -DCBLAS -I.
#LFLAGS += -L. -lcblas

# use openblas
CFLAGS += -g -O3 -DUSE_BLAS -I"/opt/OpenBLAS/include"
CFLAGS += -g -O2 -DUSE_BLAS -I"/opt/OpenBLAS/include"
LFLAGS += -L/opt/OpenBLAS/lib/ -lopenblas

#-DMKL_ILP64 -m64 -I"${MKLROOT}/include"
Expand All @@ -24,7 +24,7 @@ LFLAGS += -L/opt/OpenBLAS/lib/ -lopenblas
# -DMKL_ILP64 -m64 -I"${MKLROOT}/include"
# ${MKLROOT}/lib/libmkl_intel_ilp64.a ${MKLROOT}/lib/libmkl_tbb_thread.a ${MKLROOT}/lib/libmkl_core.a -L${TBBROOT}/lib -ltbb -lc++ -lpthread -lm -ldl

all: mnist logic digit5x7 save_test save_test_binary
all: mnist logic digit5x7 save_test save_test_binary blas_perf

$(TARGET): $(OBJS) mnist.o
$(CC) $(LFLAGS) -o $@ $^
Expand All @@ -41,6 +41,9 @@ save_test: $(OBJS) save_test.o
save_test_binary: $(OBJS) save_test_binary.o
$(CC) $(LFLAGS) -o $@ $^

blas_perf: $(OBJS) blas_perf.o
$(CC) $(LFLAGS) -o $@ $^

%.o: %.c $(DEPS)
$(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@

Expand Down
133 changes: 133 additions & 0 deletions blas_perf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//------------------------------------------------------
//
// Copyright 2023 Mark Seminatore. All rights reserved.
//------------------------------------------------------
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <cblas.h>

#ifdef WIN32
# include <Windows.h>
#endif

#define MAX_SIZE 1024

float x[MAX_SIZE], y[MAX_SIZE];
float a[MAX_SIZE * MAX_SIZE], b[MAX_SIZE * MAX_SIZE], c[MAX_SIZE * MAX_SIZE];

struct timer
{
#ifdef WIN32
LARGE_INTEGER t;
#else
struct timespec t;
#endif

};

//------------------------------------------------------
//
//------------------------------------------------------
void timer_get_time(struct timer* t)
{
#ifdef WIN32
QueryPerformanceCounter(&t->t);
#else
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t->t);
#endif

}

//------------------------------------------------------
//
//------------------------------------------------------
float timer_get_delta(struct timer *t1, struct timer *t2)
{
float dt;

#ifdef WIN32
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);

dt = (t2->t.QuadPart - t1->t.QuadPart) / (float)freq.QuadPart;

#else
int seconds = (int)(t2->t.tv_sec - t1->t.tv_sec);
long long ns = t2->t.tv_nsec - t1->t.tv_nsec;
dt = (float)seconds + (float)ns / (1000000000);
#endif

return dt;
}

//------------------------------------------------------
//
//------------------------------------------------------
void test_gemm()
{
struct timer t1, t2;
CBLAS_INDEX m, n, k;
float dt;

printf("Testing performance of cblas_sgemm()\n\n");

for (int i = 4; i <= MAX_SIZE; i <<= 1)
{
m = n = k = i;

timer_get_time(&t1);

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a, m, b, k, 1.0f, c, k);

timer_get_time(&t2);

dt = timer_get_delta(&t1, &t2);

printf("%4d: %5.2f GFlops in %5.2fs\n", i, (float)2 * m * n * k / 1000000000 / dt, dt);
}
}

//------------------------------------------------------
//
//------------------------------------------------------
void test_ger()
{
struct timer t1, t2;
float dt;

printf("Testing performance of cblas_sger()\n\n");

CBLAS_INDEX m = MAX_SIZE, n = MAX_SIZE;

for (int i = 2; i <= MAX_SIZE; i <<= 1)
{
m = n = i;

timer_get_time(&t1);

cblas_sger(CblasRowMajor, m, n, 1.0f, x, 1, y, 1, a, m);

timer_get_time(&t2);

dt = timer_get_delta(&t1, &t2);

printf("%4d: %5.2f GFlops in %5.2fs\n", i, (float)2 * m * n / 1000000000 / dt, dt);
}
}

//------------------------------------------------------
//
//------------------------------------------------------
int main(int argc, char *argv[])
{
//cblas_init();

printf( "%s\n", openblas_get_config());
printf(" CPU uArch: %s\n", openblas_get_corename());
printf("Cores/Threads: %d/%d\n\n", openblas_get_num_procs(), openblas_get_num_threads());

test_gemm();

return 0;
}
6 changes: 3 additions & 3 deletions mnist.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ int main(int argc, char *argv[])
cblas_init();
if (threads != -1)
cblas_set_num_threads(threads);
// printf( "%s\n", openblas_get_config());
// printf(" CPU uArch: %s\n", openblas_get_corename());
// printf(" Cores/Threads: %d/%d\n", openblas_get_num_procs(), openblas_get_num_threads());
printf( "%s\n", cblas_get_config());
printf(" CPU uArch: %s\n", cblas_get_corename());
printf(" Cores/Threads: %d/%d\n", cblas_get_num_procs(), cblas_get_num_threads());
#else
if (threads != -1)
openblas_set_num_threads(threads);
Expand Down

0 comments on commit a891575

Please sign in to comment.