Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade intel gemm conv tests #103

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions code/intel/convolution/mkl_conv/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ ifeq ($(MKLLIB), mklml_intel)
EXTRALIB = -L$(MKLROOT)/lib -lmklml_intel
endif
ifeq ($(MKLLIB), mkl_rt)
EXTRALIB = -L$(MKLROOT)/lib/intel64/ -lmkl_rt \
EXTRALIB = -L$(MKLROOT)/lib -lmkl_rt \
-Wl,-rpath,$(MKLROOT)/lib/intel64
endif
EXTRALIB += -liomp5 -lpthread -lm -ldl
EXTRALIB += -L$(MKLROOT)/lib -liomp5 -lpthread -lm -ldl
endif

ifeq ($(CONVLIB),MKLDNN)
Expand All @@ -43,7 +43,7 @@ ifeq ($(MKLDNNROOT),)
to the install directory.)
endif
EXTRACXXFLAGS = -I$(MKLDNNROOT)/include -DUSE_MKLDNN
EXTRALIB = -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,$(MKLDNNROOT)/lib
EXTRALIB = -L$(MKLDNNROOT)/lib -lmkldnn -lmklml_intel -Wl,-rpath,$(MKLDNNROOT)/lib
endif

ifeq ($(DEBUG), 1)
Expand All @@ -53,7 +53,7 @@ OPTFLAGS = -O3
endif

CXX = icpc
CXXFLAGS = -Wall -std=c++11 $(OPTFLAGS) $(EXTRACXXFLAGS) -fopenmp -I../../../kernels
CXXFLAGS = -Wall -std=c++11 $(OPTFLAGS) $(EXTRACXXFLAGS) -fopenmp -I../../../kernelss
geoffreyblake marked this conversation as resolved.
Show resolved Hide resolved
LFLAGS = -lrt
OBJS = std_conv_bench.o

Expand Down
272 changes: 209 additions & 63 deletions code/intel/convolution/mkl_conv/std_conv_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
#include <float.h>
#include <time.h>
#include <assert.h>
#include <getopt.h>

#include <stdexcept>
#include <tuple>
#include <vector>
#include <string>
#include <iostream>

struct conv_problem {
int minibatch;
Expand Down Expand Up @@ -51,8 +53,6 @@ struct conv_problem {
#define INFERENCE_SERVER 1
#define INFERENCE_DEVICE 2

#define ITERS 1000

// Calculates convolution output dimension using the definition from Caffe
static inline int calc_out_dim(
int input_dim, int filter_dim, int padd, int stride)
Expand Down Expand Up @@ -352,10 +352,6 @@ static void usage()
printf(
"Usage: <executable> [OPTIONS]\n"
"\n"
"Output control:\n"
" --csv-output Produce CSV output\n"
" --original-output Produce output in the original format\n"
"\n"
"Control flops calculations:\n"
" --no-skip-padding Count ops with padding zeroes (default)\n"
" --skip-padding Do not count ops with padding zeroes\n"
Expand All @@ -367,8 +363,21 @@ static void usage()
" (AVX512_4VNNI CPUs)\n"
"Problem set control:\n"
" --training Training data set (default)\n"
" --inference-server Server inference data set\n"
" --inference-device Device inference data set\n"
" --inference Server inference data set\n"
" --device Device inference data set\n"
"Custom convolution definition:\n"
" --w Width\n"
" --h Height\n"
" --c \n"
" --n \n",
" --k \n",
" --filter_w \n",
" --filter_h \n",
" --pad_w \n",
" --pad_h \n",
" --wstride \n",
" --hstride \n",
" --repeat Number of times to test convolution (default: 50)\n",
"\n"
);
exit(-1);
Expand All @@ -377,36 +386,185 @@ static void usage()
int main(int argc, char **argv)
{
bool skip_padding = false;
bool csv_output = false;
int precision = PREC_F32;
std::vector<int> modes
= {FWD_CONVOLUTION, BWD_F_CONVOLUTION, BWD_D_CONVOLUTION};
std::vector<int> modes = {FWD_CONVOLUTION};
int problem_set = TRAINING;

for(argc--, argv++; argc; argv++, argc--) {
if (*argv == std::string("--csv-output"))
csv_output = true;
else if (*argv == std::string("--original-output"))
csv_output = false;
else if (*argv == std::string("--skip-padding"))
skip_padding = true;
else if (*argv == std::string("--no-skip-padding"))
skip_padding = false;
else if (*argv == std::string("--f32"))
precision = PREC_F32;
else if (*argv == std::string("--u8s8u8"))
precision = PREC_U8S8U8;
else if (*argv == std::string("--s16s16s32"))
precision = PREC_S16S16S32;
else if (*argv == std::string("--inference-device"))
problem_set = INFERENCE_DEVICE;
else if (*argv == std::string("--inference-server"))
problem_set = INFERENCE_SERVER;
else if (*argv == std::string("--training"))
problem_set = TRAINING;
else
usage();
}
// DEFAULTS
int ITERS = 50;
std::vector<std::tuple<unsigned int, unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int, unsigned int> > *problems = nullptr;
unsigned int w, h, c, n, k, filter_w, filter_h, pad_w, pad_h, wstride, hstride;
w = 151; h = 40; c = 1; n = 1; k = 32; filter_w = 20;
filter_h = 5; pad_w = 8; pad_h = 8; wstride = 8; hstride = 2;

// Use getopt_long here to allow for either driving the benchmark using
// built in tests, or make it a gemm tester
static struct option long_options[] = {
{"training", no_argument, 0, 0}, // These will run the full tests and override customization
{"inference", no_argument, 0, 0},
{"device", no_argument, 0, 0},
{"repeat", required_argument, 0, 0},
{"w", required_argument, 0, 0},
{"h", required_argument, 0, 0},
{"c", required_argument, 0, 0},
{"n", required_argument, 0, 0},
{"k", required_argument, 0, 0},
{"filter_w", required_argument, 0, 0},
{"filter_h", required_argument, 0, 0},
{"pad_w", required_argument, 0, 0},
{"pad_h", required_argument, 0, 0},
{"wstride", required_argument, 0, 0},
{"hstride", required_argument, 0, 0},
{"no-skip-padding", no_argument, 0, 0},
{"skip-padding", no_argument, 0, 0},
{"f32", no_argument, 0, 0},
{"u8s8u8", no_argument, 0, 0},
{"s16s16s32", no_argument, 0, 0},
{0, 0, 0, 0}
};

int opt;
do {
int option_index = 0;
opt = getopt_long(argc, argv, "", long_options, &option_index);
switch (opt) {
case -1:
break;
case 0:
switch (option_index) {
case 0:
if (problems == nullptr) {
problems = &training_set;
modes = {FWD_CONVOLUTION, BWD_F_CONVOLUTION, BWD_D_CONVOLUTION};
std::cout << "Running the training benchmark set" << std::endl;
}
break;
case 1:
if (problems == nullptr) {
problems = &inference_server_set;
std::cout << "Running the inference server set" << std::endl;
}
break;
case 2:
if (problems == nullptr) {
problems = &inference_device_set;
std::cout << "Running the inference device set" << std::endl;
}
break;
case 3:
ITERS = std::atoi(optarg);
if (ITERS <= 0) {
std::cerr << "Invalid repeat parameter spec'ed" << std::endl;
return 0;
}
break;
case 4:
w = std::atoi(optarg);
if (w <= 0) {
std::cerr << "Invalid w parameter spec'ed" << std::endl;
return 0;
}
break;
case 5:
h = std::atoi(optarg);
if (h <= 0) {
std::cerr << "Invalid h parameter spec'ed" << std::endl;
return 0;
}
break;
case 6:
c = std::atoi(optarg);
if (c <= 0) {
std::cerr << "Invalid c parameter spec'ed" << std::endl;
return 0;
}
break;
case 7:
n = std::atoi(optarg);
if (n <= 0) {
std::cerr << "Invalid n parameter spec'ed" << std::endl;
return 0;
}
break;
case 8:
k = std::atoi(optarg);
if (k <= 0) {
std::cerr << "Invalid k parameter spec'ed" << std::endl;
return 0;
}
break;
case 9:
filter_w = std::atoi(optarg);
if (filter_w <= 0) {
std::cerr << "Invalid filter_w paramter spec'ed" << std::endl;
return 0;
}
break;
case 10:
filter_h = std::atoi(optarg);
if (filter_h <= 0) {
std::cerr << "Invalid filter_h parameter spec'ed" << std::endl;
return 0;
}
break;
case 11:
pad_w = std::atoi(optarg);
if (pad_w < 0) {
std::cerr << "Invalid pad_w parameter spec'ed" << std::endl;
return 0;
}
break;
case 12:
pad_h = std::atoi(optarg);
if (pad_h < 0) {
std::cerr << "Invalid pad_h parameter spec'ed" << std::endl;
return 0;
}
break;
case 13:
wstride = std::atoi(optarg);
if (wstride <= 0) {
std::cerr << "Invalid wstride parameter spec'ed" << std::endl;
return 0;
}
break;
case 14:
hstride = std::atoi(optarg);
if (hstride <= 0) {
std::cerr << "Invalid hstride parameter spec'ed" << std::endl;
return 0;
}
break;
case 15:
skip_padding = false;
break;
case 16:
skip_padding = true;
break;
case 17:
precision = PREC_F32;
break;
case 18:
precision = PREC_U8S8U8;
break;
case 19:
precision = PREC_S16S16S32;
break;
default:
break;
}
break;
case '?':
usage();
return 0;
break;
default:
usage();
return 0;
break;
}
} while (opt != -1);

#ifdef USE_MKL
if (precision != PREC_F32) {
Expand All @@ -416,43 +574,31 @@ int main(int argc, char **argv)
}
#endif

#ifdef USE_MKLDNN
if (precision != PREC_F32 || problem_set != TRAINING)
modes = {FWD_CONVOLUTION};
#endif
if (problems == nullptr) {
problems = new std::vector<std::tuple<unsigned int, unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int, unsigned int> >();
problems->push_back(std::tuple<unsigned int, unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int, unsigned int, unsigned int>(w, h, c, n, k, filter_w,
filter_h, pad_w, pad_h, wstride, hstride));
}

const char *conv_mode_strs[] = {"FWD", "BWD_F", "BWD_D"};
const char *skip_padding_strs[]
= {"w/ padding in flops", "w/o padding in flops"};

const auto &problems = (problem_set == TRAINING
? training_set
: (problem_set == INFERENCE_DEVICE
? inference_device_set
: inference_server_set));
= {"w/ padding in flops", "w/o padding in flops"};

printf("OP,w,h,c,n,k,filter_w,filter_h,pad_w,pad_h,wstride,hstride,usecs,gops\n");
for (auto m : modes) {
if (!csv_output)
printf(" %s Convolution\n", conv_mode_strs[m]);
for (const auto& problem : problems) {
for (const auto& problem : *problems) {
conv_problem p;
std::tie(p.w, p.h, p.ic, p.minibatch, p.oc, p.fw, p.fh,
p.pad_w, p.pad_h, p.stride_w, p.stride_h) = problem;
p.iters = ITERS;
auto r = bench_conv(p, m, precision, skip_padding);
if (csv_output)
printf("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%e,%e,%e,%e\n",
conv_mode_strs[m], skip_padding,
p.minibatch, p.w, p.h, p.ic, p.oc, p.fw, p.fh,
p.stride_w, p.stride_h, p.pad_w, p.pad_h,
r.min_ms, r.max_gflops, r.avg_ms, r.avg_gflops);
else
printf("W=%d, H=%d, C=%d, N=%d, K=%d, S=%d, R=%d | "
"%s %s min(ms) %.2f; max(gflop/s) %.2f;"
"avg(ms) %.2f; avg(gflop/s) %.2f;\n",
p.w, p.h, p.ic, p.minibatch, p.oc, p.fw, p.fh,
conv_mode_strs[m], skip_padding_strs[skip_padding],
r.min_ms, r.max_gflops, r.avg_ms, r.avg_gflops);
printf("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%f,%f\n",
conv_mode_strs[m], p.w, p.h, p.ic, p.minibatch, p.oc,
p.fw, p.fh,p.pad_w,p.pad_h,p.stride_h,p.stride_w,r.avg_ms*1000.0, r.avg_gflops);
fflush(0);
}
}
Expand Down
6 changes: 3 additions & 3 deletions code/intel/gemm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
# ******************************************************************************

CC = icc
CFLAGS = -O2 -Wall -I$(MKLROOT)/include -I../../kernels -qopenmp -std=c++11
CFLAGS = -O2 -Wall -I$(MKLROOT)/include -I../../kernels -fopenmp -std=c++11

EXTRALIB = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_intel_thread.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl
EXTRALIB = -Wl,--start-group $(MKLROOT)/lib/libmkl_intel_lp64.a $(MKLROOT)/lib/libmkl_intel_thread.a $(MKLROOT)/lib/libmkl_core.a -Wl,--end-group -L$(MKLROOT)/lib/ -liomp5 -lpthread -lm -ldl

all : sbench sbench_pack
all : sbench sbench_pack ibench_s8u8s32

ibench_s8u8s32 : ibench_s8u8s32.o
$(CC) $(CFLAGS) $^ $(EXTRALIB) -o $@
Expand Down
Loading