Skip to content

Commit

Permalink
simplify + cpp 11
Browse files Browse the repository at this point in the history
  • Loading branch information
sachaarbonel committed Nov 6, 2024
1 parent 9843e4e commit 3f71fe8
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 589 deletions.
209 changes: 54 additions & 155 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,36 +98,13 @@ endif

# In GNU make default CXX is g++ instead of c++. Let's fix that so that users
# of non-gcc compilers don't have to provide g++ alias or wrapper.
# Detect operating system and set compilers accordingly
ifeq ($(UNAME_S),Linux)
DEFCC := gcc
DEFCXX := g++

# Use g++-10 on Linux if available
GCC_VERSION := 10
GXX_PATH := /usr/bin/g++-$(GCC_VERSION)
ifneq ($(wildcard $(GXX_PATH)),)
DEFCXX := $(GXX_PATH)
else
$(warning g++-$(GCC_VERSION) not found. Using default g++)
endif
else ifeq ($(UNAME_S),Darwin)
DEFCC := cc
DEFCXX := c++
DEFCC := cc
DEFCXX := c++
ifeq ($(origin CC),default)
CC := $(DEFCC)
endif

# Override CC and CXX if not set
ifeq ($(origin CC), default)
CC := $(DEFCC)
endif
ifeq ($(origin CXX), default)
CXX := $(DEFCXX)
endif

# Enable CUDA only on Linux
ifeq ($(UNAME_S),Linux)
# Uncomment or set GGML_CUDA as needed
GGML_CUDA := 1
ifeq ($(origin CXX),default)
CXX := $(DEFCXX)
endif

# Mac OS + Arm can report x86_64
Expand Down Expand Up @@ -205,17 +182,16 @@ endif

# keep standard at C11 and C++11
MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Iexamples
MK_CFLAGS = -std=c11 -fPIC
MK_CXXFLAGS = -std=c++14 -fPIC
MK_NVCCFLAGS = -std=c++14 --expt-relaxed-constexpr -Xcompiler "-std=c++14"
MK_NVCCFLAGS += --compiler-bindir=$(CXX)
MK_CFLAGS = -std=c11 -fPIC
MK_CXXFLAGS = -std=c++11 -fPIC
MK_NVCCFLAGS = -std=c++11

ifndef WHISPER_NO_CCACHE
CCACHE := $(shell which ccache)
ifdef CCACHE
export CCACHE_SLOPPINESS = time_macros
$(info I ccache found, compilation results will be cached. Disable with WHISPER_NO_CCACHE.)
CC := $(CCACHE) $(CC)
CC := $(CCACHE) $(CC)
CXX := $(CCACHE) $(CXX)
else
$(info I ccache not found. Consider installing it for faster compilation.)
Expand Down Expand Up @@ -269,7 +245,7 @@ ifdef GGML_SCHED_MAX_COPIES
endif

ifdef WHISPER_DEBUG
MK_CFLAGS += -O0 -g
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g
MK_NVCCFLAGS += -O0 -g
Expand All @@ -279,7 +255,7 @@ ifdef WHISPER_DEBUG
endif
else
MK_CPPFLAGS += -DNDEBUG
MK_CFLAGS += -O3
MK_CFLAGS += -O3
MK_CXXFLAGS += -O3
MK_NVCCFLAGS += -O3
endif
Expand Down Expand Up @@ -383,13 +359,13 @@ endif

# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
# feel free to update the Makefile for your architecture and send a pull request or issue

ifndef RISCV

ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
# Use all CPU extensions that are available:
MK_CFLAGS += -march=native -mtune=native
MK_CFLAGS += -march=native -mtune=native
HOST_CXXFLAGS += -march=native -mtune=native

# Usage AVX-only
Expand Down Expand Up @@ -478,7 +454,7 @@ ifndef GGML_NO_ACCELERATE
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
MK_LDFLAGS += -framework Accelerate
OBJ_GGML += ggml/src/ggml-blas.o
OBJ_GGML += ggml/src/ggml-blas.o
endif
endif # GGML_NO_ACCELERATE

Expand All @@ -492,29 +468,29 @@ ifdef GGML_OPENBLAS
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
MK_LDFLAGS += $(shell pkg-config --libs openblas)
OBJ_GGML += ggml/src/ggml-blas.o
OBJ_GGML += ggml/src/ggml-blas.o
endif # GGML_OPENBLAS

ifdef GGML_OPENBLAS64
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
OBJ_GGML += ggml/src/ggml-blas.o
OBJ_GGML += ggml/src/ggml-blas.o
endif # GGML_OPENBLAS64

ifdef GGML_BLIS
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
OBJ_GGML += ggml/src/ggml-blas.o
OBJ_GGML += ggml/src/ggml-blas.o
endif # GGML_BLIS

ifdef GGML_RPC
MK_CPPFLAGS += -DGGML_USE_RPC
OBJ_GGML += ggml/src/ggml-rpc.o
OBJ_GGML += ggml/src/ggml-rpc.o
endif # GGML_RPC

OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))

ifdef GGML_CUDA_FA_ALL_QUANTS
OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
Expand All @@ -533,7 +509,7 @@ ifdef GGML_CUDA

#MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
#MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_CPPFLAGS += -DGGML_USE_CUDA -DWHISPER_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math

Expand Down Expand Up @@ -653,7 +629,7 @@ endif # GGML_CUDA
ifdef GGML_VULKAN
MK_CPPFLAGS += -DGGML_USE_VULKAN
MK_LDFLAGS += $(shell pkg-config --libs vulkan)
OBJ_GGML += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o
OBJ_GGML += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o

ifdef GGML_VULKAN_CHECK_RESULTS
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
Expand Down Expand Up @@ -693,7 +669,7 @@ $(_ggml_vk_header): $(_ggml_vk_source)

$(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
$(_ggml_vk_genshaders_cmd) \
--glslc $(GLSLC_CMD) \
--glslc $(GLSLC_CMD) \
--input-dir $(_ggml_vk_input_dir) \
--target-hpp $(_ggml_vk_header) \
--target-cpp $(_ggml_vk_source)
Expand All @@ -705,15 +681,15 @@ endif # GGML_VULKAN

ifdef GGML_HIPBLAS
ifeq ($(wildcard /opt/rocm),)
ROCM_PATH ?= /usr
ROCM_PATH ?= /usr
AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
else
ROCM_PATH ?= /opt/rocm
AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
endif

GGML_CUDA_DMMV_X ?= 32
GGML_CUDA_MMV_Y ?= 1
GGML_CUDA_DMMV_X ?= 32
GGML_CUDA_MMV_Y ?= 1
GGML_CUDA_KQUANTS_ITER ?= 2

MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
Expand Down Expand Up @@ -773,13 +749,13 @@ endif

ifdef GGML_METAL_EMBED_LIBRARY
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
OBJ_GGML += ggml/src/ggml-metal-embed.o
OBJ_GGML += ggml/src/ggml-metal-embed.o
endif
endif # GGML_METAL

ifdef WHISPER_COREML
MK_CXXFLAGS += -DWHISPER_USE_COREML
LDFLAGS += -framework Foundation -framework CoreML
LDFLAGS += -framework Foundation -framework CoreML

ifdef WHISPER_COREML_ALLOW_FALLBACK
MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
Expand All @@ -802,12 +778,12 @@ ggml/src/ggml-metal-embed.o: \
@echo "Embedding Metal library"
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
$(eval TEMP_ASSEMBLY=$(shell mktemp))
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
@$(AS) $(TEMP_ASSEMBLY) -o $@
@rm -f ${TEMP_ASSEMBLY}
endif
Expand Down Expand Up @@ -863,8 +839,8 @@ include scripts/get-flags.mk

# combine build flags with cmdline overrides
override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
Expand All @@ -889,102 +865,25 @@ $(info I whisper.cpp build info: )
$(info I UNAME_S: $(UNAME_S))
$(info I UNAME_P: $(UNAME_P))
$(info I UNAME_M: $(UNAME_M))
$(info I CFLAGS: $(CFLAGS))
$(info I CFLAGS: $(CFLAGS))
$(info I CXXFLAGS: $(CXXFLAGS))
$(info I NVCCFLAGS: $(NVCCFLAGS))
$(info I LDFLAGS: $(LDFLAGS))
$(info I CC: $(shell $(CC) --version | head -n 1))
$(info I CXX: $(shell $(CXX) --version | head -n 1))
$(info I CC: $(shell $(CC) --version | head -n 1))
$(info I CXX: $(shell $(CXX) --version | head -n 1))
ifdef GGML_CUDA
ifeq ($(UNAME_S),Linux)
# CUDA path setup
ifneq ($(wildcard /opt/cuda),)
CUDA_PATH ?= /opt/cuda
else
CUDA_PATH ?= /usr/local/cuda
endif

# Include paths
MK_CPPFLAGS += -DGGML_USE_CUDA -DWHISPER_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_CPPFLAGS += -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include

# Linker flags
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib

# NVCC flags
MK_NVCCFLAGS += -std=c++14 --expt-relaxed-constexpr -Xcompiler "-std=c++14" --compiler-bindir=$(CXX) -O3 -use_fast_math --forward-unknown-to-host-compiler -Wno-deprecated-gpu-targets -arch=compute_75 \
-DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128

# Additional compiler flags
CUDA_CXXFLAGS += -std=c++14 -fPIC -O3 -Wall -Wextra -Wpedantic \
-Wcast-qual -Wno-unused-function -Wmissing-declarations \
-Wmissing-noreturn -pthread -fopenmp -Wno-array-bounds \
-Wno-format-truncation -Wextra-semi -Wno-pedantic

# Object files
OBJ_GGML += ggml/src/ggml-cuda.o
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
OBJ_GGML += $(OBJ_CUDA_TMPL)

# Fatal warnings
ifdef WHISPER_FATAL_WARNINGS
MK_NVCCFLAGS += -Werror all-warnings
endif # WHISPER_FATAL_WARNINGS

# Forward unknown to host compiler
ifndef JETSON_EOL_MODULE_DETECT
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
endif # JETSON_EOL_MODULE_DETECT

# Debug flags
ifdef WHISPER_DEBUG
MK_NVCCFLAGS += -lineinfo
endif # WHISPER_DEBUG

ifdef GGML_CUDA_DEBUG
MK_NVCCFLAGS += --device-debug
endif # GGML_CUDA_DEBUG

# NVCC definition
ifdef GGML_CUDA_NVCC
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
else
NVCC = $(CCACHE) nvcc
endif # GGML_CUDA_NVCC

# Architecture-specific NVCC flags
ifdef CUDA_DOCKER_ARCH
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
else ifndef CUDA_POWER_ARCH
MK_NVCCFLAGS += -arch=compute_75
endif # CUDA_DOCKER_ARCH / CUDA_POWER_ARCH

# Ensure 'define' starts at the first column
define NVCC_COMPILE
$(NVCC) $(MK_NVCCFLAGS) $(MK_CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef

# CUDA object rules
ggml/src/ggml-cuda/%.o: \
ggml/src/ggml-cuda/%.cu \
ggml/include/ggml.h \
ggml/src/ggml-common.h \
ggml/src/ggml-cuda/common.cuh
$(NVCC_COMPILE)

ggml/src/ggml-cuda.o: \
ggml/src/ggml-cuda.cu \
ggml/include/ggml.h \
ggml/include/ggml-backend.h \
ggml/include/ggml-cuda.h \
ggml/src/ggml-backend-impl.h \
ggml/src/ggml-common.h \
$(wildcard ggml/src/ggml-cuda/*.cuh)
$(NVCC_COMPILE)

endif # ifeq ($(UNAME_S),Linux)
endif # ifdef GGML_CUDA
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)

ifndef CUDA_DOCKER_ARCH
ifndef CUDA_POWER_ARCH
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
endif # CUDA_POWER_ARCH
endif # CUDA_DOCKER_ARCH

endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
endif # GGML_CUDA
$(info )

ifdef DEPRECATE_WARNING
Expand Down Expand Up @@ -1034,14 +933,14 @@ ggml/src/ggml-quants.o: \
ggml/include/ggml.h \
ggml/src/ggml-quants.h \
ggml/src/ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@
$(CC) $(CFLAGS) -c $< -o $@

ggml/src/ggml-aarch64.o: \
ggml/src/ggml-aarch64.c \
ggml/include/ggml.h \
ggml/src/ggml-aarch64.h \
ggml/src/ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@
$(CC) $(CFLAGS) -c $< -o $@

ggml/src/ggml-blas.o: \
ggml/src/ggml-blas.cpp \
Expand Down Expand Up @@ -1199,7 +1098,7 @@ lsp: examples/lsp/lsp.cpp \
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)

# TODO: disabled until update
# https://github.com/ggerganov/whisper.cpp/issues/1818
# https://github.com/ggerganov/whisper.cpp/issues/1818
#talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
# $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
# $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
Expand Down Expand Up @@ -1276,7 +1175,7 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \
echo "----------------------------------------------" ; \
echo "" ; \
./main -m models/ggml-$@.bin -f $$f ; \
echo "" ; \
Expand Down
Loading

0 comments on commit 3f71fe8

Please sign in to comment.