From 4b918ea724733bc558c39bf0bc587f51321e0e4a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 15 Nov 2024 09:34:48 +0100 Subject: [PATCH 1/2] chore(deps): bump llama-cpp to ae8de6d50a09d49545e0afab2e50cc4acfb280e2 Signed-off-by: Ettore Di Giacinto --- Makefile | 6 +++--- backend/cpp/llama/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d51556924782..337c894796e9 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=fb4a0ec0833c71cff5a1a367ba375447ce6106eb +CPPLLAMA_VERSION?=ae8de6d50a09d49545e0afab2e50cc4acfb280e2 # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp @@ -138,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas) export CC=$(ROCM_HOME)/llvm/bin/clang # llama-ggml has no hipblas support, so override it here. export STABLE_BUILD_TYPE= - export GGML_HIPBLAS=1 + export GGML_HIP=1 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 AMDGPU_TARGETS ?= "$(GPU_TARGETS)" - CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" + CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib endif diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 176cace67819..bd67d11077d4 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas) CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) - CMAKE_ARGS+=-DGGML_HIPBLAS=ON + CMAKE_ARGS+=-DGGML_HIP=ON # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # But if it's OSX without metal, disable it here else ifeq ($(OS),Darwin) From 6a1b7d881d86832312f32f33669de466877e6839 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 15 Nov 2024 10:18:16 +0100 Subject: [PATCH 2/2] fix(metal): metal file has moved Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 337c894796e9..426c2b3351a1 100644 --- a/Makefile +++ b/Makefile @@ -761,7 +761,7 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback # TODO: every binary should have its own folder instead, so can have different metal implementations ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/ + cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/ endif backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp