Skip to content

Commit

Permalink
sync : ggml vulkan (ggml/0)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Aug 20, 2024
1 parent bd98759 commit fc27f9f
Show file tree
Hide file tree
Showing 39 changed files with 1,091 additions and 144,997 deletions.
4 changes: 2 additions & 2 deletions ggml/src/ggml-cann/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME = "whisper.cpp"
PROJECT_NAME = "llama.cpp"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
Expand All @@ -44,7 +44,7 @@ PROJECT_NUMBER =
# for a project that appears at the top of each page and should give viewer a
# quick idea about the purpose of the project. Keep the description short.

PROJECT_BRIEF = "Port of OpenAI's Whisper model in C/C++"
PROJECT_BRIEF = "llama inference engine"

# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
# in the documentation. The maximum height of the logo should not exceed 55
Expand Down
144,957 changes: 0 additions & 144,957 deletions ggml/src/ggml-vulkan-shaders.hpp

This file was deleted.

7 changes: 7 additions & 0 deletions ggml/src/vulkan-shaders/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
find_package (Threads REQUIRED)

set(TARGET vulkan-shaders-gen)
add_executable(${TARGET} vulkan-shaders-gen.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_link_libraries(vulkan-shaders-gen PUBLIC Threads::Threads)
6 changes: 4 additions & 2 deletions ggml/src/vulkan-shaders/add.comp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
#include "generic_binary_head.comp"

void main() {
if (gl_GlobalInvocationID.x >= p.ne) {
const uint idx = get_idx();

if (idx >= p.ne) {
return;
}

data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) + FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(idx)]) + FLOAT_TYPE(data_b[src1_idx(idx)]));
}
8 changes: 5 additions & 3 deletions ggml/src/vulkan-shaders/clamp.comp
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
#include "generic_unary_head.comp"

void main() {
if (gl_GlobalInvocationID.x >= p.ne) {
const uint idx = get_idx();

if (idx >= p.ne) {
return;
}

const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]);
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(val < p.param1 ? p.param1 : (val > p.param2 ? p.param2 : val));
const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(idx)]);
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(val < p.param1 ? p.param1 : (val > p.param2 ? p.param2 : val));
}
35 changes: 35 additions & 0 deletions ggml/src/vulkan-shaders/concat.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#version 450

#include "types.comp"
#include "generic_binary_head.comp"

void main() {
const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
const int dim = p.param3;

if (idx >= p.ne) {
return;
}

const uint i3 = idx / (p.ne22*p.ne21*p.ne20);
const uint i3_offset = i3 * p.ne22*p.ne21*p.ne20;
const uint i2 = (idx - i3_offset) / (p.ne21*p.ne20);
const uint i2_offset = i2*p.ne21*p.ne20;
const uint i1 = (idx - i3_offset - i2_offset) / p.ne20;
const uint i0 = idx - i3_offset - i2_offset - i1*p.ne20;

uint o[4] = {0, 0, 0, 0};
o[dim] = dim == 0 ? p.ne00 : (dim == 1 ? p.ne01 : (dim == 2 ? p.ne02 : p.ne03));

const uint src0_idx = i3*p.nb03 + i2*p.nb02 + i1*p.nb01 + i0*p.nb00;
const uint src1_idx = (i3 - o[3])*p.nb13 + (i2 - o[2])*p.nb12 + (i1 - o[1])*p.nb11 + (i0 - o[0])*p.nb10;
const uint dst_idx = i3*p.nb23 + i2*p.nb22 + i1*p.nb21 + i0*p.nb20;

const bool is_src0 = i0 < p.ne00 && i1 < p.ne01 && i2 < p.ne02 && i3 < p.ne03;

#ifndef OPTIMIZATION_ERROR_WORKAROUND
data_d[p.d_offset + dst_idx] = D_TYPE(is_src0 ? data_a[src0_idx] : data_b[src1_idx]);
#else
data_d[p.d_offset + dst_idx] = is_src0 ? data_a[src0_idx] : data_b[src1_idx];
#endif
}
8 changes: 5 additions & 3 deletions ggml/src/vulkan-shaders/copy.comp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
#include "generic_unary_head.comp"

void main() {
if (gl_GlobalInvocationID.x >= p.ne) {
const uint idx = get_idx();

if (idx >= p.ne) {
return;
}

#ifndef OPTIMIZATION_ERROR_WORKAROUND
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]);
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(data_a[src0_idx(idx)]);
#else
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = data_a[src0_idx(gl_GlobalInvocationID.x)];
data_d[p.d_offset + dst_idx(idx)] = data_a[src0_idx(idx)];
#endif
}
15 changes: 15 additions & 0 deletions ggml/src/vulkan-shaders/cos.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#version 450

#include "types.comp"
#include "generic_unary_head.comp"

void main() {
const uint idx = get_idx();

if (idx >= p.ne) {
return;
}

const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(idx)]);
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(cos(val));
}
8 changes: 8 additions & 0 deletions ggml/src/vulkan-shaders/dequant_funcs.comp
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,11 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
}
#endif

#if defined(DATA_A_IQ4_NL)
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
}
#endif
30 changes: 30 additions & 0 deletions ggml/src/vulkan-shaders/dequant_iq4_nl.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#version 450

#include "dequant_head.comp"

layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer A {block_iq4_nl data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};

void main() {
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;

const uint tid = gl_LocalInvocationID.x % 64;
const uint il = tid/32;
const uint ir = tid%32;
const uint ib = 32*i + ir;
if (ib >= p.nel / 32) {
return;
}

const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;

const float d = float(data_a[ib].d);

[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]);
data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]);
}
}
10 changes: 4 additions & 6 deletions ggml/src/vulkan-shaders/dequant_q4_0.comp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,13 @@ void main() {
return;
}

const uint b_idx = 1024*i + 32*ir + 8*il;
const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;

const float d = float(data_a[ib].d);
const float dm = -8.0f * d;

const uint q_idx = 8*il;

[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + dm);
data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >> 4) + dm);
data_b[b_idx + l + 0] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] & 0xF) - 8.0f));
data_b[b_idx + l + 16] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] >> 4) - 8.0f));
}
}
6 changes: 4 additions & 2 deletions ggml/src/vulkan-shaders/div.comp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
#include "generic_binary_head.comp"

void main() {
if (gl_GlobalInvocationID.x >= p.ne) {
const uint idx = get_idx();

if (idx >= p.ne) {
return;
}

data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) / FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(idx)]) / FLOAT_TYPE(data_b[src1_idx(idx)]));
}
2 changes: 1 addition & 1 deletion ggml/src/vulkan-shaders/gelu.comp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
void main() {
const float GELU_COEF_A = 0.044715f;
const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
const uint i = gl_GlobalInvocationID.x;
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;

if (i >= p.KX) {
return;
Expand Down
23 changes: 23 additions & 0 deletions ggml/src/vulkan-shaders/gelu_quick.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#version 450

#include "generic_head.comp"
#include "types.comp"

#extension GL_EXT_control_flow_attributes : enable

layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};

void main() {
const float GELU_QUICK_COEF = -1.702f;
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;

if (i >= p.KX) {
return;
}

const float x = float(data_a[i]);
data_d[i] = D_TYPE(x * (1.0f / (1.0f + exp(GELU_QUICK_COEF * x))));
}
6 changes: 5 additions & 1 deletion ggml/src/vulkan-shaders/generic_binary_head.comp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ layout (push_constant) uniform parameter
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
uint ne20; uint ne21; uint ne22; uint ne23; uint nb20; uint nb21; uint nb22; uint nb23;
uint d_offset;
float param1; float param2;
float param1; float param2; int param3;
} p;

layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
Expand All @@ -16,6 +16,10 @@ layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};

uint get_idx() {
return gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
}

uint src0_idx(uint idx) {
const uint i03 = idx / (p.ne02*p.ne01*p.ne00);
const uint i03_offset = i03 * p.ne02*p.ne01*p.ne00;
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/vulkan-shaders/generic_unary_head.comp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};

uint get_idx() {
return gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
}

uint src0_idx(uint idx) {
const uint i03 = idx / (p.ne02*p.ne01*p.ne00);
const uint i03_offset = i03 * p.ne02*p.ne01*p.ne00;
Expand Down
66 changes: 66 additions & 0 deletions ggml/src/vulkan-shaders/group_norm.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#version 450

#include "generic_head.comp"
#include "types.comp"

#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

layout(local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};

shared float tmp[BLOCK_SIZE];

void main() {
const uint group_size = p.KX;
const float eps = p.param1;

const uint tid = gl_LocalInvocationID.x;
const uint start = gl_WorkGroupID.x * group_size + tid;
const uint end = start + group_size;

tmp[tid] = 0.0f;

// Calculate mean
[[unroll]] for (uint col = start; col < end; col += BLOCK_SIZE) {
tmp[tid] += float(data_a[col]);
}

// tmp up partial tmps and write back result
barrier();
[[unroll]] for (int s = BLOCK_SIZE / 2; s > 0; s >>= 1) {
if (tid < s) {
tmp[tid] += tmp[tid + s];
}
barrier();
}

const float mean = tmp[0] / group_size;
barrier();
tmp[tid] = 0.0f;

// Calculate variance
[[unroll]] for (uint col = start; col < end; col += BLOCK_SIZE) {
const float xi = float(data_a[col]) - mean;
data_d[col] = D_TYPE(xi);
tmp[tid] += xi * xi;
}

// sum up partial sums and write back result
barrier();
[[unroll]] for (int s = BLOCK_SIZE / 2; s > 0; s >>= 1) {
if (tid < s) {
tmp[tid] += tmp[tid + s];
}
barrier();
}

const float variance = tmp[0] / group_size;
const float scale = inversesqrt(variance + eps);

[[unroll]] for (uint col = start; col < end; col += BLOCK_SIZE) {
data_d[col] *= D_TYPE(scale);
}
}
57 changes: 57 additions & 0 deletions ggml/src/vulkan-shaders/im2col.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#version 450

#extension GL_EXT_shader_16bit_storage : require

layout (push_constant) uniform parameter
{
uint batch_offset; uint offset_delta;
uint IC;
uint IW; uint IH;
uint OW; uint OH;
uint KW; uint KH;
uint pelements;
uint CHW;
int s0; int s1;
int p0; int p1;
int d0; int d1;
} p;

#include "types.comp"

#define BLOCK_SIZE 256

layout(local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};

void main() {
const uint i = gl_GlobalInvocationID.x;
if (i >= p.pelements) {
return;
}

const uint ksize = p.OW * (p.KH > 1 ? p.KW : 1);
const uint kx = i / ksize;
const uint kd = kx * ksize;
const uint ky = (i - kd) / p.OW;
const uint ix = i % p.OW;

const uint oh = gl_GlobalInvocationID.y;
const uint batch = gl_GlobalInvocationID.z / p.IC;
const uint ic = gl_GlobalInvocationID.z % p.IC;

const uint iiw = ix * p.s0 + kx * p.d0 - p.p0;
const uint iih = oh * p.s1 + ky * p.d1 - p.p1;

const uint offset_dst =
((batch * p.OH + oh) * p.OW + ix) * p.CHW +
(ic * (p.KW * p.KH) + ky * p.KW + kx);

if (iih < 0 || iih >= p.IH || iiw < 0 || iiw >= p.IW) {
data_d[offset_dst] = D_TYPE(0.0f);
} else {
const uint offset_src = ic * p.offset_delta + batch * p.batch_offset;
data_d[offset_dst] = D_TYPE(data_a[offset_src + iih * p.IW + iiw]);
}
}
Loading

0 comments on commit fc27f9f

Please sign in to comment.