forked from antirez/gguf-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgguflib.h
212 lines (197 loc) · 7.65 KB
/
gguflib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/* Copyright (C) 2024 Salvatore Sanfilippo <[email protected]>
* See LICENSE for licensing info.
*
* GGUF enums / structures are partially adapted
* the official GGUF implementation at from https://github.com/ggerganov/ggml/
*/
#ifndef GGUFLIB_H
#define GGUFLIB_H
#include <stdint.h>
/* ============================ Enums and structures ======================== */
/* Flags that can be used in different functions with the same meaning. */
#define GGUF_NONE 0 // No flags.
#define GGUF_OVERWRITE (1<<0) // Overwrite the destination object.
enum gguf_tensor_type {
GGUF_TYPE_F32 = 0,
GGUF_TYPE_F16 = 1,
GGUF_TYPE_Q4_0 = 2,
GGUF_TYPE_Q4_1 = 3,
// GGUF_TYPE_Q4_2 = 4, support has been removed
// GGUF_TYPE_Q4_3 (5) support has been removed
GGUF_TYPE_Q5_0 = 6,
GGUF_TYPE_Q5_1 = 7,
GGUF_TYPE_Q8_0 = 8,
GGUF_TYPE_Q8_1 = 9,
GGUF_TYPE_Q2_K = 10,
GGUF_TYPE_Q3_K = 11,
GGUF_TYPE_Q4_K = 12,
GGUF_TYPE_Q5_K = 13,
GGUF_TYPE_Q6_K = 14,
GGUF_TYPE_Q8_K = 15,
GGUF_TYPE_IQ2_XXS = 16,
GGUF_TYPE_IQ2_XS = 17,
GGUF_TYPE_IQ3_XXS = 18,
GGUF_TYPE_IQ1_S = 19,
GGUF_TYPE_IQ4_NL = 20,
GGUF_TYPE_IQ3_S = 21,
GGUF_TYPE_IQ2_S = 22,
GGUF_TYPE_IQ4_XS = 23,
GGUF_TYPE_I8 = 24,
GGUF_TYPE_I16 = 25,
GGUF_TYPE_I32 = 26,
GGUF_TYPE_I64 = 27,
GGUF_TYPE_F64 = 28,
GGUF_TYPE_IQ1_M = 29,
GGUF_TYPE_BF16 = 30,
GGUF_TYPE_COUNT,
};
enum gguf_value_type {
// The value is a 8-bit unsigned integer.
GGUF_VALUE_TYPE_UINT8 = 0,
// The value is a 8-bit signed integer.
GGUF_VALUE_TYPE_INT8 = 1,
// The value is a 16-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT16 = 2,
// The value is a 16-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT16 = 3,
// The value is a 32-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT32 = 4,
// The value is a 32-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT32 = 5,
// The value is a 32-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT32 = 6,
// The value is a boolean.
// 1-byte value where 0 is false and 1 is true.
// Anything else is invalid, and should be treated as either the model
// being invalid or the reader being buggy.
GGUF_VALUE_TYPE_BOOL = 7,
// The value is a UTF-8 non-null-terminated string, with length prepended.
GGUF_VALUE_TYPE_STRING = 8,
// The value is an array of other values, with the length and type
// prepended. Arrays can be nested, and the length of the array is the
// number of elements in the array, not the number of bytes.
GGUF_VALUE_TYPE_ARRAY = 9,
// The value is a 64-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT64 = 10,
// The value is a 64-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT64 = 11,
// The value is a 64-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT64 = 12,
// Special values used by the callbacks of gguf_do_with_value().
GGUF_VALUE_TYPE_ARRAY_START = 100,
GGUF_VALUE_TYPE_ARRAY_END = 101
};
// A string in GGUF.
struct gguf_string {
// The length of the string, in bytes.
uint64_t len;
// The string as a UTF-8 non-null-terminated string.
char string[];
};
// Union of possible values.
union gguf_value {
uint8_t uint8;
int8_t int8;
uint16_t uint16;
int16_t int16;
uint32_t uint32;
int32_t int32;
float float32;
uint64_t uint64;
int64_t int64;
double float64;
uint8_t boolval;
struct gguf_string string;
#ifdef _MSC_VER
#pragma pack(push, 1)
#define __attribute__(x)
#endif
struct {
// Any value type is valid, including arrays.
uint32_t type;
// Number of elements, not bytes
uint64_t len;
// The array of values follow...
} __attribute__((packed)) array;
#ifdef _MSC_VER
#pragma pack(pop)
#undef __attribute__
#endif
};
// Header
struct gguf_header {
// Magic number to announce that this is a GGUF file.
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
uint32_t magic;
// The version of the format implemented.
// Must be `3` for version described in this spec.
uint32_t version;
// The number of tensors in the file.
// This is explicit, instead of being included in the metadata, to ensure
// it is always present for loading the tensors.
uint64_t tensor_count;
// The number of metadata key-value pairs.
uint64_t metadata_kv_count;
};
/* Key representation in this library API. */
typedef struct {
const char *name;
size_t namelen;
uint32_t type;
union gguf_value *val;
} gguf_key;
/* Tensor representation in this library API. */
#define GGUF_TENSOR_MAX_DIM 8 // Future-proof: actual limit is 4.
typedef struct {
const char *name;
size_t namelen;
uint32_t type; // Tensor type (enum gguf_tensor_type).
uint32_t ndim; // Number of dimensions of the tensor.
uint64_t dim[GGUF_TENSOR_MAX_DIM]; // Dimensions (Eg. [512, 1024, 1, 1]).
uint64_t offset; // Offset from start of file.
uint64_t bsize; // Total size in bytes.
uint64_t num_weights; // Total number of parameters.
uint8_t *weights_data; // Pointer to the mmaped file.
} gguf_tensor;
/* The context you get after opening a GGUF file with gguf_init(). */
typedef struct {
int fd;
#ifdef _WIN32
void* mapping;
#endif
uint8_t *data; // Memory mapped data.
uint64_t size; // Total file size.
struct gguf_header *header; // GUFF file header info.
uint64_t left_kv; // Number of key-value pairs yet to read.
uint64_t left_tensors; // Number of tensors yet to read.
uint64_t off; // Offset of the next item to parse.
uint64_t data_off; // Offset of tensor data section. This
// is only set when all the kv/tensor header
// entries are processed. Initially 0.
uint64_t alignment; // File data alignment. Default: 32 bytes.
} gguf_ctx;
/* =============================== Prototypes =============================== */
gguf_ctx *gguf_open(const char *filename, int flags);
gguf_ctx *gguf_create(const char *filename, int flags);
int gguf_remap(gguf_ctx *ctx, int for_write);
void gguf_rewind(gguf_ctx *ctx);
void gguf_close(gguf_ctx *ctx);
int gguf_get_key(gguf_ctx *ctx, gguf_key *key);
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor);
const char *gguf_get_value_type_name(uint32_t type);
const char *gguf_get_tensor_type_name(uint32_t type);
void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
void *privdata, uint64_t in_array, uint64_t array_len,
void(*callback)(void *privdata, uint32_t type,
union gguf_value *val, uint64_t in_array,
uint64_t array_len));
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
void gguf_skip_key_values_section(gguf_ctx *ctx);
float *gguf_tensor_to_float(gguf_tensor *tensor);
int16_t *gguf_tensor_to_f16(gguf_tensor *tensor);
int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor);
#endif