gguflib.h

/* Copyright (C) 2024 Salvatore Sanfilippo <antirez@gmail.com>
 * See LICENSE for licensing info.
 *
 * GGUF enums / structures are partially adapted
 * the official GGUF implementation at from https://github.com/ggerganov/ggml/
 */

#ifndef GGUFLIB_H
#define GGUFLIB_H

#include <stdint.h>

/* ============================ Enums and structures ======================== */

/* Flags that can be used in different functions with the same meaning. */
#define GGUF_NONE           0           // No flags.
#define GGUF_OVERWRITE      (1<<0)      // Overwrite the destination object.

enum gguf_tensor_type {
    GGUF_TYPE_F32  = 0,
    GGUF_TYPE_F16  = 1,
    GGUF_TYPE_Q4_0 = 2,
    GGUF_TYPE_Q4_1 = 3,
    // GGUF_TYPE_Q4_2 = 4, support has been removed
    // GGUF_TYPE_Q4_3 (5) support has been removed
    GGUF_TYPE_Q5_0 = 6,
    GGUF_TYPE_Q5_1 = 7,
    GGUF_TYPE_Q8_0 = 8,
    GGUF_TYPE_Q8_1 = 9,
    GGUF_TYPE_Q2_K = 10,
    GGUF_TYPE_Q3_K = 11,
    GGUF_TYPE_Q4_K = 12,
    GGUF_TYPE_Q5_K = 13,
    GGUF_TYPE_Q6_K = 14,
    GGUF_TYPE_Q8_K = 15,
    GGUF_TYPE_IQ2_XXS = 16,
    GGUF_TYPE_IQ2_XS = 17,
    GGUF_TYPE_IQ3_XXS = 18,
    GGUF_TYPE_IQ1_S = 19,
    GGUF_TYPE_IQ4_NL = 20,
    GGUF_TYPE_IQ3_S = 21,
    GGUF_TYPE_IQ2_S = 22,
    GGUF_TYPE_IQ4_XS = 23,
    GGUF_TYPE_I8 = 24,
    GGUF_TYPE_I16 = 25,
    GGUF_TYPE_I32 = 26,
    GGUF_TYPE_I64 = 27,
    GGUF_TYPE_F64 = 28,
    GGUF_TYPE_IQ1_M = 29,
    GGUF_TYPE_BF16 = 30,
    GGUF_TYPE_COUNT,
};

enum gguf_value_type {
    // The value is a 8-bit unsigned integer.
    GGUF_VALUE_TYPE_UINT8 = 0,
    // The value is a 8-bit signed integer.
    GGUF_VALUE_TYPE_INT8 = 1,
    // The value is a 16-bit unsigned little-endian integer.
    GGUF_VALUE_TYPE_UINT16 = 2,
    // The value is a 16-bit signed little-endian integer.
    GGUF_VALUE_TYPE_INT16 = 3,
    // The value is a 32-bit unsigned little-endian integer.
    GGUF_VALUE_TYPE_UINT32 = 4,
    // The value is a 32-bit signed little-endian integer.
    GGUF_VALUE_TYPE_INT32 = 5,
    // The value is a 32-bit IEEE754 floating point number.
    GGUF_VALUE_TYPE_FLOAT32 = 6,
    // The value is a boolean.
    // 1-byte value where 0 is false and 1 is true.
    // Anything else is invalid, and should be treated as either the model
    // being invalid or the reader being buggy.
    GGUF_VALUE_TYPE_BOOL = 7,
    // The value is a UTF-8 non-null-terminated string, with length prepended.
    GGUF_VALUE_TYPE_STRING = 8,
    // The value is an array of other values, with the length and type
    // prepended. Arrays can be nested, and the length of the array is the
    // number of elements in the array, not the number of bytes.
    GGUF_VALUE_TYPE_ARRAY = 9,
    // The value is a 64-bit unsigned little-endian integer.
    GGUF_VALUE_TYPE_UINT64 = 10,
    // The value is a 64-bit signed little-endian integer.
    GGUF_VALUE_TYPE_INT64 = 11,
    // The value is a 64-bit IEEE754 floating point number.
    GGUF_VALUE_TYPE_FLOAT64 = 12,
    // Special values used by the callbacks of gguf_do_with_value().
    GGUF_VALUE_TYPE_ARRAY_START = 100,
    GGUF_VALUE_TYPE_ARRAY_END = 101
};

// A string in GGUF.
struct gguf_string {
    // The length of the string, in bytes.
    uint64_t len;
    // The string as a UTF-8 non-null-terminated string.
    char string[];
};

// Union of possible values.
union gguf_value {
    uint8_t uint8;
    int8_t int8;
    uint16_t uint16;
    int16_t int16;
    uint32_t uint32;
    int32_t int32;
    float float32;
    uint64_t uint64;
    int64_t int64;
    double float64;
    uint8_t boolval;
    struct gguf_string string;
#ifdef _MSC_VER
#pragma pack(push, 1)
#define __attribute__(x)
#endif
    struct {
        // Any value type is valid, including arrays.
        uint32_t type;
        // Number of elements, not bytes
        uint64_t len;
        // The array of values follow...
    } __attribute__((packed)) array;
#ifdef _MSC_VER
#pragma pack(pop)
#undef __attribute__
#endif
};

// Header
struct gguf_header {
    // Magic number to announce that this is a GGUF file.
    // Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
    uint32_t magic;
    // The version of the format implemented.
    // Must be `3` for version described in this spec.
    uint32_t version;
    // The number of tensors in the file.
    // This is explicit, instead of being included in the metadata, to ensure
    // it is always present for loading the tensors.
    uint64_t tensor_count;
    // The number of metadata key-value pairs.
    uint64_t metadata_kv_count;
};

/* Key representation in this library API. */
typedef struct {
    const char *name;
    size_t namelen;
    uint32_t type;
    union gguf_value *val;
} gguf_key;

/* Tensor representation in this library API. */
#define GGUF_TENSOR_MAX_DIM 8           // Future-proof: actual limit is 4.
typedef struct {
    const char *name;
    size_t namelen;
    uint32_t type;                      // Tensor type (enum gguf_tensor_type).
    uint32_t ndim;                      // Number of dimensions of the tensor.
    uint64_t dim[GGUF_TENSOR_MAX_DIM];  // Dimensions (Eg. [512, 1024, 1, 1]).
    uint64_t offset;                    // Offset from start of file.
    uint64_t bsize;                     // Total size in bytes.
    uint64_t num_weights;               // Total number of parameters.
    uint8_t *weights_data;              // Pointer to the mmaped file.
} gguf_tensor;

/* The context you get after opening a GGUF file with gguf_init(). */
typedef struct {
    int fd;
#ifdef _WIN32
    void* mapping;
#endif
    uint8_t *data;  // Memory mapped data.
    uint64_t size;  // Total file size.
    struct gguf_header *header;     // GUFF file header info.
    uint64_t left_kv;               // Number of key-value pairs yet to read.
    uint64_t left_tensors;          // Number of tensors yet to read.
    uint64_t off;                   // Offset of the next item to parse.
    uint64_t data_off;              // Offset of tensor data section. This
                                    // is only set when all the kv/tensor header
                                    // entries are processed. Initially 0.
    uint64_t alignment;             // File data alignment. Default: 32 bytes.
} gguf_ctx;

/* =============================== Prototypes =============================== */

gguf_ctx *gguf_open(const char *filename, int flags);
gguf_ctx *gguf_create(const char *filename, int flags);
int gguf_remap(gguf_ctx *ctx, int for_write);
void gguf_rewind(gguf_ctx *ctx);
void gguf_close(gguf_ctx *ctx);
int gguf_get_key(gguf_ctx *ctx, gguf_key *key);
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor);
const char *gguf_get_value_type_name(uint32_t type);
const char *gguf_get_tensor_type_name(uint32_t type);
void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
                        void *privdata, uint64_t in_array, uint64_t array_len,
                        void(*callback)(void *privdata, uint32_t type,
                                     union gguf_value *val, uint64_t in_array,
                                     uint64_t array_len));
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
void gguf_skip_key_values_section(gguf_ctx *ctx);
float *gguf_tensor_to_float(gguf_tensor *tensor);
int16_t *gguf_tensor_to_f16(gguf_tensor *tensor);
int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor);

#endif