Skip to content

Commit

Permalink
Merge branch 'ggerganov:master' into dev/da/set-utf-8-msvc
Browse files Browse the repository at this point in the history
  • Loading branch information
drasticactions authored Aug 30, 2024
2 parents ea469af + c4e1861 commit c6cb40c
Show file tree
Hide file tree
Showing 105 changed files with 6,374 additions and 147,417 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,8 @@ $(LIB_WHISPER): \
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)

$(LIB_WHISPER_S): \
$(OBJ_WHISPER)
$(OBJ_WHISPER) \
$(OBJ_GGML)
ar rcs $(LIB_WHISPER_S) $^

# common
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ Supported platforms:
- [x] [WebAssembly](examples/whisper.wasm)
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)

The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.

Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
Expand All @@ -55,8 +55,8 @@ Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)

## Implementation details

- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
- The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
- Sample usage is demonstrated in [main.cpp](examples/main)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
- Various other examples are available in the [examples](examples) folder
Expand Down Expand Up @@ -751,7 +751,7 @@ took to execute it. The results are summarized in the following Github issue:
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](bench.py).
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
You can run it with the following command, by default it will run against any standard model in the models folder.
Expand Down
2 changes: 1 addition & 1 deletion bindings/go/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
BUILD_DIR := build
MODELS_DIR := models
EXAMPLES_DIR := $(wildcard examples/*)
INCLUDE_PATH := $(abspath ../..)
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
LIBRARY_PATH := $(abspath ../..)

ifeq ($(UNAME_S),Darwin)
Expand Down
14 changes: 14 additions & 0 deletions bindings/go/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,18 @@ func (p *Params) SetAudioCtx(n int) {
p.audio_ctx = C.int(n)
}

func (p *Params) SetMaxContext(n int) {
p.n_max_text_ctx = C.int(n)
}

func (p *Params) SetBeamSize(n int) {
p.beam_search.beam_size = C.int(n)
}

func (p *Params) SetEntropyThold(t float32) {
p.entropy_thold = C.float(t)
}

// Set initial prompt
func (p *Params) SetInitialPrompt(prompt string) {
p.initial_prompt = C.CString(prompt)
Expand Down Expand Up @@ -149,6 +161,8 @@ func (p *Params) String() string {
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
if p.translate {
str += " translate"
}
Expand Down
15 changes: 15 additions & 0 deletions bindings/go/pkg/whisper/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,21 @@ func (context *context) SetAudioCtx(n uint) {
context.params.SetAudioCtx(int(n))
}

// Set maximum number of text context tokens to store
func (context *context) SetMaxContext(n int) {
context.params.SetMaxContext(n)
}

// Set Beam Size
func (context *context) SetBeamSize(n int) {
context.params.SetBeamSize(n)
}

// Set Entropy threshold
func (context *context) SetEntropyThold(t float32) {
context.params.SetEntropyThold(t)
}

// Set initial prompt
func (context *context) SetInitialPrompt(prompt string) {
context.params.SetInitialPrompt(prompt)
Expand Down
3 changes: 3 additions & 0 deletions bindings/go/pkg/whisper/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ type Context interface {
SetTokenTimestamps(bool) // Set token timestamps flag
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
SetAudioCtx(uint) // Set audio encoder context
SetMaxContext(n int) // Set maximum number of text context tokens to store
SetBeamSize(n int) // Set Beam Size
SetEntropyThold(t float32) // Set Entropy threshold
SetInitialPrompt(prompt string) // Set initial prompt

// Process mono audio data and return any errors.
Expand Down
2 changes: 1 addition & 1 deletion bindings/go/whisper.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
// CGO

/*
#cgo LDFLAGS: -lwhisper -lm -lstdc++
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
#include <whisper.h>
#include <stdlib.h>
Expand Down
42 changes: 37 additions & 5 deletions examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <thread>
#include <ctime>
#include <fstream>
#include <sstream>

#define COMMON_SAMPLE_RATE 16000

Expand Down Expand Up @@ -286,12 +287,43 @@ void sam_print_usage(int argc, char ** argv, const sam_params & params);
// Terminal utils
//


// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
// Lowest is red, middle is yellow, highest is green.
#define SQR(X) ((X) * (X))
#define UNCUBE(x) x < 48 ? 0 : x < 115 ? 1 : (x - 35) / 40

/**
* Quantizes 24-bit RGB to xterm256 code range [16,256).
*/
static int rgb2xterm256(int r, int g, int b) {
unsigned char cube[] = {0, 0137, 0207, 0257, 0327, 0377};
int av, ir, ig, ib, il, qr, qg, qb, ql;
av = r * .299 + g * .587 + b * .114 + .5;
ql = (il = av > 238 ? 23 : (av - 3) / 10) * 10 + 8;
qr = cube[(ir = UNCUBE(r))];
qg = cube[(ig = UNCUBE(g))];
qb = cube[(ib = UNCUBE(b))];
if (SQR(qr - r) + SQR(qg - g) + SQR(qb - b) <=
SQR(ql - r) + SQR(ql - g) + SQR(ql - b))
return ir * 36 + ig * 6 + ib + 020;
return il + 0350;
}

static std::string set_xterm256_foreground(int r, int g, int b) {
int x = rgb2xterm256(r, g, b);
std::ostringstream oss;
oss << "\033[38;5;" << x << "m";
return oss.str();
}

// Lowest is red, middle is yellow, highest is green. Color scheme from
// Paul Tol; it is colorblind friendly https://personal.sron.nl/~pault/
const std::vector<std::string> k_colors = {
"\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
"\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
set_xterm256_foreground(220, 5, 12),
set_xterm256_foreground(232, 96, 28),
set_xterm256_foreground(241, 147, 45),
set_xterm256_foreground(246, 193, 65),
set_xterm256_foreground(247, 240, 86),
set_xterm256_foreground(144, 201, 135),
set_xterm256_foreground( 78, 178, 101),
};

//
Expand Down
21 changes: 21 additions & 0 deletions examples/talk-llama/llama-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,24 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)

//
// helpers
//

static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
if (search.empty()) {
return;
}
std::string builder;
builder.reserve(s.length());
size_t pos = 0;
size_t last_pos = 0;
while ((pos = s.find(search, last_pos)) != std::string::npos) {
builder.append(s, last_pos, pos - last_pos);
builder.append(replace);
last_pos = pos + search.length();
}
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}
4 changes: 2 additions & 2 deletions examples/talk-llama/llama-sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ void llama_sample_top_k_impl(struct llama_sampling * smpl, llama_token_data_arra
constexpr float bucket_low = -10.0f;
constexpr float bucket_high = 10.0f;
constexpr float bucket_scale = nbuckets/(bucket_high - bucket_low);
constexpr float bucker_inter = -bucket_low * bucket_scale;
constexpr float bucket_inter = -bucket_low * bucket_scale;

std::vector<int> bucket_idx(candidates->size);
std::vector<int> histo(nbuckets, 0);

for (int i = 0; i < (int)candidates->size; ++i) {
const float val = candidates->data[i].logit;
int ib = int(bucket_scale * val + bucker_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
int ib = int(bucket_scale * val + bucket_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
ib = std::max(0, std::min(nbuckets-1, ib));
bucket_idx[i] = ib;
++histo[ib];
Expand Down
41 changes: 22 additions & 19 deletions examples/talk-llama/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@
// helpers
//

static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result;
for (size_t pos = 0; ; pos += search.length()) {
auto new_pos = s.find(search, pos);
if (new_pos == std::string::npos) {
result += s.substr(pos, s.size() - pos);
break;
}
result += s.substr(pos, new_pos - pos) + replace;
pos = new_pos;
}
s = std::move(result);
}

LLAMA_ATTRIBUTE_FORMAT(1, 2)
static std::string format(const char * fmt, ...) {
va_list ap;
Expand Down Expand Up @@ -335,6 +321,21 @@ struct llm_tokenizer_spm {

// TODO: there are a lot of common parts between spm and bpe tokenizers, should be refactored and reused

template<typename T, typename Container = std::vector<T>, typename Compare = std::less<typename Container::value_type>>
class llama_priority_queue : public std::priority_queue<T, Container, Compare> {
public:
using std::priority_queue<T, Container, Compare>::priority_queue;

T pop_move() {
T item = std::move(this->c.front());
std::pop_heap(this->c.begin(), this->c.end(), this->comp);
this->c.pop_back();
return item;
}

void pop() = delete;
};

struct llm_bigram_bpe {
struct comparator {
bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const {
Expand All @@ -343,7 +344,7 @@ struct llm_bigram_bpe {
};

using queue_storage = std::vector<llm_bigram_bpe>;
using queue = std::priority_queue<llm_bigram_bpe, queue_storage, comparator>;
using queue = llama_priority_queue<llm_bigram_bpe, queue_storage, comparator>;
llm_symbol::index left;
llm_symbol::index right;
std::string text;
Expand Down Expand Up @@ -402,6 +403,7 @@ struct llm_tokenizer_bpe {
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
regex_exprs = {
"\\p{N}",
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
Expand All @@ -424,6 +426,8 @@ struct llm_tokenizer_bpe {
};
break;
case LLAMA_VOCAB_PRE_TYPE_PORO:
case LLAMA_VOCAB_PRE_TYPE_BLOOM:
case LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH:
regex_exprs = {
" ?[^(\\s|.,!?…。,、।۔،)]+",
};
Expand Down Expand Up @@ -531,8 +535,7 @@ struct llm_tokenizer_bpe {

// build token(s)
while (!work_queue.empty()) {
auto bigram = work_queue.top();
work_queue.pop();
auto bigram = work_queue.pop_move();

auto & left_symbol = symbols[bigram.left];
auto & right_symbol = symbols[bigram.right];
Expand Down Expand Up @@ -1480,11 +1483,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
return vocab.special_pad_id;
}

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_bos;
}

int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_eos;
}

Expand Down
4 changes: 2 additions & 2 deletions examples/talk-llama/llama-vocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);

llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
Expand Down
Loading

0 comments on commit c6cb40c

Please sign in to comment.