Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the decoding issues #1768

Open
wants to merge 58 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
71a65e7
Add files via upload
bobqianic Jan 14, 2024
8301f88
Add files via upload
bobqianic Jan 14, 2024
1226204
Add files via upload
bobqianic Jan 15, 2024
c53c33b
revert change
bobqianic Jan 15, 2024
dfef69e
Delete server directory
bobqianic Jan 15, 2024
7499e3c
Merge pull request #1 from bobqianic/bobqianic-patch-1
bobqianic Jan 15, 2024
6648641
Add files via upload
bobqianic Jan 15, 2024
9d0ebd1
Add files via upload
bobqianic Jan 15, 2024
c8528a7
Add files via upload
bobqianic Jan 15, 2024
7047d32
Merge pull request #2 from bobqianic/patch
bobqianic Jan 15, 2024
96a9349
Add files via upload
bobqianic Jan 15, 2024
4b3a211
Fix ruby and go bindings
bobqianic Jan 15, 2024
3818acb
Add files via upload
bobqianic Jan 15, 2024
b5c4d5c
Add files via upload
bobqianic Jan 15, 2024
80589d2
Revert some changes
bobqianic Jan 15, 2024
271c321
Revert some changes
bobqianic Jan 15, 2024
5ea1d91
Merge branch 'ggerganov:master' into fix-decoding
bobqianic Jan 15, 2024
41df3f0
Remove hallucination by using `token_nosp`
bobqianic Jan 16, 2024
2676819
edit some comments
bobqianic Jan 16, 2024
327a3dd
Fix tokenizer (mostly)
bobqianic Jan 20, 2024
3e05c3d
Merge branch 'master' into fix-decoding
bobqianic Jan 20, 2024
23f0b0b
Update Makefile
bobqianic Jan 20, 2024
a1e6f20
fix CI
bobqianic Jan 20, 2024
938691b
Update WhisperCppTest.java
bobqianic Jan 20, 2024
eda72d3
Add files via upload
bobqianic Jan 20, 2024
a12d40f
Update CMakeLists.txt
bobqianic Jan 20, 2024
cda677e
Update unicode.h
bobqianic Jan 20, 2024
4b079bf
Merge branch 'master' into fix-decoding
bobqianic Jan 31, 2024
6e3b7d4
Restore WhisperCppTest.java
bobqianic Jan 31, 2024
7592693
Update whisper.cpp
bobqianic Jan 31, 2024
db49f1b
Merge pull request #3 from bobqianic/restore_best_of
bobqianic Jan 31, 2024
98e9c69
Fix audio feature seeking error
bobqianic Feb 1, 2024
6a2674c
Fix potential heap-buffer-overflow
bobqianic Feb 1, 2024
850fa2f
Add files via upload
bobqianic Feb 2, 2024
5ef0ea2
Add files via upload
bobqianic Feb 2, 2024
6766747
Add files via upload
bobqianic Feb 2, 2024
e2e5177
Merge pull request #4 from bobqianic/update
bobqianic Feb 2, 2024
8a46034
Add files via upload
bobqianic Feb 5, 2024
7a5a2e9
Add files via upload
bobqianic Feb 5, 2024
a0d4348
Merge pull request #5 from bobqianic/push
bobqianic Feb 5, 2024
b3305eb
Add files via upload
bobqianic Feb 5, 2024
09a735e
Update ruby_whisper.cpp
bobqianic Feb 5, 2024
0f5b5be
Update test_whisper.rb
bobqianic Feb 5, 2024
4cc4b89
Update params.go
bobqianic Feb 5, 2024
9fbe59f
Merge pull request #6 from bobqianic/fix-binding
bobqianic Feb 5, 2024
49e7a7f
Update context.go
bobqianic Feb 5, 2024
891a453
Update interface.go
bobqianic Feb 5, 2024
7baa7a6
Merge pull request #7 from bobqianic/fix-go
bobqianic Feb 5, 2024
c0277e3
revert logsumexp implementation
bobqianic Feb 6, 2024
b6d89b0
Add heuristic mode
bobqianic Feb 9, 2024
3512527
Bug Fix
bobqianic Feb 9, 2024
e091189
Add heuristic mode
bobqianic Feb 9, 2024
de4f87f
Bug Fix 2
bobqianic Feb 9, 2024
476dff4
Merge pull request #8 from bobqianic/heuristic
bobqianic Feb 9, 2024
f38b659
Merge branch 'master' into fix-decoding
bobqianic Jun 24, 2024
2b61aec
Update whisper.cpp
bobqianic Jun 24, 2024
a53175a
Add files via upload
bobqianic Jun 25, 2024
7ea8a64
Merge pull request #12 from bobqianic/base
bobqianic Jun 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ add_library(${TARGET}
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
unicode.h
whisper.h
whisper.cpp
)
Expand Down
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ ggml-blas.o: ggml-blas.cpp ggml-blas.h

WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o

whisper.o: whisper.cpp whisper.h whisper-mel.hpp ggml.h ggml-cuda.h
whisper.o: whisper.cpp whisper.h unicode.h whisper-mel.hpp ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ifndef WHISPER_COREML
Expand Down Expand Up @@ -476,9 +476,10 @@ CC_SDL=`sdl2-config --cflags --libs`

SRC_COMMON = examples/common.cpp examples/common-ggml.cpp examples/grammar-parser.cpp
SRC_COMMON_SDL = examples/common-sdl.cpp
SRC_CONSOLE = examples/console.cpp

main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(WHISPER_OBJ) -o main $(LDFLAGS)
./main -h

bench: examples/bench/bench.cpp $(WHISPER_OBJ)
Expand All @@ -491,7 +492,7 @@ server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS) $(LWINSOCK2)

stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)

command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
Expand All @@ -500,7 +501,7 @@ lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)

talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)

talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
Expand Down
4 changes: 0 additions & 4 deletions bindings/go/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ func (p *Params) SetTranslate(v bool) {
p.translate = toBool(v)
}

func (p *Params) SetSplitOnWord(v bool) {
p.split_on_word = toBool(v)
}

func (p *Params) SetNoContext(v bool) {
p.no_context = toBool(v)
}
Expand Down
52 changes: 26 additions & 26 deletions bindings/javascript/package.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
{
"name": "whisper.cpp",
"version": "1.6.2",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
{
"name": "whisper.cpp",
"version": "1.6.2",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
463 changes: 463 additions & 0 deletions bindings/ruby/ext/unicode.h

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ if (WHISPER_FFMPEG)
endif()

add_library(${TARGET} STATIC
console.h
console.cpp
common.h
common.cpp
common-ggml.h
Expand Down
29 changes: 25 additions & 4 deletions examples/command/command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "common-sdl.h"
#include "common.h"
#include "console.h"
#include "whisper.h"
#include "grammar-parser.h"

Expand Down Expand Up @@ -57,9 +58,9 @@ struct whisper_params {
std::string suppress_regex;
};

void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
void whisper_print_usage(int argc, const char ** argv, const whisper_params & params);

bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
bool whisper_params_parse(int argc, const char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];

Expand Down Expand Up @@ -99,7 +100,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
return true;
}

void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
void whisper_print_usage(int /*argc*/, const char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
Expand Down Expand Up @@ -678,7 +679,7 @@ int process_general_transcription(struct whisper_context * ctx, audio_async & au
return 0;
}

int main(int argc, char ** argv) {
int run(int argc, const char ** argv) {
whisper_params params;

if (whisper_params_parse(argc, argv, params) == false) {
Expand Down Expand Up @@ -775,3 +776,23 @@ int main(int argc, char ** argv) {

return ret_val;
}

#if _WIN32
int wmain(int argc, const wchar_t ** argv_UTF16LE) {
console::init(true, true);
atexit([]() { console::cleanup(); });
std::vector<std::string> buffer(argc);
std::vector<const char*> argv_UTF8(argc);
for (int i = 0; i < argc; ++i) {
buffer[i] = console::UTF16toUTF8(argv_UTF16LE[i]);
argv_UTF8[i] = buffer[i].c_str();
}
return run(argc, argv_UTF8.data());
}
#else
int main(int argc, const char ** argv_UTF8) {
console::init(true, true);
atexit([]() { console::cleanup(); });
return run(argc, argv_UTF8);
}
#endif
11 changes: 10 additions & 1 deletion examples/common.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#define _USE_MATH_DEFINES // for M_PI

#include "common.h"
#include "console.h"

// third-party utilities
// use your favorite implementations
Expand Down Expand Up @@ -674,7 +675,11 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
return false;
}
}
#if _WIN32
else if (drwav_init_file_w(&wav, console::UTF8toUTF16(fname).c_str(), nullptr) == false) {
#else
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
#endif
#if defined(WHISPER_FFMPEG)
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
Expand Down Expand Up @@ -889,7 +894,11 @@ int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate) {

bool is_file_exist(const char *fileName)
{
std::ifstream infile(fileName);
#ifdef _WIN32
std::wifstream infile(console::UTF8toUTF16(fileName).c_str());
#else
std::ifstream infile(fileName);
#endif
return infile.good();
}

Expand Down
Loading
Loading