Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the decoding issues #1768

Open
wants to merge 58 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 48 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
71a65e7
Add files via upload
bobqianic Jan 14, 2024
8301f88
Add files via upload
bobqianic Jan 14, 2024
1226204
Add files via upload
bobqianic Jan 15, 2024
c53c33b
revert change
bobqianic Jan 15, 2024
dfef69e
Delete server directory
bobqianic Jan 15, 2024
7499e3c
Merge pull request #1 from bobqianic/bobqianic-patch-1
bobqianic Jan 15, 2024
6648641
Add files via upload
bobqianic Jan 15, 2024
9d0ebd1
Add files via upload
bobqianic Jan 15, 2024
c8528a7
Add files via upload
bobqianic Jan 15, 2024
7047d32
Merge pull request #2 from bobqianic/patch
bobqianic Jan 15, 2024
96a9349
Add files via upload
bobqianic Jan 15, 2024
4b3a211
Fix ruby and go bindings
bobqianic Jan 15, 2024
3818acb
Add files via upload
bobqianic Jan 15, 2024
b5c4d5c
Add files via upload
bobqianic Jan 15, 2024
80589d2
Revert some changes
bobqianic Jan 15, 2024
271c321
Revert some changes
bobqianic Jan 15, 2024
5ea1d91
Merge branch 'ggerganov:master' into fix-decoding
bobqianic Jan 15, 2024
41df3f0
Remove hallucination by using `token_nosp`
bobqianic Jan 16, 2024
2676819
edit some comments
bobqianic Jan 16, 2024
327a3dd
Fix tokenizer (mostly)
bobqianic Jan 20, 2024
3e05c3d
Merge branch 'master' into fix-decoding
bobqianic Jan 20, 2024
23f0b0b
Update Makefile
bobqianic Jan 20, 2024
a1e6f20
fix CI
bobqianic Jan 20, 2024
938691b
Update WhisperCppTest.java
bobqianic Jan 20, 2024
eda72d3
Add files via upload
bobqianic Jan 20, 2024
a12d40f
Update CMakeLists.txt
bobqianic Jan 20, 2024
cda677e
Update unicode.h
bobqianic Jan 20, 2024
4b079bf
Merge branch 'master' into fix-decoding
bobqianic Jan 31, 2024
6e3b7d4
Restore WhisperCppTest.java
bobqianic Jan 31, 2024
7592693
Update whisper.cpp
bobqianic Jan 31, 2024
db49f1b
Merge pull request #3 from bobqianic/restore_best_of
bobqianic Jan 31, 2024
98e9c69
Fix audio feature seeking error
bobqianic Feb 1, 2024
6a2674c
Fix potential heap-buffer-overflow
bobqianic Feb 1, 2024
850fa2f
Add files via upload
bobqianic Feb 2, 2024
5ef0ea2
Add files via upload
bobqianic Feb 2, 2024
6766747
Add files via upload
bobqianic Feb 2, 2024
e2e5177
Merge pull request #4 from bobqianic/update
bobqianic Feb 2, 2024
8a46034
Add files via upload
bobqianic Feb 5, 2024
7a5a2e9
Add files via upload
bobqianic Feb 5, 2024
a0d4348
Merge pull request #5 from bobqianic/push
bobqianic Feb 5, 2024
b3305eb
Add files via upload
bobqianic Feb 5, 2024
09a735e
Update ruby_whisper.cpp
bobqianic Feb 5, 2024
0f5b5be
Update test_whisper.rb
bobqianic Feb 5, 2024
4cc4b89
Update params.go
bobqianic Feb 5, 2024
9fbe59f
Merge pull request #6 from bobqianic/fix-binding
bobqianic Feb 5, 2024
49e7a7f
Update context.go
bobqianic Feb 5, 2024
891a453
Update interface.go
bobqianic Feb 5, 2024
7baa7a6
Merge pull request #7 from bobqianic/fix-go
bobqianic Feb 5, 2024
c0277e3
revert logsumexp implementation
bobqianic Feb 6, 2024
b6d89b0
Add heuristic mode
bobqianic Feb 9, 2024
3512527
Bug Fix
bobqianic Feb 9, 2024
e091189
Add heuristic mode
bobqianic Feb 9, 2024
de4f87f
Bug Fix 2
bobqianic Feb 9, 2024
476dff4
Merge pull request #8 from bobqianic/heuristic
bobqianic Feb 9, 2024
f38b659
Merge branch 'master' into fix-decoding
bobqianic Jun 24, 2024
2b61aec
Update whisper.cpp
bobqianic Jun 24, 2024
a53175a
Add files via upload
bobqianic Jun 25, 2024
7ea8a64
Merge pull request #12 from bobqianic/base
bobqianic Jun 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ add_library(${TARGET}
${GGML_SOURCES_METAL}
${GGML_SOURCES_CUDA}
${GGML_SOURCES_OPENCL}
unicode.h
whisper.h
whisper.cpp
)
Expand Down
13 changes: 7 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h

WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o

whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
whisper.o: whisper.cpp whisper.h unicode.h ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ifndef WHISPER_COREML
Expand Down Expand Up @@ -358,9 +358,10 @@ CC_SDL=`sdl2-config --cflags --libs`

SRC_COMMON = examples/common.cpp examples/common-ggml.cpp
SRC_COMMON_SDL = examples/common-sdl.cpp
SRC_CONSOLE = examples/console.cpp

main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(WHISPER_OBJ) -o main $(LDFLAGS)
./main -h

bench: examples/bench/bench.cpp $(WHISPER_OBJ)
Expand All @@ -373,19 +374,19 @@ server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS) $(LWINSOCK2)

stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)

command: examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
command: examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)

lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)

talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)

talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_CONSOLE) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)

#
# Audio samples
Expand Down
4 changes: 0 additions & 4 deletions bindings/go/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ func (p *Params) SetTranslate(v bool) {
p.translate = toBool(v)
}

func (p *Params) SetSplitOnWord(v bool) {
p.split_on_word = toBool(v)
}

func (p *Params) SetNoContext(v bool) {
p.no_context = toBool(v)
}
Expand Down
4 changes: 0 additions & 4 deletions bindings/go/pkg/whisper/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ func (context *context) SetSpeedup(v bool) {
context.params.SetSpeedup(v)
}

func (context *context) SetSplitOnWord(v bool) {
context.params.SetSplitOnWord(v)
}

// Set number of threads to use
func (context *context) SetThreads(v uint) {
context.params.SetThreads(int(v))
Expand Down
1 change: 0 additions & 1 deletion bindings/go/pkg/whisper/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ type Context interface {
SetDuration(time.Duration) // Set duration
SetThreads(uint) // Set number of threads to use
SetSpeedup(bool) // Set speedup flag
SetSplitOnWord(bool) // Set split on word flag
SetTokenThreshold(float32) // Set timestamp token probability threshold
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
SetMaxSegmentLength(uint) // Set max segment length in characters
Expand Down
52 changes: 26 additions & 26 deletions bindings/javascript/package.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
{
"name": "whisper.cpp",
"version": "1.5.4",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
{
"name": "whisper.cpp",
"version": "1.5.4",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
8 changes: 0 additions & 8 deletions bindings/ruby/ext/ruby_whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,6 @@ static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, token_timestamps, value)
}
static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
BOOL_PARAMS_GETTER(self, split_on_word)
}
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, split_on_word, value)
}
ggerganov marked this conversation as resolved.
Show resolved Hide resolved
static VALUE ruby_whisper_params_get_speed_up(VALUE self) {
BOOL_PARAMS_GETTER(self, speed_up)
}
Expand Down Expand Up @@ -406,8 +400,6 @@ void Init_whisper() {
rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
rb_define_method(cParams, "speed_up", ruby_whisper_params_get_speed_up, 0);
rb_define_method(cParams, "speed_up=", ruby_whisper_params_set_speed_up, 1);
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
Expand Down
Loading
Loading