Skip to content

Commit

Permalink
Implement async dictionary loading
Browse files Browse the repository at this point in the history
  • Loading branch information
wengxt committed Apr 21, 2024
1 parent 3cd9791 commit 65e1d2e
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 21 deletions.
1 change: 1 addition & 0 deletions im/pinyin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(PINYIN_SOURCES
pinyin.cpp
customphrase.cpp
symboldictionary.cpp
workerthread.cpp
)

add_library(pinyin MODULE ${PINYIN_SOURCES})
Expand Down
57 changes: 37 additions & 20 deletions im/pinyin/pinyin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// We want to keep cloudpinyin logic but don't call it.
#include "../../modules/cloudpinyin/cloudpinyin_public.h"
#include "config.h"
#include "workerthread.h"
#include <cstdint>
#include <ctime>
#include <fcitx-utils/capabilityflags.h>
Expand All @@ -20,6 +21,7 @@
#include <fcitx/candidatelist.h>
#include <fcitx/event.h>
#include <fcitx/userinterface.h>
#include <future>
#include <libime/pinyin/pinyincorrectionprofile.h>
#include <memory>
#include <string>
Expand Down Expand Up @@ -980,7 +982,8 @@ std::string PinyinEngine::evaluateCustomPhrase(InputContext *inputContext,

PinyinEngine::PinyinEngine(Instance *instance)
: instance_(instance),
factory_([this](InputContext &) { return new PinyinState(this); }) {
factory_([this](InputContext &) { return new PinyinState(this); }),
worker_(instance->eventDispatcher()) {
ime_ = std::make_unique<libime::PinyinIME>(
std::make_unique<libime::PinyinDictionary>(),
std::make_unique<libime::UserLanguageModel>(
Expand Down Expand Up @@ -1142,24 +1145,37 @@ void PinyinEngine::loadSymbols(const StandardPathFile &file) {
}
}

void PinyinEngine::loadDict(const StandardPathFile &file) {
void PinyinEngine::loadDict(StandardPathFile file,
std::list<std::unique_ptr<TaskToken>> &taskTokens) {
if (file.fd() < 0) {
return;
}
try {
PINYIN_DEBUG() << "Loading pinyin dict " << file.path();
boost::iostreams::stream_buffer<
boost::iostreams::file_descriptor_source>
buffer(file.fd(),
boost::iostreams::file_descriptor_flags::never_close_handle);
std::istream in(&buffer);
ime_->dict()->addEmptyDict();
ime_->dict()->load(ime_->dict()->dictSize() - 1, in,
libime::PinyinDictFormat::Binary);
} catch (const std::exception &e) {
PINYIN_ERROR() << "Failed to load pinyin dict " << file.path() << ": "
<< e.what();
}
ime_->dict()->addEmptyDict();
PINYIN_DEBUG() << "Loading pinyin dict " << file.path();
auto path = file.path();
std::packaged_task<libime::PinyinDictionary::TrieType()> task(
[file = std::move(file)]() {
boost::iostreams::stream_buffer<
boost::iostreams::file_descriptor_source>
buffer(file.fd(), boost::iostreams::file_descriptor_flags::
never_close_handle);
std::istream in(&buffer);
auto trie = libime::PinyinDictionary::load(
in, libime::PinyinDictFormat::Binary);
return trie;
});
taskTokens.push_back(worker_.addTask(
std::move(task),
[this, index = ime_->dict()->dictSize() - 1,
path](std::shared_future<libime::PinyinDictionary::TrieType> &future) {
try {
PINYIN_DEBUG() << "Load pinyin dict " << path << " finished.";
ime_->dict()->setTrie(index, future.get());
} catch (const std::exception &e) {
PINYIN_ERROR() << "Failed to load pinyin dict " << path << ": "
<< e.what();
}
}));
}

void PinyinEngine::loadBuiltInDict() {
Expand All @@ -1172,7 +1188,7 @@ void PinyinEngine::loadBuiltInDict() {
{
auto file = standardPath.open(StandardPath::Type::PkgData,
"pinyin/chaizi.dict", O_RDONLY);
loadDict(file);
loadDict(std::move(file), persistentTask_);
}
{
auto file = standardPath.open(StandardPath::Type::Data,
Expand All @@ -1183,7 +1199,7 @@ void PinyinEngine::loadBuiltInDict() {
LIBIME_INSTALL_PKGDATADIR "/extb.dict",
O_RDONLY);
}
loadDict(file);
loadDict(std::move(file), persistentTask_);
}
if (ime_->dict()->dictSize() !=
libime::TrieDictionary::UserDict + 1 + NumBuiltInDict) {
Expand All @@ -1202,15 +1218,16 @@ void PinyinEngine::loadExtraDict() {
FCITX_ASSERT(ime_->dict()->dictSize() >=
libime::TrieDictionary::UserDict + NumBuiltInDict + 1)
<< "Dict size: " << ime_->dict()->dictSize();
tasks_.clear();
ime_->dict()->removeFrom(libime::TrieDictionary::UserDict + NumBuiltInDict +
1);
for (const auto &file : files) {
for (auto &file : files) {
if (disableFiles.count(stringutils::concat(file.first, ".disable"))) {
PINYIN_DEBUG() << "Dictionary: " << file.first << " is disabled.";
continue;
}
PINYIN_DEBUG() << "Loading extra dictionary: " << file.first;
loadDict(file.second);
loadDict(std::move(file.second), tasks_);
}
}

Expand Down
7 changes: 6 additions & 1 deletion im/pinyin/pinyin.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "customphrase.h"
#include "symboldictionary.h"
#include "workerthread.h"
#include <fcitx-config/configuration.h>
#include <fcitx-config/iniparser.h>
#include <fcitx-config/option.h>
Expand Down Expand Up @@ -362,7 +363,8 @@ class PinyinEngine final : public InputMethodEngineV3 {
void loadExtraDict();
void loadCustomPhrase();
void loadSymbols(const StandardPathFile &file);
void loadDict(const StandardPathFile &file);
void loadDict(StandardPathFile file,
std::list<std::unique_ptr<TaskToken>> &taskTokens);

Instance *instance_;
PinyinEngineConfig config_;
Expand All @@ -380,6 +382,9 @@ class PinyinEngine final : public InputMethodEngineV3 {
std::unique_ptr<HandlerTableEntry<EventHandler>> event_;
CustomPhraseDict customPhrase_;
SymbolDict symbols_;
WorkerThread worker_;
std::list<std::unique_ptr<TaskToken>> persistentTask_;
std::list<std::unique_ptr<TaskToken>> tasks_;

FCITX_ADDON_DEPENDENCY_LOADER(quickphrase, instance_->addonManager());
FCITX_ADDON_DEPENDENCY_LOADER(fullwidth, instance_->addonManager());
Expand Down
56 changes: 56 additions & 0 deletions im/pinyin/workerthread.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* SPDX-FileCopyrightText: 2024-2024 CSSlayer <[email protected]>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
*/
#include "workerthread.h"
#include <condition_variable>
#include <fcitx-utils/eventdispatcher.h>
#include <memory>
#include <mutex>
#include <thread>

WorkerThread::WorkerThread(fcitx::EventDispatcher &dispatcher)
: dispatcher_(dispatcher), thread_(&WorkerThread::runThread, this) {}

WorkerThread::~WorkerThread() {
{
std::lock_guard<std::mutex> lock(mutex_);
exit_ = true;
condition_.notify_one();
}
if (thread_.joinable()) {
thread_.join();
}
}

std::unique_ptr<TaskToken>
WorkerThread::addTaskImpl(std::function<void()> task,
std::function<void()> onDone) {
auto token = std::make_unique<TaskToken>();
std::lock_guard<std::mutex> lock(mutex_);
queue_.push({.task = std::move(task),
.callback = std::move(onDone),
.context = token->watch()});
condition_.notify_one();
return token;
}

void WorkerThread::run() {
while (true) {
Task task;
{
std::unique_lock lock(mutex_);
condition_.wait(lock, [this] { return exit_ || !queue_.empty(); });
if (exit_) {
break;
}

task = std::move(queue_.front());
queue_.pop();
}
task.task();
dispatcher_.scheduleWithContext(task.context, std::move(task.callback));
}
}
65 changes: 65 additions & 0 deletions im/pinyin/workerthread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* SPDX-FileCopyrightText: 2024-2024 CSSlayer <[email protected]>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
*/
#ifndef _PINYIN_WORKERTHREAD_H_
#define _PINYIN_WORKERTHREAD_H_

#include <condition_variable>
#include <fcitx-utils/eventdispatcher.h>
#include <fcitx-utils/macros.h>
#include <fcitx-utils/trackableobject.h>
#include <functional>
#include <future>
#include <list>
#include <memory>
#include <mutex>
#include <queue>

class TaskToken : public fcitx::TrackableObject<TaskToken> {};

class WorkerThread {
public:
WorkerThread(fcitx::EventDispatcher &dispatcher);
~WorkerThread();

template <typename Ret, typename OnDone>
FCITX_NODISCARD std::unique_ptr<TaskToken>
addTask(std::packaged_task<Ret()> task, OnDone onDone) {
std::future<Ret> future = task.get_future();
std::function<void()> taskFunction =
[task = std::make_shared<decltype(task)>(
std::move(task))]() mutable { (*task)(); };
std::function<void()> callback = [onDone = std::move(onDone),
future = future.share()]() mutable {
onDone(future);
};

return addTaskImpl(std::move(taskFunction), std::move(callback));
}

private:
std::unique_ptr<TaskToken> addTaskImpl(std::function<void()> task,
std::function<void()> onDone);
static void runThread(WorkerThread *self) { self->run(); }
void run();

struct Task {
std::function<void()> task;
std::function<void()> callback;
fcitx::TrackableObjectReference<TaskToken> context;
};

fcitx::EventDispatcher &dispatcher_;
std::mutex mutex_;
std::queue<Task, std::list<Task>> queue_;
bool exit_ = false;
std::condition_variable condition_;

// Must be the last member
std::thread thread_;
};

#endif

0 comments on commit 65e1d2e

Please sign in to comment.