Skip to content

Commit

Permalink
Add key_len() as a pre-calculated length of key() in Unicode character.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 719078472
  • Loading branch information
hiroyuki-komatsu committed Jan 24, 2025
1 parent dd56d86 commit 50357d4
Show file tree
Hide file tree
Showing 12 changed files with 44 additions and 30 deletions.
7 changes: 3 additions & 4 deletions src/converter/converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ bool Converter::CommitPartialSuggestionSegmentValue(
DCHECK_LT(0, segment->candidates_size());
const Segment::Candidate &submitted_candidate = segment->candidate(0);
const bool auto_partial_suggestion =
Util::CharsLen(submitted_candidate.key) != Util::CharsLen(segment->key());
Util::CharsLen(submitted_candidate.key) != segment->key_len();
segment->set_key(current_segment_key);

Segment *new_segment = segments->insert_segment(raw_segment_index + 1);
Expand Down Expand Up @@ -431,12 +431,11 @@ bool Converter::ResizeSegment(Segments *segments,
return false;
}

absl::string_view key = segments->conversion_segment(segment_index).key();
if (key.empty()) {
const size_t key_len = segments->conversion_segment(segment_index).key_len();
if (key_len == 0) {
return false;
}

const int key_len = Util::CharsLen(key);
const int new_size = key_len + offset_length;
if (new_size <= 0 || new_size > std::numeric_limits<uint8_t>::max()) {
return false;
Expand Down
5 changes: 4 additions & 1 deletion src/converter/segments.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ Segment::Segment(const Segment &x)
: removed_candidates_for_debug_(x.removed_candidates_for_debug_),
segment_type_(x.segment_type_),
key_(x.key_),
key_len_(x.key_len_),
meta_candidates_(x.meta_candidates_) {
DeepCopyCandidates(x.candidates_);
}
Expand All @@ -237,6 +238,7 @@ Segment &Segment::operator=(const Segment &x) {
removed_candidates_for_debug_ = x.removed_candidates_for_debug_;
segment_type_ = x.segment_type_;
key_ = x.key_;
key_len_ = x.key_len_;
meta_candidates_ = x.meta_candidates_;

clear_candidates();
Expand Down Expand Up @@ -400,6 +402,7 @@ void Segment::move_candidate(int old_idx, int new_idx) {
void Segment::Clear() {
clear_candidates();
key_.clear();
key_len_ = 0;
meta_candidates_.clear();
segment_type_ = FREE;
}
Expand Down Expand Up @@ -635,7 +638,7 @@ bool Segments::Resize(size_t start_index, absl::Span<const uint8_t> new_sizes) {
size_t modified_segments_size = 0;
for (const Segment &segment : all().drop(start_index)) {
absl::StrAppend(&key, segment.key());
key_len += Util::CharsLen(segment.key());
key_len += segment.key_len();
++modified_segments_size;
if (key_len >= total_size) {
break;
Expand Down
7 changes: 7 additions & 0 deletions src/converter/segments.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include "base/container/freelist.h"
#include "base/number_util.h"
#include "base/strings/assign.h"
#include "base/util.h"
#include "converter/lattice.h"
#include "testing/friend_test.h"

Expand Down Expand Up @@ -348,9 +349,14 @@ class Segment final {
}

const std::string &key() const { return key_; }

// Returns the length of the key in Unicode characters. (e.g. 1 for "あ")
size_t key_len() const { return key_len_; }

template <typename T>
void set_key(T &&key) {
strings::Assign(key_, std::forward<T>(key));
key_len_ = Util::CharsLen(key_);
}

// check if the specified index is valid or not.
Expand Down Expand Up @@ -434,6 +440,7 @@ class Segment final {
// for partial suggestion or not.
// You should detect that by using both Composer and Segments.
std::string key_;
size_t key_len_ = 0;
std::deque<Candidate *> candidates_;
std::vector<Candidate> meta_candidates_;
std::vector<std::unique_ptr<Candidate>> pool_;
Expand Down
1 change: 1 addition & 0 deletions src/converter/segments_matchers.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ MATCHER_P(EqualsSegment, segment, "") {
}
COMPARE_PROPERTY(segment_type);
COMPARE_PROPERTY(key);
COMPARE_PROPERTY(key_len);
#undef COMPARE_PROPERTY

// Compare candidates.
Expand Down
14 changes: 12 additions & 2 deletions src/converter/segments_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,14 @@ TEST(CandidateTest, InnerSegmentIterator) {
}
}

TEST(SegmentTest, KeyLength) {
Segment segment;
segment.set_key("test");
EXPECT_EQ(segment.key_len(), 4);
segment.set_key("あいう");
EXPECT_EQ(segment.key_len(), 3);
}

TEST(SegmentTest, Copy) {
Segment src;

Expand All @@ -813,16 +821,18 @@ TEST(SegmentTest, Copy) {
// Test copy constructor.
Segment dest(src);
EXPECT_EQ(dest.key(), src.key());
EXPECT_EQ(dest.key_len(), src.key_len());
EXPECT_EQ(dest.segment_type(), src.segment_type());
EXPECT_EQ(dest.candidate(0).key, src.candidate(0).key);
EXPECT_EQ(dest.candidate(1).key, src.candidate(1).key);
EXPECT_EQ(dest.meta_candidate(0).key, src.meta_candidate(0).key);

// Test copy assignment.
dest.add_candidate()->key = "dummy";
dest.add_candidate()->key = "dummy";
dest.add_candidate()->key = "placeholder";
dest.add_candidate()->key = "placeholder";
dest = src;
EXPECT_EQ(dest.key(), src.key());
EXPECT_EQ(dest.key_len(), src.key_len());
EXPECT_EQ(dest.segment_type(), src.segment_type());
EXPECT_EQ(dest.candidate(0).key, src.candidate(0).key);
EXPECT_EQ(dest.candidate(1).key, src.candidate(1).key);
Expand Down
4 changes: 2 additions & 2 deletions src/prediction/dictionary_prediction_aggregator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ bool HasHistoryKeyLongerThanOrEqualTo(const Segments &segments,
bool IsLongKeyForRealtimeCandidates(const Segments &segments) {
constexpr int kFewResultThreshold = 8;
return (segments.segments_size() > 0 &&
Util::CharsLen(segments.segment(0).key()) >= kFewResultThreshold);
segments.segment(0).key_len() >= kFewResultThreshold);
}

size_t GetMaxSizeForRealtimeCandidates(const ConversionRequest &request,
Expand Down Expand Up @@ -617,7 +617,7 @@ PredictionTypes DictionaryPredictionAggregator::AggregatePrediction(
}

const std::string &key = segments.conversion_segment(0).key();
const size_t key_len = Util::CharsLen(key);
const size_t key_len = segments.conversion_segment(0).key_len();

// TODO(toshiyuki): Check if we can remove this SUGGESTION check.
// i.e. can we return NO_PREDICTION here for both of SUGGESTION and
Expand Down
6 changes: 2 additions & 4 deletions src/prediction/dictionary_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,7 @@ void DictionaryPredictor::RewriteResultsForPrediction(
}

if (!is_mixed_conversion) {
const size_t input_key_len =
Util::CharsLen(segments.conversion_segment(0).key());
const size_t input_key_len = segments.conversion_segment(0).key_len();
RemoveMissSpelledCandidates(input_key_len, results);
}
}
Expand All @@ -347,9 +346,8 @@ void DictionaryPredictor::MaybePopulateTypingCorrectedResults(
return;
}

const size_t key_len = Util::CharsLen(segments.conversion_segment(0).key());
constexpr int kMinTypingCorrectionKeyLen = 3;
if (key_len < kMinTypingCorrectionKeyLen) {
if (segments.conversion_segment(0).key_len() < kMinTypingCorrectionKeyLen) {
return;
}

Expand Down
9 changes: 4 additions & 5 deletions src/prediction/user_history_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1190,18 +1190,17 @@ bool UserHistoryPredictor::PredictForRequest(const ConversionRequest &request,
return false;
}

const size_t input_key_len =
Util::CharsLen(segments->conversion_segment(0).key());
const bool is_empty_input = segments->conversion_segment(0).key().empty();
const Entry *prev_entry = LookupPrevEntry(*segments);
if (input_key_len == 0 && prev_entry == nullptr) {
if (is_empty_input && prev_entry == nullptr) {
MOZC_VLOG(1) << "If input_key_len is 0, prev_entry must be set";
return false;
}

const auto &params = request.request().decoder_experiment_params();

const bool is_zero_query =
((request_type == ZERO_QUERY_SUGGESTION) && (input_key_len == 0));
(request_type == ZERO_QUERY_SUGGESTION) && is_empty_input;
size_t max_prediction_size =
request.max_user_history_prediction_candidates_size();
size_t max_prediction_char_coverage =
Expand Down Expand Up @@ -1514,7 +1513,7 @@ bool UserHistoryPredictor::InsertCandidates(RequestType request_type,
LOG(ERROR) << "Unknown mode";
return false;
}
const uint32_t input_key_len = Util::CharsLen(segment->key());
const uint32_t input_key_len = segment->key_len();

const int filter_mode =
request.request()
Expand Down
3 changes: 1 addition & 2 deletions src/rewriter/date_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1240,8 +1240,7 @@ std::optional<std::string> GetNDigits(const composer::ComposerData &composer,
// Note that only one segment is in the Segments, but sometimes like
// on partial conversion, segment.key() is different from the size of
// the whole composition.
const std::string raw =
composer.GetRawSubString(0, Util::CharsLen(segment.key()));
const std::string raw = composer.GetRawSubString(0, segment.key_len());
if (validated = VaridateNDigits(raw, n); validated) {
return validated.value();
}
Expand Down
10 changes: 4 additions & 6 deletions src/rewriter/transliteration_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,18 +249,17 @@ bool TransliterationRewriter::FillT13nsFromComposer(
bool modified = false;
size_t composition_pos = 0;
for (Segment &segment : segments->conversion_segments()) {
const std::string &key = segment.key();
if (key.empty()) {
const size_t composition_len = segment.key_len();
if (composition_len == 0) {
continue;
}
const size_t composition_len = Util::CharsLen(key);
std::vector<std::string> t13ns;
request.composer().GetSubTransliterations(composition_pos, composition_len,
&t13ns);
composition_pos += composition_len;

ModifyT13ns(request, segment, &t13ns);
modified |= SetTransliterations(t13ns, key, &segment);
modified |= SetTransliterations(t13ns, segment.key(), &segment);
}
return modified;
}
Expand Down Expand Up @@ -347,8 +346,7 @@ bool TransliterationRewriter::AddRawNumberT13nCandidates(
// Note that only one segment is in the Segments, but sometimes like
// on partial conversion, segment.key() is different from the size of
// the whole composition.
const std::string raw =
composer.GetRawSubString(0, Util::CharsLen(segment->key()));
const std::string raw = composer.GetRawSubString(0, segment->key_len());
if (raw.empty()) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/rewriter/user_boundary_history_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class SegmentsKey {
byte_indexs.push_back(byte_index);
byte_index += key.size();

const size_t key_size = Util::CharsLen(key);
const size_t key_size = segment.key_len();
if (key_size > 255) { // too long segment
return std::nullopt;
}
Expand Down
6 changes: 3 additions & 3 deletions src/session/session_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ bool SessionConverter::ConvertToTransliteration(
size_t offset = 0;
for (const Segment &segment :
segments_.conversion_segments().drop(segment_index_ + 1)) {
offset += Util::CharsLen(segment.key());
offset += segment.key_len();
}
ResizeSegmentWidth(composer, offset);
}
Expand Down Expand Up @@ -759,7 +759,7 @@ void SessionConverter::CommitSegmentsInternal(

// Accumulate the size of i-th segment's key.
// The caller will remove corresponding characters from the composer.
*consumed_key_size += Util::CharsLen(segment.key());
*consumed_key_size += segment.key_len();

// Collect candidate's id for each segment.
candidate_ids.push_back(GetCandidateIndexForConverter(i));
Expand Down Expand Up @@ -1292,7 +1292,7 @@ size_t SessionConverter::GetConsumedPreeditSize(const size_t index,
segments_.conversion_segment(i).candidate(id);
DCHECK(
!(candidate.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED));
result += Util::CharsLen(segments_.conversion_segment(i).key());
result += segments_.conversion_segment(i).key_len();
}
return result;
}
Expand Down

0 comments on commit 50357d4

Please sign in to comment.