| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "converter/segments.h" |
| |
| #include <algorithm> |
| #include <sstream> // For DebugString() |
| #include <string> |
| |
| #include "base/freelist.h" |
| #include "base/logging.h" |
| #include "base/util.h" |
| |
| namespace mozc { |
| namespace { |
| const size_t kMaxHistorySize = 32; |
| const size_t kMaxConversionCandidatesSize = 200; |
| } |
| |
| StringPiece Segment::Candidate::functional_key() const { |
| return key.size() <= content_key.size() ? |
| StringPiece() : StringPiece(key.data() + content_key.size(), |
| key.size() - content_key.size()); |
| } |
| |
| StringPiece Segment::Candidate::functional_value() const { |
| return value.size() <= content_value.size() ? |
| StringPiece() : StringPiece(value.data() + content_value.size(), |
| value.size() - content_value.size()); |
| } |
| |
| void Segment::Candidate::CopyFrom(const Candidate &src) { |
| Init(); |
| |
| key = src.key; |
| value = src.value; |
| content_key = src.content_key; |
| content_value = src.content_value; |
| consumed_key_size = src.consumed_key_size; |
| |
| prefix = src.prefix; |
| suffix = src.suffix; |
| usage_id = src.usage_id; |
| description = src.description; |
| usage_title = src.usage_title; |
| usage_description = src.usage_description; |
| |
| cost = src.cost; |
| wcost = src.wcost; |
| structure_cost = src.structure_cost; |
| |
| lid = src.lid; |
| rid = src.rid; |
| |
| attributes = src.attributes; |
| |
| style = src.style; |
| command = src.command; |
| |
| inner_segment_boundary = src.inner_segment_boundary; |
| } |
| |
| bool Segment::Candidate::IsValid() const { |
| if (inner_segment_boundary.empty()) { |
| return true; |
| } |
| // The sums of the lengths of key, value, content key and content value |
| // components must coincide with those of key, value, content_key and |
| // content_value, respectively. |
| size_t sum_key_len = 0, sum_value_len = 0; |
| size_t sum_content_key_len = 0, sum_content_value_len = 0; |
| for (InnerSegmentIterator iter(this); !iter.Done(); iter.Next()) { |
| sum_key_len += iter.GetKey().size(); |
| sum_value_len += iter.GetValue().size(); |
| sum_content_key_len += iter.GetContentKey().size(); |
| sum_content_value_len += iter.GetContentValue().size(); |
| } |
| if (sum_key_len != key.size() || |
| sum_value_len != value.size() || |
| sum_content_key_len != content_key.size() || |
| sum_content_value_len != content_value.size()) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool Segment::Candidate::EncodeLengths( |
| size_t key_len, size_t value_len, |
| size_t content_key_len, size_t content_value_len, uint32 *result) { |
| if (key_len > kuint8max || value_len > kuint8max || |
| content_key_len > kuint8max || content_value_len > kuint8max) { |
| return false; |
| } |
| *result = (static_cast<uint32>(key_len) << 24) | |
| (static_cast<uint32>(value_len) << 16) | |
| (static_cast<uint32>(content_key_len) << 8) | |
| static_cast<uint32>(content_value_len); |
| return true; |
| } |
| |
| bool Segment::Candidate::PushBackInnerSegmentBoundary( |
| size_t key_len, size_t value_len, |
| size_t content_key_len, size_t content_value_len) { |
| uint32 encoded; |
| if (EncodeLengths(key_len, value_len, content_key_len, content_value_len, |
| &encoded)) { |
| inner_segment_boundary.push_back(encoded); |
| return true; |
| } |
| return false; |
| } |
| |
| string Segment::Candidate::DebugString() const { |
| stringstream os; |
| os << "(key=" << key |
| << " ckey=" << content_key |
| << " val=" << value |
| << " cval=" << content_value |
| << " cost=" << cost |
| << " scost=" << structure_cost |
| << " wcost=" << wcost |
| << " lid=" << lid |
| << " rid=" << rid |
| << " attributes=" << attributes |
| << " consumed_key_size=" << consumed_key_size; |
| if (!prefix.empty()) { |
| os << " prefix=" << prefix; |
| } |
| if (!suffix.empty()) { |
| os << " suffix=" << suffix; |
| } |
| if (!description.empty()) { |
| os << " description=" << description; |
| } |
| if (!inner_segment_boundary.empty()) { |
| os << " segbdd="; |
| for (size_t i = 0; i < inner_segment_boundary.size(); ++i) { |
| const uint32 encoded_lengths = inner_segment_boundary[i]; |
| const int key_len = encoded_lengths >> 24; |
| const int value_len = (encoded_lengths >> 16) & 0xff; |
| const int content_key_len = (encoded_lengths >> 8) & 0xff; |
| const int content_value_len = encoded_lengths & 0xff; |
| os << Util::StringPrintf("<%d,%d,%d,%d>", key_len, value_len, |
| content_key_len, content_value_len); |
| } |
| } |
| os << ")" << endl; |
| return os.str(); |
| } |
| |
| void Segment::Candidate::InnerSegmentIterator::Next() { |
| DCHECK_LT(index_, candidate_->inner_segment_boundary.size()); |
| const uint32 encoded_lengths = candidate_->inner_segment_boundary[index_++]; |
| key_offset_ += encoded_lengths >> 24; |
| value_offset_ += (encoded_lengths >> 16) & 0xff; |
| } |
| |
| StringPiece Segment::Candidate::InnerSegmentIterator::GetKey() const { |
| DCHECK_LT(index_, candidate_->inner_segment_boundary.size()); |
| const uint32 encoded_lengths = candidate_->inner_segment_boundary[index_]; |
| return StringPiece(key_offset_, encoded_lengths >> 24); |
| } |
| |
| StringPiece Segment::Candidate::InnerSegmentIterator::GetValue() const { |
| DCHECK_LT(index_, candidate_->inner_segment_boundary.size()); |
| const uint32 encoded_lengths = candidate_->inner_segment_boundary[index_]; |
| return StringPiece(value_offset_, (encoded_lengths >> 16) & 0xff); |
| } |
| |
| StringPiece Segment::Candidate::InnerSegmentIterator::GetContentKey() const { |
| DCHECK_LT(index_, candidate_->inner_segment_boundary.size()); |
| const uint32 encoded_lengths = candidate_->inner_segment_boundary[index_]; |
| return StringPiece(key_offset_, (encoded_lengths >> 8) & 0xff); |
| } |
| |
| StringPiece Segment::Candidate::InnerSegmentIterator::GetContentValue() const { |
| DCHECK_LT(index_, candidate_->inner_segment_boundary.size()); |
| const uint32 encoded_lengths = candidate_->inner_segment_boundary[index_]; |
| return StringPiece(value_offset_, encoded_lengths & 0xff); |
| } |
| |
| Segment::Segment() |
| : segment_type_(FREE), |
| pool_(new ObjectPool<Candidate>(16)) {} |
| |
| Segment::~Segment() {} |
| |
| Segment::SegmentType Segment::segment_type() const { |
| return segment_type_; |
| } |
| |
| void Segment::set_segment_type( |
| const Segment::SegmentType &segment_type) { |
| segment_type_ = segment_type; |
| } |
| |
| const string& Segment::key() const { |
| return key_; |
| } |
| |
| void Segment::set_key(const string &key) { |
| key_ = key; |
| } |
| |
| const Segment::Candidate &Segment::candidate(int i) const { |
| if (i < 0) { |
| return meta_candidate(-i-1); |
| } |
| DCHECK(i < candidates_.size()); |
| return *candidates_[i]; |
| } |
| |
| Segment::Candidate *Segment::mutable_candidate(int i) { |
| if (i < 0) { |
| const size_t meta_index = -i-1; |
| DCHECK_LT(meta_index, meta_candidates_.size()); |
| return &meta_candidates_[meta_index]; |
| } |
| DCHECK_LT(i, candidates_.size()); |
| return candidates_[i]; |
| } |
| |
| int Segment::indexOf(const Segment::Candidate *candidate) { |
| if (candidate == NULL) { |
| return static_cast<int>(candidates_size()); |
| } |
| |
| for (int i = 0; i < static_cast<int>(candidates_.size()); ++i) { |
| if (candidates_[i] == candidate) { |
| return i; |
| } |
| } |
| |
| for (int i = 0; i < static_cast<int>(meta_candidates_.size()); ++i) { |
| if (&(meta_candidates_[i]) == candidate) { |
| return -i-1; |
| } |
| } |
| |
| return static_cast<int>(candidates_size()); |
| } |
| |
| size_t Segment::candidates_size() const { |
| return candidates_.size(); |
| } |
| |
| void Segment::clear_candidates() { |
| pool_->Free(); |
| candidates_.clear(); |
| } |
| |
| Segment::Candidate *Segment::push_back_candidate() { |
| Candidate *candidate = pool_->Alloc(); |
| candidate->Init(); |
| candidates_.push_back(candidate); |
| return candidate; |
| } |
| |
| Segment::Candidate *Segment::push_front_candidate() { |
| Candidate *candidate = pool_->Alloc(); |
| candidate->Init(); |
| candidates_.push_front(candidate); |
| return candidate; |
| } |
| |
| Segment::Candidate *Segment::add_candidate() { |
| return push_back_candidate(); |
| } |
| |
| Segment::Candidate *Segment::insert_candidate(int i) { |
| if (i < 0 || i > static_cast<int>(candidates_.size())) { |
| LOG(WARNING) << "invalid index"; |
| return NULL; |
| } |
| Candidate *candidate = pool_->Alloc(); |
| candidate->Init(); |
| candidates_.insert(candidates_.begin() + i, candidate); |
| return candidate; |
| } |
| |
| void Segment::pop_front_candidate() { |
| if (!candidates_.empty()) { |
| Candidate *c = candidates_.front(); |
| pool_->Release(c); |
| candidates_.pop_front(); |
| } |
| } |
| |
| void Segment::pop_back_candidate() { |
| if (!candidates_.empty()) { |
| Candidate *c = candidates_.back(); |
| pool_->Release(c); |
| candidates_.pop_back(); |
| } |
| } |
| |
| void Segment::erase_candidate(int i) { |
| if (i < 0 || i >= static_cast<int>(candidates_size())) { |
| LOG(WARNING) << "invalid index"; |
| return; |
| } |
| pool_->Release(mutable_candidate(i)); |
| candidates_.erase(candidates_.begin() + i); |
| } |
| |
| void Segment::erase_candidates(int i, size_t size) { |
| const size_t end = i + size; |
| if (i < 0 || |
| i >= static_cast<int>(candidates_size()) || |
| end > candidates_size()) { |
| LOG(WARNING) << "invalid index"; |
| return; |
| } |
| for (int j = i; j < static_cast<int>(end); ++j) { |
| pool_->Release(mutable_candidate(j)); |
| } |
| candidates_.erase(candidates_.begin() + i, |
| candidates_.begin() + end); |
| } |
| |
| size_t Segment::meta_candidates_size() const { |
| return meta_candidates_.size(); |
| } |
| |
| void Segment::clear_meta_candidates() { |
| meta_candidates_.clear(); |
| } |
| |
| const vector<Segment::Candidate> &Segment::meta_candidates() const { |
| return meta_candidates_; |
| } |
| |
| vector<Segment::Candidate> *Segment::mutable_meta_candidates() { |
| return &meta_candidates_; |
| } |
| |
| const Segment::Candidate &Segment::meta_candidate(size_t i) const { |
| if (i >= meta_candidates_.size()) { |
| LOG(ERROR) << "Invalid index number of meta_candidate: " << i; |
| i = 0; |
| } |
| return meta_candidates_[i]; |
| } |
| |
| Segment::Candidate *Segment::mutable_meta_candidate(size_t i) { |
| if (i >= meta_candidates_.size()) { |
| LOG(ERROR) << "Invalid index number of meta_candidate: " << i; |
| i = 0; |
| } |
| return &meta_candidates_[i]; |
| } |
| |
| Segment::Candidate *Segment::add_meta_candidate() { |
| Candidate candidate; |
| candidate.Init(); |
| meta_candidates_.push_back(candidate); |
| return &meta_candidates_[meta_candidates_size()-1]; |
| } |
| |
| void Segment::move_candidate(int old_idx, int new_idx) { |
| // meta candidates |
| if (old_idx < 0) { |
| const int meta_idx = -old_idx-1; |
| DCHECK_LT(meta_idx, meta_candidates_size()); |
| Candidate *c = insert_candidate(new_idx); |
| *c = meta_candidates_[meta_idx]; |
| return; |
| } |
| |
| // normal segment |
| if (old_idx < 0 || old_idx >= static_cast<int>(candidates_size()) || |
| new_idx >= static_cast<int>(candidates_size()) || old_idx == new_idx) { |
| VLOG(1) << "old_idx and new_idx are the same"; |
| return; |
| } |
| if (old_idx > new_idx) { // promotion |
| Candidate *c = candidates_[old_idx]; |
| for (int i = old_idx; i >= new_idx + 1; --i) { |
| candidates_[i] = candidates_[i - 1]; |
| } |
| candidates_[new_idx] = c; |
| } else { // demotion |
| Candidate *c = candidates_[old_idx]; |
| for (int i = old_idx; i < new_idx; ++i) { |
| candidates_[i] = candidates_[i + 1]; |
| } |
| candidates_[new_idx] = c; |
| } |
| } |
| |
| void Segment::Clear() { |
| clear_candidates(); |
| key_.clear(); |
| meta_candidates_.clear(); |
| segment_type_ = FREE; |
| } |
| |
| void Segment::CopyFrom(const Segment &src) { |
| Clear(); |
| |
| key_ = src.key(); |
| segment_type_ = src.segment_type(); |
| |
| for (size_t i = 0; i < src.candidates_size(); ++i) { |
| Candidate *candidate = add_candidate(); |
| candidate->CopyFrom(src.candidate(i)); |
| } |
| |
| for (size_t i = 0; i < src.meta_candidates_size(); ++i) { |
| Candidate *meta_candidate = add_meta_candidate(); |
| meta_candidate->CopyFrom(src.meta_candidate(i)); |
| } |
| } |
| |
| string Segment::DebugString() const { |
| stringstream os; |
| os << "[segtype=" << segment_type() |
| << " key=" << key() << endl; |
| const int size = |
| static_cast<int>(candidates_size() + meta_candidates_size()); |
| for (int l = 0; l < size; ++l) { |
| int j = 0; |
| if (l < meta_candidates_size()) { |
| j = -l - 1; |
| } else { |
| j = l - meta_candidates_size(); |
| } |
| os << " cand " << j << " " << candidate(j).DebugString(); |
| } |
| os << "]" << endl; |
| return os.str(); |
| } |
| |
| void Segments::RevertEntry::CopyFrom(const RevertEntry &src) { |
| revert_entry_type = src.revert_entry_type; |
| id = src.id; |
| timestamp = src.timestamp; |
| key = src.key; |
| } |
| |
| Segments::Segments() |
| : max_history_segments_size_(0), |
| max_prediction_candidates_size_(0), |
| max_conversion_candidates_size_(kMaxConversionCandidatesSize), |
| resized_(false), |
| user_history_enabled_(true), |
| request_type_(Segments::CONVERSION), |
| pool_(new ObjectPool<Segment>(32)), |
| cached_lattice_(new Lattice()) {} |
| |
| Segments::~Segments() {} |
| |
| Segments::RequestType Segments::request_type() const { |
| return request_type_; |
| } |
| |
| void Segments::set_request_type(Segments::RequestType request_type) { |
| request_type_ = request_type; |
| } |
| |
| void Segments::set_user_history_enabled(bool user_history_enabled) { |
| user_history_enabled_ = user_history_enabled; |
| } |
| |
| bool Segments::user_history_enabled() const { |
| return user_history_enabled_; |
| } |
| |
| const Segment &Segments::segment(size_t i) const { |
| return *segments_[i]; |
| } |
| |
| Segment *Segments::mutable_segment(size_t i) { |
| return segments_[i]; |
| } |
| |
| const Segment &Segments::history_segment(size_t i) const { |
| return *segments_[i]; |
| } |
| |
| Segment *Segments::mutable_history_segment(size_t i) { |
| return segments_[i]; |
| } |
| |
| const Segment &Segments::conversion_segment(size_t i) const { |
| return *segments_[i + history_segments_size()]; |
| } |
| |
| Segment *Segments::mutable_conversion_segment(size_t i) { |
| return segments_[i + history_segments_size()]; |
| } |
| |
| Segment *Segments::add_segment() { |
| return push_back_segment(); |
| } |
| |
| Segment *Segments::insert_segment(size_t i) { |
| Segment *segment = pool_->Alloc(); |
| segment->Clear(); |
| segments_.insert(segments_.begin() + i, segment); |
| return segment; |
| } |
| |
| Segment *Segments::push_back_segment() { |
| Segment *segment = pool_->Alloc(); |
| segment->Clear(); |
| segments_.push_back(segment); |
| return segment; |
| } |
| |
| Segment *Segments::push_front_segment() { |
| Segment *segment = pool_->Alloc(); |
| segment->Clear(); |
| segments_.push_front(segment); |
| return segment; |
| } |
| |
| size_t Segments::segments_size() const { |
| return segments_.size(); |
| } |
| |
| size_t Segments::history_segments_size() const { |
| size_t result = 0; |
| for (size_t i = 0; i < segments_size(); ++i) { |
| if (segment(i).segment_type() != Segment::HISTORY && |
| segment(i).segment_type() != Segment::SUBMITTED) { |
| break; |
| } |
| ++result; |
| } |
| return result; |
| } |
| |
| size_t Segments::conversion_segments_size() const { |
| return (segments_size() - history_segments_size()); |
| } |
| |
| void Segments::erase_segment(size_t i) { |
| if (i >= segments_size()) { |
| return; |
| } |
| pool_->Release(mutable_segment(i)); |
| segments_.erase(segments_.begin() + i); |
| } |
| |
| void Segments::erase_segments(size_t i, size_t size) { |
| const size_t end = i + size; |
| if (i >= segments_size() || end > segments_size()) { |
| return; |
| } |
| for (size_t j = i ; j < end; ++j) { |
| pool_->Release(mutable_segment(j)); |
| } |
| segments_.erase(segments_.begin() + i, |
| segments_.begin() + end); |
| } |
| |
| void Segments::pop_front_segment() { |
| if (!segments_.empty()) { |
| Segment *seg = segments_.front(); |
| pool_->Release(seg); |
| segments_.pop_front(); |
| } |
| } |
| |
| void Segments::pop_back_segment() { |
| if (!segments_.empty()) { |
| Segment *seg = segments_.back(); |
| pool_->Release(seg); |
| segments_.pop_back(); |
| } |
| } |
| |
| void Segments::Clear() { |
| clear_segments(); |
| clear_revert_entries(); |
| } |
| |
| void Segments::CopyFrom(const Segments &src) { |
| Clear(); |
| max_history_segments_size_ = src.max_history_segments_size(); |
| max_prediction_candidates_size_ = src.max_prediction_candidates_size(); |
| max_conversion_candidates_size_ = src.max_conversion_candidates_size(); |
| resized_ = src.resized(); |
| user_history_enabled_ = src.user_history_enabled(); |
| |
| request_type_ = src.request_type(); |
| |
| for (size_t i = 0; i < src.segments_size(); ++i) { |
| Segment *segment = add_segment(); |
| segment->CopyFrom(src.segment(i)); |
| } |
| |
| for (size_t i = 0; i < src.revert_entries_size(); ++i) { |
| RevertEntry *revert_entry = push_back_revert_entry(); |
| revert_entry->CopyFrom(src.revert_entry(i)); |
| } |
| } |
| |
| void Segments::clear_segments() { |
| pool_->Free(); |
| resized_ = false; |
| segments_.clear(); |
| } |
| |
| void Segments::clear_history_segments() { |
| while (!segments_.empty()) { |
| Segment *seg = segments_.front(); |
| if (seg->segment_type() != Segment::HISTORY && |
| seg->segment_type() != Segment::SUBMITTED) { |
| break; |
| } |
| pop_front_segment(); |
| } |
| } |
| |
| void Segments::clear_conversion_segments() { |
| const size_t size = history_segments_size(); |
| for (size_t i = size; i < segments_size(); ++i) { |
| pool_->Release(mutable_segment(i)); |
| } |
| resized_ = false; |
| segments_.resize(size); |
| } |
| |
| size_t Segments::max_history_segments_size() const { |
| return max_history_segments_size_; |
| } |
| |
| void Segments::set_max_history_segments_size(size_t max_history_segments_size) { |
| max_history_segments_size_ = |
| max(static_cast<size_t>(0), |
| min(max_history_segments_size, kMaxHistorySize)); |
| } |
| |
| void Segments::set_resized(bool resized) { |
| resized_ = resized; |
| } |
| |
| bool Segments::resized() const { |
| return resized_; |
| } |
| |
| size_t Segments::max_prediction_candidates_size() const { |
| return max_prediction_candidates_size_; |
| } |
| |
| void Segments::set_max_prediction_candidates_size(size_t size) { |
| max_prediction_candidates_size_ = size; |
| } |
| |
| size_t Segments::max_conversion_candidates_size() const { |
| return max_conversion_candidates_size_; |
| } |
| |
| void Segments::set_max_conversion_candidates_size(size_t size) { |
| max_conversion_candidates_size_ = size; |
| } |
| |
| void Segments::clear_revert_entries() { |
| revert_entries_.clear(); |
| } |
| |
| size_t Segments::revert_entries_size() const { |
| return revert_entries_.size(); |
| } |
| |
| Segments::RevertEntry *Segments::push_back_revert_entry() { |
| revert_entries_.resize(revert_entries_.size() + 1); |
| Segments::RevertEntry *entry = &revert_entries_.back(); |
| entry->revert_entry_type = Segments::RevertEntry::CREATE_ENTRY; |
| entry->id = 0; |
| entry->timestamp = 0; |
| entry->key.clear(); |
| return entry; |
| } |
| |
| const Segments::RevertEntry &Segments::revert_entry(size_t i) const { |
| return revert_entries_[i]; |
| } |
| |
| Segments::RevertEntry *Segments::mutable_revert_entry(size_t i) { |
| return &revert_entries_[i]; |
| } |
| |
| Lattice *Segments::mutable_cached_lattice() { |
| return cached_lattice_.get(); |
| } |
| |
| string Segments::DebugString() const { |
| stringstream os; |
| os << "{" << endl; |
| for (size_t i = 0; i < segments_size(); ++i) { |
| os << " seg " << i << " " << segment(i).DebugString(); |
| } |
| os << "}" << endl; |
| return os.str(); |
| } |
| |
| } // namespace mozc |