| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifndef MOZC_CONVERTER_SEGMENTS_H_ |
| #define MOZC_CONVERTER_SEGMENTS_H_ |
| |
| #include <deque> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/number_util.h" |
| #include "base/port.h" |
| #include "base/scoped_ptr.h" |
| #include "base/string_piece.h" |
| #include "converter/lattice.h" |
| |
| namespace mozc { |
| |
| class Lattice; |
| struct Node; |
| template <class T> class ObjectPool; |
| |
| namespace composer { |
| class Composer; |
| } // namespace composer |
| |
| class Segment { |
| public: |
| enum SegmentType { |
| FREE, // FULL automatic conversion. |
| FIXED_BOUNDARY, // cannot consist of multiple segments. |
| FIXED_VALUE, // cannot consist of multiple segments. |
| // and result is also fixed |
| SUBMITTED, // submitted node |
| HISTORY // history node. It is hidden from user. |
| }; |
| |
| struct Candidate { |
| enum Attribute { |
| DEFAULT_ATTRIBUTE = 0, |
| // this was the best candidate before learning |
| BEST_CANDIDATE = 1 << 0, |
| // this candidate was reranked by user |
| RERANKED = 1 << 1, |
| // don't save it in history |
| NO_HISTORY_LEARNING = 1 << 2, |
| // don't save it in suggestion |
| NO_SUGGEST_LEARNING = 1 << 3, |
| // NO_HISTORY_LEARNING | NO_SUGGEST_LEARNING |
| NO_LEARNING = (1 << 2 | 1 << 3), |
| // learn it with left/right context |
| CONTEXT_SENSITIVE = 1 << 4, |
| // has "did you mean" |
| SPELLING_CORRECTION = 1 << 5, |
| // No need to have full/half width expansion |
| NO_VARIANTS_EXPANSION = 1 << 6, |
| // No need to have extra descriptions |
| NO_EXTRA_DESCRIPTION = 1 << 7, |
| // was generated by real-time conversion |
| REALTIME_CONVERSION = 1 << 8, |
| // contains tokens in user dictionary. |
| USER_DICTIONARY = 1 << 9, |
| // command candidate. e.g., incognito mode. |
| COMMAND_CANDIDATE = 1 << 10, |
| // key characters are consumed partially. |
| // Consumed size is |consumed_key_size|. |
| // If not set, all the key characters are consumed. |
| PARTIALLY_KEY_CONSUMED = 1 << 11, |
| // Typing correction candidate. |
| // - Special description should be shown when the candidate is created |
| // by a dictionary predictor. |
| // - No description should be shown when the candidate is loaded from |
| // history. |
| // - Otherwise following unexpected behavior can be observed. |
| // 1. Type "やんしょん" and submit "マンション" (annotated with "補正"). |
| // 2. Type "まんしょん". |
| // 3. "マンション" (annotated with "補正") is shown as a candidate |
| // regardless of a user's correct typing. |
| TYPING_CORRECTION = 1 << 12, |
| // Auto partial suggestion candidate. |
| // - Special description should be shown when the candidate is created |
| // by a dictionary predictor. |
| // - No description should be shown when the candidate is loaded from |
| // history. |
| AUTO_PARTIAL_SUGGESTION = 1 << 13, |
| // Predicted from user prediction history. |
| USER_HISTORY_PREDICTION = 1 << 14, |
| }; |
| |
| enum Command { |
| DEFAULT_COMMAND = 0, |
| ENABLE_INCOGNITO_MODE, // enables "incognito mode". |
| DISABLE_INCOGNITO_MODE, // disables "incognito mode". |
| ENABLE_PRESENTATION_MODE, // enables "presentation mode". |
| DISABLE_PRESENTATION_MODE, // disables "presentation mode". |
| }; |
| |
| string key; // reading |
| string value; // surface form |
| string content_key; |
| string content_value; |
| |
| size_t consumed_key_size; |
| |
| // Meta information |
| string prefix; |
| string suffix; |
| // Description including description type and message |
| string description; |
| |
| // Usage ID |
| int32 usage_id; |
| // Title of the usage containing basic form of this candidate. |
| string usage_title; |
| // Content of the usage. |
| string usage_description; |
| |
| // Context "sensitive" candidate cost. |
| // Taking adjacent words/nodes into consideration. |
| // Basically, canidate is sorted by this cost. |
| int32 cost; |
| // Context "free" candidate cost |
| // NOT taking adjacent words/nodes into consideration. |
| int32 wcost; |
| // (cost without transition cost between left/right boundaries) |
| // Cost of only transitions (cost without word cost adjacent context) |
| int32 structure_cost; |
| |
| // lid of left-most node |
| uint16 lid; |
| // rid of right-most node |
| uint16 rid; |
| |
| // Attributes of this candidate. Can set multiple attributes |
| // defined in enum |Attribute|. |
| uint32 attributes; |
| |
| // Candidate style. This is not a bit-field. |
| // The style is defined in enum |Style|. |
| NumberUtil::NumberString::Style style; |
| |
| // Command of this candidate. This is not a bit-field. |
| // The style is defined in enum |Command|. |
| Command command; |
| |
| // Boundary information for realtime conversion. This will be set only for |
| // realtime conversion result candidates. Each element is the encoded |
| // lengths of key, value, content key and content value. |
| vector<uint32> inner_segment_boundary; |
| |
| static bool EncodeLengths(size_t key_len, size_t value_len, |
| size_t content_key_len, |
| size_t content_value_len, |
| uint32 *result); |
| |
| // This function ignores error, so be careful when using this. |
| static uint32 EncodeLengths(size_t key_len, size_t value_len, |
| size_t content_key_len, |
| size_t content_value_len) { |
| uint32 result; |
| EncodeLengths(key_len, value_len, content_key_len, content_value_len, |
| &result); |
| return result; |
| } |
| |
| // Inserts a new element to |inner_segment_boundary|. If one of four |
| // lengths is longer than 255, this method returns false. |
| bool PushBackInnerSegmentBoundary(size_t key_len, size_t value_len, |
| size_t content_key_len, |
| size_t content_value_len); |
| |
| // Iterates inner segments. Usage example: |
| // for (InnerSegmentIterator iter(&cand); !iter.Done(); iter.Next()) { |
| // StringPiece s = iter.GetContentKey(); |
| // ... |
| // } |
| class InnerSegmentIterator { |
| public: |
| explicit InnerSegmentIterator(const Candidate *candidate) |
| : candidate_(candidate), key_offset_(candidate->key.data()), |
| value_offset_(candidate->value.data()), |
| index_(0) {} |
| |
| bool Done() const { |
| return index_ == candidate_->inner_segment_boundary.size(); |
| } |
| |
| void Next(); |
| StringPiece GetKey() const; |
| StringPiece GetValue() const; |
| StringPiece GetContentKey() const; |
| StringPiece GetContentValue() const; |
| |
| private: |
| const Candidate *candidate_; |
| const char *key_offset_; |
| const char *value_offset_; |
| size_t index_; |
| }; |
| |
| void Init() { |
| key.clear(); |
| value.clear(); |
| content_value.clear(); |
| content_key.clear(); |
| consumed_key_size = 0; |
| prefix.clear(); |
| suffix.clear(); |
| description.clear(); |
| usage_title.clear(); |
| usage_description.clear(); |
| cost = 0; |
| structure_cost = 0; |
| wcost = 0; |
| lid = 0; |
| rid = 0; |
| usage_id = 0; |
| attributes = 0; |
| style = NumberUtil::NumberString::DEFAULT_STYLE; |
| command = DEFAULT_COMMAND; |
| inner_segment_boundary.clear(); |
| } |
| |
| Candidate() : cost(0), wcost(0), structure_cost(0), |
| lid(0), rid(0), attributes(0), |
| style(NumberUtil::NumberString::DEFAULT_STYLE), |
| command(DEFAULT_COMMAND) {} |
| |
| // Returns functional key. |
| // functional_key = |
| // key.substr(content_key.size(), key.size() - content_key.size()); |
| StringPiece functional_key() const; |
| |
| // Returns functional value. |
| // functional_value = |
| // value.substr(content_value.size(), value.size() - content_value.size()); |
| StringPiece functional_value() const; |
| |
| void CopyFrom(const Candidate &src); |
| bool IsValid() const; |
| string DebugString() const; |
| }; |
| |
| Segment(); |
| ~Segment(); |
| |
| SegmentType segment_type() const; |
| void set_segment_type(const SegmentType &segment_type); |
| |
| const string& key() const; |
| void set_key(const string &key); |
| |
| // Candidate manupluations |
| // getter |
| const Candidate &candidate(int i) const; |
| |
| // setter |
| Candidate *mutable_candidate(int i); |
| |
| // return the index of candidate |
| // if candidate is not found, return candidates_size() |
| int indexOf(const Candidate *candidate); |
| |
| // push and insert candidates |
| Candidate *push_front_candidate(); |
| Candidate *push_back_candidate(); |
| Candidate *add_candidate(); // alias of push_back_candidate() |
| Candidate *insert_candidate(int i); |
| |
| // get size of candidates |
| size_t candidates_size() const; |
| |
| // erase candidate |
| void pop_front_candidate(); |
| void pop_back_candidate(); |
| void erase_candidate(int i); |
| void erase_candidates(int i, size_t size); |
| |
| // erase all candidates |
| // do not erase meta candidates |
| void clear_candidates(); |
| |
| // meta candidates |
| // TODO(toshiyuki): Integrate meta candidates to candidate and delete these |
| size_t meta_candidates_size() const; |
| void clear_meta_candidates(); |
| const vector<Candidate> &meta_candidates() const; |
| vector<Candidate> *mutable_meta_candidates(); |
| const Candidate &meta_candidate(size_t i) const; |
| Candidate *mutable_meta_candidate(size_t i); |
| Candidate *add_meta_candidate(); |
| |
| // move old_idx-th-candidate to new_index |
| void move_candidate(int old_idx, int new_idx); |
| |
| void Clear(); |
| void CopyFrom(const Segment &src); |
| |
| // Keep clear() method as other modules are still using the old method |
| void clear() { Clear(); } |
| |
| string DebugString() const; |
| |
| private: |
| SegmentType segment_type_; |
| // Note that |key_| is shorter than usual when partial suggestion is |
| // performed. |
| // For example if the preedit text is "しれ|ません", there is only a segment |
| // whose |key_| is "しれ". |
| // There is no way to detect by using only a segment whether this segment is |
| // for partial suggestion or not. |
| // You should detect that by using both Composer and Segments. |
| string key_; |
| deque<Candidate *> candidates_; |
| vector<Candidate> meta_candidates_; |
| scoped_ptr<ObjectPool<Candidate> > pool_; |
| DISALLOW_COPY_AND_ASSIGN(Segment); |
| }; |
| |
| // Segments is basically an array of Segment. |
| // Note that there are two types of Segment |
| // a) History Segment (SegmentType == HISTORY OR SUBMITTED) |
| // Segments user entered just before the transacton |
| // b) Conversion Segment |
| // Current segments user inputs |
| // |
| // Array of segment is represented as an array as follows |
| // segments_array[] = {HS_0,HS_1,...HS_N, CS0, CS1, CS2...} |
| // |
| // * segment(i) and mutable_segment(int i) |
| // access segment regardless of History/Conversion distinctions |
| // |
| // * history_segment(i) and mutable_history_segment(i) |
| // access only History Segment |
| // |
| // conversion_segment(i) and mutable_conversion_segment(i) |
| // access only Conversion Segment |
| // segment(i + history_segments_size()) == conversion_segment(i) |
| class Segments { |
| public: |
| enum RequestType { |
| CONVERSION, // normal conversion |
| REVERSE_CONVERSION, // reverse conversion |
| PREDICTION, // show prediction with user tab key |
| SUGGESTION, // show prediction automatically |
| PARTIAL_PREDICTION, // show prediction using the text before cursor |
| PARTIAL_SUGGESTION, // show suggestion using the text before cursor |
| }; |
| |
| // Client of segments can remember any string which can be used |
| // to revert the last Finish operation. |
| // "id" can be used for identifying the purpose of the key; |
| struct RevertEntry { |
| enum RevertEntryType { |
| CREATE_ENTRY, |
| UPDATE_ENTRY, |
| }; |
| uint16 revert_entry_type; |
| // UserHitoryPredictor uses '1' for now. |
| // Do not use duplicate keys. |
| uint16 id; |
| uint32 timestamp; |
| string key; |
| RevertEntry() : revert_entry_type(0), id(0), timestamp(0) {} |
| |
| void CopyFrom(const RevertEntry &src); |
| }; |
| |
| RequestType request_type() const; |
| void set_request_type(RequestType request_type); |
| |
| // enable/disable user history |
| void set_user_history_enabled(bool user_history_enabled); |
| bool user_history_enabled() const; |
| |
| // getter |
| const Segment &segment(size_t i) const; |
| const Segment &conversion_segment(size_t i) const; |
| const Segment &history_segment(size_t i) const; |
| |
| // setter |
| Segment *mutable_segment(size_t i); |
| Segment *mutable_conversion_segment(size_t i); |
| Segment *mutable_history_segment(size_t i); |
| |
| // push and insert segments |
| Segment *push_front_segment(); |
| Segment *push_back_segment(); |
| Segment *add_segment(); // alias of push_back_segment() |
| Segment *insert_segment(size_t i); |
| |
| // get size of segments |
| size_t segments_size() const; |
| size_t history_segments_size() const; |
| size_t conversion_segments_size() const; |
| |
| // erase segment |
| void pop_front_segment(); |
| void pop_back_segment(); |
| void erase_segment(size_t i); |
| void erase_segments(size_t i, size_t size); |
| |
| // erase all segments |
| void clear_history_segments(); |
| void clear_conversion_segments(); |
| void clear_segments(); |
| |
| void set_max_history_segments_size(size_t max_history_segments_size); |
| size_t max_history_segments_size() const; |
| |
| // Let predictor know the maximum size of |
| // candidates prediction/suggestion can generate. |
| void set_max_prediction_candidates_size(size_t size); |
| size_t max_prediction_candidates_size() const; |
| |
| // Let converter know the maximum size of |
| // candidates converter can generate. |
| // NOTE: This field is used as an "optional" field. |
| // Rewriter might insert more than |size| candiates. |
| // Default setting is 200. |
| void set_max_conversion_candidates_size(size_t size); |
| size_t max_conversion_candidates_size() const; |
| |
| bool resized() const; |
| void set_resized(bool resized); |
| |
| // clear segments |
| void Clear(); |
| |
| // Copy segments from src |
| void CopyFrom(const Segments &src); |
| |
| // Dump Segments structure |
| string DebugString() const; |
| |
| // Revert entries |
| void clear_revert_entries(); |
| size_t revert_entries_size() const; |
| RevertEntry *push_back_revert_entry(); |
| const RevertEntry &revert_entry(size_t i) const; |
| RevertEntry *mutable_revert_entry(size_t i); |
| |
| // setter |
| Lattice *mutable_cached_lattice(); |
| |
| Segments(); |
| virtual ~Segments(); |
| |
| private: |
| size_t max_history_segments_size_; |
| size_t max_prediction_candidates_size_; |
| size_t max_conversion_candidates_size_; |
| bool resized_; |
| bool user_history_enabled_; |
| |
| RequestType request_type_; |
| scoped_ptr<ObjectPool<Segment> > pool_; |
| deque<Segment *> segments_; |
| vector<RevertEntry> revert_entries_; |
| scoped_ptr<Lattice> cached_lattice_; |
| |
| DISALLOW_COPY_AND_ASSIGN(Segments); |
| }; |
| |
| } // namespace mozc |
| |
| #endif // MOZC_CONVERTER_SEGMENTS_H_ |