| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifndef MOZC_STORAGE_LOUDS_LOUDS_TRIE_H_ |
| #define MOZC_STORAGE_LOUDS_LOUDS_TRIE_H_ |
| |
| #include "base/port.h" |
| #include "base/string_piece.h" |
| #include "storage/louds/key_expansion_table.h" |
| #include "storage/louds/louds.h" |
| #include "storage/louds/simple_succinct_bit_vector_index.h" |
| |
| namespace mozc { |
| namespace storage { |
| namespace louds { |
| |
| class LoudsTrie { |
| public: |
| // The max depth of the trie. |
| static const size_t kMaxDepth = 256; |
| |
| // This class stores a traversal state. |
| using Node = Louds::Node; |
| |
| // Interface which is called back when the result is found. |
| class Callback { |
| public: |
| enum ResultType { |
| // Finishes the current (prefix or predictive) search. |
| SEARCH_DONE, |
| |
| // Continues the current (prefix or predictive) search. |
| SEARCH_CONTINUE, |
| |
| // Finished the (prefix or predictive) search of the current edge, |
| // but still continues to search for other edges. |
| SEARCH_CULL, |
| }; |
| |
| virtual ~Callback() { |
| } |
| |
| // The callback will be invoked when a target word is found. |
| // "s" may not be null-terminated. |
| virtual ResultType Run(const char *s, size_t len, int key_id) = 0; |
| |
| protected: |
| Callback() {} |
| }; |
| |
| LoudsTrie() : edge_character_(nullptr) {} |
| ~LoudsTrie() {} |
| |
| // Opens the binary image, and constructs the data structure. |
| // This method doesn't own the "data", so it is caller's reponsibility |
| // to keep the data alive until Close is invoked. |
| // See .cc file for the detailed format of the binary image. |
| bool Open(const uint8 *data); |
| |
| // Destructs the internal data structure explicitly (the destructor will do |
| // clean up too). |
| void Close(); |
| |
| // Generic APIs for tree traversal, some of which are delegated from Louds |
| // class; see louds.h. |
| |
| // Returns true if |node| is in a valid state (returns true both for terminal |
| // and non-terminal nodes). |
| bool IsValidNode(const Node &node) const { return louds_.IsValidNode(node); } |
| |
| // Returns true if |node| is a terminal node. |
| bool IsTerminalNode(const Node &node) const { |
| return terminal_bit_vector_.Get(node.node_id() - 1); |
| } |
| |
| // Returns the label of the edge from |node|'s parent (predecessor) to |node|. |
| char GetEdgeLabelToParentNode(const Node &node) const { |
| return edge_character_[node.node_id() - 1]; |
| } |
| |
| // Computes the ID of key that reaches to |node|. |
| // REQUIRES: |node| is a terminal node. |
| int GetKeyIdOfTerminalNode(const Node &node) const { |
| return terminal_bit_vector_.Rank1(node.node_id() - 1); |
| } |
| |
| // Initializes a node corresponding to |key_id|. |
| // REQUIRES: |key_id| is a valid ID. |
| void GetTerminalNodeFromKeyId(int key_id, Node *node) const { |
| const int node_id = terminal_bit_vector_.Select1(key_id + 1) + 1; |
| louds_.InitNodeFromNodeId(node_id, node); |
| } |
| Node GetTerminalNodeFromKeyId(int key_id) const { |
| Node node; |
| GetTerminalNodeFromKeyId(key_id, &node); |
| return node; |
| } |
| |
| // Restores the key string corresponding to |key_id|. The caller is |
| // responsible for allocating a buffer for the result StringPiece, which needs |
| // to be passed in |buf|. The returned StringPiece points to a piece of |
| // |buf|. |
| // REQUIRES: |buf| is longer than kMaxDepth + 1. |
| StringPiece RestoreKeyString(int key_id, char *buf) const; |
| |
| // Methods for moving node exported from Louds class; see louds.h. |
| void MoveToFirstChild(Node *node) const { |
| louds_.MoveToFirstChild(node); |
| } |
| Node MoveToFirstChild(Node node) const { |
| MoveToFirstChild(&node); |
| return node; |
| } |
| static void MoveToNextSibling(Node *node) { |
| Louds::MoveToNextSibling(node); |
| } |
| static Node MoveToNextSibling(Node node) { |
| MoveToNextSibling(&node); |
| return node; |
| } |
| |
| // Traverses a trie for |key|, starting from |node|, and modifies |node| to |
| // the destination terminal node. Here, |node| is not necessarily the root. |
| // Returns false if there's no node reachable by |key|. |
| bool Traverse(StringPiece key, Node *node) const; |
| |
| // Higher level APIs. |
| |
| // Returns true if |key| is in this trie. |
| bool HasKey(StringPiece key) const { |
| Node node; // Root |
| return Traverse(key, &node) && IsTerminalNode(node); |
| } |
| |
| // Searches this trie for the key that exactly matches the given key. Returns |
| // -1 if such key doesn't exist. |
| // NOTE: When you only need to check if |key| is in this trie, use HasKey() |
| // method, which is more efficient. |
| int ExactSearch(StringPiece key) const; |
| |
| // TODO(noriyukit): The following search methods rely on Callback. However, |
| // this results in the nested callback to implement SystemDictionary's lookup |
| // methods (i.e., inside implementations of DictionaryInterface::Callback, |
| // LoudsTrie::Callback needs to be called; see system_dictionary.cc. This is |
| // somewhat inefficient because both requires virtual method calls. |
| // Therefore, it'd be better to implement the following search methods |
| // directly in system_dictionary.cc using more generic APIs defined above. |
| |
| // Searches the trie structure, and invokes callback->Run when for each word |
| // which is a prefix of the key is found. |
| void PrefixSearch(StringPiece key, Callback *callback) const { |
| PrefixSearchWithKeyExpansion( |
| key, KeyExpansionTable::GetDefaultInstance(), callback); |
| } |
| |
| void PrefixSearchWithKeyExpansion( |
| StringPiece key, const KeyExpansionTable &key_expansion_table, |
| Callback *callback) const; |
| |
| // Searches the trie structure, and invokes callback->Run when for each word |
| // which begins with key is found. |
| void PredictiveSearch(StringPiece key, Callback *callback) const { |
| PredictiveSearchWithKeyExpansion( |
| key, KeyExpansionTable::GetDefaultInstance(), callback); |
| } |
| |
| void PredictiveSearchWithKeyExpansion( |
| StringPiece key, const KeyExpansionTable &key_expansion_table, |
| Callback *callback) const; |
| |
| private: |
| Louds louds_; // Tree structure representation by LOUDS. |
| |
| // Bit-vector to represent whether each node in LOUDS tree is terminal. |
| // This bit vector doesn't include "super root" in the LOUDS. |
| // In other words, id=1 in louds_ corresponds to id=0 in terminal_bit_vector_, |
| // id=10 in louds_ corresponds to id=9 in terminal_bit_vector_, and so on. |
| // TODO(noriyukit): Simplify the id-mapping by introducing a bit for the |
| // super root in this bit vector. |
| SimpleSuccinctBitVectorIndex terminal_bit_vector_; |
| |
| // A sequence of characters, annotated to each edge. |
| // This array also doesn't have an entry for super root. |
| // In other words, id=2 in louds_ corresponds to edge_character_[1]. |
| const char *edge_character_; |
| |
| DISALLOW_COPY_AND_ASSIGN(LoudsTrie); |
| }; |
| |
| } // namespace louds |
| } // namespace storage |
| } // namespace mozc |
| |
| #endif // MOZC_STORAGE_LOUDS_LOUDS_TRIE_H_ |