src/prediction/dictionary_predictor.h - mozc - Git at Google

 // Copyright 2010-2014, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifndef MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_
 #define MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_

 #include <functional>
 #include <string>
 #include <vector>

 #include "base/util.h"
 #include "dictionary/dictionary_token.h"
 #include "prediction/predictor_interface.h"
 // for FRIEND_TEST()
 #include "testing/base/public/gunit_prod.h"

 namespace mozc {

 class ConnectorInterface;
 class ConversionRequest;
 class ConverterInterface;
 class DictionaryInterface;
 class ImmutableConverterInterface;
 class POSMatcher;
 class SegmenterInterface;
 class Segments;
 class SuggestionFilter;

 // Dictionary-based predictor
 class DictionaryPredictor : public PredictorInterface {
  public:
   // Initializes a predictor with given references to submodules. Note that
   // pointers are not owned by the class and to be deleted by the caller.
   DictionaryPredictor(const ConverterInterface *converter,
                       const ImmutableConverterInterface *immutable_converter,
                       const DictionaryInterface *dictionary,
                       const DictionaryInterface *suffix_dictionary,
                       const ConnectorInterface *connector,
                       const SegmenterInterface *segmenter,
                       const POSMatcher *pos_matcher,
                       const SuggestionFilter *suggestion_filter);
   virtual ~DictionaryPredictor();

   virtual bool PredictForRequest(const ConversionRequest &request,
                                  Segments *segments) const;

   virtual const string &GetPredictorName() const { return predictor_name_; }

  protected:
   // Protected members for unittesting
   // For use util method accessing private members, made them protected.
   // http://code.google.com/p/googletest/wiki/FAQ
   enum PredictionType {
     // don't need to show any suggestions.
     NO_PREDICTION = 0,
     // suggests from current key user is now typing
     UNIGRAM = 1,
     // suggests from the previous history key user typed before.
     BIGRAM = 2,
     // suggests from immutable_converter
     REALTIME = 4,
     // add suffixes like "さん", "が" which matches to the pevious context.
     SUFFIX =  8,
     // add English words.
     ENGLISH = 16,
     // add prediciton to type corrected keys
     TYPING_CORRECTION = 32,

     // Suggests from |converter_|. The difference from REALTIME is that it uses
     // the full converter with rewriter, history, etc.
     // TODO(noriyukit): This label should be integrated with REALTIME. This is
     // why 65536 is used to indicate that it is a temporary assignment.
     REALTIME_TOP = 65536,
   };
   // Bitfield to store a set of PredictionType.
   typedef int32 PredictionTypes;

   struct Result {
     Result() : types(NO_PREDICTION), wcost(0), cost(0), lid(0), rid(0),
                candidate_attributes(0), consumed_key_size(0) {}

     void InitializeByTokenAndTypes(const Token &token, PredictionTypes types);
     void SetTypesAndTokenAttributes(PredictionTypes prediction_types,
                                     Token::AttributesBitfield token_attr);

     string key;
     string value;
     // Indicating which PredictionType creates this instance.
     // UNIGRAM, BIGRAM, REALTIME, SUFFIX, ENGLISH or TYPING_CORRECTION
     // is set exclusively.
     // TODO(matsuzakit): Using PredictionTypes both as input and output
     //                   makes the code complex. Let's split them.
     PredictionTypes types;
     // Context "insensitive" candidate cost.
     int wcost;
     // Context "sensitive" candidate cost.
     int cost;
     int lid;
     int rid;
     // Boundary information for realtime conversion.
     // This will be set only for realtime conversion result candidates.
     // This contains inner segment size for key and value.
     // If the candidate key and value are
     // "わたしの|なまえは|なかのです", " 私の|名前は|中野です",
     // |inner_segment_boundary| have [(4,2), (4, 3), (5, 4)].
     vector<uint32> inner_segment_boundary;
     uint32 candidate_attributes;
     size_t consumed_key_size;
   };

   // On MSVS2008/2010, Constructors of TestableDictionaryPredictor::Result
   // causes a compile error even if you change the access right of it to public.
   // You can use TestableDictionaryPredictor::MakeEmptyResult() instead.
   static Result MakeEmptyResult() {
     return Result();
   }

   class PredictiveLookupCallback;
   class PredictiveBigramLookupCallback;
   class ResultWCostLess;
   class ResultCostLess;

   void AggregateRealtimeConversion(PredictionTypes types,
                                    const ConversionRequest &request,
                                    Segments *segments,
                                    vector<Result> *results) const;

   void AggregateUnigramPrediction(PredictionTypes types,
                                   const ConversionRequest &request,
                                   const Segments &segments,
                                   vector<Result> *results) const;

   void AggregateBigramPrediction(PredictionTypes types,
                                  const ConversionRequest &request,
                                  const Segments &segments,
                                  vector<Result> *results) const;

   void AggregateSuffixPrediction(PredictionTypes types,
                                  const ConversionRequest &request,
                                  const Segments &segments,
                                  vector<Result> *results) const;

   void AggregateEnglishPrediction(PredictionTypes types,
                                   const ConversionRequest &request,
                                   const Segments &segments,
                                   vector<Result> *results) const;

   void AggregateTypeCorrectingPrediction(PredictionTypes types,
                                          const ConversionRequest &request,
                                          const Segments &segments,
                                          vector<Result> *results) const;

   void ApplyPenaltyForKeyExpansion(const Segments &segments,
                                    vector<Result> *results) const;

   bool AddPredictionToCandidates(const ConversionRequest &request,
                                  Segments *segments,
                                  vector<Result> *results) const;

  private:
   FRIEND_TEST(DictionaryPredictorTest, GetPredictionTypes);
   FRIEND_TEST(DictionaryPredictorTest,
               GetPredictionTypesTestWithTypingCorrection);
   FRIEND_TEST(DictionaryPredictorTest,
               GetPredictionTypesTestWithZeroQuerySuggestion);
   FRIEND_TEST(DictionaryPredictorTest, IsZipCodeRequest);
   FRIEND_TEST(DictionaryPredictorTest, GetRealtimeCandidateMaxSize);
   FRIEND_TEST(DictionaryPredictorTest, GetRealtimeCandidateMaxSizeForMixed);
   FRIEND_TEST(DictionaryPredictorTest,
               GetRealtimeCandidateMaxSizeWithActualConverter);
   FRIEND_TEST(DictionaryPredictorTest, GetCandidateCutoffThreshold);
   FRIEND_TEST(DictionaryPredictorTest, AggregateUnigramPrediction);
   FRIEND_TEST(DictionaryPredictorTest, AggregateBigramPrediction);
   FRIEND_TEST(DictionaryPredictorTest, AggregateSuffixPrediction);
   FRIEND_TEST(DictionaryPredictorTest, ZeroQuerySuggestionAfterNumbers);
   FRIEND_TEST(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion);
   FRIEND_TEST(DictionaryPredictorTest, GetHistoryKeyAndValue);
   FRIEND_TEST(DictionaryPredictorTest, RealtimeConversionStartingWithAlphabets);
   FRIEND_TEST(DictionaryPredictorTest, IsAggressiveSuggestion);
   FRIEND_TEST(DictionaryPredictorTest,
               RealtimeConversionWithSpellingCorrection);
   FRIEND_TEST(DictionaryPredictorTest, GetMissSpelledPosition);
   FRIEND_TEST(DictionaryPredictorTest, RemoveMissSpelledCandidates);
   FRIEND_TEST(DictionaryPredictorTest, ConformCharacterWidthToPreference);
   FRIEND_TEST(DictionaryPredictorTest, SetLMCost);
   FRIEND_TEST(DictionaryPredictorTest, SetDescription);
   FRIEND_TEST(DictionaryPredictorTest, SetDebugDescription);

   // Returns false if no results were aggregated.
   bool AggregatePrediction(const ConversionRequest &request,
                            Segments *segments,
                            vector<Result> *results) const;

   void SetCost(const ConversionRequest &request,
                const Segments &segments, vector<Result> *results) const;

   // Removes prediciton by setting NO_PREDICTION to result type if necessary.
   void RemovePrediction(const ConversionRequest &request,
                         const Segments &segments,
                         vector<Result> *results) const;

   // Adds prediction results from history key and value.
   void AddBigramResultsFromHistory(const string &history_key,
                                    const string &history_value,
                                    const ConversionRequest &request,
                                    const Segments &segments,
                                    vector<Result> *results) const;

   // Changes the prediction type for irrelevant bigram candidate.
   void CheckBigramResult(const Token &history_token,
                          const Util::ScriptType history_ctype,
                          const Util::ScriptType last_history_ctype,
                          Result *result) const;

   void GetPredictiveResults(const DictionaryInterface &dictionary,
                             const string &history_key,
                             const ConversionRequest &request,
                             const Segments &segments,
                             PredictionTypes types,
                             size_t lookup_limit,
                             vector<Result> *results) const;

   void GetPredictiveResultsForBigram(const DictionaryInterface &dictionary,
                                      const string &history_key,
                                      const string &history_value,
                                      const ConversionRequest &request,
                                      const Segments &segments,
                                      PredictionTypes types,
                                      size_t lookup_limit,
                                      vector<Result> *results) const;

   // Performs a custom look up for English words where case-conversion might be
   // applied to lookup key and/or output results.
   void GetPredictiveResultsForEnglish(const DictionaryInterface &dictionary,
                                       const string &history_key,
                                       const ConversionRequest &request,
                                       const Segments &segments,
                                       PredictionTypes types,
                                       size_t lookup_limit,
                                       vector<Result> *results) const;

   // Performs look-ups using type-corrected queries from composer. Usually
   // involves multiple look-ups from dictionary.
   void GetPredictiveResultsUsingTypingCorrection(
       const DictionaryInterface &dictionary,
       const string &history_key,
       const ConversionRequest &request,
       const Segments &segments,
       PredictionTypes types,
       size_t lookup_limit,
       vector<Result> *results) const;

   // Returns the position of misspelled character position.
   //
   // Example1:
   // key: "れみおめろん"
   // value: "レミオロメン"
   // returns 3
   //
   // Example3:
   // key: "ろっぽんぎ"5
   // value: "六本木"
   // returns 5 (charslen("六本木"))
   size_t GetMissSpelledPosition(const string &key,
                                 const string &value) const;

   // Returns language model cost of |token| given prediciton type |type|.
   // |rid| is the right id of previous word (token).
   // If |rid| is uknown, set 0 as a default value.
   int GetLMCost(const Result &result, int rid) const;

   // Given the results aggregated by aggregates, remove
   // miss-spelled results from the |results|.
   // we don't directly remove miss-spelled result but set
   // result[i].type = NO_PREDICTION.
   //
   // Here's the basic step of removal:
   // Case1:
   // result1: "あぼがど" => "アボガド"
   // result2: "あぼがど" => "アボカド" (spelling correction)
   // result3: "あぼかど" => "アボカド"
   // In this case, we can remove result 1 and 2.
   // If there exists the same result2.key in result1,3 and
   // the same result2.value in result1,3, we can remove the
   // 1) spelling correction candidate 2) candidate having
   // the same key as the spelling correction candidate.
   //
   // Case2:
   // result1: "あぼかど" => "アボカド"
   // result2: "あぼがど" => "アボカド" (spelling correction)
   // In this case, remove result2.
   //
   // Case3:
   // result1: "あぼがど" => "アボガド"
   // result2: "あぼがど" => "アボカド" (spelling correction)
   // In this case,
   //   a) user input: あ,あぼ,あぼ => remove result1, result2
   //   b) user input: あぼが,あぼがど  => remove result1
   //
   // let |same_key_size| and |same_value_size| be the number of
   // non-spelling-correction-candidates who have the same key/value as
   // spelling-correction-candidate respectively.
   //
   // if (same_key_size > 0 && same_value_size > 0) {
   //   remove spelling correction and candidates having the
   //   same key as the spelling correction.
   // } else if (same_key_size == 0 && same_value_size > 0) {
   //   remove spelling correction
   // } else {
   //   do nothing.
   // }
   void RemoveMissSpelledCandidates(size_t request_key_len,
                                    vector<Result> *results) const;

   // Scoring function which takes prediction bounus into account.
   // It basically reranks the candidate by lang_prob * (1 + remain_len).
   // This algorithm is mainly used for desktop.
   void SetPredictionCost(const Segments &segments,
                          vector<Result> *results) const;

   // Language model-based scoring function.
   // This algorithm is mainly used for mobile.
   void SetLMCost(const Segments &segments,
                  vector<Result> *results) const;

   // Returns true if the suggestion is classified
   // as "aggressive".
   bool IsAggressiveSuggestion(
       size_t query_len, size_t key_len, int cost,
       bool is_suggestion, size_t total_candidates_size) const;

   // Gets history key/value.
   // Returns false if history segments are
   // not found.
   bool GetHistoryKeyAndValue(const Segments &segments,
                              string *key, string *value) const;

   // Returns a bitfield of PredictionType.
   static PredictionTypes GetPredictionTypes(const ConversionRequest &request,
                                             const Segments &segments);

   // Returns true if the realtime conversion should be used.
   // TODO(hidehiko): add Config and Request instances into the arguments
   //   to represent the dependency explicitly.
   static bool ShouldRealTimeConversionEnabled(const ConversionRequest &request,
                                               const Segments &segments);

   // Returns true if key consistes of '0'-'9' or '-'
   static bool IsZipCodeRequest(const string &key);

   // Returns max size of realtime candidates.
   size_t GetRealtimeCandidateMaxSize(const Segments &segments,
                                      bool mixed_conversion,
                                      size_t max_size) const;

   // Aggregates unigram candidate for non mixed conversion.
   void AggregateUnigramCandidate(const ConversionRequest &request,
                                  const Segments &segments,
                                  vector<Result> *results) const;

   // Aggregates unigram candidate for mixed conversion.
   // This reduces redundant candidates.
   void AggregateUnigramCandidateForMixedConversion(
       const ConversionRequest &request,
       const Segments &segments,
       vector<Result> *results) const;

   // Returns cutoff threshold of unigram candidates.
   // AggregateUnigramPrediction method does not return any candidates
   // if there are too many (>= cutoff threshold) eligible candidates.
   // This behavior prevents a user from seeing too many prefix-match
   // candidates.
   size_t GetCandidateCutoffThreshold(const Segments &segments) const;

   // Generates a top conversion result from |converter_| and adds its result to
   // |results|.
   bool PushBackTopConversionResult(const ConversionRequest &request,
                                    const Segments &segments,
                                    vector<Result> *results) const;

   // Sets candidate description.
   static void SetDescription(PredictionTypes types,
                              uint32 attributes,
                              string *description);
   // Description for DEBUG mode.
   static void SetDebugDescription(PredictionTypes types,
                                   string *description);

   const ConverterInterface *converter_;
   const ImmutableConverterInterface *immutable_converter_;
   const DictionaryInterface *dictionary_;
   const DictionaryInterface *suffix_dictionary_;
   const ConnectorInterface *connector_;
   const SegmenterInterface *segmenter_;
   const SuggestionFilter *suggestion_filter_;
   const uint16 counter_suffix_word_id_;
   const string predictor_name_;

   DISALLOW_COPY_AND_ASSIGN(DictionaryPredictor);
 };
 }  // namespace mozc

 #endif  // MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_
	// Copyright 2010-2014, Google Inc.
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#ifndef MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_
	#define MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_

	#include <functional>
	#include <string>
	#include <vector>

	#include "base/util.h"
	#include "dictionary/dictionary_token.h"
	#include "prediction/predictor_interface.h"
	// for FRIEND_TEST()
	#include "testing/base/public/gunit_prod.h"

	namespace mozc {

	class ConnectorInterface;
	class ConversionRequest;
	class ConverterInterface;
	class DictionaryInterface;
	class ImmutableConverterInterface;
	class POSMatcher;
	class SegmenterInterface;
	class Segments;
	class SuggestionFilter;

	// Dictionary-based predictor
	class DictionaryPredictor : public PredictorInterface {
	public:
	// Initializes a predictor with given references to submodules. Note that
	// pointers are not owned by the class and to be deleted by the caller.
	DictionaryPredictor(const ConverterInterface *converter,
	const ImmutableConverterInterface *immutable_converter,
	const DictionaryInterface *dictionary,
	const DictionaryInterface *suffix_dictionary,
	const ConnectorInterface *connector,
	const SegmenterInterface *segmenter,
	const POSMatcher *pos_matcher,
	const SuggestionFilter *suggestion_filter);
	virtual ~DictionaryPredictor();

	virtual bool PredictForRequest(const ConversionRequest &request,
	Segments *segments) const;

	virtual const string &GetPredictorName() const { return predictor_name_; }

	protected:
	// Protected members for unittesting
	// For use util method accessing private members, made them protected.
	// http://code.google.com/p/googletest/wiki/FAQ
	enum PredictionType {
	// don't need to show any suggestions.
	NO_PREDICTION = 0,
	// suggests from current key user is now typing
	UNIGRAM = 1,
	// suggests from the previous history key user typed before.
	BIGRAM = 2,
	// suggests from immutable_converter
	REALTIME = 4,
	// add suffixes like "さん", "が" which matches to the pevious context.
	SUFFIX = 8,
	// add English words.
	ENGLISH = 16,
	// add prediciton to type corrected keys
	TYPING_CORRECTION = 32,

	// Suggests from \|converter_\|. The difference from REALTIME is that it uses
	// the full converter with rewriter, history, etc.
	// TODO(noriyukit): This label should be integrated with REALTIME. This is
	// why 65536 is used to indicate that it is a temporary assignment.
	REALTIME_TOP = 65536,
	};
	// Bitfield to store a set of PredictionType.
	typedef int32 PredictionTypes;

	struct Result {
	Result() : types(NO_PREDICTION), wcost(0), cost(0), lid(0), rid(0),
	candidate_attributes(0), consumed_key_size(0) {}

	void InitializeByTokenAndTypes(const Token &token, PredictionTypes types);
	void SetTypesAndTokenAttributes(PredictionTypes prediction_types,
	Token::AttributesBitfield token_attr);

	string key;
	string value;
	// Indicating which PredictionType creates this instance.
	// UNIGRAM, BIGRAM, REALTIME, SUFFIX, ENGLISH or TYPING_CORRECTION
	// is set exclusively.
	// TODO(matsuzakit): Using PredictionTypes both as input and output
	// makes the code complex. Let's split them.
	PredictionTypes types;
	// Context "insensitive" candidate cost.
	int wcost;
	// Context "sensitive" candidate cost.
	int cost;
	int lid;
	int rid;
	// Boundary information for realtime conversion.
	// This will be set only for realtime conversion result candidates.
	// This contains inner segment size for key and value.
	// If the candidate key and value are
	// "わたしの\|なまえは\|なかのです", " 私の\|名前は\|中野です",
	// \|inner_segment_boundary\| have [(4,2), (4, 3), (5, 4)].
	vector<uint32> inner_segment_boundary;
	uint32 candidate_attributes;
	size_t consumed_key_size;
	};

	// On MSVS2008/2010, Constructors of TestableDictionaryPredictor::Result
	// causes a compile error even if you change the access right of it to public.
	// You can use TestableDictionaryPredictor::MakeEmptyResult() instead.
	static Result MakeEmptyResult() {
	return Result();
	}

	class PredictiveLookupCallback;
	class PredictiveBigramLookupCallback;
	class ResultWCostLess;
	class ResultCostLess;

	void AggregateRealtimeConversion(PredictionTypes types,
	const ConversionRequest &request,
	Segments *segments,
	vector<Result> *results) const;

	void AggregateUnigramPrediction(PredictionTypes types,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	void AggregateBigramPrediction(PredictionTypes types,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	void AggregateSuffixPrediction(PredictionTypes types,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	void AggregateEnglishPrediction(PredictionTypes types,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	void AggregateTypeCorrectingPrediction(PredictionTypes types,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	void ApplyPenaltyForKeyExpansion(const Segments &segments,
	vector<Result> *results) const;

	bool AddPredictionToCandidates(const ConversionRequest &request,
	Segments *segments,
	vector<Result> *results) const;

	private:
	FRIEND_TEST(DictionaryPredictorTest, GetPredictionTypes);
	FRIEND_TEST(DictionaryPredictorTest,
	GetPredictionTypesTestWithTypingCorrection);
	FRIEND_TEST(DictionaryPredictorTest,
	GetPredictionTypesTestWithZeroQuerySuggestion);
	FRIEND_TEST(DictionaryPredictorTest, IsZipCodeRequest);
	FRIEND_TEST(DictionaryPredictorTest, GetRealtimeCandidateMaxSize);
	FRIEND_TEST(DictionaryPredictorTest, GetRealtimeCandidateMaxSizeForMixed);
	FRIEND_TEST(DictionaryPredictorTest,
	GetRealtimeCandidateMaxSizeWithActualConverter);
	FRIEND_TEST(DictionaryPredictorTest, GetCandidateCutoffThreshold);
	FRIEND_TEST(DictionaryPredictorTest, AggregateUnigramPrediction);
	FRIEND_TEST(DictionaryPredictorTest, AggregateBigramPrediction);
	FRIEND_TEST(DictionaryPredictorTest, AggregateSuffixPrediction);
	FRIEND_TEST(DictionaryPredictorTest, ZeroQuerySuggestionAfterNumbers);
	FRIEND_TEST(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion);
	FRIEND_TEST(DictionaryPredictorTest, GetHistoryKeyAndValue);
	FRIEND_TEST(DictionaryPredictorTest, RealtimeConversionStartingWithAlphabets);
	FRIEND_TEST(DictionaryPredictorTest, IsAggressiveSuggestion);
	FRIEND_TEST(DictionaryPredictorTest,
	RealtimeConversionWithSpellingCorrection);
	FRIEND_TEST(DictionaryPredictorTest, GetMissSpelledPosition);
	FRIEND_TEST(DictionaryPredictorTest, RemoveMissSpelledCandidates);
	FRIEND_TEST(DictionaryPredictorTest, ConformCharacterWidthToPreference);
	FRIEND_TEST(DictionaryPredictorTest, SetLMCost);
	FRIEND_TEST(DictionaryPredictorTest, SetDescription);
	FRIEND_TEST(DictionaryPredictorTest, SetDebugDescription);

	// Returns false if no results were aggregated.
	bool AggregatePrediction(const ConversionRequest &request,
	Segments *segments,
	vector<Result> *results) const;

	void SetCost(const ConversionRequest &request,
	const Segments &segments, vector<Result> *results) const;

	// Removes prediciton by setting NO_PREDICTION to result type if necessary.
	void RemovePrediction(const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	// Adds prediction results from history key and value.
	void AddBigramResultsFromHistory(const string &history_key,
	const string &history_value,
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	// Changes the prediction type for irrelevant bigram candidate.
	void CheckBigramResult(const Token &history_token,
	const Util::ScriptType history_ctype,
	const Util::ScriptType last_history_ctype,
	Result *result) const;

	void GetPredictiveResults(const DictionaryInterface &dictionary,
	const string &history_key,
	const ConversionRequest &request,
	const Segments &segments,
	PredictionTypes types,
	size_t lookup_limit,
	vector<Result> *results) const;

	void GetPredictiveResultsForBigram(const DictionaryInterface &dictionary,
	const string &history_key,
	const string &history_value,
	const ConversionRequest &request,
	const Segments &segments,
	PredictionTypes types,
	size_t lookup_limit,
	vector<Result> *results) const;

	// Performs a custom look up for English words where case-conversion might be
	// applied to lookup key and/or output results.
	void GetPredictiveResultsForEnglish(const DictionaryInterface &dictionary,
	const string &history_key,
	const ConversionRequest &request,
	const Segments &segments,
	PredictionTypes types,
	size_t lookup_limit,
	vector<Result> *results) const;

	// Performs look-ups using type-corrected queries from composer. Usually
	// involves multiple look-ups from dictionary.
	void GetPredictiveResultsUsingTypingCorrection(
	const DictionaryInterface &dictionary,
	const string &history_key,
	const ConversionRequest &request,
	const Segments &segments,
	PredictionTypes types,
	size_t lookup_limit,
	vector<Result> *results) const;

	// Returns the position of misspelled character position.
	//
	// Example1:
	// key: "れみおめろん"
	// value: "レミオロメン"
	// returns 3
	//
	// Example3:
	// key: "ろっぽんぎ"5
	// value: "六本木"
	// returns 5 (charslen("六本木"))
	size_t GetMissSpelledPosition(const string &key,
	const string &value) const;

	// Returns language model cost of \|token\| given prediciton type \|type\|.
	// \|rid\| is the right id of previous word (token).
	// If \|rid\| is uknown, set 0 as a default value.
	int GetLMCost(const Result &result, int rid) const;

	// Given the results aggregated by aggregates, remove
	// miss-spelled results from the \|results\|.
	// we don't directly remove miss-spelled result but set
	// result[i].type = NO_PREDICTION.
	//
	// Here's the basic step of removal:
	// Case1:
	// result1: "あぼがど" => "アボガド"
	// result2: "あぼがど" => "アボカド" (spelling correction)
	// result3: "あぼかど" => "アボカド"
	// In this case, we can remove result 1 and 2.
	// If there exists the same result2.key in result1,3 and
	// the same result2.value in result1,3, we can remove the
	// 1) spelling correction candidate 2) candidate having
	// the same key as the spelling correction candidate.
	//
	// Case2:
	// result1: "あぼかど" => "アボカド"
	// result2: "あぼがど" => "アボカド" (spelling correction)
	// In this case, remove result2.
	//
	// Case3:
	// result1: "あぼがど" => "アボガド"
	// result2: "あぼがど" => "アボカド" (spelling correction)
	// In this case,
	// a) user input: あ,あぼ,あぼ => remove result1, result2
	// b) user input: あぼが,あぼがど => remove result1
	//
	// let \|same_key_size\| and \|same_value_size\| be the number of
	// non-spelling-correction-candidates who have the same key/value as
	// spelling-correction-candidate respectively.
	//
	// if (same_key_size > 0 && same_value_size > 0) {
	// remove spelling correction and candidates having the
	// same key as the spelling correction.
	// } else if (same_key_size == 0 && same_value_size > 0) {
	// remove spelling correction
	// } else {
	// do nothing.
	// }
	void RemoveMissSpelledCandidates(size_t request_key_len,
	vector<Result> *results) const;

	// Scoring function which takes prediction bounus into account.
	// It basically reranks the candidate by lang_prob * (1 + remain_len).
	// This algorithm is mainly used for desktop.
	void SetPredictionCost(const Segments &segments,
	vector<Result> *results) const;

	// Language model-based scoring function.
	// This algorithm is mainly used for mobile.
	void SetLMCost(const Segments &segments,
	vector<Result> *results) const;

	// Returns true if the suggestion is classified
	// as "aggressive".
	bool IsAggressiveSuggestion(
	size_t query_len, size_t key_len, int cost,
	bool is_suggestion, size_t total_candidates_size) const;

	// Gets history key/value.
	// Returns false if history segments are
	// not found.
	bool GetHistoryKeyAndValue(const Segments &segments,
	string key, string value) const;

	// Returns a bitfield of PredictionType.
	static PredictionTypes GetPredictionTypes(const ConversionRequest &request,
	const Segments &segments);

	// Returns true if the realtime conversion should be used.
	// TODO(hidehiko): add Config and Request instances into the arguments
	// to represent the dependency explicitly.
	static bool ShouldRealTimeConversionEnabled(const ConversionRequest &request,
	const Segments &segments);

	// Returns true if key consistes of '0'-'9' or '-'
	static bool IsZipCodeRequest(const string &key);

	// Returns max size of realtime candidates.
	size_t GetRealtimeCandidateMaxSize(const Segments &segments,
	bool mixed_conversion,
	size_t max_size) const;

	// Aggregates unigram candidate for non mixed conversion.
	void AggregateUnigramCandidate(const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	// Aggregates unigram candidate for mixed conversion.
	// This reduces redundant candidates.
	void AggregateUnigramCandidateForMixedConversion(
	const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	// Returns cutoff threshold of unigram candidates.
	// AggregateUnigramPrediction method does not return any candidates
	// if there are too many (>= cutoff threshold) eligible candidates.
	// This behavior prevents a user from seeing too many prefix-match
	// candidates.
	size_t GetCandidateCutoffThreshold(const Segments &segments) const;

	// Generates a top conversion result from \|converter_\| and adds its result to
	// \|results\|.
	bool PushBackTopConversionResult(const ConversionRequest &request,
	const Segments &segments,
	vector<Result> *results) const;

	// Sets candidate description.
	static void SetDescription(PredictionTypes types,
	uint32 attributes,
	string *description);
	// Description for DEBUG mode.
	static void SetDebugDescription(PredictionTypes types,
	string *description);

	const ConverterInterface *converter_;
	const ImmutableConverterInterface *immutable_converter_;
	const DictionaryInterface *dictionary_;
	const DictionaryInterface *suffix_dictionary_;
	const ConnectorInterface *connector_;
	const SegmenterInterface *segmenter_;
	const SuggestionFilter *suggestion_filter_;
	const uint16 counter_suffix_word_id_;
	const string predictor_name_;

	DISALLOW_COPY_AND_ASSIGN(DictionaryPredictor);
	};
	} // namespace mozc

	#endif // MOZC_PREDICTION_DICTIONARY_PREDICTOR_H_