// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "rewriter/variants_rewriter.h"

#include <string>
#include <vector>

#include "base/logging.h"
#include "base/number_util.h"
#include "base/util.h"
#include "config/character_form_manager.h"
#include "converter/conversion_request.h"
#include "converter/segments.h"
#include "dictionary/pos_matcher.h"
#include "session/commands.pb.h"

namespace mozc {

using config::CharacterFormManager;

#ifndef OS_ANDROID
// "ひらがな"
const char *VariantsRewriter::kHiragana =
    "\xE3\x81\xB2\xE3\x82\x89\xE3\x81\x8C\xE3\x81\xAA";
// "カタカナ"
const char *VariantsRewriter::kKatakana =
    "\xE3\x82\xAB\xE3\x82\xBF\xE3\x82\xAB\xE3\x83\x8A";
// "数字"
const char *VariantsRewriter::kNumber = "\xE6\x95\xB0\xE5\xAD\x97";
// "アルファベット"
const char *VariantsRewriter::kAlphabet = "\xE3\x82\xA2\xE3\x83\xAB"
    "\xE3\x83\x95\xE3\x82\xA1\xE3\x83\x99\xE3\x83\x83\xE3\x83\x88";
// "漢字"
const char *VariantsRewriter::kKanji = "\xe6\xbc\xa2\xe5\xad\x97";
// "[全]"
const char *VariantsRewriter::kFullWidth = "[\xE5\x85\xA8]";
// "[半]"
const char *VariantsRewriter::kHalfWidth = "[\xE5\x8D\x8A]";
// "<機種依存文字>"
const char *VariantsRewriter::kPlatformDependent = "<\xE6\xA9\x9F\xE7\xA8\xAE"
    "\xE4\xBE\x9D\xE5\xAD\x98\xE6\x96\x87\xE5\xAD\x97>";
// "<もしかして>"
const char *VariantsRewriter::kDidYouMean =
    "<\xE3\x82\x82\xE3\x81\x97\xE3\x81\x8B\xE3\x81\x97\xE3\x81\xA6>";
// "円記号"
const char *VariantsRewriter::kYenKigou =
    "\xE5\x86\x86\xE8\xA8\x98\xE5\x8F\xB7";
#else  // OS_ANDROID
const char *VariantsRewriter::kHiragana = "";
const char *VariantsRewriter::kKatakana = "";
const char *VariantsRewriter::kNumber = "";
const char *VariantsRewriter::kAlphabet = "";
const char *VariantsRewriter::kKanji = "";
// "[全]"
const char *VariantsRewriter::kFullWidth = "[\xE5\x85\xA8]";
// "[半]"
const char *VariantsRewriter::kHalfWidth = "[\xE5\x8D\x8A]";
// "<機種依存>"
const char *VariantsRewriter::kPlatformDependent = "<\xE6\xA9\x9F\xE7\xA8\xAE"
    "\xE4\xBE\x9D\xE5\xAD\x98>";
// "<もしかして>"
const char *VariantsRewriter::kDidYouMean =
    "<\xE3\x82\x82\xE3\x81\x97\xE3\x81\x8B\xE3\x81\x97\xE3\x81\xA6>";
// "円記号"
const char *VariantsRewriter::kYenKigou =
    "\xE5\x86\x86\xE8\xA8\x98\xE5\x8F\xB7";
#endif  // OS_ANDROID

// Append |src| to |dst| with a separator ' '.
void AppendString(const string &src, string *dst) {
  CHECK(dst);
  if (!src.empty()) {
    if (!dst->empty()) {
      dst->append(" ");
    }
    dst->append(src);
  }
}

// Return true if all charcters in |value| are UNKNOWN_SCRIPT
// and FormType of |value| are consistent, e.g. all fullwith or
// all halfwidth.
// Example:
// "&-()" => true (all symbol and all half)
// "&-（）" => false (all symbol but contains both full/half width)
// "google" => false (not symbol)
bool HasCharacterFormDescription(const string &value) {
  if (value.empty()) {
    return false;
  }
  Util::FormType prev = Util::UNKNOWN_FORM;

  for (ConstChar32Iterator iter(value); !iter.Done(); iter.Next()) {
    const char32 ucs4 = iter.Get();
    const Util::FormType type = Util::GetFormType(ucs4);
    if (prev != Util::UNKNOWN_FORM && prev != type) {
      return false;
    }
    if (Util::UNKNOWN_SCRIPT != Util::GetScriptType(ucs4)) {
      return false;
    }
    prev = type;
  }
  return true;
}

VariantsRewriter::VariantsRewriter(const POSMatcher *pos_matcher)
    : pos_matcher_(pos_matcher) {}

VariantsRewriter::~VariantsRewriter() {}

// static
void VariantsRewriter::SetDescriptionForCandidate(
    const POSMatcher &pos_matcher,
    Segment::Candidate *candidate) {
  SetDescription(pos_matcher,
                 FULL_HALF_WIDTH |
                 CHARACTER_FORM |
                 PLATFORM_DEPENDENT_CHARACTER |
                 ZIPCODE |
                 SPELLING_CORRECTION,
                 candidate);
}

// static
void VariantsRewriter::SetDescriptionForTransliteration(
    const POSMatcher &pos_matcher,
    Segment::Candidate *candidate) {
  SetDescription(pos_matcher,
                 FULL_HALF_WIDTH |
                 FULL_HALF_WIDTH_WITH_UNKNOWN |
                 CHARACTER_FORM |
                 PLATFORM_DEPENDENT_CHARACTER |
                 SPELLING_CORRECTION,
                 candidate);
}

// static
void VariantsRewriter::SetDescriptionForPrediction(
    const POSMatcher &pos_matcher,
    Segment::Candidate *candidate) {
  SetDescription(pos_matcher,
                 PLATFORM_DEPENDENT_CHARACTER |
                 ZIPCODE |
                 SPELLING_CORRECTION,
                 candidate);
}

// static
void VariantsRewriter::SetDescription(const POSMatcher &pos_matcher,
                                      int description_type,
                                      Segment::Candidate *candidate) {
  string description;
  string character_form_message;

  // Add Character form.
  if (description_type & CHARACTER_FORM) {
    const Util::ScriptType type =
        Util::GetScriptTypeWithoutSymbols(candidate->value);
    switch (type) {
      case Util::HIRAGANA:
        character_form_message = kHiragana;
        // don't need to set full/half, because hiragana only has
        // full form
        description_type &= ~FULL_HALF_WIDTH;
        break;
      case Util::KATAKANA:
        // character_form_message = "カタカナ";
        character_form_message = kKatakana;
        break;
      case Util::NUMBER:
        // character_form_message = "数字";
        character_form_message = kNumber;
        break;
      case Util::ALPHABET:
        // character_form_message = "アルファベット";
        character_form_message = kAlphabet;
        break;
      case Util::KANJI:
      case Util::EMOJI:
        // don't need to have full/half annotation for kanji and emoji,
        // since it's obvious
        description_type &= ~FULL_HALF_WIDTH;
        break;
      case Util::UNKNOWN_SCRIPT:   // mixed character
        if ((description_type & FULL_HALF_WIDTH_WITH_UNKNOWN) ||
            HasCharacterFormDescription(candidate->value)) {
          description_type |= FULL_HALF_WIDTH;
        } else {
          description_type &= ~FULL_HALF_WIDTH;
        }
        break;
      default:
        // Do nothing
        break;
    }
  }

  // If candidate already has a description, clear it.
  // Currently, character_form_message is treated as a "default"
  // description.
  if (!candidate->description.empty()) {
    character_form_message.clear();
  }

  // full/half char description
  if (description_type & FULL_HALF_WIDTH) {
    const Util::FormType form = Util::GetFormType(candidate->value);
    switch (form) {
      case Util::FULL_WIDTH:
        // description = "[全]";
        description = kFullWidth;
        break;
      case Util::HALF_WIDTH:
        // description = "[半]";
        description = kHalfWidth;
        break;
      default:
        break;
    }
  } else if (description_type & FULL_WIDTH) {
    // description = "[全]";
    description = kFullWidth;
  } else if (description_type & HALF_WIDTH) {
    // description = "[半]";
    description = kHalfWidth;
  }

  // add character_form_message
  AppendString(character_form_message, &description);

  // add main message
  if (candidate->value == "\x5C" || candidate->value == "\xEF\xBC\xBC") {
    // if "\" (harlf-width backslash) or "＼" (full-width backslash)
    // AppendString("バックスラッシュ", &description);
    AppendString("\xE3\x83\x90\xE3\x83\x83\xE3\x82\xAF\xE3\x82\xB9"
                 "\xE3\x83\xA9\xE3\x83\x83\xE3\x82\xB7\xE3\x83\xA5",
                 &description);
  } else if (candidate->value == "\xC2\xA5") {
    // if "¥" (harlf-width Yen sign), append kYenKigou and kPlatformDependent.
    AppendString(kYenKigou, &description);
    AppendString(kPlatformDependent, &description);
  } else if (candidate->value == "\xEF\xBF\xA5") {
    // if "￥" (full-width Yen sign), append only kYenKigou
    AppendString(kYenKigou, &description);
  } else {
    AppendString(candidate->description, &description);
  }

  // Platform dependent char description
  if (description_type & PLATFORM_DEPENDENT_CHARACTER &&
      Util::GetCharacterSet(candidate->value) >= Util::JISX0212) {
    AppendString(kPlatformDependent, &description);
  }

  // The follwoing description tries to overwrite exisiting description.
  // TODO(taku): reconsider this behavior.
  // Zipcode description
  if ((description_type & ZIPCODE) &&
      pos_matcher.IsZipcode(candidate->lid) &&
      candidate->lid == candidate->rid) {
    description = candidate->content_key;
    // Append default description because it may contain extra description.
    AppendString(candidate->description, &description);
  }

  // The follwoing description tries to overwrite exisiting description.
  // TODO(taku): reconsider this behavior.
  // Spelling Correction description
  if ((description_type & SPELLING_CORRECTION) &&
      (candidate->attributes & Segment::Candidate::SPELLING_CORRECTION)) {
    description = kDidYouMean;
    // Add prefix to distinguish this candidate.
    candidate->prefix = "\xE2\x86\x92 ";  // "→ "
    // Append default description because it may contain extra description.
    AppendString(candidate->description, &description);
  }

  // set new description
  candidate->description = description;
  candidate->attributes |= Segment::Candidate::NO_EXTRA_DESCRIPTION;
}

int VariantsRewriter::capability(const ConversionRequest &request) const {
  return RewriterInterface::ALL;
}

bool VariantsRewriter::RewriteSegment(RewriteType type, Segment *seg) const {
  CHECK(seg);
  bool modified = false;

  // Meta Candidate
  for (size_t i = 0; i < seg->meta_candidates_size(); ++i) {
    Segment::Candidate *candidate =
        seg->mutable_candidate(-static_cast<int>(i) - 1);
    DCHECK(candidate);
    if (candidate->attributes & Segment::Candidate::NO_EXTRA_DESCRIPTION) {
      continue;
    }
    SetDescriptionForTransliteration(*pos_matcher_, candidate);
  }

  // Regular Candidate
  string default_value, alternative_value;
  string default_content_value, alternative_content_value;
  vector<uint32> default_inner_segment_boundary;
  vector<uint32> alternative_inner_segment_boundary;
  for (size_t i = 0; i < seg->candidates_size(); ++i) {
    Segment::Candidate *original_candidate = seg->mutable_candidate(i);
    DCHECK(original_candidate);

    if (original_candidate->attributes &
        Segment::Candidate::NO_EXTRA_DESCRIPTION) {
      continue;
    }

    if (original_candidate->attributes &
        Segment::Candidate::NO_VARIANTS_EXPANSION) {
      SetDescriptionForCandidate(*pos_matcher_, original_candidate);
      VLOG(1) << "Canidate has NO_NORMALIZATION node";
      continue;
    }

    if (!GenerateAlternatives(*original_candidate,
                              &default_value,
                              &alternative_value,
                              &default_content_value,
                              &alternative_content_value,
                              &default_inner_segment_boundary,
                              &alternative_inner_segment_boundary)) {
      SetDescriptionForCandidate(*pos_matcher_, original_candidate);
      continue;
    }

    CharacterFormManager::FormType default_form
        = CharacterFormManager::UNKNOWN_FORM;
    CharacterFormManager::FormType alternative_form
        = CharacterFormManager::UNKNOWN_FORM;

    int default_description_type =
        (CHARACTER_FORM | PLATFORM_DEPENDENT_CHARACTER |
         ZIPCODE | SPELLING_CORRECTION);

    int alternative_description_type =
        (CHARACTER_FORM | PLATFORM_DEPENDENT_CHARACTER |
         ZIPCODE | SPELLING_CORRECTION);

    if (CharacterFormManager::GetFormTypesFromStringPair(
            default_value,
            &default_form,
            alternative_value,
            &alternative_form)) {
      if (default_form == CharacterFormManager::HALF_WIDTH) {
        default_description_type |= HALF_WIDTH;
      } else if (default_form == CharacterFormManager::FULL_WIDTH) {
        default_description_type |= FULL_WIDTH;
      }
      if (alternative_form == CharacterFormManager::HALF_WIDTH) {
        alternative_description_type |= HALF_WIDTH;
      } else if (alternative_form == CharacterFormManager::FULL_WIDTH) {
        alternative_description_type |= FULL_WIDTH;
      }
    } else {
      default_description_type     |= FULL_HALF_WIDTH;
      alternative_description_type |= FULL_HALF_WIDTH;
    }

    if (type == EXPAND_VARIANT) {
      // Insert default candidate to position |i| and
      // rewrite original(|i+1|) to altenative
      Segment::Candidate *new_candidate = seg->insert_candidate(i);
      DCHECK(new_candidate);

      new_candidate->Init();
      new_candidate->key = original_candidate->key;
      new_candidate->value = default_value;
      new_candidate->content_key = original_candidate->content_key;
      new_candidate->content_value = default_content_value;
      new_candidate->cost = original_candidate->cost;
      new_candidate->structure_cost = original_candidate->structure_cost;
      new_candidate->lid = original_candidate->lid;
      new_candidate->rid = original_candidate->rid;
      new_candidate->description = original_candidate->description;
      SetDescription(*pos_matcher_, default_description_type, new_candidate);

      original_candidate->value = alternative_value;
      original_candidate->content_value = alternative_content_value;
      SetDescription(*pos_matcher_,
                     alternative_description_type, original_candidate);
      ++i;  // skip inserted candidate
    } else if (type == SELECT_VARIANT) {
      // Rewrite original to default
      original_candidate->value = default_value;
      original_candidate->content_value = default_content_value;
      original_candidate->inner_segment_boundary.swap(
          default_inner_segment_boundary);
      SetDescription(*pos_matcher_,
                     default_description_type, original_candidate);
    }
    modified = true;
  }
  return modified;
}

// Try generating default and alternative character forms.  Inner segment
// boundary is taken into account.  When no rewrite happens, false is returned.
bool VariantsRewriter::GenerateAlternatives(
    const Segment::Candidate &original,
    string *default_value,
    string *alternative_value,
    string *default_content_value,
    string *alternative_content_value,
    vector<uint32> *default_inner_segment_boundary,
    vector<uint32> *alternative_inner_segment_boundary) const {
  default_value->clear();
  alternative_value->clear();
  default_content_value->clear();
  alternative_content_value->clear();
  default_inner_segment_boundary->clear();
  alternative_inner_segment_boundary->clear();

  const config::CharacterFormManager *manager =
      CharacterFormManager::GetCharacterFormManager();

  // TODO(noriyukit): Some rewriter may rewrite key and/or value and make the
  // inner segment boundary inconsistent.  Ideally, it should always be valid.
  // Accessing inner segments with broken boundary information is very
  // dangerous. So here checks the validity.  For invalid candidate, inner
  // segment boundary is ignored.
  const bool is_valid = original.IsValid();
  VLOG_IF(2, !is_valid) << "Invalid candidate: " << original.DebugString();
  if (original.inner_segment_boundary.empty() || !is_valid) {
    if (!manager->ConvertConversionStringWithAlternative(original.value,
                                                         default_value,
                                                         alternative_value)) {
      return false;
    }
    if (original.value != original.content_value) {
      manager->ConvertConversionStringWithAlternative(
          original.content_value, default_content_value,
          alternative_content_value);
    } else {
      default_content_value->assign(*default_value);
      alternative_content_value->assign(*alternative_value);
    }
    return true;
  }

  // When inner segment boundary is present, rewrite each inner segment.  If at
  // least one inner segment is rewritten, the whole segment is considered
  // rewritten.
  bool at_least_one_modified = false;
  string tmp, inner_default_value, inner_alternative_value;
  string inner_default_content_value, inner_alternative_content_value;
  for (Segment::Candidate::InnerSegmentIterator iter(&original);
       !iter.Done(); iter.Next()) {
    iter.GetValue().CopyToString(&tmp);
    inner_default_value.clear();
    inner_alternative_value.clear();
    if (!manager->ConvertConversionStringWithAlternative(
            tmp, &inner_default_value, &inner_alternative_value)) {
      iter.GetValue().CopyToString(&inner_default_value);
      iter.GetValue().CopyToString(&inner_alternative_value);
    } else {
      at_least_one_modified = true;
    }
    if (iter.GetValue() != iter.GetContentValue()) {
      inner_default_content_value.clear();
      inner_alternative_content_value.clear();
      iter.GetContentValue().CopyToString(&tmp);
      manager->ConvertConversionStringWithAlternative(
          tmp, &inner_default_content_value,
          &inner_alternative_content_value);
    } else {
      inner_default_content_value = inner_default_value;
      inner_alternative_content_value = inner_alternative_value;
    }
    default_value->append(inner_default_value);
    alternative_value->append(inner_alternative_value);
    default_content_value->append(inner_default_content_value);
    alternative_content_value->append(inner_alternative_content_value);
    default_inner_segment_boundary->push_back(
        Segment::Candidate::EncodeLengths(
            iter.GetKey().size(),
            inner_default_value.size(),
            iter.GetContentKey().size(),
            inner_default_content_value.size()));
    alternative_inner_segment_boundary->push_back(
        Segment::Candidate::EncodeLengths(
            iter.GetKey().size(),
            inner_alternative_value.size(),
            iter.GetContentKey().size(),
            inner_alternative_content_value.size()));
  }
  return at_least_one_modified;
}

void VariantsRewriter::Finish(const ConversionRequest &request,
                              Segments *segments) {
  if (segments->request_type() != Segments::CONVERSION) {
    return;
  }

  // save character form
  for (int i = 0; i < segments->conversion_segments_size(); ++i) {
    const Segment &segment = segments->conversion_segment(i);
    if (segment.candidates_size() <= 0 ||
        segment.segment_type() != Segment::FIXED_VALUE ||
        segment.candidate(0).attributes &
        Segment::Candidate::NO_HISTORY_LEARNING) {
      continue;
    }

    const Segment::Candidate &candidate = segment.candidate(0);
    if (candidate.attributes & Segment::Candidate::NO_VARIANTS_EXPANSION) {
      continue;
    }

    switch (candidate.style) {
      case NumberUtil::NumberString::NUMBER_SEPARATED_ARABIC_HALFWIDTH:
        // treat NUMBER_SEPARATED_ARABIC as half_width num
        CharacterFormManager::GetCharacterFormManager()->
            SetCharacterForm("0", config::Config::HALF_WIDTH);
        break;
      case NumberUtil::NumberString::NUMBER_SEPARATED_ARABIC_FULLWIDTH:
        // treat NUMBER_SEPARATED_WIDE_ARABIC as full_width num
        CharacterFormManager::GetCharacterFormManager()->
            SetCharacterForm("0", config::Config::FULL_WIDTH);
        break;
      default:
        CharacterFormManager::GetCharacterFormManager()->
            GuessAndSetCharacterForm(candidate.value);
        break;
    }
  }
}

void VariantsRewriter::Clear() {
  CharacterFormManager::GetCharacterFormManager()->ClearHistory();
}

bool VariantsRewriter::Rewrite(const ConversionRequest &request,
                               Segments *segments) const {
  CHECK(segments);
  bool modified = false;

  const RewriteType type = ((segments->request_type() == Segments::SUGGESTION) ?
                            SELECT_VARIANT : EXPAND_VARIANT);

  for (size_t i = segments->history_segments_size();
       i < segments->segments_size(); ++i) {
    Segment *seg = segments->mutable_segment(i);
    DCHECK(seg);
    modified |= RewriteSegment(type, seg);
  }

  return modified;
}

}  // namespace mozc
