// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "rewriter/number_rewriter.h"

#include <algorithm>
#include <cstdio>
#include <string>
#include <utility>
#include <vector>

#include "base/logging.h"
#include "base/number_util.h"
#include "base/util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/conversion_request.h"
#include "converter/segments.h"
#include "data_manager/data_manager_interface.h"
#include "dictionary/pos_matcher.h"
#include "rewriter/number_compound_util.h"
#include "session/commands.pb.h"

namespace mozc {
namespace {

// Rewrite type
enum RewriteType {
  NO_REWRITE = 0,
  ARABIC_FIRST,  // arabic candidates first ordering
  KANJI_FIRST,  // kanji candidates first ordering
};

struct RewriteCandidateInfo {
  RewriteType type;
  int position;
  Segment::Candidate candidate;
};

// Returns rewrite type for the given segment and base candidate information.
// base_candidate_pos: the index of the base candidate.
// *arabic_candidate: arabic candidate using numeric style conversion.
// POS information, cost, etc will be copied from base candidate.
RewriteType GetRewriteTypeAndBase(
    const CounterSuffixEntry *suffix_array,
    size_t suffix_array_size,
    const Segment &seg,
    int base_candidate_pos,
    const POSMatcher &pos_matcher,
    Segment::Candidate *arabic_candidate) {
  DCHECK(arabic_candidate);

  const Segment::Candidate &c = seg.candidate(base_candidate_pos);
  if (!number_compound_util::IsNumber(suffix_array, suffix_array_size,
                                      pos_matcher, c)) {
    return NO_REWRITE;
  }

  if (Util::GetScriptType(c.content_value) == Util::NUMBER) {
    arabic_candidate->CopyFrom(c);
    arabic_candidate->inner_segment_boundary.clear();
    DCHECK(arabic_candidate->IsValid());
    return ARABIC_FIRST;
  }

  string half_width_new_content_value;
  Util::FullWidthToHalfWidth(c.content_key, &half_width_new_content_value);
  // Try to get normalized kanji_number and arabic_number.
  // If it failed, do nothing.
  // Retain suffix for later use.
  string number_suffix, kanji_number, arabic_number;
  if (!NumberUtil::NormalizeNumbersWithSuffix(c.content_value,
                                              true,  // trim_reading_zeros
                                              &kanji_number,
                                              &arabic_number,
                                              &number_suffix) ||
      arabic_number == half_width_new_content_value) {
    return NO_REWRITE;
  }
  const string suffix(c.value, c.content_value.size(),
                      c.value.size() - c.content_value.size());
  arabic_candidate->Init();
  arabic_candidate->value = arabic_number + number_suffix + suffix;
  arabic_candidate->content_value = arabic_number + number_suffix;
  arabic_candidate->key = c.key;
  arabic_candidate->content_key = c.content_key;
  arabic_candidate->cost = c.cost;
  arabic_candidate->structure_cost = c.structure_cost;
  arabic_candidate->lid = c.lid;
  arabic_candidate->rid = c.rid;
  DCHECK(arabic_candidate->IsValid());
  return KANJI_FIRST;
}

void GetRewriteCandidateInfos(
    const CounterSuffixEntry *suffix_array, size_t suffix_array_size,
    const Segment &seg,
    const POSMatcher &pos_matcher,
    vector<RewriteCandidateInfo> *rewrite_candidate_info) {
  DCHECK(rewrite_candidate_info);
  RewriteCandidateInfo info;

  for (size_t i = 0; i < seg.candidates_size(); ++i) {
    const RewriteType type = GetRewriteTypeAndBase(
        suffix_array, suffix_array_size,
        seg, i, pos_matcher, &info.candidate);
    if (type == NO_REWRITE) {
      continue;
    }
    info.type = type;
    info.position = i;
    rewrite_candidate_info->push_back(info);
  }
}

// If top candidate is Kanji numeric, we want to expand at least
// 5 candidates apart from base candidate.
// http://b/issue?id=2872048
const int kArabicNumericOffset = 5;

void PushBackCandidate(const string &value, const string &desc,
                       NumberUtil::NumberString::Style style,
                       vector<Segment::Candidate> *results) {
  bool found = false;
  for (vector<Segment::Candidate>::const_iterator it = results->begin();
       it != results->end(); ++it) {
    if (it->value == value) {
      found = true;
      break;
    }
  }
  if (!found) {
    Segment::Candidate cand;
    cand.value = value;
    cand.description = desc;
    cand.style = style;
    results->push_back(cand);
  }
}

void SetCandidatesInfo(const Segment::Candidate &arabic_cand,
                       vector<Segment::Candidate> *candidates) {
  const string suffix(
      arabic_cand.value, arabic_cand.content_value.size(),
      arabic_cand.value.size() - arabic_cand.content_value.size());

  for (vector<Segment::Candidate>::iterator it = candidates->begin();
       it != candidates->end(); ++it) {
    it->content_value.assign(it->value);
    it->value.append(suffix);
  }
}

class CheckValueOperator {
 public:
  explicit CheckValueOperator(const string &v) : find_value_(v) {}
  bool operator() (const Segment::Candidate &cand) const {
    return (cand.value == find_value_);
  }

 private:
  const string &find_value_;
};

// If we have the candidates to be inserted before the base candidate,
// delete them.
// TODO(toshiyuki): Delete candidates between base pos and insert pos
// if necessary.
void EraseExistingCandidates(
    const vector<Segment::Candidate> &results,
    int base_candidate_pos,
    Segment *seg,
    vector<RewriteCandidateInfo> *rewrite_candidate_info_list) {
  DCHECK(seg);
  // Remember base candidate value
  for (int pos = base_candidate_pos - 1; pos >= 0; --pos) {
    // Simple liner search. |results| size is small. (at most 10 or so)
    const vector<Segment::Candidate>::const_iterator iter =
        find_if(results.begin(), results.end(),
                CheckValueOperator(seg->candidate(pos).value));
    if (iter == results.end()) {
      continue;
    }

    seg->erase_candidate(pos);

    // Adjust position in rewrite_candidate_info.
    for (size_t i = 0; i < rewrite_candidate_info_list->size(); ++i) {
      if ((*rewrite_candidate_info_list)[i].position > pos) {
        --(*rewrite_candidate_info_list)[i].position;
      }
    }
  }
}

// This is a utility function for InsertCandidate and UpdateCandidate.
// Do not use this function directly.
void MergeCandidateInfoInternal(const Segment::Candidate &base_cand,
                                const Segment::Candidate &result_cand,
                                Segment::Candidate *cand) {
  DCHECK(cand);
  cand->key = base_cand.key;
  cand->value = result_cand.value;
  cand->content_key = base_cand.content_key;
  cand->content_value = result_cand.content_value;
  cand->consumed_key_size = base_cand.consumed_key_size;
  cand->cost = base_cand.cost;
  cand->lid = base_cand.lid;
  cand->rid = base_cand.rid;
  cand->style = result_cand.style;

  if (base_cand.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED) {
    cand->description.assign("\xE9\x83\xA8\xE5\x88\x86");  // "部分"
    if (!result_cand.description.empty()) {
      cand->description.append(1, '\n').append(result_cand.description);
    }
  } else {
    cand->description.assign(result_cand.description);
  }

  // Don't want to have FULL_WIDTH form for Hex/Oct/BIN..etc.
  if (cand->style == NumberUtil::NumberString::NUMBER_HEX ||
      cand->style == NumberUtil::NumberString::NUMBER_OCT ||
      cand->style == NumberUtil::NumberString::NUMBER_BIN) {
    cand->attributes |= Segment::Candidate::NO_VARIANTS_EXPANSION;
  }
  cand->attributes |=
      base_cand.attributes & Segment::Candidate::PARTIALLY_KEY_CONSUMED;
}

void InsertCandidate(Segment *segment,
                     int32 insert_position,
                     const Segment::Candidate &base_cand,
                     const Segment::Candidate &result_cand) {
  DCHECK(segment);
  Segment::Candidate *c = segment->insert_candidate(insert_position);
  c->Init();
  MergeCandidateInfoInternal(base_cand, result_cand, c);
}

void UpdateCandidate(Segment *segment,
                     int32 update_position,
                     const Segment::Candidate &base_cand,
                     const Segment::Candidate &result_cand) {
  DCHECK(segment);
  Segment::Candidate *c = segment->mutable_candidate(update_position);
  // Do not call |c->Init()| for an existing candidate.
  // There are two major reasons.
  // 1) Future design change may introduce another field into
  //    Segment::Candidate. In such situation, simply calling |c->Init()|
  //    for an existing candidate may result in unexpeced data loss.
  // 2) In order to preserve existing attribute information such as
  //    Segment::Candidate::USER_DICTIONARY bit in |c|, we cannot not call
  //    |c->Init()|. Note that neither |base_cand| nor |result[0]| has
  //    valid value in its |attributes|.
  MergeCandidateInfoInternal(base_cand, result_cand, c);
}

void InsertConvertedCandidates(const vector<Segment::Candidate> &results,
                               const Segment::Candidate &base_cand,
                               int base_candidate_pos,
                               int insert_pos, Segment *seg) {
  if (results.empty()) {
    return;
  }
  if (base_candidate_pos >= seg->candidates_size()) {
    LOG(WARNING) << "Invalid base candidate pos";
    return;
  }
  // First, insert top candidate
  // If we find the base candidate is equal to the converted
  // special form candidates, we will rewrite it.
  // Otherwise, we will insert top candidate just below the base.
  // Sometimes original base candidate is different from converted candidate
  // For example, "千万" v.s. "一千万", or "一二三" v.s. "百二十三".
  // We don't want to rewrite "千万" to "一千万".
  {
    const string &base_value = seg->candidate(base_candidate_pos).value;
    vector<Segment::Candidate>::const_iterator itr =
        find_if(results.begin(), results.end(), CheckValueOperator(base_value));
    if (itr != results.end() &&
        itr->style != NumberUtil::NumberString::NUMBER_KANJI &&
        itr->style != NumberUtil::NumberString::NUMBER_KANJI_ARABIC) {
      // Update exsisting base candidate
      UpdateCandidate(seg, base_candidate_pos, base_cand, results[0]);
    } else {
      // Insert candidate just below the base candidate
      InsertCandidate(seg, base_candidate_pos + 1, base_cand, results[0]);
      ++insert_pos;
    }
  }

  // Insert others
  for (size_t i = 1; i < results.size(); ++i) {
    InsertCandidate(seg, insert_pos++, base_cand, results[i]);
  }
}

int GetInsertPos(int base_pos, const Segment &segment, RewriteType type) {
  if (type == ARABIC_FIRST) {
    // +2 for arabic half_width full_width expansion
    return min(base_pos + 2, static_cast<int>(segment.candidates_size()));
  } else {
    return min(base_pos + kArabicNumericOffset,
               static_cast<int>(segment.candidates_size()));
  }
}

void InsertHalfArabic(const string &half_arabic,
                      vector<NumberUtil::NumberString> *output) {
  output->push_back(
      NumberUtil::NumberString(half_arabic, "",
                               NumberUtil::NumberString::DEFAULT_STYLE));
}

void GetNumbers(RewriteType type, bool exec_radix_conversion,
                const string &arabic_content_value,
                vector<NumberUtil::NumberString> *output) {
  DCHECK(output);
  if (type == ARABIC_FIRST) {
    InsertHalfArabic(arabic_content_value, output);
    NumberUtil::ArabicToWideArabic(arabic_content_value, output);
    NumberUtil::ArabicToSeparatedArabic(arabic_content_value, output);
    NumberUtil::ArabicToKanji(arabic_content_value, output);
    NumberUtil::ArabicToOtherForms(arabic_content_value, output);
  } else if (type == KANJI_FIRST) {
    NumberUtil::ArabicToKanji(arabic_content_value, output);
    InsertHalfArabic(arabic_content_value, output);
    NumberUtil::ArabicToWideArabic(arabic_content_value, output);
    NumberUtil::ArabicToSeparatedArabic(arabic_content_value, output);
    NumberUtil::ArabicToOtherForms(arabic_content_value, output);
  }

  if (exec_radix_conversion) {
    NumberUtil::ArabicToOtherRadixes(arabic_content_value, output);
  }
}

bool RewriteOneSegment(
    const CounterSuffixEntry *suffix_array, size_t suffix_array_size,
    const POSMatcher &pos_matcher, bool exec_radix_conversion, Segment *seg) {
  DCHECK(seg);
  bool modified = false;
  vector<RewriteCandidateInfo> rewrite_candidate_infos;
  GetRewriteCandidateInfos(suffix_array, suffix_array_size,
                           *seg, pos_matcher, &rewrite_candidate_infos);

  for (int i = rewrite_candidate_infos.size() - 1; i >= 0; --i) {
    const RewriteCandidateInfo &info = rewrite_candidate_infos[i];
    if (info.candidate.content_value.size() > info.candidate.value.size()) {
      LOG(ERROR) << "Invalid content_value/value: ";
      break;
    }

    string arabic_content_value;
    Util::FullWidthToHalfWidth(
        info.candidate.content_value, &arabic_content_value);
    if (Util::GetScriptType(arabic_content_value) != Util::NUMBER) {
      if (Util::GetFirstScriptType(arabic_content_value) == Util::NUMBER) {
        // Rewrite for number suffix
        const int insert_pos = min(info.position + 1,
                                   static_cast<int>(seg->candidates_size()));
        InsertCandidate(seg, insert_pos, info.candidate, info.candidate);
        modified = true;
        continue;
      }
      LOG(ERROR) << "arabic_content_value is not number: "
                 << arabic_content_value;
      break;
    }
    vector<NumberUtil::NumberString> output;
    GetNumbers(info.type, exec_radix_conversion, arabic_content_value, &output);
    vector<Segment::Candidate> converted_numbers;
    for (int j = 0; j < output.size(); ++j) {
      PushBackCandidate(output[j].value, output[j].description,
                        output[j].style, &converted_numbers);
    }
    SetCandidatesInfo(info.candidate, &converted_numbers);

    // Caution!!!: This invocation will update the data inside of the
    // rewrite_candidate_infos. Thus, |info| also can be updated as well
    // regardless of whether it's const reference-ness.
    EraseExistingCandidates(converted_numbers, info.position,
                            seg, &rewrite_candidate_infos);
    int insert_pos = GetInsertPos(info.position, *seg, info.type);
    DCHECK_LT(info.position, insert_pos);
    InsertConvertedCandidates(
        converted_numbers, info.candidate, info.position, insert_pos, seg);
    modified = true;
  }
  return modified;
}
}  // namespace

NumberRewriter::NumberRewriter(const DataManagerInterface *data_manager)
    : pos_matcher_(data_manager->GetPOSMatcher()) {
  data_manager->GetCounterSuffixSortedArray(&suffix_array_,
                                            &suffix_array_size_);
}

NumberRewriter::~NumberRewriter() {}

int NumberRewriter::capability(const ConversionRequest &request) const {
  if (request.request().mixed_conversion()) {
    return RewriterInterface::ALL;
  }
  return RewriterInterface::CONVERSION;
}

bool NumberRewriter::Rewrite(const ConversionRequest &request,
                             Segments *segments) const {
  DCHECK(segments);
  if (!GET_CONFIG(use_number_conversion)) {
    VLOG(2) << "no use_number_conversion";
    return false;
  }

  bool modified = false;
  // Radix conversion is done only for conversion mode.
  // Showing radix candidates is annoying for an user.
  const bool exec_radix_conversion =
      (segments->conversion_segments_size() == 1 &&
       segments->request_type() == Segments::CONVERSION);

  for (size_t i = 0; i < segments->conversion_segments_size(); ++i) {
    Segment *seg = segments->mutable_conversion_segment(i);
    modified |= RewriteOneSegment(suffix_array_, suffix_array_size_,
                                  *pos_matcher_, exec_radix_conversion, seg);
  }

  return modified;
}

}  // namespace mozc
