blob: 99f642b1185e3793483a6e93314de69e979f815d [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "gui/character_pad/unicode_util.h"
#include <QtCore/QtCore>
#include <QtGui/QtGui>
#include <algorithm>
#include <string>
#include "base/logging.h"
#include "base/util.h"
#include "gui/character_pad/data/cp932_map.h"
#include "gui/character_pad/data/unicode_data.h"
#include "gui/character_pad/data/unihan_data.h"
namespace mozc {
namespace {
template <class T> struct UnicodeDataCompare {
bool operator()(const T &d1, const T &d2) const {
return (d1.ucs4 < d2.ucs4);
}
};
bool ExtractFirstUCS4Char(const QString &str, char32 *ucs4) {
const QVector<uint> ucs4s = str.toUcs4();
// Due to QTBUG-25536, QString::toUcs4() is not reliable on Qt 4.8.0/4.8.1.
// https://bugreports.qt-project.org/browse/QTBUG-25536
// Nevertheless, we should be able to get the first character.
if (ucs4s.size() < 1) {
return false;
}
DCHECK(ucs4);
*ucs4 = static_cast<char32>(ucs4s[0]);
return true;
}
// TODO(taku): move it to base/util
uint16 SjisToEUC(uint16 code) {
if (code < 0x80) { // ascii
return code;
}
if (code >= 0xa1 && code <= 0xdf) { // halfwidth kana
return (0x8e << 8) | code;
}
const uint16 lo = code & 0xff;
const uint16 hi = (code >> 8) & 0xff;
if (lo >= 0x9f) {
return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
}
return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
(lo + (lo >= 0x7f ? 0x60 : 0x61));
}
uint16 LookupCP932Data(const QString &str) {
char32 ucs4 = 0;
if (!ExtractFirstUCS4Char(str, &ucs4)) {
return 0;
}
CP932MapData key;
key.ucs4 = ucs4;
const CP932MapData *result =
lower_bound(kCP932MapData,
kCP932MapData + kCP932MapDataSize,
key,
UnicodeDataCompare<CP932MapData>());
if (result == kCP932MapData + kCP932MapDataSize ||
result->ucs4 != key.ucs4) {
return 0;
}
return result->sjis;
}
const UnihanData *LookupUnihanData(const QString &str) {
char32 ucs4 = 0;
if (!ExtractFirstUCS4Char(str, &ucs4)) {
return NULL;
}
UnihanData key;
key.ucs4 = ucs4;
const UnihanData *result =
lower_bound(kUnihanData,
kUnihanData + kUnihanDataSize,
key,
UnicodeDataCompare<UnihanData>());
if (result == kUnihanData + kUnihanDataSize ||
result->ucs4 != ucs4) {
return NULL;
}
return result;
}
const QString LookupUnicodeData(const QString &str) {
char32 ucs4 = 0;
if (!ExtractFirstUCS4Char(str, &ucs4)) {
return QString("");
}
UnicodeData key;
key.ucs4 = ucs4;
key.description = NULL;
const UnicodeData *result =
lower_bound(kUnicodeData,
kUnicodeData + kUnicodeDataSize,
key,
UnicodeDataCompare<UnicodeData>());
if (result == kUnicodeData + kUnicodeDataSize ||
result->ucs4 != ucs4) {
return QString("");
}
return QString(result->description);
}
QString toCodeInUcs4(const QString &str) {
char32 ucs4 = 0;
if (!ExtractFirstUCS4Char(str, &ucs4)) {
return "";
}
QString result;
result.sprintf("U+%04X", ucs4);
return result;
}
QString toHexUTF8(const QString &str) {
if (str.isEmpty()) {
return QString("--");
}
QByteArray array = str.toUtf8();
QString result;
for (int i = 0; i < array.size(); ++i) {
QString tmp;
tmp.sprintf("%02X ", static_cast<uint8>(array[i]));
result += tmp;
}
return result;
}
QString Hexify(uint16 code) {
if (code == 0) {
return QString("--");
}
QString tmp;
const uint16 high = (code >> 8) & 0xFF;
const uint16 low = code & 0xFF;
if (high == 0) {
tmp.sprintf("%02X", low);
} else {
tmp.sprintf("%02X %02X", high, low);
}
return tmp;
}
QString toHexSJIS(const QString &str) {
return Hexify(LookupCP932Data(str));
}
QString toHexEUC(const QString &str) {
return Hexify(SjisToEUC(LookupCP932Data(str)));
}
QString toJapaneseReading(const char *str) {
if (QLocale::system().language() == QLocale::Japanese) {
string tmp = str;
string output;
Util::LowerString(&tmp);
Util::RomanjiToHiragana(tmp, &output);
return QString::fromUtf8(output.c_str());
} else {
return QString::fromUtf8(str);
}
}
} // namespace
// static
QString UnicodeUtil::GetToolTip(const QFont &font, const QString &text) {
QString info = QString::fromLatin1
("<center><span style=\"font-size: 24pt; font-family: %1\">").arg
(font.family());
info += Qt::escape(text);
info += "</span></center>";
const QString desc = LookupUnicodeData(text);
if (!desc.isEmpty()) {
info += "<center><span>";
info += Qt::escape(desc);
info += "</span></center>";
}
info += "<table border=0>";
const UnihanData *unihan = LookupUnihanData(text);
if (unihan != NULL) {
if (unihan->japanese_kun != NULL) {
info += "<tr><td>" + QObject::tr("Kun Reading") + ":</td><td>";
info += Qt::escape(toJapaneseReading(unihan->japanese_kun));
info += "</td></tr>";
}
if (unihan->japanese_on != NULL) {
info += "<tr><td>" + QObject::tr("On Reading") + ":</td><td>";
info += Qt::escape(toJapaneseReading(unihan->japanese_on));
info += "</td></tr>";
}
// Since radical/total_storkes defined in Unihan database are not
// reliable, we currently don't want to display them.
// if (unihan->radical != NULL) {
// info += "<tr><td>" + QObject::tr("Radical") + ":</td><td>";
// info += Qt::escape(QString::fromUtf8(unihan->radical));
// info += "</td></tr>";
// }
// if (unihan->total_strokes > 0) {
// QString tmp;
// tmp.sprintf("%d", unihan->total_strokes);
// info += "<tr><td>" + QObject::tr("Total Strokes") + ":</td><td>";
// info += Qt::escape(tmp);
// info += "</td></tr>";
// }
if (unihan->IRG_jsource != NULL) {
info += "<tr><td>" + QObject::tr("Source") + ":</td><td>";
info += Qt::escape(QString::fromUtf8(unihan->IRG_jsource));
info += "</td></tr>";
}
}
info += "<tr><td>" + QObject::tr("Unicode") + ":</td><td>";
info += toCodeInUcs4(text) + "</td></tr>";
info += "<tr><td>UTF-8: </td><td>";
info += toHexUTF8(text) + "</td></tr>";
info += "<tr><td>Shift-JIS: </td><td>";
info += toHexSJIS(text) + "</td></tr>";
info += "<tr><td>EUC-JP: </td><td>";
info += toHexEUC(text) + "</td></tr>";
info += "</table>";
return info;
}
} // namespace mozc