blob: d661c6e962788ad3429064b8fc1029b87a87df6d [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "unix/emacs/mozc_emacs_helper_lib.h"
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include "base/logging.h"
#include "base/number_util.h"
#include "base/port.h"
#include "base/protobuf/descriptor.h"
#include "base/util.h"
#include "session/commands.pb.h"
#include "session/key_parser.h"
namespace mozc {
namespace emacs {
namespace {
// forward declaration
void PrintField(
const protobuf::Message &message,
const protobuf::Reflection &reflection,
const protobuf::FieldDescriptor &field,
vector<string>* output);
void PrintFieldValue(
const protobuf::Message &message,
const protobuf::Reflection &reflection,
const protobuf::FieldDescriptor &field,
int index,
vector<string>* output);
} // namespace
// Parses a line, which must be a single complete command in form of:
// '(' EVENT_ID COMMAND [ARGUMENT]... ')'
// where EVENT_ID is an arbitrary integer used to identify the response
// according to the command (see 'emacs-event-id' in a response).
// Normally it's just a sequence number of transactions.
// COMMAND is one of 'CreateSession', 'DeleteSession' and 'SendKey'.
// ARGUMENTs depend on a command.
// An input line must be surrounded by a pair of parentheses,
// like a S-expression.
void ParseInputLine(
const string &line, uint32 *event_id, uint32 *session_id,
mozc::commands::Input *input) {
CHECK(event_id);
CHECK(session_id);
CHECK(input);
vector<string> tokens;
if (!TokenizeSExpr(line, &tokens) ||
tokens.size() < 4 || // Must be at least '(' EVENT_ID COMMAND ')'.
tokens.front() != "(" || tokens.back() != ")") {
ErrorExit(kErrScanError, "S expression in the wrong format");
}
// Read an event ID (a sequence number).
if (!NumberUtil::SafeStrToUInt32(tokens[1], event_id)) {
ErrorExit(kErrWrongTypeArgument, "Event ID is not an integer");
}
// Read a command.
const string &func = tokens[2];
if (func == "SendKey") { // SendKey is a most-frequently-used command.
input->set_type(mozc::commands::Input::SEND_KEY);
} else if (func == "CreateSession") {
input->set_type(mozc::commands::Input::CREATE_SESSION);
} else if (func == "DeleteSession") {
input->set_type(mozc::commands::Input::DELETE_SESSION);
} else {
// Mozc has SendTestKey and SendCommand commands in addition to the above.
// But this code doesn't support them because of no need so far.
ErrorExit(kErrVoidFunction, "Unknown function");
}
switch (input->type()) {
case mozc::commands::Input::CREATE_SESSION: {
// Suppose: (EVENT_ID CreateSession)
if (tokens.size() != 4) {
ErrorExit(kErrWrongNumberOfArguments, "Wrong number of arguments");
}
break;
}
case mozc::commands::Input::DELETE_SESSION: {
// Suppose: (EVENT_ID DeleteSession SESSION_ID)
if (tokens.size() != 5) {
ErrorExit(kErrWrongNumberOfArguments, "Wrong number of arguments");
}
// Parse session ID.
if (!NumberUtil::SafeStrToUInt32(tokens[3], session_id)) {
ErrorExit(kErrWrongTypeArgument, "Session ID is not an integer");
}
break;
}
case mozc::commands::Input::SEND_KEY: {
// Suppose: (EVENT_ID SendKey SESSION_ID KEY...)
if (tokens.size() < 6) {
ErrorExit(kErrWrongNumberOfArguments, "Wrong number of arguments");
}
// Parse session ID.
if (!NumberUtil::SafeStrToUInt32(tokens[3], session_id)) {
ErrorExit(kErrWrongTypeArgument, "Session ID is not an integer");
}
// Parse keys.
vector<string> keys;
string key_string;
for (int i = 4; i < tokens.size() - 1; ++i) {
if (isdigit(tokens[i][0])) { // Numeric key code
uint32 key_code;
if (!NumberUtil::SafeStrToUInt32(tokens[i], &key_code) ||
key_code > 255) {
ErrorExit(kErrWrongTypeArgument, "Wrong character code");
}
keys.push_back(string(1, static_cast<char>(key_code)));
} else if (tokens[i][0] == '\"') { // String literal
if (!key_string.empty()) {
ErrorExit(kErrWrongTypeArgument, "Wrong number of key strings");
}
if (!UnquoteString(tokens[i], &key_string)) {
ErrorExit(kErrWrongTypeArgument, "Wrong key string literal");
}
} else { // Key symbol
keys.push_back(tokens[i]);
}
}
if (!mozc::KeyParser::ParseKeyVector(keys, input->mutable_key()) &&
// If there are any unsupported key symbols, falls back to
// mozc::commands::KeyEvent::UNDEFINED_KEY.
!mozc::KeyParser::ParseKey("undefinedkey", input->mutable_key())) {
DLOG(FATAL); // Code must not reach here.
}
if (!key_string.empty()) {
input->mutable_key()->set_key_string(key_string);
}
break;
}
default:
DLOG(FATAL); // Code must not reach here.
}
}
// Prints the content of a protocol buffer in S-expression.
// - 'message' and 'group' are mapped to alist (associative list)
// - 'repeated' is expressed as a list
// - other types are expressed as is
//
// Input parameter 'message' is a protocol buffer to be output.
// 'output' is a text buffer to output 'message'.
//
// This function never outputs newlines except for ones in strings.
void PrintMessage(
const protobuf::Message &message,
vector<string>* output) {
DCHECK(output);
const protobuf::Reflection *reflection = message.GetReflection();
vector<const protobuf::FieldDescriptor*> fields;
reflection->ListFields(message, &fields);
output->push_back("(");
for (int i = 0; i < fields.size(); ++i) {
PrintField(message, *reflection, *fields[i], output);
}
output->push_back(")");
}
// Utilities
// Normalizes a symbol with the following rules:
// - all alphabets are converted to lowercase
// - underscore('_') is converted to dash('-')
string NormalizeSymbol(const string &symbol) {
string s = symbol;
mozc::Util::LowerString(&s);
replace(s.begin(), s.end(), '_', '-');
return s;
}
// Returns a quoted string as a string literal in S-expression.
// - double-quote is converted to backslash + double-quote
// - backslash is converted to backslash + backslash
//
// Control characters, including newline('\n'), in a given string remain as is.
string QuoteString(const string &str) {
string tmp, escaped_body;
mozc::Util::StringReplace(str, "\\", "\\\\", true, &tmp);
mozc::Util::StringReplace(tmp, "\"", "\\\"", true, &escaped_body);
return "\"" + escaped_body + "\"";
}
// Unquotes and unescapes a double-quoted string.
// The input string must begin and end with double quotes.
bool UnquoteString(const string &input, string *output) {
DCHECK(output);
output->clear();
if (input.length() < 2 ||
*input.begin() != '\"' || *input.rbegin() != '\"') {
return false; // wrong format
}
string result;
result.reserve(input.size());
bool escape = false;
for (string::const_iterator i = ++input.begin(), e = --input.end();
i != e; ++i) {
if (escape) {
char c = *i;
switch (*i) {
case 'a': c = '\x07'; break; // control-g
case 'b': c = '\x08'; break; // backspace
case 't': c = '\x09'; break; // tab
case 'n': c = '\x0a'; break; // newline
case 'v': c = '\x0b'; break; // vertical tab
case 'f': c = '\x0c'; break; // formfeed
case 'r': c = '\x0d'; break; // carriage return
case 'e': c = '\x1b'; break; // escape
case 's': c = '\x20'; break; // space
case 'd': c = '\x7f'; break; // delete
}
result.push_back(c);
escape = false;
} else if (*i == '\\') {
escape = true;
} else if (*i == '\"') {
// Double-quote w/o the escape sign must not appear inside a quoted
// string.
return false;
} else {
result.push_back(*i);
}
}
if (escape) { // wrong format
return false;
}
output->swap(result);
return true;
}
// Tokenizes the given string as S expression. Returns true if success.
//
// This function implements very simple tokenization and is NOT conforming to
// the definition of S expression. For example, this function does not return
// an error for the input "\'".
bool TokenizeSExpr(const string &input, vector<string> *output) {
DCHECK(output);
vector<string> results;
for (string::const_iterator i = input.begin(); i != input.end(); ++i) {
if (isspace(*i)) { continue; } // Skip white space.
if (!isgraph(*i)) {
return false; // unrecognized control character
}
switch (*i) {
case ';': // comment
while (i != input.end() && *i != '\n') { ++i; }
break;
case '(': case ')': // list parantheses
case '[': case ']': // vector parantheses
case '\'': // quote
case '`': // quasiquote
results.push_back(string(1, *i));
break;
case '\"': { // string
string::const_iterator start = i++;
for (bool escape = false; ; ++i) {
if (i == input.end()) {
return false; // unexpected end of string
}
if (escape) {
escape = false;
} else if (*i == '\\') {
escape = true;
} else if (*i == '\"') {
break;
}
}
results.push_back(string(start, i + 1));
break;
}
default: { // must be atom
string::const_iterator start = i++;
for (;; ++i) {
if (i == input.end()) {
break;
}
if (!isgraph(*i)) {
break;
}
bool is_special_char = false;
switch (*i) {
case ';': // comment
case '(': case ')': // list parantheses
case '[': case ']': // vector parantheses
case '\'': // quote
case '`': // quasiquote
case '\"': // string
is_special_char = true;
}
if (is_special_char) {
break;
}
}
results.push_back(string(start, i));
--i; // Put the last char back.
break;
}
}
}
output->swap(results);
return true;
}
// Prints an error message in S-expression and terminates with status code 1.
void ErrorExit(const string &error, const string &message) {
fprintf(stdout, "((error . %s)(message . %s))\n",
error.c_str(), QuoteString(message).c_str());
exit(1);
}
bool RemoveUsageData(mozc::commands::Output *output) {
if (!output->has_candidates()) {
return false;
}
if (!output->candidates().has_usages()) {
return false;
}
output->mutable_candidates()->mutable_usages()->Clear();
return true;
}
namespace {
// Prints one entry of a protocol buffer in S-expression.
// An entry is a cons cell of key and value.
//
// Input parameter 'message' is a protocol buffer to be output.
// 'reflection' must be a reflection object of 'message'. 'field' is
// a field descriptor in 'message' to be output. 'field' can have both of
// a single value and repeated values.
// 'output' is a pseudo output stream to output field's key and value(s).
void PrintField(
const protobuf::Message &message,
const protobuf::Reflection &reflection,
const protobuf::FieldDescriptor &field,
vector<string>* output) {
output->push_back("(");
output->push_back(NormalizeSymbol(field.name()));
if (!field.is_repeated()) {
output->push_back(" . "); // Print an object as a value.
PrintFieldValue(message, reflection, field, -1 /* dummy arg */, output);
} else {
output->push_back(" "); // Print objects as a list.
const int count = reflection.FieldSize(message, &field);
const bool is_message =
field.cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE;
for (int i = 0; i < count; ++i) {
if (i != 0 && !is_message) {
output->push_back(" ");
}
PrintFieldValue(message, reflection, field, i, output);
}
}
output->push_back(")");
}
// Prints a value of a field of a protocol buffer in S-expression.
// - integer and floating point number are represented as is
// - bool is represented as "t" or "nil"
// - enum is represented as symbol
// - string is represented as quoted string
// - message and group are represented as alist
//
// Input parameter 'message' is a protocol buffer to be output.
// 'reflection' must be a reflection object of 'message'. 'field' is
// a field descriptor in 'message' to be output. 'field' can have both of
// a single value and repeated values. If 'field' has repeated values,
// 'index' specifies its index to be output. Otherwise, 'index' is ignored.
// 'output' is a pseudo output stream to output the value.
void PrintFieldValue(
const protobuf::Message &message,
const protobuf::Reflection &reflection,
const protobuf::FieldDescriptor &field,
int index,
vector<string>* output) {
#define GET_FIELD_VALUE(METHOD_TYPE) \
(field.is_repeated() ? \
reflection.GetRepeated##METHOD_TYPE(message, &field, index) : \
reflection.Get##METHOD_TYPE(message, &field))
switch (field.cpp_type()) {
// Number (integer and floating point)
#define PRINT_FIELD_VALUE(PROTO_CPP_TYPE, METHOD_TYPE, CPP_TYPE, FORMAT) \
case protobuf::FieldDescriptor::CPPTYPE_##PROTO_CPP_TYPE: \
output->push_back(mozc::Util::StringPrintf( \
FORMAT, static_cast<CPP_TYPE>(GET_FIELD_VALUE(METHOD_TYPE)))); \
break;
// Since Emacs does not support 64-bit integers, it supports only
// 60-bit integers on 64-bit version, and 28-bit on 32-bit version,
// we escape it into a string as a workaround.
// We don't need any 64-bit values on Emacs so far, and 32-bit
// integer values have never got over 28-bit yet.
PRINT_FIELD_VALUE(INT32, Int32, int32, "%d");
PRINT_FIELD_VALUE(INT64, Int64, int64,
"\"%" GG_LL_FORMAT "d\""); // as a string
PRINT_FIELD_VALUE(UINT32, UInt32, uint32, "%u");
PRINT_FIELD_VALUE(UINT64, UInt64, uint64,
"\"%" GG_LL_FORMAT "u\""); // as a string
PRINT_FIELD_VALUE(DOUBLE, Double, double, "%f");
PRINT_FIELD_VALUE(FLOAT, Float, float, "%f");
#undef PRINT_FIELD_VALUE
case protobuf::FieldDescriptor::CPPTYPE_BOOL: // bool
output->push_back(GET_FIELD_VALUE(Bool) ? "t" : "nil");
break;
case protobuf::FieldDescriptor::CPPTYPE_ENUM: // enum
output->push_back(NormalizeSymbol(GET_FIELD_VALUE(Enum)->name()));
break;
case protobuf::FieldDescriptor::CPPTYPE_STRING: { // string
string str;
str = field.is_repeated() ?
reflection.GetRepeatedStringReference(
message, &field, index, &str) :
reflection.GetStringReference(message, &field, &str);
output->push_back(QuoteString(str));
break;
}
// message and group
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
PrintMessage(GET_FIELD_VALUE(Message), output);
break;
}
#undef GET_FIELD_VALUE
}
} // namespace
} // namespace emacs
} // namespace mozc