blob: a648bbd961c2361ea8fa2385cb3f63a002e634c3 [file] [log] [blame]
#include "test-tool.h"
static const char *utf8_replace_character = "�";
/*
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
* in an XML file.
*/
int cmd__xml_encode(int argc, const char **argv)
{
unsigned char buf[1024], tmp[4], *tmp2 = NULL;
ssize_t cur = 0, len = 1, remaining = 0;
unsigned char ch;
for (;;) {
if (++cur == len) {
len = xread(0, buf, sizeof(buf));
if (!len)
return 0;
if (len < 0)
die_errno("Could not read <stdin>");
cur = 0;
}
ch = buf[cur];
if (tmp2) {
if ((ch & 0xc0) != 0x80) {
fputs(utf8_replace_character, stdout);
tmp2 = NULL;
cur--;
continue;
}
*tmp2 = ch;
tmp2++;
if (--remaining == 0) {
fwrite(tmp, tmp2 - tmp, 1, stdout);
tmp2 = NULL;
}
continue;
}
if (!(ch & 0x80)) {
/* 0xxxxxxx */
if (ch == '&')
fputs("&amp;", stdout);
else if (ch == '\'')
fputs("&apos;", stdout);
else if (ch == '"')
fputs("&quot;", stdout);
else if (ch == '<')
fputs("&lt;", stdout);
else if (ch == '>')
fputs("&gt;", stdout);
else if (ch >= 0x20)
fputc(ch, stdout);
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
fprintf(stdout, "&#x%02x;", ch);
else
fputs(utf8_replace_character, stdout);
} else if ((ch & 0xe0) == 0xc0) {
/* 110XXXXx 10xxxxxx */
tmp[0] = ch;
remaining = 1;
tmp2 = tmp + 1;
} else if ((ch & 0xf0) == 0xe0) {
/* 1110XXXX 10Xxxxxx 10xxxxxx */
tmp[0] = ch;
remaining = 2;
tmp2 = tmp + 1;
} else if ((ch & 0xf8) == 0xf0) {
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
tmp[0] = ch;
remaining = 3;
tmp2 = tmp + 1;
} else
fputs(utf8_replace_character, stdout);
}
return 0;
}