54 lines
1.4 KiB
C
54 lines
1.4 KiB
C
/*
|
|
* 2014 lovewilliam <ztong@vt.edu>
|
|
* from http://www.lemoda.net/c/ucs2-to-utf8/ucs2-to-utf8.c
|
|
*/
|
|
/* Input: a Unicode code point, "ucs2".
|
|
|
|
Output: UTF-8 characters in buffer "utf8".
|
|
|
|
Return value: the number of bytes written into "utf8", or -1 if
|
|
there was an error.
|
|
|
|
This adds a zero byte to the end of the string. It assumes that the
|
|
buffer "utf8" has at least four bytes of space to write to. */
|
|
|
|
#define UNICODE_SURROGATE_PAIR -2
|
|
#define UNICODE_BAD_INPUT -1
|
|
|
|
int ucs2_to_utf8 (int ucs2, unsigned char * utf8)
|
|
{
|
|
if (ucs2 < 0x80) {
|
|
utf8[0] = ucs2;
|
|
utf8[1] = '\0';
|
|
return 1;
|
|
}
|
|
if (ucs2 >= 0x80 && ucs2 < 0x800) {
|
|
utf8[0] = (ucs2 >> 6) | 0xC0;
|
|
utf8[1] = (ucs2 & 0x3F) | 0x80;
|
|
utf8[2] = '\0';
|
|
return 2;
|
|
}
|
|
if (ucs2 >= 0x800 && ucs2 < 0xFFFF) {
|
|
if (ucs2 >= 0xD800 && ucs2 <= 0xDFFF) {
|
|
/* Ill-formed. */
|
|
return UNICODE_SURROGATE_PAIR;
|
|
}
|
|
utf8[0] = ((ucs2 >> 12) ) | 0xE0;
|
|
utf8[1] = ((ucs2 >> 6 ) & 0x3F) | 0x80;
|
|
utf8[2] = ((ucs2 ) & 0x3F) | 0x80;
|
|
utf8[3] = '\0';
|
|
return 3;
|
|
}
|
|
if (ucs2 >= 0x10000 && ucs2 < 0x10FFFF) {
|
|
/* http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380 */
|
|
utf8[0] = 0xF0 | (ucs2 >> 18);
|
|
utf8[1] = 0x80 | ((ucs2 >> 12) & 0x3F);
|
|
utf8[2] = 0x80 | ((ucs2 >> 6) & 0x3F);
|
|
utf8[3] = 0x80 | ((ucs2 & 0x3F));
|
|
utf8[4] = '\0';
|
|
return 4;
|
|
}
|
|
return UNICODE_BAD_INPUT;
|
|
}
|
|
|