cave-story-solaris/DoConfig/fltk/src/xutf8/utf8Utils.c
2019-05-02 18:20:47 +01:00

229 lines
5.4 KiB
C

/* "$Id: $"
*
* Author: Jean-Marc Lienher ( http://oksid.ch )
* Copyright 2000-2003 by O'ksi'D.
*
* This library is free software. Distribution and use rights are outlined in
* the file "COPYING" which should have been included with this file. If this
* file is missing or damaged, see the license at:
*
* http://www.fltk.org/COPYING.php
*
* Please report all bugs and problems on the following page:
*
* http://www.fltk.org/str.php
*/
/*
* Unicode to UTF-8 conversion functions.
*/
#if !defined(WIN32) && !defined(__APPLE__)
#include "../Xutf8.h"
/*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/
/*
* Converts the first char of the UTF-8 string to an Unicode value
* Returns the byte length of the converted UTF-8 char
* Returns -1 if the UTF-8 string is not valid
*/
int
XConvertUtf8ToUcs(const unsigned char *buf,
int len,
unsigned int *ucs) {
if (buf[0] & 0x80) {
if (buf[0] & 0x40) {
if (buf[0] & 0x20) {
if (buf[0] & 0x10) {
if (buf[0] & 0x08) {
if (buf[0] & 0x04) {
if (buf[0] & 0x02) {
/* bad UTF-8 string */
} else {
/* 0x04000000 - 0x7FFFFFFF */
}
} else if (len > 4
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80
&& (buf[3] & 0xC0) == 0x80
&& (buf[4] & 0xC0) == 0x80) {
/* 0x00200000 - 0x03FFFFFF */
*ucs = ((buf[0] & ~0xF8) << 24) +
((buf[1] & ~0x80) << 18) +
((buf[2] & ~0x80) << 12) +
((buf[3] & ~0x80) << 6) +
(buf[4] & ~0x80);
if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5;
}
} else if (len > 3
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80
&& (buf[3] & 0xC0) == 0x80) {
/* 0x00010000 - 0x001FFFFF */
*ucs = ((buf[0] & ~0xF0) << 18) +
((buf[1] & ~0x80) << 12) +
((buf[2] & ~0x80) << 6) +
(buf[3] & ~0x80);
if (*ucs > 0x0000FFFF) return 4;
}
} else if (len > 2
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80) {
/* 0x00000800 - 0x0000FFFF */
*ucs = ((buf[0] & ~0xE0) << 12) +
((buf[1] & ~0x80) << 6) +
(buf[2] & ~0x80);
if (*ucs > 0x000007FF) return 3;
}
} else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
/* 0x00000080 - 0x000007FF */
*ucs = ((buf[0] & ~0xC0) << 6) +
(buf[1] & ~0x80);
if (*ucs > 0x0000007F) return 2;
}
}
} else if (len > 0) {
/* 0x00000000 - 0x0000007F */
*ucs = buf[0];
return 1;
}
*ucs = (unsigned int) '?'; /* bad utf-8 string */
return -1;
}
/*
* Converts an Unicode value to an UTF-8 string
* NOTE : the buffer (buf) must be at least 5 bytes long !!!
*/
int
XConvertUcsToUtf8(unsigned int ucs,
char *buf) {
if (ucs < 0x000080) {
buf[0] = ucs;
return 1;
} else if (ucs < 0x000800) {
buf[0] = 0xC0 | (ucs >> 6);
buf[1] = 0x80 | (ucs & 0x3F);
return 2;
} else if (ucs < 0x010000) {
buf[0] = 0xE0 | (ucs >> 12);
buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
buf[2] = 0x80 | (ucs & 0x3F);
return 3;
} else if (ucs < 0x00200000) {
buf[0] = 0xF0 | (ucs >> 18);
buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
buf[3] = 0x80 | (ucs & 0x3F);
return 4;
} else if (ucs < 0x01000000) {
buf[0] = 0xF8 | (ucs >> 24);
buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
buf[4] = 0x80 | (ucs & 0x3F);
return 5;
}
buf[0] = '?';
return -1;
}
/*
* returns the byte length of the first UTF-8 char
* (returns -1 if not valid)
*/
int
XUtf8CharByteLen(const unsigned char *buf,
int len) {
unsigned int ucs;
return XConvertUtf8ToUcs(buf, len, &ucs);
}
/*
* returns the quantity of Unicode chars in the UTF-8 string
*/
int
XCountUtf8Char(const unsigned char *buf,
int len) {
int i = 0;
int nbc = 0;
while (i < len) {
int cl = XUtf8CharByteLen(buf + i, len - i);
if (cl < 1) cl = 1;
nbc++;
i += cl;
}
return nbc;
}
/*
* Same as XConvertUtf8ToUcs but no sanity check is done.
*/
int
XFastConvertUtf8ToUcs(const unsigned char *buf,
int len,
unsigned int *ucs) {
if (buf[0] & 0x80) {
if (buf[0] & 0x40) {
if (buf[0] & 0x20) {
if (buf[0] & 0x10) {
if (buf[0] & 0x08) {
if (buf[0] & 0x04) {
if (buf[0] & 0x02) {
/* bad UTF-8 string */
} else {
/* 0x04000000 - 0x7FFFFFFF */
}
} else if (len > 4) {
/* 0x00200000 - 0x03FFFFFF */
*ucs = ((buf[0] & ~0xF8) << 24) +
((buf[1] & ~0x80) << 18) +
((buf[2] & ~0x80) << 12) +
((buf[3] & ~0x80) << 6) +
(buf[4] & ~0x80);
return 5;
}
} else if (len > 3) {
/* 0x00010000 - 0x001FFFFF */
*ucs = ((buf[0] & ~0xF0) << 18) +
((buf[1] & ~0x80) << 12) +
((buf[2] & ~0x80) << 6) +
(buf[3] & ~0x80);
return 4;
}
} else if (len > 2) {
/* 0x00000800 - 0x0000FFFF */
*ucs = ((buf[0] & ~0xE0) << 12) +
((buf[1] & ~0x80) << 6) +
(buf[2] & ~0x80);
return 3;
}
} else if (len > 1) {
/* 0x00000080 - 0x000007FF */
*ucs = ((buf[0] & ~0xC0) << 6) +
(buf[1] & ~0x80);
return 2;
}
}
} else if (len > 0) {
/* 0x00000000 - 0x0000007F */
*ucs = buf[0];
return 1;
}
*ucs = (unsigned int) '?'; /* bad utf-8 string */
return -1;
}
#endif /* X11 only */
/*
* End of "$Id: $".
*/