229 lines
5.4 KiB
C
229 lines
5.4 KiB
C
/* "$Id: $"
|
|
*
|
|
* Author: Jean-Marc Lienher ( http://oksid.ch )
|
|
* Copyright 2000-2003 by O'ksi'D.
|
|
*
|
|
* This library is free software. Distribution and use rights are outlined in
|
|
* the file "COPYING" which should have been included with this file. If this
|
|
* file is missing or damaged, see the license at:
|
|
*
|
|
* http://www.fltk.org/COPYING.php
|
|
*
|
|
* Please report all bugs and problems on the following page:
|
|
*
|
|
* http://www.fltk.org/str.php
|
|
*/
|
|
|
|
/*
|
|
* Unicode to UTF-8 conversion functions.
|
|
*/
|
|
|
|
#if !defined(WIN32) && !defined(__APPLE__)
|
|
|
|
#include "../Xutf8.h"
|
|
|
|
/*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/
|
|
|
|
/*
|
|
* Converts the first char of the UTF-8 string to an Unicode value
|
|
* Returns the byte length of the converted UTF-8 char
|
|
* Returns -1 if the UTF-8 string is not valid
|
|
*/
|
|
int
|
|
XConvertUtf8ToUcs(const unsigned char *buf,
|
|
int len,
|
|
unsigned int *ucs) {
|
|
|
|
if (buf[0] & 0x80) {
|
|
if (buf[0] & 0x40) {
|
|
if (buf[0] & 0x20) {
|
|
if (buf[0] & 0x10) {
|
|
if (buf[0] & 0x08) {
|
|
if (buf[0] & 0x04) {
|
|
if (buf[0] & 0x02) {
|
|
/* bad UTF-8 string */
|
|
} else {
|
|
/* 0x04000000 - 0x7FFFFFFF */
|
|
}
|
|
} else if (len > 4
|
|
&& (buf[1] & 0xC0) == 0x80
|
|
&& (buf[2] & 0xC0) == 0x80
|
|
&& (buf[3] & 0xC0) == 0x80
|
|
&& (buf[4] & 0xC0) == 0x80) {
|
|
/* 0x00200000 - 0x03FFFFFF */
|
|
*ucs = ((buf[0] & ~0xF8) << 24) +
|
|
((buf[1] & ~0x80) << 18) +
|
|
((buf[2] & ~0x80) << 12) +
|
|
((buf[3] & ~0x80) << 6) +
|
|
(buf[4] & ~0x80);
|
|
if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5;
|
|
}
|
|
} else if (len > 3
|
|
&& (buf[1] & 0xC0) == 0x80
|
|
&& (buf[2] & 0xC0) == 0x80
|
|
&& (buf[3] & 0xC0) == 0x80) {
|
|
/* 0x00010000 - 0x001FFFFF */
|
|
*ucs = ((buf[0] & ~0xF0) << 18) +
|
|
((buf[1] & ~0x80) << 12) +
|
|
((buf[2] & ~0x80) << 6) +
|
|
(buf[3] & ~0x80);
|
|
if (*ucs > 0x0000FFFF) return 4;
|
|
}
|
|
} else if (len > 2
|
|
&& (buf[1] & 0xC0) == 0x80
|
|
&& (buf[2] & 0xC0) == 0x80) {
|
|
/* 0x00000800 - 0x0000FFFF */
|
|
*ucs = ((buf[0] & ~0xE0) << 12) +
|
|
((buf[1] & ~0x80) << 6) +
|
|
(buf[2] & ~0x80);
|
|
if (*ucs > 0x000007FF) return 3;
|
|
}
|
|
} else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
|
|
/* 0x00000080 - 0x000007FF */
|
|
*ucs = ((buf[0] & ~0xC0) << 6) +
|
|
(buf[1] & ~0x80);
|
|
if (*ucs > 0x0000007F) return 2;
|
|
}
|
|
}
|
|
} else if (len > 0) {
|
|
/* 0x00000000 - 0x0000007F */
|
|
*ucs = buf[0];
|
|
return 1;
|
|
}
|
|
|
|
*ucs = (unsigned int) '?'; /* bad utf-8 string */
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Converts an Unicode value to an UTF-8 string
|
|
* NOTE : the buffer (buf) must be at least 5 bytes long !!!
|
|
*/
|
|
int
|
|
XConvertUcsToUtf8(unsigned int ucs,
|
|
char *buf) {
|
|
|
|
if (ucs < 0x000080) {
|
|
buf[0] = ucs;
|
|
return 1;
|
|
} else if (ucs < 0x000800) {
|
|
buf[0] = 0xC0 | (ucs >> 6);
|
|
buf[1] = 0x80 | (ucs & 0x3F);
|
|
return 2;
|
|
} else if (ucs < 0x010000) {
|
|
buf[0] = 0xE0 | (ucs >> 12);
|
|
buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
|
|
buf[2] = 0x80 | (ucs & 0x3F);
|
|
return 3;
|
|
} else if (ucs < 0x00200000) {
|
|
buf[0] = 0xF0 | (ucs >> 18);
|
|
buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
|
|
buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
|
|
buf[3] = 0x80 | (ucs & 0x3F);
|
|
return 4;
|
|
} else if (ucs < 0x01000000) {
|
|
buf[0] = 0xF8 | (ucs >> 24);
|
|
buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
|
|
buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
|
|
buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
|
|
buf[4] = 0x80 | (ucs & 0x3F);
|
|
return 5;
|
|
}
|
|
buf[0] = '?';
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* returns the byte length of the first UTF-8 char
|
|
* (returns -1 if not valid)
|
|
*/
|
|
int
|
|
XUtf8CharByteLen(const unsigned char *buf,
|
|
int len) {
|
|
unsigned int ucs;
|
|
return XConvertUtf8ToUcs(buf, len, &ucs);
|
|
}
|
|
|
|
/*
|
|
* returns the quantity of Unicode chars in the UTF-8 string
|
|
*/
|
|
int
|
|
XCountUtf8Char(const unsigned char *buf,
|
|
int len) {
|
|
|
|
int i = 0;
|
|
int nbc = 0;
|
|
while (i < len) {
|
|
int cl = XUtf8CharByteLen(buf + i, len - i);
|
|
if (cl < 1) cl = 1;
|
|
nbc++;
|
|
i += cl;
|
|
}
|
|
return nbc;
|
|
}
|
|
|
|
/*
|
|
* Same as XConvertUtf8ToUcs but no sanity check is done.
|
|
*/
|
|
int
|
|
XFastConvertUtf8ToUcs(const unsigned char *buf,
|
|
int len,
|
|
unsigned int *ucs) {
|
|
|
|
if (buf[0] & 0x80) {
|
|
if (buf[0] & 0x40) {
|
|
if (buf[0] & 0x20) {
|
|
if (buf[0] & 0x10) {
|
|
if (buf[0] & 0x08) {
|
|
if (buf[0] & 0x04) {
|
|
if (buf[0] & 0x02) {
|
|
/* bad UTF-8 string */
|
|
} else {
|
|
/* 0x04000000 - 0x7FFFFFFF */
|
|
}
|
|
} else if (len > 4) {
|
|
/* 0x00200000 - 0x03FFFFFF */
|
|
*ucs = ((buf[0] & ~0xF8) << 24) +
|
|
((buf[1] & ~0x80) << 18) +
|
|
((buf[2] & ~0x80) << 12) +
|
|
((buf[3] & ~0x80) << 6) +
|
|
(buf[4] & ~0x80);
|
|
return 5;
|
|
}
|
|
} else if (len > 3) {
|
|
/* 0x00010000 - 0x001FFFFF */
|
|
*ucs = ((buf[0] & ~0xF0) << 18) +
|
|
((buf[1] & ~0x80) << 12) +
|
|
((buf[2] & ~0x80) << 6) +
|
|
(buf[3] & ~0x80);
|
|
return 4;
|
|
}
|
|
} else if (len > 2) {
|
|
/* 0x00000800 - 0x0000FFFF */
|
|
*ucs = ((buf[0] & ~0xE0) << 12) +
|
|
((buf[1] & ~0x80) << 6) +
|
|
(buf[2] & ~0x80);
|
|
return 3;
|
|
}
|
|
} else if (len > 1) {
|
|
/* 0x00000080 - 0x000007FF */
|
|
*ucs = ((buf[0] & ~0xC0) << 6) +
|
|
(buf[1] & ~0x80);
|
|
return 2;
|
|
}
|
|
}
|
|
} else if (len > 0) {
|
|
/* 0x00000000 - 0x0000007F */
|
|
*ucs = buf[0];
|
|
return 1;
|
|
}
|
|
|
|
*ucs = (unsigned int) '?'; /* bad utf-8 string */
|
|
return -1;
|
|
}
|
|
|
|
#endif /* X11 only */
|
|
|
|
/*
|
|
* End of "$Id: $".
|
|
*/
|