Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
authorSam Lantinga <slouken@libsdl.org>
Mon, 13 Mar 2006 01:08:00 +0000
changeset 1501 73dc5d39bbf8
parent 1500 f58c88a4dff5
child 1502 d403a39389da
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
configure.in
include/SDL_config.h.in
include/SDL_stdinc.h
src/stdlib/SDL_iconv.c
src/stdlib/SDL_string.c
test/Makefile.in
test/testiconv.c
test/utf8.txt
--- a/configure.in	Sun Mar 12 01:47:23 2006 +0000
+++ b/configure.in	Mon Mar 13 01:08:00 2006 +0000
@@ -94,7 +94,7 @@
 
     dnl Check for C library headers
     AC_HEADER_STDC
-    AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h signal.h)
+    AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h iconv.h signal.h)
 
     dnl Check for typedefs, structures, etc.
     AC_TYPE_SIZE_T
@@ -116,7 +116,7 @@
     if test x$ac_cv_func_strtod = xyes; then
         AC_DEFINE(HAVE_STRTOD)
     fi
-    AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp sscanf snprintf vsnprintf sigaction setjmp nanosleep)
+    AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep)
 
     AC_CHECK_LIB(m, pow, [BUILD_LDFLAGS="$BUILD_LDFLAGS -lm"])
 fi
--- a/include/SDL_config.h.in	Sun Mar 12 01:47:23 2006 +0000
+++ b/include/SDL_config.h.in	Mon Mar 13 01:08:00 2006 +0000
@@ -68,6 +68,7 @@
 #undef HAVE_STDINT_H
 #undef HAVE_CTYPE_H
 #undef HAVE_MATH_H
+#undef HAVE_ICONV_H
 #undef HAVE_SIGNAL_H
 #undef HAVE_ALTIVEC_H
 
@@ -118,9 +119,11 @@
 #undef HAVE_STRNCMP
 #undef HAVE_STRICMP
 #undef HAVE_STRCASECMP
+#undef HAVE_STRNCASECMP
 #undef HAVE_SSCANF
 #undef HAVE_SNPRINTF
 #undef HAVE_VSNPRINTF
+#undef HAVE_ICONV
 #undef HAVE_SIGACTION
 #undef HAVE_SETJMP
 #undef HAVE_NANOSLEEP
--- a/include/SDL_stdinc.h	Sun Mar 12 01:47:23 2006 +0000
+++ b/include/SDL_stdinc.h	Mon Mar 13 01:08:00 2006 +0000
@@ -70,6 +70,9 @@
 #if HAVE_CTYPE_H
 # include <ctype.h>
 #endif
+#if HAVE_ICONV_H
+# include <iconv.h>
+#endif
 
 /* The number of elements in an array */
 #define SDL_arraysize(array)	(sizeof(array)/sizeof(array[0]))
@@ -518,6 +521,12 @@
 extern DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str2);
 #endif
 
+#if HAVE_STRNCASECMP
+#define SDL_strncasecmp strncasecmp
+#else
+extern DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen);
+#endif
+
 #if HAVE_SSCANF
 #define SDL_sscanf      sscanf
 #else
@@ -536,6 +545,32 @@
 extern DECLSPEC int SDLCALL SDL_vsnprintf(char *text, size_t maxlen, const char *fmt, va_list ap);
 #endif
 
+/* The SDL implementation of iconv() returns these error codes */
+#define SDL_ICONV_ERROR		(size_t)-1
+#define SDL_ICONV_E2BIG		(size_t)-2
+#define SDL_ICONV_EILSEQ	(size_t)-3
+#define SDL_ICONV_EINVAL	(size_t)-4
+
+#if HAVE_ICONV
+#define SDL_iconv_t     iconv_t
+#define SDL_iconv_open  iconv_open
+#define SDL_iconv_close iconv_close
+extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
+#else
+typedef struct _SDL_iconv_t *SDL_iconv_t;
+extern DECLSPEC SDL_iconv_t SDLCALL SDL_iconv_open(const char *tocode, const char *fromcode);
+extern DECLSPEC int SDLCALL SDL_iconv_close(SDL_iconv_t cd);
+extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
+#endif
+/* This function converts a string between encodings in one pass, returning a
+   string that must be freed with SDL_free() or NULL on error.
+*/
+extern DECLSPEC char * SDLCALL SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft);
+#define SDL_iconv_utf8_ascii(S)		SDL_iconv_string("ASCII", "UTF-8", S, SDL_strlen(S)+1)
+#define SDL_iconv_utf8_latin1(S)	SDL_iconv_string("LATIN1", "UTF-8", S, SDL_strlen(S)+1)
+#define SDL_iconv_utf8_ucs2(S)		(Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1)
+#define SDL_iconv_utf8_ucs4(S)		(Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1)
+
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/stdlib/SDL_iconv.c	Mon Mar 13 01:08:00 2006 +0000
@@ -0,0 +1,809 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* This file contains portable iconv functions for SDL */
+
+#include "SDL_stdinc.h"
+#include "SDL_endian.h"
+
+#ifdef HAVE_ICONV
+
+#include <errno.h>
+
+size_t SDL_iconv(SDL_iconv_t cd,
+                 char **inbuf, size_t *inbytesleft,
+                 char **outbuf, size_t *outbytesleft)
+{
+	size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
+	if ( retCode == (size_t)-1 ) {
+		switch(errno) {
+		    case E2BIG:
+			return SDL_ICONV_E2BIG;
+		    case EILSEQ:
+			return SDL_ICONV_EILSEQ;
+		    case EINVAL:
+			return SDL_ICONV_EINVAL;
+		    default:
+			return SDL_ICONV_ERROR;
+		}
+	}
+	return retCode;
+}
+
+#else
+
+#define UNICODE_BOM	0xFEFF
+
+#define UNKNOWN_ASCII	'?'
+#define UNKNOWN_UNICODE	0xFFFD
+
+enum {
+	ENCODING_UNKNOWN,
+	ENCODING_ASCII,
+	ENCODING_LATIN1,
+	ENCODING_UTF8,
+	ENCODING_UTF16,		/* Needs byte order marker */
+	ENCODING_UTF16BE,
+	ENCODING_UTF16LE,
+	ENCODING_UTF32,		/* Needs byte order marker */
+	ENCODING_UTF32BE,
+	ENCODING_UTF32LE,
+	ENCODING_UCS2,		/* Native byte order assumed */
+	ENCODING_UCS4,		/* Native byte order assumed */
+};
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+#define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
+#define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
+#else
+#define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
+#define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
+#endif
+
+struct _SDL_iconv_t
+{
+	int src_fmt;
+	int dst_fmt;
+};
+
+static struct {
+	const char *name;
+	int format;
+} encodings[] = {
+	{ "ASCII",	ENCODING_ASCII },
+	{ "US-ASCII",	ENCODING_ASCII },
+	{ "LATIN1",	ENCODING_LATIN1 },
+	{ "ISO-8859-1",	ENCODING_LATIN1 },
+	{ "UTF8",	ENCODING_UTF8 },
+	{ "UTF-8",	ENCODING_UTF8 },
+	{ "UTF16",	ENCODING_UTF16 },
+	{ "UTF-16",	ENCODING_UTF16 },
+	{ "UTF16BE",	ENCODING_UTF16BE },
+	{ "UTF-16BE",	ENCODING_UTF16BE },
+	{ "UTF16LE",	ENCODING_UTF16LE },
+	{ "UTF-16LE",	ENCODING_UTF16LE },
+	{ "UTF32",	ENCODING_UTF32 },
+	{ "UTF-32",	ENCODING_UTF32 },
+	{ "UTF32BE",	ENCODING_UTF32BE },
+	{ "UTF-32BE",	ENCODING_UTF32BE },
+	{ "UTF32LE",	ENCODING_UTF32LE },
+	{ "UTF-32LE",	ENCODING_UTF32LE },
+	{ "UCS2",	ENCODING_UCS2 },
+	{ "UCS-2",	ENCODING_UCS2 },
+	{ "UCS4",	ENCODING_UCS4 },
+	{ "UCS-4",	ENCODING_UCS4 },
+};
+
+SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
+{
+	int src_fmt = ENCODING_UNKNOWN;
+	int dst_fmt = ENCODING_UNKNOWN;
+	int i;
+
+	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
+		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
+			src_fmt = encodings[i].format;
+			if ( dst_fmt != ENCODING_UNKNOWN ) {
+				break;
+			}
+		}
+		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
+			dst_fmt = encodings[i].format;
+			if ( src_fmt != ENCODING_UNKNOWN ) {
+				break;
+			}
+		}
+	}
+	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
+		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
+		if ( cd ) {
+			cd->src_fmt = src_fmt;
+			cd->dst_fmt = dst_fmt;
+			return cd;
+		}
+	}
+	return (SDL_iconv_t)-1;
+}
+
+size_t SDL_iconv(SDL_iconv_t cd,
+                 char **inbuf, size_t *inbytesleft,
+                 char **outbuf, size_t *outbytesleft)
+{
+	/* For simplicity, we'll convert everything to and from UCS-4 */
+	char *src, *dst;
+	size_t srclen, dstlen;
+	Uint32 ch;
+	size_t total;
+
+	if ( !inbuf || !*inbuf ) {
+		/* Reset the context */
+		return 0;
+	}
+	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
+		return SDL_ICONV_E2BIG;
+	}
+	src = *inbuf;
+	srclen = (inbytesleft ? *inbytesleft : 0);
+	dst = *outbuf;
+	dstlen = *outbytesleft;
+
+	switch ( cd->src_fmt ) {
+	    case ENCODING_UTF16:
+		/* Scan for a byte order marker */
+		{
+			Uint8 *p = (Uint8 *)src;
+			size_t n = srclen / 2;
+			while ( n ) {
+				if ( p[0] == 0xFF && p[1] == 0xFE ) {
+					cd->src_fmt = ENCODING_UTF16BE;
+					break;
+				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
+					cd->src_fmt = ENCODING_UTF16LE;
+					break;
+				}
+				p += 2;
+				--n;
+			}
+			if ( n == 0 ) {
+				/* We can't tell, default to host order */
+				cd->src_fmt = ENCODING_UTF16NATIVE;
+			}
+		}
+		break;
+	    case ENCODING_UTF32:
+		/* Scan for a byte order marker */
+		{
+			Uint8 *p = (Uint8 *)src;
+			size_t n = srclen / 4;
+			while ( n ) {
+				if ( p[0] == 0xFF && p[1] == 0xFE &&
+				     p[2] == 0x00 && p[3] == 0x00 ) {
+					cd->src_fmt = ENCODING_UTF32BE;
+					break;
+				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
+				            p[2] == 0xFE && p[3] == 0xFF ) {
+					cd->src_fmt = ENCODING_UTF32LE;
+					break;
+				}
+				p += 4;
+				--n;
+			}
+			if ( n == 0 ) {
+				/* We can't tell, default to host order */
+				cd->src_fmt = ENCODING_UTF32NATIVE;
+			}
+		}
+		break;
+	}
+
+	switch ( cd->dst_fmt ) {
+	    case ENCODING_UTF16:
+		/* Default to host order, need to add byte order marker */
+		if ( dstlen < 2 ) {
+			return SDL_ICONV_E2BIG;
+		}
+		*(Uint16 *)dst = UNICODE_BOM;
+		dst += 2;
+		dstlen -= 2;
+		cd->dst_fmt = ENCODING_UTF16NATIVE;
+		break;
+	    case ENCODING_UTF32:
+		/* Default to host order, need to add byte order marker */
+		if ( dstlen < 4 ) {
+			return SDL_ICONV_E2BIG;
+		}
+		*(Uint32 *)dst = UNICODE_BOM;
+		dst += 4;
+		dstlen -= 4;
+		cd->dst_fmt = ENCODING_UTF32NATIVE;
+		break;
+	}
+
+	total = 0;
+	while ( srclen > 0 ) {
+		/* Decode a character */
+		switch ( cd->src_fmt ) {
+		    case ENCODING_ASCII:
+			{
+				Uint8 *p = (Uint8 *)src;
+				ch = (Uint32)(p[0] & 0x7F);
+				++src;
+				--srclen;
+			}
+			break;
+		    case ENCODING_LATIN1:
+			{
+				Uint8 *p = (Uint8 *)src;
+				ch = (Uint32)p[0];
+				++src;
+				--srclen;
+			}
+			break;
+		    case ENCODING_UTF8: /* RFC 3629 */
+			{
+				Uint8 *p = (Uint8 *)src;
+				size_t left = 0;
+				SDL_bool overlong = SDL_FALSE;
+				if ( p[0] >= 0xFC ) {
+					if ( (p[0] & 0xFE) != 0xFC ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						if ( p[0] == 0xFC ) {
+							overlong = SDL_TRUE;
+						}
+						ch = (Uint32)(p[0] & 0x01);
+						left = 5;
+					}
+				} else if ( p[0] >= 0xF8 ) {
+					if ( (p[0] & 0xFC) != 0xF8 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						if ( p[0] == 0xF8 ) {
+							overlong = SDL_TRUE;
+						}
+						ch = (Uint32)(p[0] & 0x03);
+						left = 4;
+					}
+				} else if ( p[0] >= 0xF0 ) {
+					if ( (p[0] & 0xF8) != 0xF0 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						if ( p[0] == 0xF0 ) {
+							overlong = SDL_TRUE;
+						}
+						ch = (Uint32)(p[0] & 0x07);
+						left = 3;
+					}
+				} else if ( p[0] >= 0xE0 ) {
+					if ( (p[0] & 0xF0) != 0xE0 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						if ( p[0] == 0xE0 ) {
+							overlong = SDL_TRUE;
+						}
+						ch = (Uint32)(p[0] & 0x0F);
+						left = 2;
+					}
+				} else if ( p[0] >= 0xC0 ) {
+					if ( (p[0] & 0xE0) != 0xC0 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						if ( (p[0] & 0xCE) == 0xC0 ) {
+							overlong = SDL_TRUE;
+						}
+						ch = (Uint32)(p[0] & 0x1F);
+						left = 1;
+					}
+				} else {
+					if ( (p[0] & 0x80) != 0x00 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+					} else {
+						ch = (Uint32)p[0];
+					}
+				}
+				++src;
+				--srclen;
+				if ( srclen < left ) {
+					return SDL_ICONV_EINVAL;
+				}
+				while ( left-- ) {
+					++p;
+					if ( (p[0] & 0xC0) != 0x80 ) {
+						/* Skip illegal sequences
+						return SDL_ICONV_EILSEQ;
+						*/
+						ch = UNKNOWN_UNICODE;
+						break;
+					}
+					ch <<= 6;
+					ch |= (p[0] & 0x3F);
+					++src;
+					--srclen;
+				}
+				if ( overlong ) {
+					/* Potential security risk
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
+				     (ch == 0xFFFE || ch == 0xFFFF) ) {
+					/* Skip illegal sequences
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+				}
+			}
+			break;
+		    case ENCODING_UTF16BE: /* RFC 2781 */
+			{
+				Uint8 *p = (Uint8 *)src;
+				Uint16 W1, W2;
+				if ( srclen < 2 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				W1 = ((Uint32)p[0] << 8) |
+				      (Uint32)p[1];
+				src += 2;
+				srclen -= 2;
+				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
+					ch = (Uint32)W1;
+					break;
+				}
+				if ( W1 > 0xDBFF ) {
+					/* Skip illegal sequences
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+					break;
+				}
+				if ( srclen < 2 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				p = src;
+				W2 = ((Uint32)p[0] << 8) |
+				      (Uint32)p[1];
+				src += 2;
+				srclen -= 2;
+				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
+					/* Skip illegal sequences
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+					break;
+				}
+				ch = (((Uint32)(W1 & 0x3FF) << 10) |
+				      (Uint32)(W2 & 0x3FF)) + 0x10000;
+			}
+			break;
+		    case ENCODING_UTF16LE: /* RFC 2781 */
+			{
+				Uint8 *p = (Uint8 *)src;
+				Uint16 W1, W2;
+				if ( srclen < 2 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				W1 = ((Uint32)p[1] << 8) |
+				      (Uint32)p[0];
+				src += 2;
+				srclen -= 2;
+				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
+					ch = (Uint32)W1;
+					break;
+				}
+				if ( W1 > 0xDBFF ) {
+					/* Skip illegal sequences
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+					break;
+				}
+				if ( srclen < 2 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				p = src;
+				W2 = ((Uint32)p[1] << 8) |
+				      (Uint32)p[0];
+				src += 2;
+				srclen -= 2;
+				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
+					/* Skip illegal sequences
+					return SDL_ICONV_EILSEQ;
+					*/
+					ch = UNKNOWN_UNICODE;
+					break;
+				}
+				ch = (((Uint32)(W1 & 0x3FF) << 10) |
+				      (Uint32)(W2 & 0x3FF)) + 0x10000;
+			}
+			break;
+		    case ENCODING_UTF32BE:
+			{
+				Uint8 *p = (Uint8 *)src;
+				if ( srclen < 4 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				ch = ((Uint32)p[0] << 24) |
+				     ((Uint32)p[1] << 16) |
+				     ((Uint32)p[2] << 8) |
+				      (Uint32)p[3];
+				src += 4;
+				srclen -= 4;
+			}
+			break;
+		    case ENCODING_UTF32LE:
+			{
+				Uint8 *p = (Uint8 *)src;
+				if ( srclen < 4 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				ch = ((Uint32)p[3] << 24) |
+				     ((Uint32)p[2] << 16) |
+				     ((Uint32)p[1] << 8) |
+				      (Uint32)p[0];
+				src += 4;
+				srclen -= 4;
+			}
+			break;
+		    case ENCODING_UCS2:
+			{
+				Uint16 *p = (Uint16 *)src;
+				if ( srclen < 2 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				ch = *p;
+				src += 2;
+				srclen -= 2;
+			}
+			break;
+		    case ENCODING_UCS4:
+			{
+				Uint32 *p = (Uint32 *)src;
+				if ( srclen < 4 ) {
+					return SDL_ICONV_EINVAL;
+				}
+				ch = *p;
+				src += 4;
+				srclen -= 4;
+			}
+			break;
+		}
+
+		/* Encode a character */
+		switch ( cd->dst_fmt ) {
+		    case ENCODING_ASCII:
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( dstlen < 1 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				if ( ch > 0x7F ) {
+					*p = UNKNOWN_ASCII;
+				} else {
+					*p = (Uint8)ch;
+				}
+				++dst;
+				--dstlen;
+			}
+			break;
+		    case ENCODING_LATIN1:
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( dstlen < 1 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				if ( ch > 0xFF ) {
+					*p = UNKNOWN_ASCII;
+				} else {
+					*p = (Uint8)ch;
+				}
+				++dst;
+				--dstlen;
+			}
+			break;
+		    case ENCODING_UTF8: /* RFC 3629 */
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( ch > 0x7FFFFFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( ch <= 0x7F ) {
+					if ( dstlen < 1 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					*p = (Uint8)ch;
+					++dst;
+					--dstlen;
+				} else if ( ch <= 0x7FF ) {
+					if ( dstlen < 2 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
+					p[1] = 0x80 | (Uint8)(ch & 0x3F);
+					dst += 2;
+					dstlen -= 2;
+				} else if ( ch <= 0xFFFF ) {
+					if ( dstlen < 3 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
+					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
+					p[2] = 0x80 | (Uint8)(ch & 0x3F);
+					dst += 3;
+					dstlen -= 3;
+				} else if ( ch <= 0x1FFFFF ) {
+					if ( dstlen < 4 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
+					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
+					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
+					p[3] = 0x80 | (Uint8)(ch & 0x3F);
+					dst += 4;
+					dstlen -= 4;
+				} else if ( ch <= 0x3FFFFFF ) {
+					if ( dstlen < 5 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
+					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
+					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
+					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
+					p[4] = 0x80 | (Uint8)(ch & 0x3F);
+					dst += 5;
+					dstlen -= 5;
+				} else {
+					if ( dstlen < 6 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
+					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
+					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
+					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
+					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
+					p[5] = 0x80 | (Uint8)(ch & 0x3F);
+					dst += 6;
+					dstlen -= 6;
+				}
+			}
+			break;
+		    case ENCODING_UTF16BE: /* RFC 2781 */
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( ch > 0x10FFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( ch < 0x10000 ) {
+					if ( dstlen < 2 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[0] = (Uint8)(ch >> 8);
+					p[1] = (Uint8)ch;
+					dst += 2;
+					dstlen -= 2;
+				} else {
+					Uint16 W1, W2;
+					if ( dstlen < 4 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					ch = ch - 0x10000;
+					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
+					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
+					p[0] = (Uint8)(W1 >> 8);
+					p[1] = (Uint8)W1;
+					p[2] = (Uint8)(W2 >> 8);
+					p[3] = (Uint8)W2;
+					dst += 4;
+					dstlen -= 4;
+				}
+			}
+			break;
+		    case ENCODING_UTF16LE: /* RFC 2781 */
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( ch > 0x10FFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( ch < 0x10000 ) {
+					if ( dstlen < 2 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					p[1] = (Uint8)(ch >> 8);
+					p[0] = (Uint8)ch;
+					dst += 2;
+					dstlen -= 2;
+				} else {
+					Uint16 W1, W2;
+					if ( dstlen < 4 ) {
+						return SDL_ICONV_E2BIG;
+					}
+					ch = ch - 0x10000;
+					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
+					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
+					p[1] = (Uint8)(W1 >> 8);
+					p[0] = (Uint8)W1;
+					p[3] = (Uint8)(W2 >> 8);
+					p[2] = (Uint8)W2;
+					dst += 4;
+					dstlen -= 4;
+				}
+			}
+			break;
+		    case ENCODING_UTF32BE:
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( ch > 0x7FFFFFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( dstlen < 4 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				p[0] = (Uint8)(ch >> 24);
+				p[1] = (Uint8)(ch >> 16);
+				p[2] = (Uint8)(ch >> 8);
+				p[3] = (Uint8)ch;
+				dst += 4;
+				dstlen -= 4;
+			}
+			break;
+		    case ENCODING_UTF32LE:
+			{
+				Uint8 *p = (Uint8 *)dst;
+				if ( ch > 0x7FFFFFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( dstlen < 4 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				p[3] = (Uint8)(ch >> 24);
+				p[2] = (Uint8)(ch >> 16);
+				p[1] = (Uint8)(ch >> 8);
+				p[0] = (Uint8)ch;
+				dst += 4;
+				dstlen -= 4;
+			}
+			break;
+		    case ENCODING_UCS2:
+			{
+				Uint16 *p = (Uint16 *)dst;
+				if ( ch > 0xFFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( dstlen < 2 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				*p = (Uint16)ch;
+				dst += 2;
+				dstlen -= 2;
+			}
+			break;
+		    case ENCODING_UCS4:
+			{
+				Uint32 *p = (Uint32 *)dst;
+				if ( ch > 0x7FFFFFFF ) {
+					ch = UNKNOWN_UNICODE;
+				}
+				if ( dstlen < 4 ) {
+					return SDL_ICONV_E2BIG;
+				}
+				*p = ch;
+				dst += 4;
+				dstlen -= 4;
+			}
+			break;
+		}
+
+		/* Update state */
+		*inbuf = src;
+		*inbytesleft = srclen;
+		*outbuf = dst;
+		*outbytesleft = dstlen;
+		++total;
+	}
+	return total;
+}
+
+int SDL_iconv_close(SDL_iconv_t cd)
+{
+	if ( cd && cd != (SDL_iconv_t)-1 ) {
+		SDL_free(cd);
+	}
+	return 0;
+}
+
+#endif /* !HAVE_ICONV */
+
+char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft)
+{
+	SDL_iconv_t cd;
+	char *string;
+	size_t stringsize;
+	char *outbuf;
+	size_t outbytesleft;
+	size_t retCode = 0;
+
+	cd = SDL_iconv_open(tocode, fromcode);
+	if ( cd == (SDL_iconv_t)-1 ) {
+		return NULL;
+	}
+
+	stringsize = inbytesleft > 4 ? inbytesleft : 4;
+	string = SDL_malloc(stringsize);
+	if ( !string ) {
+		SDL_iconv_close(cd);
+		return NULL;
+	}
+	outbuf = string;
+	outbytesleft = stringsize;
+	SDL_memset(outbuf, 0, 4);
+
+	while ( inbytesleft > 0 ) {
+		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+		switch (retCode) {
+		    case SDL_ICONV_E2BIG:
+			{
+				char *oldstring = string;
+				stringsize *= 2;
+				string = SDL_realloc(string, stringsize);
+				if ( !string ) {
+					SDL_iconv_close(cd);
+					return NULL;
+				}
+				outbuf = string + (outbuf - oldstring);
+				outbytesleft = stringsize - (outbuf - string);
+				SDL_memset(outbuf, 0, 4);
+			}
+			break;
+		    case SDL_ICONV_EILSEQ:
+			/* Try skipping some input data - not perfect, but... */
+			++inbuf;
+			--inbytesleft;
+			break;
+		    case SDL_ICONV_EINVAL:
+		    case SDL_ICONV_ERROR:
+			/* We can't continue... */
+			inbytesleft = 0;
+			break;
+		}
+	}
+	SDL_iconv_close(cd);
+
+	return string;
+}
--- a/src/stdlib/SDL_string.c	Sun Mar 12 01:47:23 2006 +0000
+++ b/src/stdlib/SDL_string.c	Mon Mar 13 01:08:00 2006 +0000
@@ -661,12 +661,12 @@
 }
 #endif
 
-#ifndef HAVE_STRCASECMP
+#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
 int SDL_strcasecmp(const char *str1, const char *str2)
 {
     char a = 0;
     char b = 0;
-    while (*str1 && *str2) {
+    while ( *str1 && *str2 ) {
         a = SDL_tolower(*str1);
         b = SDL_tolower(*str2);
         if ( a != b )
@@ -678,6 +678,24 @@
 }
 #endif
 
+#ifndef HAVE_STRNCASECMP
+int SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen)
+{
+    char a = 0;
+    char b = 0;
+    while ( *str1 && *str2 && maxlen ) {
+        a = SDL_tolower(*str1);
+        b = SDL_tolower(*str2);
+        if ( a != b )
+            break;
+        ++str1;
+        ++str2;
+        --maxlen;
+    }
+    return (int)((unsigned char)a - (unsigned char)b);
+}
+#endif
+
 #ifndef HAVE_SSCANF
 int SDL_sscanf(const char *text, const char *fmt, ...)
 {
--- a/test/Makefile.in	Sun Mar 12 01:47:23 2006 +0000
+++ b/test/Makefile.in	Mon Mar 13 01:08:00 2006 +0000
@@ -7,7 +7,7 @@
 CFLAGS  = @CFLAGS@
 LIBS	= @LIBS@
 
-TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE)
+TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testiconv$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE)
 
 all: $(TARGETS)
 
@@ -50,6 +50,9 @@
 testhread$(EXE): $(srcdir)/testhread.c
 	$(CC) -o $@ $? $(CFLAGS) $(LIBS)
 
+testiconv$(EXE): $(srcdir)/testiconv.c
+	$(CC) -o $@ $? $(CFLAGS) $(LIBS)
+
 testjoystick$(EXE): $(srcdir)/testjoystick.c
 	$(CC) -o $@ $? $(CFLAGS) $(LIBS)
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testiconv.c	Mon Mar 13 01:08:00 2006 +0000
@@ -0,0 +1,85 @@
+
+#include <stdio.h>
+
+#include "SDL.h"
+
+static SDL_bool testutf16(char *data)
+{
+	Uint32 *p = (Uint32 *)data;
+	while(*p) {
+		if ( *p > 0x10FFFF ) {
+			return SDL_FALSE;
+		}
+		++p;
+	}
+	return SDL_TRUE;
+}
+
+static size_t widelen(char *data)
+{
+	size_t len = 0;
+	Uint32 *p = (Uint32 *)data;
+	while(*p++) {
+		++len;
+	}
+	return len;
+}
+
+int main(int argc, char *argv[])
+{
+	const char * formats[] = {
+		"UTF8",
+		"UTF-8",
+		"UTF16BE",
+		"UTF-16BE",
+		"UTF16LE",
+		"UTF-16LE",
+		"UTF32BE",
+		"UTF-32BE",
+		"UTF32LE",
+		"UTF-32LE",
+		"UCS4",
+		"UCS-4",
+	};
+	char buffer[BUFSIZ];
+	char *ucs4;
+	char *test[2];
+	int i, j, index = 0;
+	FILE *file;
+	int errors = 0;
+
+	if ( !argv[1] ) {
+		argv[1] = "utf8.txt";
+	}
+	file = fopen(argv[1], "rb");
+	if ( !file ) {
+		fprintf(stderr, "Unable to open %s\n", argv[1]);
+		return (1);
+	}
+
+	while ( fgets(buffer, sizeof(buffer), file) ) {
+		/* Convert to UCS-4 */
+		ucs4 = SDL_iconv_string("UCS-4", "UTF-8", buffer, SDL_strlen(buffer)+1);
+		size_t len = (widelen(ucs4)+1)*4;
+		for ( i = 0; i < SDL_arraysize(formats); ++i ) {
+			if ( (SDL_strncasecmp(formats[i], "UTF16", 5) == 0 ||
+			      SDL_strncasecmp(formats[i], "UTF-16", 6) == 0) &&
+			      !testutf16(ucs4) ) {
+				continue;
+			}
+			test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len);
+			test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len);
+			if ( SDL_memcmp(test[1], ucs4, len) != 0 ) {
+				fprintf(stderr, "FAIL: %s\n", formats[i]);
+				++errors;
+			}
+			SDL_free(test[0]);
+			SDL_free(test[1]);
+		}
+		test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len);
+		SDL_free(ucs4);
+		fputs(test[0], stdout);
+		SDL_free(test[0]);
+	}
+	return (errors ? errors + 1 : 0);
+}
Binary file test/utf8.txt has changed