It turns out that UCS2 and UCS4 are defined as big-endian encodings
authorSam Lantinga <slouken@libsdl.org>
Sun, 28 Oct 2012 13:03:45 -0700
changeset 6610 4032f8efdfe7
parent 6609 747e2ac35db6
child 6611 5c8b5b03ad8a
It turns out that UCS2 and UCS4 are defined as big-endian encodings
include/SDL_stdinc.h
src/stdlib/SDL_iconv.c
--- a/include/SDL_stdinc.h	Sat Oct 27 02:54:10 2012 -0700
+++ b/include/SDL_stdinc.h	Sun Oct 28 13:03:45 2012 -0700
@@ -748,8 +748,8 @@
                                                const char *inbuf,
                                                size_t inbytesleft);
 #define SDL_iconv_utf8_locale(S)	SDL_iconv_string("", "UTF-8", S, SDL_strlen(S)+1)
-#define SDL_iconv_utf8_ucs2(S)		(Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1)
-#define SDL_iconv_utf8_ucs4(S)		(Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1)
+#define SDL_iconv_utf8_ucs2(S)		(Uint16 *)SDL_iconv_string("UCS-2-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)
+#define SDL_iconv_utf8_ucs4(S)		(Uint32 *)SDL_iconv_string("UCS-4-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)
 
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
--- a/src/stdlib/SDL_iconv.c	Sat Oct 27 02:54:10 2012 -0700
+++ b/src/stdlib/SDL_iconv.c	Sun Oct 28 13:03:45 2012 -0700
@@ -87,15 +87,21 @@
     ENCODING_UTF32,             /* Needs byte order marker */
     ENCODING_UTF32BE,
     ENCODING_UTF32LE,
-    ENCODING_UCS2,              /* Native byte order assumed */
-    ENCODING_UCS4,              /* Native byte order assumed */
+    ENCODING_UCS2BE,
+    ENCODING_UCS2LE,
+    ENCODING_UCS4BE,
+    ENCODING_UCS4LE,
 };
 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
+#define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
+#define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
 #else
 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
+#define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
+#define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
 #endif
 
 struct _SDL_iconv_t
@@ -128,10 +134,16 @@
     { "UTF-32BE", ENCODING_UTF32BE },
     { "UTF32LE", ENCODING_UTF32LE },
     { "UTF-32LE", ENCODING_UTF32LE },
-    { "UCS2", ENCODING_UCS2 },
-    { "UCS-2", ENCODING_UCS2 },
-    { "UCS4", ENCODING_UCS4 },
-    { "UCS-4", ENCODING_UCS4 },
+    { "UCS2", ENCODING_UCS2BE },
+    { "UCS-2", ENCODING_UCS2BE },
+    { "UCS-2LE", ENCODING_UCS2LE },
+    { "UCS-2BE", ENCODING_UCS2BE },
+    { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
+    { "UCS4", ENCODING_UCS4BE },
+    { "UCS-4", ENCODING_UCS4BE },
+    { "UCS-4LE", ENCODING_UCS4LE },
+    { "UCS-4BE", ENCODING_UCS4BE },
+    { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
 /* *INDENT-ON* */
 };
 
@@ -518,6 +530,29 @@
                       (Uint32) (W2 & 0x3FF)) + 0x10000;
             }
             break;
+        case ENCODING_UCS2LE:
+            {
+                Uint8 *p = (Uint8 *) src;
+                if (srclen < 2) {
+                    return SDL_ICONV_EINVAL;
+                }
+                ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
+                src += 2;
+                srclen -= 2;
+            }
+            break;
+        case ENCODING_UCS2BE:
+            {
+                Uint8 *p = (Uint8 *) src;
+                if (srclen < 2) {
+                    return SDL_ICONV_EINVAL;
+                }
+                ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
+                src += 2;
+                srclen -= 2;
+            }
+            break;
+        case ENCODING_UCS4BE:
         case ENCODING_UTF32BE:
             {
                 Uint8 *p = (Uint8 *) src;
@@ -531,6 +566,7 @@
                 srclen -= 4;
             }
             break;
+        case ENCODING_UCS4LE:
         case ENCODING_UTF32LE:
             {
                 Uint8 *p = (Uint8 *) src;
@@ -544,28 +580,6 @@
                 srclen -= 4;
             }
             break;
-        case ENCODING_UCS2:
-            {
-                Uint16 *p = (Uint16 *) src;
-                if (srclen < 2) {
-                    return SDL_ICONV_EINVAL;
-                }
-                ch = *p;
-                src += 2;
-                srclen -= 2;
-            }
-            break;
-        case ENCODING_UCS4:
-            {
-                Uint32 *p = (Uint32 *) src;
-                if (srclen < 4) {
-                    return SDL_ICONV_EINVAL;
-                }
-                ch = *p;
-                src += 4;
-                srclen -= 4;
-            }
-            break;
         }
 
         /* Encode a character */
@@ -728,12 +742,46 @@
                 }
             }
             break;
-        case ENCODING_UTF32BE:
+        case ENCODING_UCS2BE:
+            {
+                Uint8 *p = (Uint8 *) dst;
+                if (ch > 0xFFFF) {
+                    ch = UNKNOWN_UNICODE;
+                }
+                if (dstlen < 2) {
+                    return SDL_ICONV_E2BIG;
+                }
+                p[0] = (Uint8) (ch >> 8);
+                p[1] = (Uint8) ch;
+                dst += 2;
+                dstlen -= 2;
+            }
+            break;
+        case ENCODING_UCS2LE:
             {
                 Uint8 *p = (Uint8 *) dst;
-                if (ch > 0x10FFFF) {
+                if (ch > 0xFFFF) {
                     ch = UNKNOWN_UNICODE;
                 }
+                if (dstlen < 2) {
+                    return SDL_ICONV_E2BIG;
+                }
+                p[1] = (Uint8) (ch >> 8);
+                p[0] = (Uint8) ch;
+                dst += 2;
+                dstlen -= 2;
+            }
+            break;
+        case ENCODING_UTF32BE:
+            if (ch > 0x10FFFF) {
+                ch = UNKNOWN_UNICODE;
+            }
+        case ENCODING_UCS4BE:
+            if (ch > 0x7FFFFFFF) {
+                ch = UNKNOWN_UNICODE;
+            }
+            {
+                Uint8 *p = (Uint8 *) dst;
                 if (dstlen < 4) {
                     return SDL_ICONV_E2BIG;
                 }
@@ -746,11 +794,15 @@
             }
             break;
         case ENCODING_UTF32LE:
+            if (ch > 0x10FFFF) {
+                ch = UNKNOWN_UNICODE;
+            }
+        case ENCODING_UCS4LE:
+            if (ch > 0x7FFFFFFF) {
+                ch = UNKNOWN_UNICODE;
+            }
             {
                 Uint8 *p = (Uint8 *) dst;
-                if (ch > 0x10FFFF) {
-                    ch = UNKNOWN_UNICODE;
-                }
                 if (dstlen < 4) {
                     return SDL_ICONV_E2BIG;
                 }
@@ -762,34 +814,6 @@
                 dstlen -= 4;
             }
             break;
-        case ENCODING_UCS2:
-            {
-                Uint16 *p = (Uint16 *) dst;
-                if (ch > 0xFFFF) {
-                    ch = UNKNOWN_UNICODE;
-                }
-                if (dstlen < 2) {
-                    return SDL_ICONV_E2BIG;
-                }
-                *p = (Uint16) ch;
-                dst += 2;
-                dstlen -= 2;
-            }
-            break;
-        case ENCODING_UCS4:
-            {
-                Uint32 *p = (Uint32 *) dst;
-                if (ch > 0x7FFFFFFF) {
-                    ch = UNKNOWN_UNICODE;
-                }
-                if (dstlen < 4) {
-                    return SDL_ICONV_E2BIG;
-                }
-                *p = ch;
-                dst += 4;
-                dstlen -= 4;
-            }
-            break;
         }
 
         /* Update state */