Make sure utf8codepoint() advances the pointer even on bogus input.
authorRyan C. Gordon <icculus@icculus.org>
Wed, 01 Jun 2011 03:10:08 -0400
changeset 1163 e46c11bb2780
parent 1162 49950eab0ef8
child 1167 9d84f8652839
Make sure utf8codepoint() advances the pointer even on bogus input. This patch was from Francois Gouget, who found this bug in MojoSetup's copy of this function.
src/physfs_unicode.c
--- a/src/physfs_unicode.c	Wed Jun 01 03:08:53 2011 -0400
+++ b/src/physfs_unicode.c	Wed Jun 01 03:10:08 2011 -0400
@@ -61,12 +61,13 @@
 
     else if (octet < 224)  /* two octets */
     {
+        (*_str)++;  /* advance at least one byte in case of an error */
         octet -= (128+64);
         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;
 
-        *_str += 2;  /* skip to next possible start of codepoint. */
+        *_str += 1;  /* skip to next possible start of codepoint. */
         retval = ((octet << 6) | (octet2 - 128));
         if ((retval >= 0x80) && (retval <= 0x7FF))
             return retval;
@@ -74,6 +75,7 @@
 
     else if (octet < 240)  /* three octets */
     {
+        (*_str)++;  /* advance at least one byte in case of an error */
         octet -= (128+64+32);
         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
@@ -83,7 +85,7 @@
         if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;
 
-        *_str += 3;  /* skip to next possible start of codepoint. */
+        *_str += 2;  /* skip to next possible start of codepoint. */
         retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
 
         /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
@@ -106,6 +108,7 @@
 
     else if (octet < 248)  /* four octets */
     {
+        (*_str)++;  /* advance at least one byte in case of an error */
         octet -= (128+64+32+16);
         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
@@ -119,7 +122,7 @@
         if ((octet4 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;
 
-        *_str += 4;  /* skip to next possible start of codepoint. */
+        *_str += 3;  /* skip to next possible start of codepoint. */
         retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
                    ((octet3 - 128) << 6) | ((octet4 - 128)) );
         if ((retval >= 0x10000) && (retval <= 0x10FFFF))
@@ -134,6 +137,7 @@
 
     else if (octet < 252)  /* five octets */
     {
+        (*_str)++;  /* advance at least one byte in case of an error */
         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;
@@ -150,12 +154,13 @@
         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;
 
-        *_str += 5;  /* skip to next possible start of codepoint. */
+        *_str += 4;  /* skip to next possible start of codepoint. */
         return UNICODE_BOGUS_CHAR_VALUE;
     } /* else if */
 
     else  /* six octets */
     {
+        (*_str)++;  /* advance at least one byte in case of an error */
         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
             return UNICODE_BOGUS_CHAR_VALUE;