Make sure utf8codepoint() advances the pointer even on bogus input.
This patch was from Francois Gouget, who found this bug in MojoSetup's copy
of this function.
--- a/src/physfs_unicode.c Wed Jun 01 03:08:53 2011 -0400
+++ b/src/physfs_unicode.c Wed Jun 01 03:10:08 2011 -0400
@@ -61,12 +61,13 @@
else if (octet < 224) /* two octets */
{
+ (*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 2; /* skip to next possible start of codepoint. */
+ *_str += 1; /* skip to next possible start of codepoint. */
retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF))
return retval;
@@ -74,6 +75,7 @@
else if (octet < 240) /* three octets */
{
+ (*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@@ -83,7 +85,7 @@
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 3; /* skip to next possible start of codepoint. */
+ *_str += 2; /* skip to next possible start of codepoint. */
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
@@ -106,6 +108,7 @@
else if (octet < 248) /* four octets */
{
+ (*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32+16);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
@@ -119,7 +122,7 @@
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 4; /* skip to next possible start of codepoint. */
+ *_str += 3; /* skip to next possible start of codepoint. */
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
@@ -134,6 +137,7 @@
else if (octet < 252) /* five octets */
{
+ (*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
@@ -150,12 +154,13 @@
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 5; /* skip to next possible start of codepoint. */
+ *_str += 4; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */
else /* six octets */
{
+ (*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;