Skip to content

Commit

Permalink
Make sure utf8codepoint() advances the pointer even on bogus input.
Browse files Browse the repository at this point in the history
This patch was from Francois Gouget, who found this bug in MojoSetup's copy
 of this function.
  • Loading branch information
icculus committed Jun 1, 2011
1 parent c6ca60e commit 6a1aca5
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/physfs_unicode.c
Expand Up @@ -61,19 +61,21 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 224) /* two octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 2; /* skip to next possible start of codepoint. */
*_str += 1; /* skip to next possible start of codepoint. */
retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF))
return retval;
} /* else if */

else if (octet < 240) /* three octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
Expand All @@ -83,7 +85,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 3; /* skip to next possible start of codepoint. */
*_str += 2; /* skip to next possible start of codepoint. */
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );

/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
Expand All @@ -106,6 +108,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 248) /* four octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64+32+16);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
Expand All @@ -119,7 +122,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 4; /* skip to next possible start of codepoint. */
*_str += 3; /* skip to next possible start of codepoint. */
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
Expand All @@ -134,6 +137,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 252) /* five octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
Expand All @@ -150,12 +154,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 5; /* skip to next possible start of codepoint. */
*_str += 4; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */

else /* six octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
Expand Down

0 comments on commit 6a1aca5

Please sign in to comment.