Skip to content

Commit

Permalink
Backport from dev branch: utf8codepoint() should always advance pointer.
Browse files Browse the repository at this point in the history
  • Loading branch information
icculus committed Jun 1, 2011
1 parent 76ffb5d commit 4043d09
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions physfs_unicode.c
Expand Up @@ -63,19 +63,21 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 224) /* two octets */
{
(*_str)++; /* advance at least one byte in case of an error */
octet -= (128+64);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 2; /* skip to next possible start of codepoint. */
*_str += 1; /* skip to next possible start of codepoint. */
retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF))
return retval;
} /* else if */

else if (octet < 240) /* three octets */
{
(*_str)++; // advance at least one byte in case of an error
octet -= (128+64+32);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
Expand All @@ -85,7 +87,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet3 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 3; /* skip to next possible start of codepoint. */
*_str += 2; /* skip to next possible start of codepoint. */
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );

/* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
Expand All @@ -108,6 +110,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 248) /* four octets */
{
(*_str)++; // advance at least one byte in case of an error
octet -= (128+64+32+16);
octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet2 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
Expand All @@ -121,7 +124,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet4 & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 4; /* skip to next possible start of codepoint. */
*_str += 3; /* skip to next possible start of codepoint. */
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
Expand All @@ -136,6 +139,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)

else if (octet < 252) /* five octets */
{
(*_str)++; // advance at least one byte in case of an error
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
Expand All @@ -152,12 +156,13 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 5; /* skip to next possible start of codepoint. */
*_str += 4; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */

else /* six octets */
{
(*_str)++; // advance at least one byte in case of an error
octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;
Expand All @@ -178,7 +183,7 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
if ((octet & (128+64)) != 128) /* Format isn't 10xxxxxx? */
return UNICODE_BOGUS_CHAR_VALUE;

*_str += 6; /* skip to next possible start of codepoint. */
*_str += 5; /* skip to next possible start of codepoint. */
return UNICODE_BOGUS_CHAR_VALUE;
} /* else if */

Expand Down

0 comments on commit 4043d09

Please sign in to comment.