physfs_unicode.c
branchstable-2.0
changeset 1164 18a21ee32131
parent 1095 69b884a15de1
equal deleted inserted replaced
1160:cf466b29bf6a 1164:18a21ee32131
    61         return UNICODE_BOGUS_CHAR_VALUE;
    61         return UNICODE_BOGUS_CHAR_VALUE;
    62     } /* else if */
    62     } /* else if */
    63 
    63 
    64     else if (octet < 224)  /* two octets */
    64     else if (octet < 224)  /* two octets */
    65     {
    65     {
       
    66         (*_str)++;  /* advance at least one byte in case of an error */
    66         octet -= (128+64);
    67         octet -= (128+64);
    67         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    68         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    68         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    69         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    69             return UNICODE_BOGUS_CHAR_VALUE;
    70             return UNICODE_BOGUS_CHAR_VALUE;
    70 
    71 
    71         *_str += 2;  /* skip to next possible start of codepoint. */
    72         *_str += 1;  /* skip to next possible start of codepoint. */
    72         retval = ((octet << 6) | (octet2 - 128));
    73         retval = ((octet << 6) | (octet2 - 128));
    73         if ((retval >= 0x80) && (retval <= 0x7FF))
    74         if ((retval >= 0x80) && (retval <= 0x7FF))
    74             return retval;
    75             return retval;
    75     } /* else if */
    76     } /* else if */
    76 
    77 
    77     else if (octet < 240)  /* three octets */
    78     else if (octet < 240)  /* three octets */
    78     {
    79     {
       
    80         (*_str)++;  // advance at least one byte in case of an error
    79         octet -= (128+64+32);
    81         octet -= (128+64+32);
    80         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    82         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    81         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    83         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    82             return UNICODE_BOGUS_CHAR_VALUE;
    84             return UNICODE_BOGUS_CHAR_VALUE;
    83 
    85 
    84         octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    86         octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
    85         if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    87         if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
    86             return UNICODE_BOGUS_CHAR_VALUE;
    88             return UNICODE_BOGUS_CHAR_VALUE;
    87 
    89 
    88         *_str += 3;  /* skip to next possible start of codepoint. */
    90         *_str += 2;  /* skip to next possible start of codepoint. */
    89         retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
    91         retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
    90 
    92 
    91         /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
    93         /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
    92         switch (retval)
    94         switch (retval)
    93         {
    95         {
   106             return retval;
   108             return retval;
   107     } /* else if */
   109     } /* else if */
   108 
   110 
   109     else if (octet < 248)  /* four octets */
   111     else if (octet < 248)  /* four octets */
   110     {
   112     {
       
   113         (*_str)++;  // advance at least one byte in case of an error
   111         octet -= (128+64+32+16);
   114         octet -= (128+64+32+16);
   112         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   115         octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   113         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   116         if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   114             return UNICODE_BOGUS_CHAR_VALUE;
   117             return UNICODE_BOGUS_CHAR_VALUE;
   115 
   118 
   119 
   122 
   120         octet4 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   123         octet4 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   121         if ((octet4 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   124         if ((octet4 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   122             return UNICODE_BOGUS_CHAR_VALUE;
   125             return UNICODE_BOGUS_CHAR_VALUE;
   123 
   126 
   124         *_str += 4;  /* skip to next possible start of codepoint. */
   127         *_str += 3;  /* skip to next possible start of codepoint. */
   125         retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
   128         retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
   126                    ((octet3 - 128) << 6) | ((octet4 - 128)) );
   129                    ((octet3 - 128) << 6) | ((octet4 - 128)) );
   127         if ((retval >= 0x10000) && (retval <= 0x10FFFF))
   130         if ((retval >= 0x10000) && (retval <= 0x10FFFF))
   128             return retval;
   131             return retval;
   129     } /* else if */
   132     } /* else if */
   134      *  ahead the right number of bytes and don't overflow the buffer.
   137      *  ahead the right number of bytes and don't overflow the buffer.
   135      */
   138      */
   136 
   139 
   137     else if (octet < 252)  /* five octets */
   140     else if (octet < 252)  /* five octets */
   138     {
   141     {
       
   142         (*_str)++;  // advance at least one byte in case of an error
       
   143         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   144         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   145             return UNICODE_BOGUS_CHAR_VALUE;
       
   146 
       
   147         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   148         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   149             return UNICODE_BOGUS_CHAR_VALUE;
       
   150 
       
   151         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   152         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   153             return UNICODE_BOGUS_CHAR_VALUE;
       
   154 
       
   155         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   156         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   157             return UNICODE_BOGUS_CHAR_VALUE;
       
   158 
       
   159         *_str += 4;  /* skip to next possible start of codepoint. */
       
   160         return UNICODE_BOGUS_CHAR_VALUE;
       
   161     } /* else if */
       
   162 
       
   163     else  /* six octets */
       
   164     {
       
   165         (*_str)++;  // advance at least one byte in case of an error
       
   166         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   167         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   168             return UNICODE_BOGUS_CHAR_VALUE;
       
   169 
   139         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   170         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   140         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   171         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   141             return UNICODE_BOGUS_CHAR_VALUE;
   172             return UNICODE_BOGUS_CHAR_VALUE;
   142 
   173 
   143         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   174         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   151         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   182         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
   152         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   183         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
   153             return UNICODE_BOGUS_CHAR_VALUE;
   184             return UNICODE_BOGUS_CHAR_VALUE;
   154 
   185 
   155         *_str += 5;  /* skip to next possible start of codepoint. */
   186         *_str += 5;  /* skip to next possible start of codepoint. */
   156         return UNICODE_BOGUS_CHAR_VALUE;
       
   157     } /* else if */
       
   158 
       
   159     else  /* six octets */
       
   160     {
       
   161         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   162         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   163             return UNICODE_BOGUS_CHAR_VALUE;
       
   164 
       
   165         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   166         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   167             return UNICODE_BOGUS_CHAR_VALUE;
       
   168 
       
   169         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   170         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   171             return UNICODE_BOGUS_CHAR_VALUE;
       
   172 
       
   173         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   174         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   175             return UNICODE_BOGUS_CHAR_VALUE;
       
   176 
       
   177         octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
       
   178         if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
       
   179             return UNICODE_BOGUS_CHAR_VALUE;
       
   180 
       
   181         *_str += 6;  /* skip to next possible start of codepoint. */
       
   182         return UNICODE_BOGUS_CHAR_VALUE;
   187         return UNICODE_BOGUS_CHAR_VALUE;
   183     } /* else if */
   188     } /* else if */
   184 
   189 
   185     return UNICODE_BOGUS_CHAR_VALUE;
   190     return UNICODE_BOGUS_CHAR_VALUE;
   186 } /* utf8codepoint */
   191 } /* utf8codepoint */