physfs.h
changeset 785 1ea0f6549eb2
parent 779 7acc8a8abdf6
child 789 ede0553c4232
equal deleted inserted replaced
784:fb4a01af9ff7 785:1ea0f6549eb2
   145  *   - .HOG (Descent I/II HOG file archives)
   145  *   - .HOG (Descent I/II HOG file archives)
   146  *   - .MVL (Descent II movielib archives)
   146  *   - .MVL (Descent II movielib archives)
   147  *   - .WAD (DOOM engine archives)
   147  *   - .WAD (DOOM engine archives)
   148  *   - .MIX (Older Westwood games archives)
   148  *   - .MIX (Older Westwood games archives)
   149  *
   149  *
       
   150  *
       
   151  * String policy for PhysicsFS 2.0 and later:
       
   152  *
       
   153  * PhysicsFS 1.0 deals with null-terminated ASCII strings. All high ASCII
       
   154  *  chars resulted in undefined behaviour, and there was no Unicode support.
       
   155  *
       
   156  * All strings passed through PhysicsFS are in null-terminated UTF-8 format.
       
   157  *  This means that if all you care about is English (ASCII characters <= 127)
       
   158  *  then you just use regular C strings. If you care about Unicode (and you
       
   159  *  should!) then you need to figure out what your platform wants, needs, and
       
   160  *  offers. If you are on Windows and build with Unicode support, your TCHAR
       
   161  *  strings are two bytes per character (this is called "UCS-2 encoding"). You
       
   162  *  should convert them to UTF-8 before handing them to PhysicsFS with
       
   163  *  PHYSFS_utf8fromucs2(). If you're using Unix or Mac OS X, your wchar_t
       
   164  *  strings are four bytes per character ("UCS-4 encoding"). Use
       
   165  *  PHYSFS_utf8fromucs2(). Mac OS X can gie you UTF-8 directly from a CFString,
       
   166  *  and many Unixes generally give you C strings in UTF-8 format everywhere.
       
   167  *  If you have a single-byte high ASCII charset, like so-many European
       
   168  *  "codepages" you may be out of luck. We'll convert from "Latin1" to UTF-8
       
   169  *  only, and never back to Latin1. If you're above ASCII 127, all bets are
       
   170  *  off: move to Unicode or use your platform's facilities. Passing a C string
       
   171  *  with high-ASCII data that isn't UTF-8 encoded will NOT do what you expect!
       
   172  *
       
   173  * Naturally, there's also PHYSFS_utf8toucs2() and PHYSFS_utf8toucs4() to get
       
   174  *  data back into a format you like. Behind the scenes, PhysicsFS will use
       
   175  *  Unicode where possible: the UTF-8 strings on Windows will be converted
       
   176  *  and used with the multibyte Windows APIs, for example.
       
   177  *
       
   178  * PhysicsFS offers basic encoding conversion support, but not a whole string
       
   179  *  library. Get your stuff into whatever format you can work with.
       
   180  *
       
   181  *
       
   182  * Other stuff:
       
   183  *
   150  * Please see the file LICENSE in the source's root directory for licensing
   184  * Please see the file LICENSE in the source's root directory for licensing
   151  *  and redistribution rights.
   185  *  and redistribution rights.
   152  *
   186  *
   153  * Please see the file CREDITS in the source's root directory for a more or
   187  * Please see the file CREDITS in the source's root directory for a more or
   154  *  less complete list of who's responsible for this.
   188  *  less complete list of who's responsible for this.
  1987 
  2021 
  1988 __EXPORT__ void PHYSFS_enumerateFilesCallback(const char *dir,
  2022 __EXPORT__ void PHYSFS_enumerateFilesCallback(const char *dir,
  1989                                               PHYSFS_EnumFilesCallback c,
  2023                                               PHYSFS_EnumFilesCallback c,
  1990                                               void *d);
  2024                                               void *d);
  1991 
  2025 
       
  2026 /**
       
  2027  * \fn void PHYSFS_utf8fromucs4(const PHYSFS_uint32 *src, char *dst, PHYSFS_uint64 len)
       
  2028  * \brief Convert a UCS-4 string to a UTF-8 string.
       
  2029  *
       
  2030  * UCS-4 strings are 32-bits per character: \c wchar_t on Unix.
       
  2031  *
       
  2032  * To ensure that the destination buffer is large enough for the conversion,
       
  2033  *  please allocate a buffer that is the same size as the source buffer. UTF-8
       
  2034  *  never uses more than 32-bits per character, so while it may shrink a UCS-4
       
  2035  *  string, it will never expand it.
       
  2036  *
       
  2037  * Strings that don't fit in the destination buffer will be truncated, but
       
  2038  *  will always be null-terminated and never have an incomplete UTF-8
       
  2039  *  sequence at the end.
       
  2040  *
       
  2041  *   \param src Null-terminated source string in UCS-4 format.
       
  2042  *   \param dst Buffer to store converted UTF-8 string.
       
  2043  *   \param len Size, in bytes, of destination buffer.
       
  2044  */
       
  2045 __EXPORT__ void PHYSFS_utf8fromucs4(const PHYSFS_uint32 *src, char *dst,
       
  2046                                     PHYSFS_uint64 len);
       
  2047 
       
  2048 /**
       
  2049  * \fn void PHYSFS_utf8toucs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len)
       
  2050  * \brief Convert a UTF-8 string to a UCS-4 string.
       
  2051  *
       
  2052  * UCS-4 strings are 32-bits per character: \c wchar_t on Unix.
       
  2053  *
       
  2054  * To ensure that the destination buffer is large enough for the conversion,
       
  2055  *  please allocate a buffer that is four times the size of the source buffer.
       
  2056  *  UTF-8 uses from one to four bytes per character, but UCS-4 always uses
       
  2057  *  four, so an entirely low-ASCII string will quadruple in size!
       
  2058  *
       
  2059  * Strings that don't fit in the destination buffer will be truncated, but
       
  2060  *  will always be null-terminated and never have an incomplete UCS-4
       
  2061  *  sequence at the end.
       
  2062  *
       
  2063  *   \param src Null-terminated source string in UTF-8 format.
       
  2064  *   \param dst Buffer to store converted UCS-4 string.
       
  2065  *   \param len Size, in bytes, of destination buffer.
       
  2066  */
       
  2067 __EXPORT__ void PHYSFS_utf8toucs4(const char *src, PHYSFS_uint32 *dst,
       
  2068                                   PHYSFS_uint64 len);
       
  2069 
       
  2070 /**
       
  2071  * \fn void PHYSFS_utf8fromucs2(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len)
       
  2072  * \brief Convert a UCS-2 string to a UTF-8 string.
       
  2073  *
       
  2074  * UCS-2 strings are 16-bits per character: \c TCHAR on Windows, when building
       
  2075  *  with Unicode support.
       
  2076  *
       
  2077  * To ensure that the destination buffer is large enough for the conversion,
       
  2078  *  please allocate a buffer that is double the size of the source buffer.
       
  2079  *  UTF-8 never uses more than 32-bits per character, so while it may shrink
       
  2080  *  a UCS-2 string, it may also expand it.
       
  2081  *
       
  2082  * Strings that don't fit in the destination buffer will be truncated, but
       
  2083  *  will always be null-terminated and never have an incomplete UTF-8
       
  2084  *  sequence at the end.
       
  2085  *
       
  2086  * Please note that UCS-2 is not UTF-16; we do not support the "surrogate"
       
  2087  *  values at this time.
       
  2088  *
       
  2089  *   \param src Null-terminated source string in UCS-2 format.
       
  2090  *   \param dst Buffer to store converted UTF-8 string.
       
  2091  *   \param len Size, in bytes, of destination buffer.
       
  2092  */
       
  2093 __EXPORT__ void PHYSFS_utf8fromucs2(const PHYSFS_uint16 *src, char *dst,
       
  2094                                     PHYSFS_uint64 len);
       
  2095 
       
  2096 /**
       
  2097  * \fn PHYSFS_utf8toucs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
       
  2098  * \brief Convert a UTF-8 string to a UCS-2 string.
       
  2099  *
       
  2100  * UCS-2 strings are 16-bits per character: \c TCHAR on Windows, when building
       
  2101  *  with Unicode support.
       
  2102  *
       
  2103  * To ensure that the destination buffer is large enough for the conversion,
       
  2104  *  please allocate a buffer that is double the size of the source buffer.
       
  2105  *  UTF-8 uses from one to four bytes per character, but UCS-2 always uses
       
  2106  *  two, so an entirely low-ASCII string will double in size!
       
  2107  *
       
  2108  * Strings that don't fit in the destination buffer will be truncated, but
       
  2109  *  will always be null-terminated and never have an incomplete UCS-2
       
  2110  *  sequence at the end.
       
  2111  *
       
  2112  * Please note that UCS-2 is not UTF-16; we do not support the "surrogate"
       
  2113  *  values at this time.
       
  2114  *
       
  2115  *   \param src Null-terminated source string in UTF-8 format.
       
  2116  *   \param dst Buffer to store converted UCS-2 string.
       
  2117  *   \param len Size, in bytes, of destination buffer.
       
  2118  */
       
  2119 __EXPORT__ void PHYSFS_utf8toucs2(const char *src, PHYSFS_uint16 *dst,
       
  2120                                   PHYSFS_uint64 len);
       
  2121 
       
  2122 /**
       
  2123  * \fn void PHYSFS_utf8fromlatin1(const char *src, char *dst, PHYSFS_uint64 len)
       
  2124  * \brief Convert a UTF-8 string to a Latin1 string.
       
  2125  *
       
  2126  * Latin1 strings are 8-bits per character: a popular "high ASCII"
       
  2127  *  encoding.
       
  2128  *
       
  2129  * To ensure that the destination buffer is large enough for the conversion,
       
  2130  *  please allocate a buffer that is double the size of the source buffer.
       
  2131  *  UTF-8 expands latin1 codepoints over 127 from to 2 bytes, so the string
       
  2132  *  may grow in some cases.
       
  2133  *
       
  2134  * Strings that don't fit in the destination buffer will be truncated, but
       
  2135  *  will always be null-terminated and never have an incomplete UTF-8
       
  2136  *  sequence at the end.
       
  2137  *
       
  2138  * Please note that we do not supply a UTF-8 to Latin1 converter, since Latin1
       
  2139  *  can't express most Unicode codepoints. It's a legacy encoding; you should
       
  2140  *  be converting away from it at all times.
       
  2141  *
       
  2142  *   \param src Null-terminated source string in Latin1 format.
       
  2143  *   \param dst Buffer to store converted UTF-8 string.
       
  2144  *   \param len Size, in bytes, of destination buffer.
       
  2145  */
       
  2146 __EXPORT__ void PHYSFS_utf8fromlatin1(const char *src, char *dst,
       
  2147                                   PHYSFS_uint64 len);
  1992 
  2148 
  1993 /* Everything above this line is part of the PhysicsFS 2.0 API. */
  2149 /* Everything above this line is part of the PhysicsFS 2.0 API. */
  1994 
  2150 
  1995 
  2151 
  1996 #ifdef __cplusplus
  2152 #ifdef __cplusplus