Skip to content

Commit

Permalink
Bunch of work on Unicode...added case-folding stricmp, removed
Browse files Browse the repository at this point in the history
platform-specific stricmp implementations, changed appropriate calls to an
ASCII-only stricmp that ignores locale. Fixed case on UTF-8 API entry points.
  • Loading branch information
icculus committed Mar 15, 2007
1 parent 52cb5ba commit 607d39a
Show file tree
Hide file tree
Showing 18 changed files with 3,355 additions and 199 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.txt
Expand Up @@ -2,6 +2,10 @@
* CHANGELOG.
*/

03152007 - Bunch of work on Unicode...added case-folding stricmp, removed
platform-specific stricmp implementations, changed appropriate
calls to an ASCII-only stricmp that ignores locale. Fixed case on
UTF-8 API entry points.
03142007 - Dropped classic Mac OS support. It's just too hard to find a working
Mac OS 9 install and reasonable development tools, so it's not
worth it. If you still target OS 8 or 9, please use PhysicsFS 1.0.
Expand Down
1 change: 0 additions & 1 deletion TODO.txt
Expand Up @@ -9,7 +9,6 @@ UNICODE:
systems, otherwise try to use a sane codepage.
- OS/2: Codepages. No full Unicode in the filesystem, but we can probably make
a conversion effort.
- Everywhere: Move stricmp() out of platform layer and make it grok UTF-8.


Stuff:
Expand Down
4 changes: 2 additions & 2 deletions archivers/hog.c
Expand Up @@ -229,7 +229,7 @@ static int HOG_isArchive(const char *filename, int forWriting)
static int hog_entry_cmp(void *_a, PHYSFS_uint32 one, PHYSFS_uint32 two)
{
HOGentry *a = (HOGentry *) _a;
return(__PHYSFS_platformStricmp(a[one].name, a[two].name));
return(__PHYSFS_stricmpASCII(a[one].name, a[two].name));
} /* hog_entry_cmp */


Expand Down Expand Up @@ -367,7 +367,7 @@ static HOGentry *hog_find_entry(HOGinfo *info, const char *name)
while (lo <= hi)
{
middle = lo + ((hi - lo) / 2);
rc = __PHYSFS_platformStricmp(name, a[middle].name);
rc = __PHYSFS_stricmpASCII(name, a[middle].name);
if (rc == 0) /* found it! */
return(&a[middle]);
else if (rc > 0)
Expand Down
2 changes: 1 addition & 1 deletion archivers/mvl.c
Expand Up @@ -324,7 +324,7 @@ static MVLentry *mvl_find_entry(MVLinfo *info, const char *name)
while (lo <= hi)
{
middle = lo + ((hi - lo) / 2);
rc = __PHYSFS_platformStricmp(name, a[middle].name);
rc = __PHYSFS_stricmpASCII(name, a[middle].name);
if (rc == 0) /* found it! */
return(&a[middle]);
else if (rc > 0)
Expand Down
4 changes: 2 additions & 2 deletions archivers/qpak.c
Expand Up @@ -40,8 +40,8 @@
#include "physfs_internal.h"

#if 1 /* Make this case insensitive? */
#define QPAK_strcmp(x, y) __PHYSFS_platformStricmp(x, y)
#define QPAK_strncmp(x, y, z) __PHYSFS_platformStrnicmp(x, y, z)
#define QPAK_strcmp(x, y) __PHYSFS_stricmpASCII(x, y)
#define QPAK_strncmp(x, y, z) __PHYSFS_strnicmpASCII(x, y, z)
#else
#define QPAK_strcmp(x, y) strcmp(x, y)
#define QPAK_strncmp(x, y, z) strncmp(x, y, z)
Expand Down
1,064 changes: 1,064 additions & 0 deletions extras/casefolding.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions extras/ignorecase.c
Expand Up @@ -28,6 +28,7 @@
*/

/* I'm not screwing around with stricmp vs. strcasecmp... */
/* !!! FIXME: this will NOT work with UTF-8 strings in physfs2.0 */
static int caseInsensitiveStringCompare(const char *x, const char *y)
{
int ux, uy;
Expand Down
85 changes: 85 additions & 0 deletions extras/makecasefoldhashtable.pl
@@ -0,0 +1,85 @@
#!/usr/bin/perl -w

use warnings;
use strict;

print <<__EOF__;
/*
* This file is part of PhysicsFS (http://icculus.org/physfs/)
*
* This data generated by physfs/extras/makecasefoldhashtable.pl ...
* Do not manually edit this file!
*
* Please see the file LICENSE.txt in the source's root directory.
*/
#ifndef __PHYSICSFS_INTERNAL__
#error Do not include this header from your applications.
#endif
__EOF__


my @foldPairs;

for (my $i = 0; $i < 256; $i++) {
$foldPairs[$i] = '';
}

open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
while (<FH>) {
chomp;
# strip comments from textfile...
s/\#.*\Z//;

# strip whitespace...
s/\A\s+//;
s/\s+\Z//;

next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
my ($code, $status, $mapping) = ($1, $2, $3);
my $hexxed = hex($code);
my $hashed = (($hexxed ^ ($hexxed >> 8)) & 0xFF);
#print("// code '$code' status '$status' mapping '$mapping'\n");
#print("// hexxed '$hexxed' hashed '$hashed'\n");

if (($status eq 'C') or ($status eq 'F')) {
my ($map1, $map2, $map3) = ('0000', '0000', '0000');
$map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
$map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
$map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
die("mapping space too small for '$code'\n") if ($mapping ne '');
$foldPairs[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
}
}
close(FH);

for (my $i = 0; $i < 256; $i++) {
$foldPairs[$i] =~ s/,\n\Z//;
my $str = $foldPairs[$i];
next if $str eq '';
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold_${num}";
print("static const CaseFoldMapping ${sym}[] = {\n$str\n};\n\n");
}

print("\nstatic const CaseFoldHashBucket case_fold_hash[256] = {\n");

for (my $i = 0; $i < 256; $i++) {
my $str = $foldPairs[$i];
if ($str eq '') {
print(" { 0, NULL },\n");
} else {
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold_${num}";
print(" { __PHYSFS_ARRAYLEN($sym), $sym },\n");
}
}
print("};\n\n");

exit 0;

# end of makecashfoldhashtable.pl ...

6 changes: 3 additions & 3 deletions physfs.c
Expand Up @@ -426,14 +426,14 @@ static DirHandle *openDirectory(const char *d, int forWriting)
/* Look for archivers with matching file extensions first... */
for (i = archivers; (*i != NULL) && (retval == NULL); i++)
{
if (__PHYSFS_platformStricmp(ext, (*i)->info->extension) == 0)
if (__PHYSFS_stricmpASCII(ext, (*i)->info->extension) == 0)
retval = tryOpenDir(*i, d, forWriting);
} /* for */

/* failing an exact file extension match, try all the others... */
for (i = archivers; (*i != NULL) && (retval == NULL); i++)
{
if (__PHYSFS_platformStricmp(ext, (*i)->info->extension) != 0)
if (__PHYSFS_stricmpASCII(ext, (*i)->info->extension) != 0)
retval = tryOpenDir(*i, d, forWriting);
} /* for */
} /* if */
Expand Down Expand Up @@ -1133,7 +1133,7 @@ int PHYSFS_setSaneConfig(const char *organization, const char *appName,
if ((l > extlen) && ((*i)[l - extlen - 1] == '.'))
{
ext = (*i) + (l - extlen);
if (__PHYSFS_platformStricmp(ext, archiveExt) == 0)
if (__PHYSFS_stricmpASCII(ext, archiveExt) == 0)
{
const char *d = PHYSFS_getRealDir(*i);
size_t allocsize = strlen(d) + strlen(dirsep) + l + 1;
Expand Down
12 changes: 6 additions & 6 deletions physfs.h
Expand Up @@ -2061,7 +2061,7 @@ __EXPORT__ void PHYSFS_enumerateFilesCallback(const char *dir,
* \param dst Buffer to store converted UTF-8 string.
* \param len Size, in bytes, of destination buffer.
*/
__EXPORT__ void PHYSFS_utf8fromucs4(const PHYSFS_uint32 *src, char *dst,
__EXPORT__ void PHYSFS_utf8FromUcs4(const PHYSFS_uint32 *src, char *dst,
PHYSFS_uint64 len);

/**
Expand All @@ -2083,7 +2083,7 @@ __EXPORT__ void PHYSFS_utf8fromucs4(const PHYSFS_uint32 *src, char *dst,
* \param dst Buffer to store converted UCS-4 string.
* \param len Size, in bytes, of destination buffer.
*/
__EXPORT__ void PHYSFS_utf8toucs4(const char *src, PHYSFS_uint32 *dst,
__EXPORT__ void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst,
PHYSFS_uint64 len);

/**
Expand All @@ -2109,7 +2109,7 @@ __EXPORT__ void PHYSFS_utf8toucs4(const char *src, PHYSFS_uint32 *dst,
* \param dst Buffer to store converted UTF-8 string.
* \param len Size, in bytes, of destination buffer.
*/
__EXPORT__ void PHYSFS_utf8fromucs2(const PHYSFS_uint16 *src, char *dst,
__EXPORT__ void PHYSFS_utf8FromUcs2(const PHYSFS_uint16 *src, char *dst,
PHYSFS_uint64 len);

/**
Expand All @@ -2135,11 +2135,11 @@ __EXPORT__ void PHYSFS_utf8fromucs2(const PHYSFS_uint16 *src, char *dst,
* \param dst Buffer to store converted UCS-2 string.
* \param len Size, in bytes, of destination buffer.
*/
__EXPORT__ void PHYSFS_utf8toucs2(const char *src, PHYSFS_uint16 *dst,
__EXPORT__ void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst,
PHYSFS_uint64 len);

/**
* \fn void PHYSFS_utf8fromlatin1(const char *src, char *dst, PHYSFS_uint64 len)
* \fn void PHYSFS_utf8FromLatin1(const char *src, char *dst, PHYSFS_uint64 len)
* \brief Convert a UTF-8 string to a Latin1 string.
*
* Latin1 strings are 8-bits per character: a popular "high ASCII"
Expand All @@ -2162,7 +2162,7 @@ __EXPORT__ void PHYSFS_utf8toucs2(const char *src, PHYSFS_uint16 *dst,
* \param dst Buffer to store converted UTF-8 string.
* \param len Size, in bytes, of destination buffer.
*/
__EXPORT__ void PHYSFS_utf8fromlatin1(const char *src, char *dst,
__EXPORT__ void PHYSFS_utf8FromLatin1(const char *src, char *dst,
PHYSFS_uint64 len);

/* Everything above this line is part of the PhysicsFS 2.0 API. */
Expand Down

0 comments on commit 607d39a

Please sign in to comment.