extras/makecasefoldhashtable.pl
author Ryan C. Gordon <icculus@icculus.org>
Mon, 19 Mar 2007 04:30:10 +0000
changeset 833 b260f190aa59
parent 828 ee871d51510d
child 1373 527ef3c6a2d6
permissions -rwxr-xr-x
Whoops, switched two strings in CMakeLists.txt
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
828
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     1
#!/usr/bin/perl -w
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     2
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     3
use warnings;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     4
use strict;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     5
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     6
print <<__EOF__;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     7
/*
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     8
 * This file is part of PhysicsFS (http://icculus.org/physfs/)
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
     9
 *
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    10
 * This data generated by physfs/extras/makecasefoldhashtable.pl ...
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    11
 * Do not manually edit this file!
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    12
 *
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    13
 * Please see the file LICENSE.txt in the source's root directory.
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    14
 */
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    15
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    16
#ifndef __PHYSICSFS_INTERNAL__
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    17
#error Do not include this header from your applications.
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    18
#endif
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    19
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    20
__EOF__
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    21
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    22
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    23
my @foldPairs;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    24
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    25
for (my $i = 0; $i < 256; $i++) {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    26
    $foldPairs[$i] = '';
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    27
}
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    28
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    29
open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    30
while (<FH>) {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    31
    chomp;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    32
    # strip comments from textfile...
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    33
    s/\#.*\Z//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    34
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    35
    # strip whitespace...
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    36
    s/\A\s+//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    37
    s/\s+\Z//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    38
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    39
    next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    40
    my ($code, $status, $mapping) = ($1, $2, $3);
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    41
    my $hexxed = hex($code);
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    42
    my $hashed = (($hexxed ^ ($hexxed >> 8)) & 0xFF);
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    43
    #print("// code '$code'   status '$status'   mapping '$mapping'\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    44
    #print("// hexxed '$hexxed'  hashed '$hashed'\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    45
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    46
    if (($status eq 'C') or ($status eq 'F')) {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    47
        my ($map1, $map2, $map3) = ('0000', '0000', '0000');
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    48
        $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    49
        $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    50
        $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    51
        die("mapping space too small for '$code'\n") if ($mapping ne '');
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    52
        $foldPairs[$hashed] .= "    { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    53
    }
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    54
}
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    55
close(FH);
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    56
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    57
for (my $i = 0; $i < 256; $i++) {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    58
    $foldPairs[$i] =~ s/,\n\Z//;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    59
    my $str = $foldPairs[$i];
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    60
    next if $str eq '';
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    61
    my $num = '000' . $i;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    62
    $num =~ s/\A.*?(\d\d\d)\Z/$1/;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    63
    my $sym = "case_fold_${num}";
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    64
    print("static const CaseFoldMapping ${sym}[] = {\n$str\n};\n\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    65
}
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    66
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    67
print("\nstatic const CaseFoldHashBucket case_fold_hash[256] = {\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    68
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    69
for (my $i = 0; $i < 256; $i++) {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    70
    my $str = $foldPairs[$i];
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    71
    if ($str eq '') {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    72
        print("    { 0, NULL },\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    73
    } else {
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    74
        my $num = '000' . $i;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    75
        $num =~ s/\A.*?(\d\d\d)\Z/$1/;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    76
        my $sym = "case_fold_${num}";
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    77
        print("    { __PHYSFS_ARRAYLEN($sym), $sym },\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    78
    }
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    79
}
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    80
print("};\n\n");
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    81
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    82
exit 0;
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    83
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    84
# end of makecashfoldhashtable.pl ...
ee871d51510d Bunch of work on Unicode...added case-folding stricmp, removed
Ryan C. Gordon <icculus@icculus.org>
parents:
diff changeset
    85