author | Sam Lantinga <slouken@libsdl.org> |
Mon, 13 Mar 2006 01:16:16 +0000 | |
changeset 1502 | d403a39389da |
parent 1501 | 73dc5d39bbf8 |
child 1516 | 4d241ea8a1cd |
permissions | -rw-r--r-- |
1501
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
1 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
2 |
#include <stdio.h> |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
3 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
4 |
#include "SDL.h" |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
5 |
|
1502
d403a39389da
UTF-32 by definition can't handle characters higher than 0x10FFFF
Sam Lantinga <slouken@libsdl.org>
parents:
1501
diff
changeset
|
6 |
static SDL_bool testutf1632(char *data) |
1501
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
7 |
{ |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
8 |
Uint32 *p = (Uint32 *)data; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
9 |
while(*p) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
10 |
if ( *p > 0x10FFFF ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
11 |
return SDL_FALSE; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
12 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
13 |
++p; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
14 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
15 |
return SDL_TRUE; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
16 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
17 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
18 |
static size_t widelen(char *data) |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
19 |
{ |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
20 |
size_t len = 0; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
21 |
Uint32 *p = (Uint32 *)data; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
22 |
while(*p++) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
23 |
++len; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
24 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
25 |
return len; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
26 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
27 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
28 |
int main(int argc, char *argv[]) |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
29 |
{ |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
30 |
const char * formats[] = { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
31 |
"UTF8", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
32 |
"UTF-8", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
33 |
"UTF16BE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
34 |
"UTF-16BE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
35 |
"UTF16LE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
36 |
"UTF-16LE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
37 |
"UTF32BE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
38 |
"UTF-32BE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
39 |
"UTF32LE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
40 |
"UTF-32LE", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
41 |
"UCS4", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
42 |
"UCS-4", |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
43 |
}; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
44 |
char buffer[BUFSIZ]; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
45 |
char *ucs4; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
46 |
char *test[2]; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
47 |
int i, j, index = 0; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
48 |
FILE *file; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
49 |
int errors = 0; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
50 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
51 |
if ( !argv[1] ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
52 |
argv[1] = "utf8.txt"; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
53 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
54 |
file = fopen(argv[1], "rb"); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
55 |
if ( !file ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
56 |
fprintf(stderr, "Unable to open %s\n", argv[1]); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
57 |
return (1); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
58 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
59 |
|
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
60 |
while ( fgets(buffer, sizeof(buffer), file) ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
61 |
/* Convert to UCS-4 */ |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
62 |
ucs4 = SDL_iconv_string("UCS-4", "UTF-8", buffer, SDL_strlen(buffer)+1); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
63 |
size_t len = (widelen(ucs4)+1)*4; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
64 |
for ( i = 0; i < SDL_arraysize(formats); ++i ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
65 |
if ( (SDL_strncasecmp(formats[i], "UTF16", 5) == 0 || |
1502
d403a39389da
UTF-32 by definition can't handle characters higher than 0x10FFFF
Sam Lantinga <slouken@libsdl.org>
parents:
1501
diff
changeset
|
66 |
SDL_strncasecmp(formats[i], "UTF-16", 6) == 0 || |
d403a39389da
UTF-32 by definition can't handle characters higher than 0x10FFFF
Sam Lantinga <slouken@libsdl.org>
parents:
1501
diff
changeset
|
67 |
SDL_strncasecmp(formats[i], "UTF32", 5) == 0 || |
d403a39389da
UTF-32 by definition can't handle characters higher than 0x10FFFF
Sam Lantinga <slouken@libsdl.org>
parents:
1501
diff
changeset
|
68 |
SDL_strncasecmp(formats[i], "UTF-32", 6) == 0) && |
d403a39389da
UTF-32 by definition can't handle characters higher than 0x10FFFF
Sam Lantinga <slouken@libsdl.org>
parents:
1501
diff
changeset
|
69 |
!testutf1632(ucs4) ) { |
1501
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
70 |
continue; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
71 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
72 |
test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
73 |
test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
74 |
if ( SDL_memcmp(test[1], ucs4, len) != 0 ) { |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
75 |
fprintf(stderr, "FAIL: %s\n", formats[i]); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
76 |
++errors; |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
77 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
78 |
SDL_free(test[0]); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
79 |
SDL_free(test[1]); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
80 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
81 |
test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
82 |
SDL_free(ucs4); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
83 |
fputs(test[0], stdout); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
84 |
SDL_free(test[0]); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
85 |
} |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
86 |
return (errors ? errors + 1 : 0); |
73dc5d39bbf8
Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability
Sam Lantinga <slouken@libsdl.org>
parents:
diff
changeset
|
87 |
} |