From 21b2fd39f2f010737c2cbfe4f103755fdbd52e5e Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Sun, 20 Aug 2017 02:02:08 -0400 Subject: [PATCH] Made PHYSFS_caseFold() a public API. --- src/physfs.h | 30 ++++++++++++++++++++++++++++++ src/physfs_unicode.c | 9 ++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/physfs.h b/src/physfs.h index a9135de6..d78c74de 100644 --- a/src/physfs.h +++ b/src/physfs.h @@ -2531,6 +2531,36 @@ PHYSFS_DECL void PHYSFS_utf8FromLatin1(const char *src, char *dst, /* Everything above this line is part of the PhysicsFS 2.0 API. */ +/** + * \fn int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to) + * \brief "Fold" a Unicode codepoint to a lowercase equivalent. + * + * (This is for limited, hardcore use. If you don't immediately see a need + * for it, you can probably ignore this forever.) + * + * This will convert a Unicode codepoint into its lowercase equivalent. + * Bogus codepoints and codepoints without a lowercase equivalent will + * be returned unconverted. + * + * Note that you might get multiple codepoints in return! The German Eszett, + * for example, will fold down to two lowercase latin 's' codepoints. The + * theory is that if you fold two strings, one with an Eszett and one with + * "SS" down, they will match. + * + * \warning Anyone that is a student of Unicode knows about the "Turkish I" + * problem. This API does not handle it. Assume this one letter + * in all of Unicode will definitely fold sort of incorrectly. If + * you don't know what this is about, you can probably ignore this + * problem for most of the planet, but perfection is impossible. + * + * \param from The codepoint to fold. + * \param to Buffer to store the folded codepoint values into. This should + * point to space for at least 3 PHYSFS_uint32 slots. + * \return The number of codepoints the folding produced. Between 1 and 3. + */ +PHYSFS_DECL int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to); + + /** * \fn int PHYSFS_utf8stricmp(const char *str1, const char *str2) * \brief Case-insensitive compare of two UTF-8 strings. diff --git a/src/physfs_unicode.c b/src/physfs_unicode.c index f8ffec7c..0e006022 100644 --- a/src/physfs_unicode.c +++ b/src/physfs_unicode.c @@ -430,8 +430,7 @@ void PHYSFS_utf8FromUtf16(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len } /* PHYSFS_utf8FromUtf16 */ -/* (to) should point to at least 3 PHYSFS_uint32 slots. */ -static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to) +int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to) { int i; @@ -515,7 +514,7 @@ static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to) /* Not found...there's no remapping for this codepoint. */ *to = from; return 1; -} /* locate_casefold_mapping */ +} /* PHYSFS_caseFold */ #define UTFSTRICMP(bits) \ @@ -526,14 +525,14 @@ static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to) if (head1 != tail1) { \ cp1 = folded1[tail1++]; \ } else { \ - head1 = locate_casefold_mapping(utf##bits##codepoint(&str1), folded1); \ + head1 = PHYSFS_caseFold(utf##bits##codepoint(&str1), folded1); \ cp1 = folded1[0]; \ tail1 = 1; \ } \ if (head2 != tail2) { \ cp2 = folded2[tail2++]; \ } else { \ - head2 = locate_casefold_mapping(utf##bits##codepoint(&str2), folded2); \ + head2 = PHYSFS_caseFold(utf##bits##codepoint(&str2), folded2); \ cp2 = folded2[0]; \ tail2 = 1; \ } \