From 21b2fd39f2f010737c2cbfe4f103755fdbd52e5e Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sun, 20 Aug 2017 02:02:08 -0400
Subject: [PATCH] Made PHYSFS_caseFold() a public API.

---
 src/physfs.h         | 30 ++++++++++++++++++++++++++++++
 src/physfs_unicode.c |  9 ++++-----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/physfs.h b/src/physfs.h
index a9135de6..d78c74de 100644
--- a/src/physfs.h
+++ b/src/physfs.h
@@ -2531,6 +2531,36 @@ PHYSFS_DECL void PHYSFS_utf8FromLatin1(const char *src, char *dst,
 
 /* Everything above this line is part of the PhysicsFS 2.0 API. */
 
+/**
+ * \fn int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
+ * \brief "Fold" a Unicode codepoint to a lowercase equivalent.
+ *
+ * (This is for limited, hardcore use. If you don't immediately see a need
+ *  for it, you can probably ignore this forever.)
+ *
+ * This will convert a Unicode codepoint into its lowercase equivalent.
+ *  Bogus codepoints and codepoints without a lowercase equivalent will
+ *  be returned unconverted.
+ *
+ * Note that you might get multiple codepoints in return! The German Eszett,
+ *  for example, will fold down to two lowercase latin 's' codepoints. The
+ *  theory is that if you fold two strings, one with an Eszett and one with
+ *  "SS" down, they will match.
+ *
+ * \warning Anyone that is a student of Unicode knows about the "Turkish I"
+ *          problem. This API does not handle it. Assume this one letter
+ *          in all of Unicode will definitely fold sort of incorrectly. If
+ *          you don't know what this is about, you can probably ignore this
+ *          problem for most of the planet, but perfection is impossible.
+ *
+ *   \param from The codepoint to fold.
+ *   \param to Buffer to store the folded codepoint values into. This should
+ *             point to space for at least 3 PHYSFS_uint32 slots.
+ *  \return The number of codepoints the folding produced. Between 1 and 3.
+ */
+PHYSFS_DECL int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to);
+
+
 /**
  * \fn int PHYSFS_utf8stricmp(const char *str1, const char *str2)
  * \brief Case-insensitive compare of two UTF-8 strings.
diff --git a/src/physfs_unicode.c b/src/physfs_unicode.c
index f8ffec7c..0e006022 100644
--- a/src/physfs_unicode.c
+++ b/src/physfs_unicode.c
@@ -430,8 +430,7 @@ void PHYSFS_utf8FromUtf16(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len
 } /* PHYSFS_utf8FromUtf16 */
 
 
-/* (to) should point to at least 3 PHYSFS_uint32 slots. */
-static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
+int PHYSFS_caseFold(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
 {
     int i;
 
@@ -515,7 +514,7 @@ static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
     /* Not found...there's no remapping for this codepoint. */
     *to = from;
     return 1;
-} /* locate_casefold_mapping */
+} /* PHYSFS_caseFold */
 
 
 #define UTFSTRICMP(bits) \
@@ -526,14 +525,14 @@ static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
         if (head1 != tail1) { \
             cp1 = folded1[tail1++]; \
         } else { \
-            head1 = locate_casefold_mapping(utf##bits##codepoint(&str1), folded1); \
+            head1 = PHYSFS_caseFold(utf##bits##codepoint(&str1), folded1); \
             cp1 = folded1[0]; \
             tail1 = 1; \
         } \
         if (head2 != tail2) { \
             cp2 = folded2[tail2++]; \
         } else { \
-            head2 = locate_casefold_mapping(utf##bits##codepoint(&str2), folded2); \
+            head2 = PHYSFS_caseFold(utf##bits##codepoint(&str2), folded2); \
             cp2 = folded2[0]; \
             tail2 = 1; \
         } \