Real Unicode support for X11. Based on updated version of this patch:
authorRyan C. Gordon <icculus@icculus.org>
Mon, 21 Nov 2005 00:16:34 +0000
changeset 1178 9867f3d86e44
parent 1177 e967ab22e6fd
child 1179 abb4267e7028
Real Unicode support for X11. Based on updated version of this patch: http://lists.arabeyes.org/archives/developer/2004/June/msg00160.html --ryan.
BUGS
src/video/x11/SDL_x11events.c
src/video/x11/SDL_x11events_c.h
src/video/x11/SDL_x11sym.h
src/video/x11/SDL_x11video.c
src/video/x11/SDL_x11video.h
--- a/BUGS	Sun Nov 20 23:59:26 2005 +0000
+++ b/BUGS	Mon Nov 21 00:16:34 2005 +0000
@@ -15,6 +15,7 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
+	(UPDATE 04/06/2004: this bug is now fixed)
 
 	The keyboard modifiers are not set to the correct state on startup.
 
@@ -100,7 +101,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 Solaris:
@@ -110,7 +112,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 IRIX:
@@ -122,7 +125,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 EPOC:
@@ -148,7 +152,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 OSF/Tru64:  -= NOT YET SUPPORTED =-
@@ -160,7 +165,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 AIX:  -= NOT YET SUPPORTED =-
@@ -176,7 +182,8 @@
 	It requires handling of keyboard mapping events and using the XIM
 	input translation extension.  I will implement it as requested.
 	Latin-1 keyboard input works fine.
-
+	(UPDATE 04/06/2004: this bug is now fixed but needs testing)
+	
 	The keyboard modifiers are not set to the correct state on startup.
 
 	The AIX port was done by Carsten.Griwodz@KOM.tu-darmstadt.de
--- a/src/video/x11/SDL_x11events.c	Sun Nov 20 23:59:26 2005 +0000
+++ b/src/video/x11/SDL_x11events.c	Mon Nov 21 00:16:34 2005 +0000
@@ -62,7 +62,7 @@
 /* The translation tables from an X11 keysym to a SDL keysym */
 static SDLKey ODD_keymap[256];
 static SDLKey MISC_keymap[256];
-SDL_keysym *X11_TranslateKey(Display *display, XKeyEvent *xkey, KeyCode kc,
+SDL_keysym *X11_TranslateKey(Display *display, XIC ic, XKeyEvent *xkey, KeyCode kc,
 			     SDL_keysym *keysym);
 
 /* Check to see if this is a repeated key.
@@ -241,7 +241,7 @@
 #ifdef DEBUG_XEVENTS
 printf("KeymapNotify!\n");
 #endif
-		X11_SetKeyboardState(SDL_Display, xevent.xkeymap.key_vector);
+		X11_SetKeyboardState(SDL_Display, SDL_IC,  xevent.xkeymap.key_vector);
 	    }
 	    break;
 
@@ -293,7 +293,7 @@
 printf("KeyPress (X11 keycode = 0x%X)\n", xevent.xkey.keycode);
 #endif
 		posted = SDL_PrivateKeyboard(SDL_PRESSED,
-				X11_TranslateKey(SDL_Display, &xevent.xkey,
+				X11_TranslateKey(SDL_Display, SDL_IC, &xevent.xkey,
 						 xevent.xkey.keycode,
 						 &keysym));
 	    }
@@ -309,7 +309,7 @@
 		/* Check to see if this is a repeated key */
 		if ( ! X11_KeyRepeat(SDL_Display, &xevent) ) {
 			posted = SDL_PrivateKeyboard(SDL_RELEASED, 
-				X11_TranslateKey(SDL_Display, &xevent.xkey,
+				X11_TranslateKey(SDL_Display, SDL_IC,  &xevent.xkey,
 						 xevent.xkey.keycode,
 						 &keysym));
 		}
@@ -612,7 +612,128 @@
 	MISC_keymap[XK_Hyper_R&0xFF] = SDLK_MENU;   /* Windows "Menu" key */
 }
 
-SDL_keysym *X11_TranslateKey(Display *display, XKeyEvent *xkey, KeyCode kc,
+#ifdef X_HAVE_UTF8_STRING
+Uint32 Utf8ToUcs4(const char * utf8)
+{
+	Uint32 c;
+	int i = 1;
+	int noOctets = 0;
+	int firstOctetMask = 0;
+	unsigned char firstOctet = utf8[0];
+	if (firstOctet < 0x80) {
+		/*
+		  Characters in the range:
+		    00000000 to 01111111 (ASCII Range)
+		  are stored in one octet:
+		    0xxxxxxx (The same as its ASCII representation)
+		  The least 6 significant bits of the first octet is the most 6 significant nonzero bits
+		  of the UCS4 representation.
+		*/
+		noOctets = 1;
+		firstOctetMask = 0x7F;  /* 0(1111111) - The most significant bit is ignored */
+	} else if ((firstOctet & 0xE0) /* get the most 3 significant bits by AND'ing with 11100000 */
+	              == 0xC0 ) {  /* see if those 3 bits are 110. If so, the char is in this range */
+		/*
+		  Characters in the range:
+		    00000000 10000000 to 00000111 11111111
+		  are stored in two octets:
+		    110xxxxx 10xxxxxx
+		  The least 5 significant bits of the first octet is the most 5 significant nonzero bits
+		  of the UCS4 representation.
+		*/
+		noOctets = 2;
+		firstOctetMask = 0x1F;  /* 000(11111) - The most 3 significant bits are ignored */
+	} else if ((firstOctet & 0xF0) /* get the most 4 significant bits by AND'ing with 11110000 */
+	              == 0xE0) {  /* see if those 4 bits are 1110. If so, the char is in this range */
+		/*
+		  Characters in the range:
+		    00001000 00000000 to 11111111 11111111
+		  are stored in three octets:
+		    1110xxxx 10xxxxxx 10xxxxxx
+		  The least 4 significant bits of the first octet is the most 4 significant nonzero bits
+		  of the UCS4 representation.
+		*/
+		noOctets = 3;
+		firstOctetMask = 0x0F; /* 0000(1111) - The most 4 significant bits are ignored */
+	} else if ((firstOctet & 0xF8) /* get the most 5 significant bits by AND'ing with 11111000 */
+	              == 0xF0) {  /* see if those 5 bits are 11110. If so, the char is in this range */
+		/*
+		  Characters in the range:
+		    00000001 00000000 00000000 to 00011111 11111111 11111111
+		  are stored in four octets:
+		    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+		  The least 3 significant bits of the first octet is the most 3 significant nonzero bits
+		  of the UCS4 representation.
+		*/
+		noOctets = 4;
+		firstOctetMask = 0x07; /* 11110(111) - The most 5 significant bits are ignored */
+	} else if ((firstOctet & 0xFC) /* get the most 6 significant bits by AND'ing with 11111100 */
+	              == 0xF8) { /* see if those 6 bits are 111110. If so, the char is in this range */
+		/*
+		  Characters in the range:
+		    00000000 00100000 00000000 00000000 to
+		    00000011 11111111 11111111 11111111
+		  are stored in five octets:
+		    111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+		  The least 2 significant bits of the first octet is the most 2 significant nonzero bits
+		  of the UCS4 representation.
+		*/
+		noOctets = 5;
+		firstOctetMask = 0x03; /* 111110(11) - The most 6 significant bits are ignored */
+	} else if ((firstOctet & 0xFE) /* get the most 7 significant bits by AND'ing with 11111110 */
+	              == 0xFC) { /* see if those 7 bits are 1111110. If so, the char is in this range */
+		/*
+		  Characters in the range:
+		    00000100 00000000 00000000 00000000 to
+		    01111111 11111111 11111111 11111111
+		  are stored in six octets:
+		    1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+		  The least significant bit of the first octet is the most significant nonzero bit
+		  of the UCS4 representation.
+		*/
+		noOctets = 6;
+		firstOctetMask = 0x01; /* 1111110(1) - The most 7 significant bits are ignored */
+	} else
+		return 0;  /* The given chunk is not a valid UTF-8 encoded Unicode character */
+	
+	/*
+	  The least noOctets significant bits of the first octet is the most 2 significant nonzero bits
+	  of the UCS4 representation.
+	  The first 6 bits of the UCS4 representation is the least 8-noOctets-1 significant bits of
+	  firstOctet if the character is not ASCII. If so, it's the least 7 significant bits of firstOctet.
+	  This done by AND'ing firstOctet with its mask to trim the bits used for identifying the
+	  number of continuing octets (if any) and leave only the free bits (the x's)
+	  Sample:
+	  1-octet:    0xxxxxxx  &  01111111 = 0xxxxxxx
+	  2-octets:  110xxxxx  &  00011111 = 000xxxxx
+	*/
+	c = firstOctet & firstOctetMask;
+	
+	/* Now, start filling c.ucs4 with the bits from the continuing octets from utf8. */
+	for (i = 1; i < noOctets; i++) {
+		/* A valid continuing octet is of the form 10xxxxxx */
+		if ((utf8[i] & 0xC0) /* get the most 2 significant bits by AND'ing with 11000000 */
+		    != 0x80) /* see if those 2 bits are 10. If not, the is a malformed sequence. */
+			/*The given chunk is a partial sequence at the end of a string that could
+			   begin a valid character */
+			return 0;
+		
+		/* Make room for the next 6-bits */
+		c <<= 6;
+		
+		/*
+		  Take only the least 6 significance bits of the current octet (utf8[i]) and fill the created room
+		  of c.ucs4 with them.
+		  This done by AND'ing utf8[i] with 00111111 and the OR'ing the result with c.ucs4.
+		*/
+		c |= utf8[i] & 0x3F;
+	}
+	return c;
+}
+#endif
+
+
+SDL_keysym *X11_TranslateKey(Display *display, XIC ic, XKeyEvent *xkey, KeyCode kc,
 			     SDL_keysym *keysym)
 {
 	KeySym xsym;
@@ -695,8 +816,7 @@
 	keysym->unicode = 0;
 	if ( SDL_TranslateUNICODE && xkey ) {
 		static XComposeStatus state;
-		/* Until we handle the IM protocol, use XLookupString() */
-		unsigned char keybuf[32];
+
 
 #define BROKEN_XFREE86_INTERNATIONAL_KBD
 /* This appears to be a magical flag that is used with AltGr on
@@ -711,15 +831,31 @@
 		}
 #endif
 		/* Look up the translated value for the key event */
-		if ( pXLookupString(xkey, (char *)keybuf, sizeof(keybuf),
-							NULL, &state) ) {
-			/*
-			 * FIXME,: XLookupString() may yield more than one
-			 * character, so we need a mechanism to allow for
-			 * this (perhaps generate null keypress events with
-			 * a unicode value)
-			 */
-			keysym->unicode = keybuf[0];
+
+		/* if there is no connection with the IM server, use the regular method */
+		if (ic == NULL) {
+			unsigned char keybuf[32];
+
+			if ( pXLookupString(xkey, (char *)keybuf, sizeof(keybuf),
+								NULL, &state) ) {
+				/*
+				* FIXME,: XLookupString() may yield more than one
+				* character, so we need a mechanism to allow for
+				* this (perhaps generate null keypress events with
+				* a unicode value)
+				*/
+				keysym->unicode = keybuf[0];
+			}
+		} else {  /* else, use the IM protocol */
+			#ifdef X_HAVE_UTF8_STRING
+			/* A UTF-8 character can be at most 6 bytes */
+			unsigned char keybuf[6];
+			pXSetICFocus(ic);
+			if ( pXutf8LookupString(ic, (XKeyPressedEvent *)xkey, (char *)keybuf, sizeof(keybuf),
+			                                    NULL, (Status *)&state) )
+				keysym->unicode = Utf8ToUcs4(keybuf);
+			pXUnsetICFocus(ic);
+			#endif
 		}
 	}
 	return(keysym);
@@ -832,12 +968,13 @@
 	return(unicode);
 }
 
+
 /*
  * Called when focus is regained, to read the keyboard state and generate
  * synthetic keypress/release events.
  * key_vec is a bit vector of keycodes (256 bits)
  */
-void X11_SetKeyboardState(Display *display, const char *key_vec)
+void X11_SetKeyboardState(Display *display, XIC ic, const char *key_vec)
 {
 	char keys_return[32];
 	int i;
@@ -886,7 +1023,7 @@
 			if(key_vec[i] & (1 << j)) {
 				SDL_keysym sk;
 				KeyCode kc = i << 3 | j;
-				X11_TranslateKey(display, NULL, kc, &sk);
+				X11_TranslateKey(display, ic, NULL, kc, &sk);
 				new_kstate[sk.sym] = SDL_PRESSED;
 				xcode[sk.sym] = kc;
 			}
--- a/src/video/x11/SDL_x11events_c.h	Sun Nov 20 23:59:26 2005 +0000
+++ b/src/video/x11/SDL_x11events_c.h	Mon Nov 21 00:16:34 2005 +0000
@@ -30,5 +30,5 @@
 /* Functions to be exported */
 extern void X11_InitOSKeymap(_THIS);
 extern void X11_PumpEvents(_THIS);
-extern void X11_SetKeyboardState(Display *display, const char *key_vec);
+extern void X11_SetKeyboardState(Display *display, XIC ic, const char *key_vec);
 
--- a/src/video/x11/SDL_x11sym.h	Sun Nov 20 23:59:26 2005 +0000
+++ b/src/video/x11/SDL_x11sym.h	Mon Nov 21 00:16:34 2005 +0000
@@ -114,6 +114,13 @@
 SDL_X11_SYM(int,XextRemoveDisplay,(XExtensionInfo*,Display*))
 #ifdef X_HAVE_UTF8_STRING
 SDL_X11_SYM(int,Xutf8TextListToTextProperty,(Display*,char**,int,XICCEncodingStyle,XTextProperty*))
+SDL_X11_SYM(int,Xutf8LookupString,(XIC,XKeyPressedEvent*,char*,int,KeySym*,Status*))
+SDL_X11_SYM(XIC,XCreateIC,(XIM, ...))
+SDL_X11_SYM(void,XDestroyIC,(XIC))
+SDL_X11_SYM(void,XSetICFocus,(XIC))
+SDL_X11_SYM(void,XUnsetICFocus,(XIC))
+SDL_X11_SYM(XIM,XOpenIM,(Display*,struct _XrmHashBucketRec*,char*,char*))
+SDL_X11_SYM(Status,XCloseIM,(XIM))
 #endif
 SDL_X11_SYM(void,_XEatData,(Display*,unsigned long))
 SDL_X11_SYM(void,_XFlush,(Display*))
--- a/src/video/x11/SDL_x11video.c	Sun Nov 20 23:59:26 2005 +0000
+++ b/src/video/x11/SDL_x11video.c	Mon Nov 21 00:16:34 2005 +0000
@@ -349,6 +349,7 @@
 		 FocusChangeMask | KeyPressMask | KeyReleaseMask
 		 | PropertyChangeMask | StructureNotifyMask | KeymapStateMask);
 
+    char * savedclassname = 0;
     /* Set the class hints so we can get an icon (AfterStep) */
     {
 	XClassHint *classhints;
@@ -358,6 +359,7 @@
             if ( ! classname ) {
                 classname = "SDL_App";
             }
+	    savedclassname = strdup(classname);
 	    classhints->res_name = classname;
 	    classhints->res_class = classname;
 	    pXSetClassHint(SDL_Display, WMwindow, classhints);
@@ -365,6 +367,33 @@
 	}
     }
 
+    /* Setup the communication with the IM server */
+    SDL_IM = NULL;
+    SDL_IC = NULL;
+
+    #ifdef X_HAVE_UTF8_STRING
+    SDL_IM = pXOpenIM(SDL_Display, NULL, savedclassname, savedclassname);
+    if (SDL_IM == NULL) {
+	SDL_SetError("no input method could be opened");
+    } else {
+	SDL_IC = pXCreateIC(SDL_IM,
+			XNClientWindow, WMwindow,
+			XNFocusWindow, WMwindow,
+			XNInputStyle, XIMPreeditNothing  | XIMStatusNothing,
+			XNResourceName, savedclassname,
+			XNResourceClass, savedclassname,
+			NULL);
+	if (SDL_IC == NULL) {
+		SDL_SetError("no input context could be created");
+		pXCloseIM(SDL_IM);
+		SDL_IM = NULL;
+	}
+    }
+    #endif
+
+    free(savedclassname);
+
+
     /* Allow the window to be deleted by the window manager */
     WM_DELETE_WINDOW = pXInternAtom(SDL_Display, "WM_DELETE_WINDOW", False);
     pXSetWMProtocols(SDL_Display, WMwindow, &WM_DELETE_WINDOW, 1);
@@ -808,7 +837,6 @@
 					| ButtonPressMask | ButtonReleaseMask
 					| PointerMotionMask | ExposureMask ));
 	}
-
 	/* Create the graphics context here, once we have a window */
 	if ( flags & SDL_OPENGL ) {
 		if ( X11_GL_CreateContext(this) < 0 ) {
@@ -854,7 +882,7 @@
 	}
 
 	/* Update the internal keyboard state */
-	X11_SetKeyboardState(SDL_Display, NULL);
+	X11_SetKeyboardState(SDL_Display, SDL_IC, NULL);
 
 	/* When the window is first mapped, ignore non-modifier keys */
 	{
@@ -892,6 +920,7 @@
 			screen->flags &= ~SDL_FULLSCREEN;
 		}
 	}
+	
 	return(0);
 }
 
@@ -1231,6 +1260,18 @@
 		/* Flush any delayed updates */
 		pXSync(GFX_Display, False);
 
+		/* Close the connection with the IM server */
+		#ifdef X_HAVE_UTF8_STRING
+		if (SDL_IC == NULL) {
+			pXDestroyIC(SDL_IC);
+			SDL_IC = NULL;
+		}
+		if (SDL_IM == NULL) {
+			pXCloseIM(SDL_IM);
+			SDL_IM = NULL;
+		}
+		#endif
+
 		/* Start shutting down the windows */
 		X11_DestroyImage(this, this->screen);
 		X11_DestroyWindow(this, this->screen);
--- a/src/video/x11/SDL_x11video.h	Sun Nov 20 23:59:26 2005 +0000
+++ b/src/video/x11/SDL_x11video.h	Mon Nov 21 00:16:34 2005 +0000
@@ -62,6 +62,8 @@
     Window SDL_Window;		/* Shared by both displays (no X security?) */
     Atom WM_DELETE_WINDOW;	/* "close-window" protocol atom */
     WMcursor *BlankCursor;	/* The invisible cursor */
+    XIM X11_IM;		/* Used to communicate with the input method (IM) server */
+    XIC X11_IC;		/* Used for retaining the state, properties, and semantics of communication with                                                  the input method (IM) server */
 
     char *SDL_windowid;		/* Flag: true if we have been passed a window */
 
@@ -147,15 +149,15 @@
 #define SDL_Display		(this->hidden->X11_Display)
 #define GFX_Display		(this->hidden->GFX_Display)
 #define SDL_Screen		DefaultScreen(this->hidden->X11_Display)
-
 #define SDL_Visual		(this->hidden->vis)
-
 #define SDL_Root		RootWindow(SDL_Display, SDL_Screen)
 #define WMwindow		(this->hidden->WMwindow)
 #define FSwindow		(this->hidden->FSwindow)
 #define SDL_Window		(this->hidden->SDL_Window)
 #define WM_DELETE_WINDOW	(this->hidden->WM_DELETE_WINDOW)
 #define SDL_BlankCursor		(this->hidden->BlankCursor)
+#define SDL_IM		(this->hidden->X11_IM)
+#define SDL_IC		(this->hidden->X11_IC)
 #define SDL_windowid		(this->hidden->SDL_windowid)
 #define using_dga		(this->hidden->using_dga)
 #define use_mitshm		(this->hidden->use_mitshm)
@@ -186,7 +188,6 @@
 #define gamma_saved		(this->hidden->gamma_saved)
 #define gamma_changed		(this->hidden->gamma_changed)
 #define SDL_iconcolors		(this->hidden->iconcolors)
-
 /* Some versions of XFree86 have bugs - detect if this is one of them */
 #define BUGGY_XFREE86(condition, buggy_version) \
 ((strcmp(ServerVendor(SDL_Display), "The XFree86 Project, Inc") == 0) && \