src/video/SDL_yuv_sw.c
changeset 0 74212992fb08
child 9 a1c15fa4abb9
equal deleted inserted replaced
-1:000000000000 0:74212992fb08
       
     1 /*
       
     2     SDL - Simple DirectMedia Layer
       
     3     Copyright (C) 1997, 1998, 1999, 2000, 2001  Sam Lantinga
       
     4 
       
     5     This library is free software; you can redistribute it and/or
       
     6     modify it under the terms of the GNU Library General Public
       
     7     License as published by the Free Software Foundation; either
       
     8     version 2 of the License, or (at your option) any later version.
       
     9 
       
    10     This library is distributed in the hope that it will be useful,
       
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    13     Library General Public License for more details.
       
    14 
       
    15     You should have received a copy of the GNU Library General Public
       
    16     License along with this library; if not, write to the Free
       
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
       
    18 
       
    19     Sam Lantinga
       
    20     slouken@devolution.com
       
    21 */
       
    22 
       
    23 #ifdef SAVE_RCSID
       
    24 static char rcsid =
       
    25  "@(#) $Id$";
       
    26 #endif
       
    27 
       
    28 /* This is the software implementation of the YUV video overlay support */
       
    29 
       
    30 /* This code was derived from code carrying the following copyright notices:
       
    31 
       
    32  * Copyright (c) 1995 The Regents of the University of California.
       
    33  * All rights reserved.
       
    34  * 
       
    35  * Permission to use, copy, modify, and distribute this software and its
       
    36  * documentation for any purpose, without fee, and without written agreement is
       
    37  * hereby granted, provided that the above copyright notice and the following
       
    38  * two paragraphs appear in all copies of this software.
       
    39  * 
       
    40  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
       
    41  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
       
    42  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
       
    43  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    44  * 
       
    45  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
       
    46  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    47  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
       
    48  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
       
    49  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    50 
       
    51  * Copyright (c) 1995 Erik Corry
       
    52  * All rights reserved.
       
    53  * 
       
    54  * Permission to use, copy, modify, and distribute this software and its
       
    55  * documentation for any purpose, without fee, and without written agreement is
       
    56  * hereby granted, provided that the above copyright notice and the following
       
    57  * two paragraphs appear in all copies of this software.
       
    58  * 
       
    59  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
       
    60  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
       
    61  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
       
    62  * OF THE POSSIBILITY OF SUCH DAMAGE.
       
    63  * 
       
    64  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
       
    65  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
       
    66  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
       
    67  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
       
    68  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    69 
       
    70  * Portions of this software Copyright (c) 1995 Brown University.
       
    71  * All rights reserved.
       
    72  * 
       
    73  * Permission to use, copy, modify, and distribute this software and its
       
    74  * documentation for any purpose, without fee, and without written agreement
       
    75  * is hereby granted, provided that the above copyright notice and the
       
    76  * following two paragraphs appear in all copies of this software.
       
    77  * 
       
    78  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
       
    79  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
       
    80  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
       
    81  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    82  * 
       
    83  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
       
    84  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
       
    85  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
       
    86  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
       
    87  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    88  */
       
    89 
       
    90 #include <stdlib.h>
       
    91 #include <string.h>
       
    92 
       
    93 #include "SDL_error.h"
       
    94 #include "SDL_video.h"
       
    95 #include "SDL_stretch_c.h"
       
    96 #include "SDL_yuvfuncs.h"
       
    97 #include "SDL_yuv_sw_c.h"
       
    98 
       
    99 /* Function to check the CPU flags */
       
   100 #define MMX_CPU		0x800000
       
   101 #ifdef USE_ASMBLIT
       
   102 #define CPU_Flags()	Hermes_X86_CPU()
       
   103 #else
       
   104 #define CPU_Flags()	0L
       
   105 #endif
       
   106 
       
   107 #ifdef USE_ASMBLIT
       
   108 #define X86_ASSEMBLER
       
   109 #define HermesConverterInterface	void
       
   110 #define HermesClearInterface		void
       
   111 #define STACKCALL
       
   112 typedef Uint32 int32;
       
   113 
       
   114 #include "HeadX86.h"
       
   115 #endif
       
   116 
       
   117 /* The functions used to manipulate software video overlays */
       
   118 static struct private_yuvhwfuncs sw_yuvfuncs = {
       
   119 	SDL_LockYUV_SW,
       
   120 	SDL_UnlockYUV_SW,
       
   121 	SDL_DisplayYUV_SW,
       
   122 	SDL_FreeYUV_SW
       
   123 };
       
   124 
       
   125 /* RGB conversion lookup tables */
       
   126 struct private_yuvhwdata {
       
   127 	SDL_Surface *stretch;
       
   128 	SDL_Surface *display;
       
   129 	Uint8 *pixels;
       
   130 	int *colortab;
       
   131 	Uint32 *rgb_2_pix;
       
   132 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
       
   133                           unsigned char *lum, unsigned char *cr,
       
   134                           unsigned char *cb, unsigned char *out,
       
   135                           int rows, int cols, int mod );
       
   136 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
       
   137 	                  unsigned char *lum, unsigned char *cr,
       
   138                           unsigned char *cb, unsigned char *out,
       
   139                           int rows, int cols, int mod );
       
   140 
       
   141 	/* These are just so we don't have to allocate them separately */
       
   142 	Uint16 pitches[3];
       
   143 	Uint8 *planes[3];
       
   144 };
       
   145 
       
   146 
       
   147 /* The colorspace conversion functions */
       
   148 
       
   149 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
       
   150                                      unsigned char *lum, unsigned char *cr,
       
   151                                      unsigned char *cb, unsigned char *out,
       
   152                                      int rows, int cols, int mod );
       
   153 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
       
   154                                      unsigned char *lum, unsigned char *cr,
       
   155                                      unsigned char *cb, unsigned char *out,
       
   156                                      int rows, int cols, int mod );
       
   157 
       
   158 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   159                                     unsigned char *lum, unsigned char *cr,
       
   160                                     unsigned char *cb, unsigned char *out,
       
   161                                     int rows, int cols, int mod )
       
   162 {
       
   163     unsigned short* row1;
       
   164     unsigned short* row2;
       
   165     unsigned char* lum2;
       
   166     int x, y;
       
   167     int cr_r;
       
   168     int crb_g;
       
   169     int cb_b;
       
   170     int cols_2 = cols / 2;
       
   171 
       
   172     row1 = (unsigned short*) out;
       
   173     row2 = row1 + cols + mod;
       
   174     lum2 = lum + cols;
       
   175 
       
   176     mod += cols + mod;
       
   177 
       
   178     y = rows / 2;
       
   179     while( y-- )
       
   180     {
       
   181         x = cols_2;
       
   182         while( x-- )
       
   183         {
       
   184             register int L;
       
   185 
       
   186             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   187             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   188                                + colortab[ *cb + 2*256 ];
       
   189             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   190             ++cr; ++cb;
       
   191 
       
   192             L = *lum++;
       
   193             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   194                        rgb_2_pix[ L + crb_g ] |
       
   195                        rgb_2_pix[ L + cb_b ]);
       
   196 
       
   197             L = *lum++;
       
   198             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   199                        rgb_2_pix[ L + crb_g ] |
       
   200                        rgb_2_pix[ L + cb_b ]);
       
   201 
       
   202 
       
   203             /* Now, do second row.  */
       
   204 
       
   205             L = *lum2++;
       
   206             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   207                        rgb_2_pix[ L + crb_g ] |
       
   208                        rgb_2_pix[ L + cb_b ]);
       
   209 
       
   210             L = *lum2++;
       
   211             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   212                        rgb_2_pix[ L + crb_g ] |
       
   213                        rgb_2_pix[ L + cb_b ]);
       
   214         }
       
   215 
       
   216         /*
       
   217          * These values are at the start of the next line, (due
       
   218          * to the ++'s above),but they need to be at the start
       
   219          * of the line after that.
       
   220          */
       
   221         lum  += cols;
       
   222         lum2 += cols;
       
   223         row1 += mod;
       
   224         row2 += mod;
       
   225     }
       
   226 }
       
   227 
       
   228 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   229                                     unsigned char *lum, unsigned char *cr,
       
   230                                     unsigned char *cb, unsigned char *out,
       
   231                                     int rows, int cols, int mod )
       
   232 {
       
   233     unsigned int value;
       
   234     unsigned char* row1;
       
   235     unsigned char* row2;
       
   236     unsigned char* lum2;
       
   237     int x, y;
       
   238     int cr_r;
       
   239     int crb_g;
       
   240     int cb_b;
       
   241     int cols_2 = cols / 2;
       
   242 
       
   243     row1 = out;
       
   244     row2 = row1 + cols*3 + mod*3;
       
   245     lum2 = lum + cols;
       
   246 
       
   247     mod += cols + mod;
       
   248     mod *= 3;
       
   249 
       
   250     y = rows / 2;
       
   251     while( y-- )
       
   252     {
       
   253         x = cols_2;
       
   254         while( x-- )
       
   255         {
       
   256             register int L;
       
   257 
       
   258             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   259             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   260                                + colortab[ *cb + 2*256 ];
       
   261             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   262             ++cr; ++cb;
       
   263 
       
   264             L = *lum++;
       
   265             value = (rgb_2_pix[ L + cr_r ] |
       
   266                      rgb_2_pix[ L + crb_g ] |
       
   267                      rgb_2_pix[ L + cb_b ]);
       
   268             *row1++ = (value      ) & 0xFF;
       
   269             *row1++ = (value >>  8) & 0xFF;
       
   270             *row1++ = (value >> 16) & 0xFF;
       
   271 
       
   272             L = *lum++;
       
   273             value = (rgb_2_pix[ L + cr_r ] |
       
   274                      rgb_2_pix[ L + crb_g ] |
       
   275                      rgb_2_pix[ L + cb_b ]);
       
   276             *row1++ = (value      ) & 0xFF;
       
   277             *row1++ = (value >>  8) & 0xFF;
       
   278             *row1++ = (value >> 16) & 0xFF;
       
   279 
       
   280 
       
   281             /* Now, do second row.  */
       
   282 
       
   283             L = *lum2++;
       
   284             value = (rgb_2_pix[ L + cr_r ] |
       
   285                      rgb_2_pix[ L + crb_g ] |
       
   286                      rgb_2_pix[ L + cb_b ]);
       
   287             *row2++ = (value      ) & 0xFF;
       
   288             *row2++ = (value >>  8) & 0xFF;
       
   289             *row2++ = (value >> 16) & 0xFF;
       
   290 
       
   291             L = *lum2++;
       
   292             value = (rgb_2_pix[ L + cr_r ] |
       
   293                      rgb_2_pix[ L + crb_g ] |
       
   294                      rgb_2_pix[ L + cb_b ]);
       
   295             *row2++ = (value      ) & 0xFF;
       
   296             *row2++ = (value >>  8) & 0xFF;
       
   297             *row2++ = (value >> 16) & 0xFF;
       
   298         }
       
   299 
       
   300         /*
       
   301          * These values are at the start of the next line, (due
       
   302          * to the ++'s above),but they need to be at the start
       
   303          * of the line after that.
       
   304          */
       
   305         lum  += cols;
       
   306         lum2 += cols;
       
   307         row1 += mod;
       
   308         row2 += mod;
       
   309     }
       
   310 }
       
   311 
       
   312 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   313                                     unsigned char *lum, unsigned char *cr,
       
   314                                     unsigned char *cb, unsigned char *out,
       
   315                                     int rows, int cols, int mod )
       
   316 {
       
   317     unsigned int* row1;
       
   318     unsigned int* row2;
       
   319     unsigned char* lum2;
       
   320     int x, y;
       
   321     int cr_r;
       
   322     int crb_g;
       
   323     int cb_b;
       
   324     int cols_2 = cols / 2;
       
   325 
       
   326     row1 = (unsigned int*) out;
       
   327     row2 = row1 + cols + mod;
       
   328     lum2 = lum + cols;
       
   329 
       
   330     mod += cols + mod;
       
   331 
       
   332     y = rows / 2;
       
   333     while( y-- )
       
   334     {
       
   335         x = cols_2;
       
   336         while( x-- )
       
   337         {
       
   338             register int L;
       
   339 
       
   340             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   341             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   342                                + colortab[ *cb + 2*256 ];
       
   343             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   344             ++cr; ++cb;
       
   345 
       
   346             L = *lum++;
       
   347             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   348                        rgb_2_pix[ L + crb_g ] |
       
   349                        rgb_2_pix[ L + cb_b ]);
       
   350 
       
   351             L = *lum++;
       
   352             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   353                        rgb_2_pix[ L + crb_g ] |
       
   354                        rgb_2_pix[ L + cb_b ]);
       
   355 
       
   356 
       
   357             /* Now, do second row.  */
       
   358 
       
   359             L = *lum2++;
       
   360             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   361                        rgb_2_pix[ L + crb_g ] |
       
   362                        rgb_2_pix[ L + cb_b ]);
       
   363 
       
   364             L = *lum2++;
       
   365             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   366                        rgb_2_pix[ L + crb_g ] |
       
   367                        rgb_2_pix[ L + cb_b ]);
       
   368         }
       
   369 
       
   370         /*
       
   371          * These values are at the start of the next line, (due
       
   372          * to the ++'s above),but they need to be at the start
       
   373          * of the line after that.
       
   374          */
       
   375         lum  += cols;
       
   376         lum2 += cols;
       
   377         row1 += mod;
       
   378         row2 += mod;
       
   379     }
       
   380 }
       
   381 
       
   382 /*
       
   383  * In this function I make use of a nasty trick. The tables have the lower
       
   384  * 16 bits replicated in the upper 16. This means I can write ints and get
       
   385  * the horisontal doubling for free (almost).
       
   386  */
       
   387 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   388                                     unsigned char *lum, unsigned char *cr,
       
   389                                     unsigned char *cb, unsigned char *out,
       
   390                                     int rows, int cols, int mod )
       
   391 {
       
   392     unsigned int* row1 = (unsigned int*) out;
       
   393     const int next_row = cols+(mod/2);
       
   394     unsigned int* row2 = row1 + 2*next_row;
       
   395     unsigned char* lum2;
       
   396     int x, y;
       
   397     int cr_r;
       
   398     int crb_g;
       
   399     int cb_b;
       
   400     int cols_2 = cols / 2;
       
   401 
       
   402     lum2 = lum + cols;
       
   403 
       
   404     mod = (next_row * 3) + (mod/2);
       
   405 
       
   406     y = rows / 2;
       
   407     while( y-- )
       
   408     {
       
   409         x = cols_2;
       
   410         while( x-- )
       
   411         {
       
   412             register int L;
       
   413 
       
   414             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   415             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   416                                + colortab[ *cb + 2*256 ];
       
   417             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   418             ++cr; ++cb;
       
   419 
       
   420             L = *lum++;
       
   421             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   422                                         rgb_2_pix[ L + crb_g ] |
       
   423                                         rgb_2_pix[ L + cb_b ]);
       
   424             row1++;
       
   425 
       
   426             L = *lum++;
       
   427             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   428                                         rgb_2_pix[ L + crb_g ] |
       
   429                                         rgb_2_pix[ L + cb_b ]);
       
   430             row1++;
       
   431 
       
   432 
       
   433             /* Now, do second row. */
       
   434 
       
   435             L = *lum2++;
       
   436             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   437                                         rgb_2_pix[ L + crb_g ] |
       
   438                                         rgb_2_pix[ L + cb_b ]);
       
   439             row2++;
       
   440 
       
   441             L = *lum2++;
       
   442             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   443                                         rgb_2_pix[ L + crb_g ] |
       
   444                                         rgb_2_pix[ L + cb_b ]);
       
   445             row2++;
       
   446         }
       
   447 
       
   448         /*
       
   449          * These values are at the start of the next line, (due
       
   450          * to the ++'s above),but they need to be at the start
       
   451          * of the line after that.
       
   452          */
       
   453         lum  += cols;
       
   454         lum2 += cols;
       
   455         row1 += mod;
       
   456         row2 += mod;
       
   457     }
       
   458 }
       
   459 
       
   460 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   461                                     unsigned char *lum, unsigned char *cr,
       
   462                                     unsigned char *cb, unsigned char *out,
       
   463                                     int rows, int cols, int mod )
       
   464 {
       
   465     unsigned int value;
       
   466     unsigned char* row1 = out;
       
   467     const int next_row = (cols*2 + mod) * 3;
       
   468     unsigned char* row2 = row1 + 2*next_row;
       
   469     unsigned char* lum2;
       
   470     int x, y;
       
   471     int cr_r;
       
   472     int crb_g;
       
   473     int cb_b;
       
   474     int cols_2 = cols / 2;
       
   475 
       
   476     lum2 = lum + cols;
       
   477 
       
   478     mod = next_row*3 + mod*3;
       
   479 
       
   480     y = rows / 2;
       
   481     while( y-- )
       
   482     {
       
   483         x = cols_2;
       
   484         while( x-- )
       
   485         {
       
   486             register int L;
       
   487 
       
   488             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   489             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   490                                + colortab[ *cb + 2*256 ];
       
   491             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   492             ++cr; ++cb;
       
   493 
       
   494             L = *lum++;
       
   495             value = (rgb_2_pix[ L + cr_r ] |
       
   496                      rgb_2_pix[ L + crb_g ] |
       
   497                      rgb_2_pix[ L + cb_b ]);
       
   498             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
       
   499                      (value      ) & 0xFF;
       
   500             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
       
   501                      (value >>  8) & 0xFF;
       
   502             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
       
   503                      (value >> 16) & 0xFF;
       
   504             row1 += 2*3;
       
   505 
       
   506             L = *lum++;
       
   507             value = (rgb_2_pix[ L + cr_r ] |
       
   508                      rgb_2_pix[ L + crb_g ] |
       
   509                      rgb_2_pix[ L + cb_b ]);
       
   510             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
       
   511                      (value      ) & 0xFF;
       
   512             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
       
   513                      (value >>  8) & 0xFF;
       
   514             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
       
   515                      (value >> 16) & 0xFF;
       
   516             row1 += 2*3;
       
   517 
       
   518 
       
   519             /* Now, do second row. */
       
   520 
       
   521             L = *lum2++;
       
   522             value = (rgb_2_pix[ L + cr_r ] |
       
   523                      rgb_2_pix[ L + crb_g ] |
       
   524                      rgb_2_pix[ L + cb_b ]);
       
   525             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
       
   526                      (value      ) & 0xFF;
       
   527             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
       
   528                      (value >>  8) & 0xFF;
       
   529             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
       
   530                      (value >> 16) & 0xFF;
       
   531             row2 += 2*3;
       
   532 
       
   533             L = *lum2++;
       
   534             value = (rgb_2_pix[ L + cr_r ] |
       
   535                      rgb_2_pix[ L + crb_g ] |
       
   536                      rgb_2_pix[ L + cb_b ]);
       
   537             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
       
   538                      (value      ) & 0xFF;
       
   539             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
       
   540                      (value >>  8) & 0xFF;
       
   541             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
       
   542                      (value >> 16) & 0xFF;
       
   543             row2 += 2*3;
       
   544         }
       
   545 
       
   546         /*
       
   547          * These values are at the start of the next line, (due
       
   548          * to the ++'s above),but they need to be at the start
       
   549          * of the line after that.
       
   550          */
       
   551         lum  += cols;
       
   552         lum2 += cols;
       
   553         row1 += mod;
       
   554         row2 += mod;
       
   555     }
       
   556 }
       
   557 
       
   558 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   559                                     unsigned char *lum, unsigned char *cr,
       
   560                                     unsigned char *cb, unsigned char *out,
       
   561                                     int rows, int cols, int mod )
       
   562 {
       
   563     unsigned int* row1 = (unsigned int*) out;
       
   564     const int next_row = cols*2+mod;
       
   565     unsigned int* row2 = row1 + 2*next_row;
       
   566     unsigned char* lum2;
       
   567     int x, y;
       
   568     int cr_r;
       
   569     int crb_g;
       
   570     int cb_b;
       
   571     int cols_2 = cols / 2;
       
   572 
       
   573     lum2 = lum + cols;
       
   574 
       
   575     mod = (next_row * 3) + mod;
       
   576 
       
   577     y = rows / 2;
       
   578     while( y-- )
       
   579     {
       
   580         x = cols_2;
       
   581         while( x-- )
       
   582         {
       
   583             register int L;
       
   584 
       
   585             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   586             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   587                                + colortab[ *cb + 2*256 ];
       
   588             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   589             ++cr; ++cb;
       
   590 
       
   591             L = *lum++;
       
   592             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
       
   593                                        (rgb_2_pix[ L + cr_r ] |
       
   594                                         rgb_2_pix[ L + crb_g ] |
       
   595                                         rgb_2_pix[ L + cb_b ]);
       
   596             row1 += 2;
       
   597 
       
   598             L = *lum++;
       
   599             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
       
   600                                        (rgb_2_pix[ L + cr_r ] |
       
   601                                         rgb_2_pix[ L + crb_g ] |
       
   602                                         rgb_2_pix[ L + cb_b ]);
       
   603             row1 += 2;
       
   604 
       
   605 
       
   606             /* Now, do second row. */
       
   607 
       
   608             L = *lum2++;
       
   609             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
       
   610                                        (rgb_2_pix[ L + cr_r ] |
       
   611                                         rgb_2_pix[ L + crb_g ] |
       
   612                                         rgb_2_pix[ L + cb_b ]);
       
   613             row2 += 2;
       
   614 
       
   615             L = *lum2++;
       
   616             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
       
   617                                        (rgb_2_pix[ L + cr_r ] |
       
   618                                         rgb_2_pix[ L + crb_g ] |
       
   619                                         rgb_2_pix[ L + cb_b ]);
       
   620             row2 += 2;
       
   621         }
       
   622 
       
   623         /*
       
   624          * These values are at the start of the next line, (due
       
   625          * to the ++'s above),but they need to be at the start
       
   626          * of the line after that.
       
   627          */
       
   628         lum  += cols;
       
   629         lum2 += cols;
       
   630         row1 += mod;
       
   631         row2 += mod;
       
   632     }
       
   633 }
       
   634 
       
   635 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   636                                     unsigned char *lum, unsigned char *cr,
       
   637                                     unsigned char *cb, unsigned char *out,
       
   638                                     int rows, int cols, int mod )
       
   639 {
       
   640     unsigned short* row;
       
   641     int x, y;
       
   642     int cr_r;
       
   643     int crb_g;
       
   644     int cb_b;
       
   645     int cols_2 = cols / 2;
       
   646 
       
   647     row = (unsigned short*) out;
       
   648 
       
   649     y = rows;
       
   650     while( y-- )
       
   651     {
       
   652         x = cols_2;
       
   653         while( x-- )
       
   654         {
       
   655             register int L;
       
   656 
       
   657             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   658             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   659                                + colortab[ *cb + 2*256 ];
       
   660             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   661             cr += 4; cb += 4;
       
   662 
       
   663             L = *lum; lum += 2;
       
   664             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   665                        rgb_2_pix[ L + crb_g ] |
       
   666                        rgb_2_pix[ L + cb_b ]);
       
   667 
       
   668             L = *lum; lum += 2;
       
   669             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   670                        rgb_2_pix[ L + crb_g ] |
       
   671                        rgb_2_pix[ L + cb_b ]);
       
   672 
       
   673         }
       
   674 
       
   675         row += mod;
       
   676     }
       
   677 }
       
   678 
       
   679 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   680                                     unsigned char *lum, unsigned char *cr,
       
   681                                     unsigned char *cb, unsigned char *out,
       
   682                                     int rows, int cols, int mod )
       
   683 {
       
   684     unsigned int value;
       
   685     unsigned char* row;
       
   686     int x, y;
       
   687     int cr_r;
       
   688     int crb_g;
       
   689     int cb_b;
       
   690     int cols_2 = cols / 2;
       
   691 
       
   692     row = (unsigned char*) out;
       
   693     mod *= 3;
       
   694     y = rows;
       
   695     while( y-- )
       
   696     {
       
   697         x = cols_2;
       
   698         while( x-- )
       
   699         {
       
   700             register int L;
       
   701 
       
   702             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   703             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   704                                + colortab[ *cb + 2*256 ];
       
   705             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   706             cr += 4; cb += 4;
       
   707 
       
   708             L = *lum; lum += 2;
       
   709             value = (rgb_2_pix[ L + cr_r ] |
       
   710                      rgb_2_pix[ L + crb_g ] |
       
   711                      rgb_2_pix[ L + cb_b ]);
       
   712             *row++ = (value      ) & 0xFF;
       
   713             *row++ = (value >>  8) & 0xFF;
       
   714             *row++ = (value >> 16) & 0xFF;
       
   715 
       
   716             L = *lum; lum += 2;
       
   717             value = (rgb_2_pix[ L + cr_r ] |
       
   718                      rgb_2_pix[ L + crb_g ] |
       
   719                      rgb_2_pix[ L + cb_b ]);
       
   720             *row++ = (value      ) & 0xFF;
       
   721             *row++ = (value >>  8) & 0xFF;
       
   722             *row++ = (value >> 16) & 0xFF;
       
   723 
       
   724         }
       
   725         row += mod;
       
   726     }
       
   727 }
       
   728 
       
   729 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   730                                     unsigned char *lum, unsigned char *cr,
       
   731                                     unsigned char *cb, unsigned char *out,
       
   732                                     int rows, int cols, int mod )
       
   733 {
       
   734     unsigned int* row;
       
   735     int x, y;
       
   736     int cr_r;
       
   737     int crb_g;
       
   738     int cb_b;
       
   739     int cols_2 = cols / 2;
       
   740 
       
   741     row = (unsigned int*) out;
       
   742     y = rows;
       
   743     while( y-- )
       
   744     {
       
   745         x = cols_2;
       
   746         while( x-- )
       
   747         {
       
   748             register int L;
       
   749 
       
   750             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   751             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   752                                + colortab[ *cb + 2*256 ];
       
   753             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   754             cr += 4; cb += 4;
       
   755 
       
   756             L = *lum; lum += 2;
       
   757             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   758                        rgb_2_pix[ L + crb_g ] |
       
   759                        rgb_2_pix[ L + cb_b ]);
       
   760 
       
   761             L = *lum; lum += 2;
       
   762             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   763                        rgb_2_pix[ L + crb_g ] |
       
   764                        rgb_2_pix[ L + cb_b ]);
       
   765 
       
   766 
       
   767         }
       
   768         row += mod;
       
   769     }
       
   770 }
       
   771 
       
   772 /*
       
   773  * In this function I make use of a nasty trick. The tables have the lower
       
   774  * 16 bits replicated in the upper 16. This means I can write ints and get
       
   775  * the horisontal doubling for free (almost).
       
   776  */
       
   777 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   778                                     unsigned char *lum, unsigned char *cr,
       
   779                                     unsigned char *cb, unsigned char *out,
       
   780                                     int rows, int cols, int mod )
       
   781 {
       
   782     unsigned int* row = (unsigned int*) out;
       
   783     const int next_row = cols+(mod/2);
       
   784     int x, y;
       
   785     int cr_r;
       
   786     int crb_g;
       
   787     int cb_b;
       
   788     int cols_2 = cols / 2;
       
   789 
       
   790     y = rows;
       
   791     while( y-- )
       
   792     {
       
   793         x = cols_2;
       
   794         while( x-- )
       
   795         {
       
   796             register int L;
       
   797 
       
   798             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   799             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   800                                + colortab[ *cb + 2*256 ];
       
   801             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   802             cr += 4; cb += 4;
       
   803 
       
   804             L = *lum; lum += 2;
       
   805             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   806                                         rgb_2_pix[ L + crb_g ] |
       
   807                                         rgb_2_pix[ L + cb_b ]);
       
   808             row++;
       
   809 
       
   810             L = *lum; lum += 2;
       
   811             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   812                                         rgb_2_pix[ L + crb_g ] |
       
   813                                         rgb_2_pix[ L + cb_b ]);
       
   814             row++;
       
   815 
       
   816         }
       
   817         row += next_row;
       
   818     }
       
   819 }
       
   820 
       
   821 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   822                                     unsigned char *lum, unsigned char *cr,
       
   823                                     unsigned char *cb, unsigned char *out,
       
   824                                     int rows, int cols, int mod )
       
   825 {
       
   826     unsigned int value;
       
   827     unsigned char* row = out;
       
   828     const int next_row = (cols*2 + mod) * 3;
       
   829     int x, y;
       
   830     int cr_r;
       
   831     int crb_g;
       
   832     int cb_b;
       
   833     int cols_2 = cols / 2;
       
   834     y = rows;
       
   835     while( y-- )
       
   836     {
       
   837         x = cols_2;
       
   838         while( x-- )
       
   839         {
       
   840             register int L;
       
   841 
       
   842             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   843             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   844                                + colortab[ *cb + 2*256 ];
       
   845             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   846             cr += 4; cb += 4;
       
   847 
       
   848             L = *lum; lum += 2;
       
   849             value = (rgb_2_pix[ L + cr_r ] |
       
   850                      rgb_2_pix[ L + crb_g ] |
       
   851                      rgb_2_pix[ L + cb_b ]);
       
   852             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
       
   853                      (value      ) & 0xFF;
       
   854             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
       
   855                      (value >>  8) & 0xFF;
       
   856             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
       
   857                      (value >> 16) & 0xFF;
       
   858             row += 2*3;
       
   859 
       
   860             L = *lum; lum += 2;
       
   861             value = (rgb_2_pix[ L + cr_r ] |
       
   862                      rgb_2_pix[ L + crb_g ] |
       
   863                      rgb_2_pix[ L + cb_b ]);
       
   864             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
       
   865                      (value      ) & 0xFF;
       
   866             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
       
   867                      (value >>  8) & 0xFF;
       
   868             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
       
   869                      (value >> 16) & 0xFF;
       
   870             row += 2*3;
       
   871 
       
   872         }
       
   873         row += next_row;
       
   874     }
       
   875 }
       
   876 
       
   877 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   878                                     unsigned char *lum, unsigned char *cr,
       
   879                                     unsigned char *cb, unsigned char *out,
       
   880                                     int rows, int cols, int mod )
       
   881 {
       
   882     unsigned int* row = (unsigned int*) out;
       
   883     const int next_row = cols*2+mod;
       
   884     int x, y;
       
   885     int cr_r;
       
   886     int crb_g;
       
   887     int cb_b;
       
   888     int cols_2 = cols / 2;
       
   889     mod+=mod;
       
   890     y = rows;
       
   891     while( y-- )
       
   892     {
       
   893         x = cols_2;
       
   894         while( x-- )
       
   895         {
       
   896             register int L;
       
   897 
       
   898             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   899             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   900                                + colortab[ *cb + 2*256 ];
       
   901             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   902             cr += 4; cb += 4;
       
   903 
       
   904             L = *lum; lum += 2;
       
   905             row[0] = row[1] = row[next_row] = row[next_row+1] =
       
   906                                        (rgb_2_pix[ L + cr_r ] |
       
   907                                         rgb_2_pix[ L + crb_g ] |
       
   908                                         rgb_2_pix[ L + cb_b ]);
       
   909             row += 2;
       
   910 
       
   911             L = *lum; lum += 2;
       
   912             row[0] = row[1] = row[next_row] = row[next_row+1] =
       
   913                                        (rgb_2_pix[ L + cr_r ] |
       
   914                                         rgb_2_pix[ L + crb_g ] |
       
   915                                         rgb_2_pix[ L + cb_b ]);
       
   916             row += 2;
       
   917 
       
   918 
       
   919         }
       
   920 
       
   921         row += next_row;
       
   922     }
       
   923 }
       
   924 
       
   925 /*
       
   926  * How many 1 bits are there in the Uint32.
       
   927  * Low performance, do not call often.
       
   928  */
       
   929 static int number_of_bits_set( Uint32 a )
       
   930 {
       
   931     if(!a) return 0;
       
   932     if(a & 1) return 1 + number_of_bits_set(a >> 1);
       
   933     return(number_of_bits_set(a >> 1));
       
   934 }
       
   935 
       
   936 /*
       
   937  * How many 0 bits are there at least significant end of Uint32.
       
   938  * Low performance, do not call often.
       
   939  */
       
   940 static int free_bits_at_bottom( Uint32 a )
       
   941 {
       
   942       /* assume char is 8 bits */
       
   943     if(!a) return sizeof(Uint32) * 8;
       
   944     if(((Sint32)a) & 1l) return 0;
       
   945     return 1 + free_bits_at_bottom ( a >> 1);
       
   946 }
       
   947 
       
   948 
       
   949 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
       
   950 {
       
   951 	SDL_Overlay *overlay;
       
   952 	struct private_yuvhwdata *swdata;
       
   953 	int *Cr_r_tab;
       
   954 	int *Cr_g_tab;
       
   955 	int *Cb_g_tab;
       
   956 	int *Cb_b_tab;
       
   957 	Uint32 *r_2_pix_alloc;
       
   958 	Uint32 *g_2_pix_alloc;
       
   959 	Uint32 *b_2_pix_alloc;
       
   960 	int i, cpu_mmx;
       
   961 	int CR, CB;
       
   962 	Uint32 Rmask, Gmask, Bmask;
       
   963 
       
   964 	/* Only RGB packed pixel conversion supported */
       
   965 	if ( (display->format->BytesPerPixel != 2) &&
       
   966 	     (display->format->BytesPerPixel != 3) &&
       
   967 	     (display->format->BytesPerPixel != 4) ) {
       
   968 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
       
   969 		return(NULL);
       
   970 	}
       
   971 
       
   972 	/* Verify that we support the format */
       
   973 	switch (format) {
       
   974 	    case SDL_YV12_OVERLAY:
       
   975 	    case SDL_IYUV_OVERLAY:
       
   976 	    case SDL_YUY2_OVERLAY:
       
   977 	    case SDL_UYVY_OVERLAY:
       
   978 	    case SDL_YVYU_OVERLAY:
       
   979 		break;
       
   980 	    default:
       
   981 		SDL_SetError("Unsupported YUV format");
       
   982 		return(NULL);
       
   983 	}
       
   984 
       
   985 	/* Create the overlay structure */
       
   986 	overlay = (SDL_Overlay *)malloc(sizeof *overlay);
       
   987 	if ( overlay == NULL ) {
       
   988 		SDL_OutOfMemory();
       
   989 		return(NULL);
       
   990 	}
       
   991 	memset(overlay, 0, (sizeof *overlay));
       
   992 
       
   993 	/* Fill in the basic members */
       
   994 	overlay->format = format;
       
   995 	overlay->w = width;
       
   996 	overlay->h = height;
       
   997 
       
   998 	/* Set up the YUV surface function structure */
       
   999 	overlay->hwfuncs = &sw_yuvfuncs;
       
  1000 
       
  1001 	/* Create the pixel data and lookup tables */
       
  1002 	swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata);
       
  1003 	overlay->hwdata = swdata;
       
  1004 	if ( swdata == NULL ) {
       
  1005 		SDL_OutOfMemory();
       
  1006 		SDL_FreeYUVOverlay(overlay);
       
  1007 		return(NULL);
       
  1008 	}
       
  1009 	swdata->stretch = NULL;
       
  1010 	swdata->display = display;
       
  1011 	swdata->pixels = (Uint8 *) malloc(width*height*2);
       
  1012 	swdata->colortab = (int *)malloc(4*256*sizeof(int));
       
  1013 	Cr_r_tab = &swdata->colortab[0*256];
       
  1014 	Cr_g_tab = &swdata->colortab[1*256];
       
  1015 	Cb_g_tab = &swdata->colortab[2*256];
       
  1016 	Cb_b_tab = &swdata->colortab[3*256];
       
  1017 	swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32));
       
  1018 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
       
  1019 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
       
  1020 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
       
  1021 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
       
  1022 		SDL_OutOfMemory();
       
  1023 		SDL_FreeYUVOverlay(overlay);
       
  1024 		return(NULL);
       
  1025 	}
       
  1026 
       
  1027 	/* Generate the tables for the display surface */
       
  1028 	for (i=0; i<256; i++) {
       
  1029 		/* Gamma correction (luminescence table) and chroma correction
       
  1030 		   would be done here.  See the Berkeley mpeg_play sources.
       
  1031 		*/
       
  1032 		CB = CR = (i-128);
       
  1033 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
       
  1034 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
       
  1035 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
       
  1036 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
       
  1037 	}
       
  1038 
       
  1039 	/* 
       
  1040 	 * Set up entries 0-255 in rgb-to-pixel value tables.
       
  1041 	 */
       
  1042 	Rmask = display->format->Rmask;
       
  1043 	Gmask = display->format->Gmask;
       
  1044 	Bmask = display->format->Bmask;
       
  1045 	for ( i=0; i<256; ++i ) {
       
  1046 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
       
  1047 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
       
  1048 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
       
  1049 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
       
  1050 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
       
  1051 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
       
  1052 	}
       
  1053 
       
  1054 	/*
       
  1055 	 * If we have 16-bit output depth, then we double the value
       
  1056 	 * in the top word. This means that we can write out both
       
  1057 	 * pixels in the pixel doubling mode with one op. It is 
       
  1058 	 * harmless in the normal case as storing a 32-bit value
       
  1059 	 * through a short pointer will lose the top bits anyway.
       
  1060 	 */
       
  1061 	if( display->format->BytesPerPixel == 2 ) {
       
  1062 		for ( i=0; i<256; ++i ) {
       
  1063 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
       
  1064 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
       
  1065 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
       
  1066 		}
       
  1067 	}
       
  1068 
       
  1069 	/*
       
  1070 	 * Spread out the values we have to the rest of the array so that
       
  1071 	 * we do not need to check for overflow.
       
  1072 	 */
       
  1073 	for ( i=0; i<256; ++i ) {
       
  1074 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
       
  1075 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
       
  1076 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
       
  1077 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
       
  1078 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
       
  1079 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
       
  1080 	}
       
  1081 
       
  1082 	/* You have chosen wisely... */
       
  1083 	switch (format) {
       
  1084 	    case SDL_YV12_OVERLAY:
       
  1085 	    case SDL_IYUV_OVERLAY:
       
  1086 		cpu_mmx = CPU_Flags() & MMX_CPU;
       
  1087 		if ( display->format->BytesPerPixel == 2 ) {
       
  1088 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
       
  1089 			/* inline assembly functions */
       
  1090 			if ( cpu_mmx && (Rmask == 0xF800) &&
       
  1091 			                (Gmask == 0x07E0) &&
       
  1092 				        (Bmask == 0x001F) &&
       
  1093 			                (width & 15) == 0) {
       
  1094 /*printf("Using MMX 16-bit 565 dither\n");*/
       
  1095 				swdata->Display1X = Color565DitherYV12MMX1X;
       
  1096 			} else {
       
  1097 /*printf("Using C 16-bit dither\n");*/
       
  1098 				swdata->Display1X = Color16DitherYV12Mod1X;
       
  1099 			}
       
  1100 #else
       
  1101 			swdata->Display1X = Color16DitherYV12Mod1X;
       
  1102 #endif
       
  1103 			swdata->Display2X = Color16DitherYV12Mod2X;
       
  1104 		}
       
  1105 		if ( display->format->BytesPerPixel == 3 ) {
       
  1106 			swdata->Display1X = Color24DitherYV12Mod1X;
       
  1107 			swdata->Display2X = Color24DitherYV12Mod2X;
       
  1108 		}
       
  1109 		if ( display->format->BytesPerPixel == 4 ) {
       
  1110 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
       
  1111 			/* inline assembly functions */
       
  1112 			if ( cpu_mmx && (Rmask == 0x00FF0000) &&
       
  1113 			                (Gmask == 0x0000FF00) &&
       
  1114 				        (Bmask == 0x000000FF) && 
       
  1115 			                (width & 15) == 0) {
       
  1116 /*printf("Using MMX 32-bit dither\n");*/
       
  1117 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
       
  1118 			} else {
       
  1119 /*printf("Using C 32-bit dither\n");*/
       
  1120 				swdata->Display1X = Color32DitherYV12Mod1X;
       
  1121 			}
       
  1122 #else
       
  1123 			swdata->Display1X = Color32DitherYV12Mod1X;
       
  1124 #endif
       
  1125 			swdata->Display2X = Color32DitherYV12Mod2X;
       
  1126 		}
       
  1127 		break;
       
  1128 	    case SDL_YUY2_OVERLAY:
       
  1129 	    case SDL_UYVY_OVERLAY:
       
  1130 	    case SDL_YVYU_OVERLAY:
       
  1131 		if ( display->format->BytesPerPixel == 2 ) {
       
  1132 			swdata->Display1X = Color16DitherYUY2Mod1X;
       
  1133 			swdata->Display2X = Color16DitherYUY2Mod2X;
       
  1134 		}
       
  1135 		if ( display->format->BytesPerPixel == 3 ) {
       
  1136 			swdata->Display1X = Color24DitherYUY2Mod1X;
       
  1137 			swdata->Display2X = Color24DitherYUY2Mod2X;
       
  1138 		}
       
  1139 		if ( display->format->BytesPerPixel == 4 ) {
       
  1140 			swdata->Display1X = Color32DitherYUY2Mod1X;
       
  1141 			swdata->Display2X = Color32DitherYUY2Mod2X;
       
  1142 		}
       
  1143 		break;
       
  1144 	    default:
       
  1145 		/* We should never get here (caught above) */
       
  1146 		break;
       
  1147 	}
       
  1148 
       
  1149 	/* Find the pitch and offset values for the overlay */
       
  1150 	overlay->pitches = swdata->pitches;
       
  1151 	overlay->pixels = swdata->planes;
       
  1152 	switch (format) {
       
  1153 	    case SDL_YV12_OVERLAY:
       
  1154 	    case SDL_IYUV_OVERLAY:
       
  1155 		overlay->pitches[0] = overlay->w;
       
  1156 		overlay->pitches[1] = overlay->pitches[0] / 2;
       
  1157 		overlay->pitches[2] = overlay->pitches[0] / 2;
       
  1158 	        overlay->pixels[0] = swdata->pixels;
       
  1159 	        overlay->pixels[1] = overlay->pixels[0] +
       
  1160 		                     overlay->pitches[0] * overlay->h;
       
  1161 	        overlay->pixels[2] = overlay->pixels[1] +
       
  1162 		                     overlay->pitches[1] * overlay->h / 2;
       
  1163 		overlay->planes = 3;
       
  1164 		break;
       
  1165 	    case SDL_YUY2_OVERLAY:
       
  1166 	    case SDL_UYVY_OVERLAY:
       
  1167 	    case SDL_YVYU_OVERLAY:
       
  1168 		overlay->pitches[0] = overlay->w*2;
       
  1169 	        overlay->pixels[0] = swdata->pixels;
       
  1170 		overlay->planes = 1;
       
  1171 		break;
       
  1172 	    default:
       
  1173 		/* We should never get here (caught above) */
       
  1174 		break;
       
  1175 	}
       
  1176 
       
  1177 	/* We're all done.. */
       
  1178 	return(overlay);
       
  1179 }
       
  1180 
       
  1181 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1182 {
       
  1183 	return(0);
       
  1184 }
       
  1185 
       
  1186 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1187 {
       
  1188 	return;
       
  1189 }
       
  1190 
       
  1191 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
       
  1192 {
       
  1193 	struct private_yuvhwdata *swdata;
       
  1194 	SDL_Surface *stretch;
       
  1195 	SDL_Surface *display;
       
  1196 	int scale_2x;
       
  1197 	Uint8 *lum, *Cr, *Cb;
       
  1198 	Uint8 *dst;
       
  1199 	int mod;
       
  1200 
       
  1201 	swdata = overlay->hwdata;
       
  1202 	scale_2x = 0;
       
  1203 	stretch = 0;
       
  1204 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
       
  1205 		if ( (dstrect->w == 2*overlay->w) &&
       
  1206 		     (dstrect->h == 2*overlay->h) ) {
       
  1207 			scale_2x = 1;
       
  1208 		} else {
       
  1209 			if ( ! swdata->stretch ) {
       
  1210 				display = swdata->display;
       
  1211 				swdata->stretch = SDL_CreateRGBSurface(
       
  1212 					SDL_SWSURFACE,
       
  1213 					overlay->w, overlay->h,
       
  1214 					display->format->BitsPerPixel,
       
  1215 					display->format->Rmask,
       
  1216 					display->format->Gmask,
       
  1217 					display->format->Bmask, 0);
       
  1218 				if ( ! swdata->stretch ) {
       
  1219 					return(-1);
       
  1220 				}
       
  1221 			}
       
  1222 			stretch = swdata->stretch;
       
  1223 		}
       
  1224 	}
       
  1225 
       
  1226 	if ( stretch ) {
       
  1227 		display = stretch;
       
  1228 	} else {
       
  1229 		display = swdata->display;
       
  1230 	}
       
  1231 	switch (overlay->format) {
       
  1232 	    case SDL_YV12_OVERLAY:
       
  1233 		lum = overlay->pixels[0];
       
  1234 		Cr =  overlay->pixels[1];
       
  1235 		Cb =  overlay->pixels[2];
       
  1236 		break;
       
  1237 	    case SDL_IYUV_OVERLAY:
       
  1238 		lum = overlay->pixels[0];
       
  1239 		Cr =  overlay->pixels[2];
       
  1240 		Cb =  overlay->pixels[1];
       
  1241 		break;
       
  1242 	    case SDL_YUY2_OVERLAY:
       
  1243 		lum = overlay->pixels[0];
       
  1244 		Cr = lum + 3;
       
  1245 		Cb = lum + 1;
       
  1246 		break;
       
  1247 	    case SDL_UYVY_OVERLAY:
       
  1248 		lum = overlay->pixels[0]+1;
       
  1249 		Cr = lum + 1;
       
  1250 		Cb = lum - 1;
       
  1251 		break;
       
  1252 	    case SDL_YVYU_OVERLAY:
       
  1253 		lum = overlay->pixels[0];
       
  1254 		Cr = lum + 1;
       
  1255 		Cb = lum + 3;
       
  1256 		break;
       
  1257 	    default:
       
  1258 		SDL_SetError("Unsupported YUV format in blit (??)");
       
  1259 		return(-1);
       
  1260 	}
       
  1261 	if ( SDL_MUSTLOCK(display) ) {
       
  1262         	if ( SDL_LockSurface(display) < 0 ) {
       
  1263 			return(-1);
       
  1264 		}
       
  1265 	}
       
  1266 	if ( stretch ) {
       
  1267 		dst = (Uint8 *)stretch->pixels;
       
  1268 	} else {
       
  1269 		dst = (Uint8 *)display->pixels
       
  1270 			+ dstrect->x * display->format->BytesPerPixel
       
  1271 			+ dstrect->y * display->pitch;
       
  1272 	}
       
  1273 	mod = (display->pitch / display->format->BytesPerPixel);
       
  1274 
       
  1275 	if ( scale_2x ) {
       
  1276 		mod -= (overlay->w * 2);
       
  1277 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
       
  1278 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
       
  1279 	} else {
       
  1280 		mod -= overlay->w;
       
  1281 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
       
  1282 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
       
  1283 	}
       
  1284 	if ( SDL_MUSTLOCK(display) ) {
       
  1285 		SDL_UnlockSurface(display);
       
  1286 	}
       
  1287 	if ( stretch ) {
       
  1288 		display = swdata->display;
       
  1289 		SDL_SoftStretch(stretch, NULL, display, dstrect);
       
  1290 	}
       
  1291 	SDL_UpdateRects(display, 1, dstrect);
       
  1292 
       
  1293 	return(0);
       
  1294 }
       
  1295 
       
  1296 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1297 {
       
  1298 	struct private_yuvhwdata *swdata;
       
  1299 
       
  1300 	swdata = overlay->hwdata;
       
  1301 	if ( swdata ) {
       
  1302 		if ( swdata->pixels ) {
       
  1303 			free(swdata->pixels);
       
  1304 		}
       
  1305 		if ( swdata->colortab ) {
       
  1306 			free(swdata->colortab);
       
  1307 		}
       
  1308 		if ( swdata->rgb_2_pix ) {
       
  1309 			free(swdata->rgb_2_pix);
       
  1310 		}
       
  1311 		free(swdata);
       
  1312 	}
       
  1313 }