Made it possible to create a texture of any format, even if not supported by the renderer.
This allows me to reduce the set of formats supported by the renderers to the most optimal set, for a nice speed boost.
--- a/VisualC/SDL/SDL_VS2008.vcproj Wed Feb 02 22:55:12 2011 -0800
+++ b/VisualC/SDL/SDL_VS2008.vcproj Thu Feb 03 00:19:40 2011 -0800
@@ -607,7 +607,7 @@
>
</File>
<File
- RelativePath="..\..\src\video\mmx.h"
+ RelativePath="..\..\src\render\mmx.h"
>
</File>
<File
@@ -1251,15 +1251,15 @@
>
</File>
<File
- RelativePath="..\..\src\video\SDL_yuv_mmx.c"
+ RelativePath="..\..\src\render\SDL_yuv_mmx.c"
>
</File>
<File
- RelativePath="..\..\src\video\SDL_yuv_sw.c"
+ RelativePath="..\..\src\render\SDL_yuv_sw.c"
>
</File>
<File
- RelativePath="..\..\src\video\SDL_yuv_sw_c.h"
+ RelativePath="..\..\src\render\SDL_yuv_sw_c.h"
>
</File>
<File
--- a/VisualC/SDL/SDL_VS2010.vcxproj Wed Feb 02 22:55:12 2011 -0800
+++ b/VisualC/SDL/SDL_VS2010.vcxproj Thu Feb 03 00:19:40 2011 -0800
@@ -282,8 +282,9 @@
<ClInclude Include="..\..\src\events\SDL_touch_c.h" />
<ClInclude Include="..\..\src\libm\math.h" />
<ClInclude Include="..\..\src\libm\math_private.h" />
+ <ClInclude Include="..\..\src\render\mmx.h" />
<ClInclude Include="..\..\src\render\SDL_sysrender.h" />
- <ClInclude Include="..\..\src\video\mmx.h" />
+ <ClInclude Include="..\..\src\render\SDL_yuv_sw_c.h" />
<ClInclude Include="..\..\src\video\SDL_alphamult.h" />
<ClInclude Include="..\..\src\audio\SDL_audio_c.h" />
<ClInclude Include="..\..\src\audio\SDL_audiodev_c.h" />
@@ -339,7 +340,6 @@
<ClInclude Include="..\..\src\video\windows\SDL_windowsvideo.h" />
<ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" />
<ClInclude Include="..\..\src\events\SDL_windowevents_c.h" />
- <ClInclude Include="..\..\src\video\SDL_yuv_sw_c.h" />
<ClInclude Include="..\..\src\video\windows\wmmsg.h" />
</ItemGroup>
<ItemGroup>
@@ -365,6 +365,8 @@
<ClCompile Include="..\..\src\render\direct3d\SDL_d3drender.c" />
<ClCompile Include="..\..\src\render\opengl\SDL_renderer_gl.c" />
<ClCompile Include="..\..\src\render\SDL_render.c" />
+ <ClCompile Include="..\..\src\render\SDL_yuv_mmx.c" />
+ <ClCompile Include="..\..\src\render\SDL_yuv_sw.c" />
<ClCompile Include="..\..\src\render\software\SDL_renderer_sw.c" />
<ClCompile Include="..\..\src\SDL.c" />
<ClCompile Include="..\..\src\video\SDL_alphamult.c" />
@@ -452,8 +454,6 @@
<ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" />
<ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" />
<ClCompile Include="..\..\src\events\SDL_windowevents.c" />
- <ClCompile Include="..\..\src\video\SDL_yuv_mmx.c" />
- <ClCompile Include="..\..\src\video\SDL_yuv_sw.c" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
--- a/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Wed Feb 02 22:55:12 2011 -0800
+++ b/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Thu Feb 03 00:19:40 2011 -0800
@@ -73,6 +73,10 @@
043DD77010FD8A0000DED673 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 043DD76C10FD8A0000DED673 /* SDL_alphamult.h */; };
043DD77110FD8A0000DED673 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */; };
043DD77210FD8A0000DED673 /* SDL_drawrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76E10FD8A0000DED673 /* SDL_drawrect.c */; };
+ 04409BA612FA989600FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA212FA989600FB9AA8 /* mmx.h */; };
+ 04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */; };
+ 04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */; };
+ 04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */; };
04461DEE0EA76BA3006C462D /* SDL_haptic.h in Headers */ = {isa = PBXBuildFile; fileRef = 04461DED0EA76BA3006C462D /* SDL_haptic.h */; settings = {ATTRIBUTES = (Public, ); }; };
044E5FB511E6069F0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB311E6069F0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
044E5FB611E6069F0076F181 /* SDL_input.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB411E6069F0076F181 /* SDL_input.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -223,9 +227,6 @@
FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA683190DF2374E00F98A1A /* SDL_surface.c */; };
FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */; };
FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831B0DF2374E00F98A1A /* SDL_video.c */; };
- FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */; };
- FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */; };
- FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */; };
FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F50DF244C800F98A1A /* SDL_nullevents.c */; };
FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */; };
FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */; };
@@ -328,6 +329,10 @@
043DD76C10FD8A0000DED673 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
043DD76E10FD8A0000DED673 /* SDL_drawrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_drawrect.c; sourceTree = "<group>"; };
+ 04409BA212FA989600FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
+ 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
+ 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
+ 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
04461DED0EA76BA3006C462D /* SDL_haptic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_haptic.h; path = ../../include/SDL_haptic.h; sourceTree = SOURCE_ROOT; };
044E5FB311E6069F0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
044E5FB411E6069F0076F181 /* SDL_input.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_input.h; path = ../../include/SDL_input.h; sourceTree = SOURCE_ROOT; };
@@ -505,9 +510,6 @@
FDA683190DF2374E00F98A1A /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
FDA6831B0DF2374E00F98A1A /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
- FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
- FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
- FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
FDA685F50DF244C800F98A1A /* SDL_nullevents.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullevents.c; sourceTree = "<group>"; };
FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
@@ -659,9 +661,13 @@
isa = PBXGroup;
children = (
041B2CE812FA0F680087D585 /* opengles */,
+ 041B2CEC12FA0F680087D585 /* software */,
+ 04409BA212FA989600FB9AA8 /* mmx.h */,
041B2CEA12FA0F680087D585 /* SDL_render.c */,
041B2CEB12FA0F680087D585 /* SDL_sysrender.h */,
- 041B2CEC12FA0F680087D585 /* software */,
+ 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */,
+ 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */,
+ 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */,
);
name = render;
path = ../../src/render;
@@ -1113,9 +1119,6 @@
FDA683190DF2374E00F98A1A /* SDL_surface.c */,
FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */,
FDA6831B0DF2374E00F98A1A /* SDL_video.c */,
- FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */,
- FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */,
- FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */,
);
name = video;
path = ../../src/video;
@@ -1179,7 +1182,6 @@
FDA6845D0DF2374E00F98A1A /* SDL_pixels_c.h in Headers */,
FDA684630DF2374E00F98A1A /* SDL_RLEaccel_c.h in Headers */,
FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */,
- FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */,
FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */,
FDA686000DF244C800F98A1A /* SDL_nullvideo.h in Headers */,
FD5F9D300E0E08B3008E885B /* SDL_joystick_c.h in Headers */,
@@ -1220,6 +1222,8 @@
04FFAB9812E23BDC00BA343D /* SDL_shape.h in Headers */,
041B2CD912FA0E9E0087D585 /* SDL_render.h in Headers */,
041B2CF212FA0F680087D585 /* SDL_sysrender.h in Headers */,
+ 04409BA612FA989600FB9AA8 /* mmx.h in Headers */,
+ 04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -1427,8 +1431,6 @@
FDA684640DF2374E00F98A1A /* SDL_stretch.c in Sources */,
FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */,
FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */,
- FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */,
- FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */,
FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */,
FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */,
FD5F9D2F0E0E08B3008E885B /* SDL_joystick.c in Sources */,
@@ -1469,6 +1471,8 @@
041B2CF012FA0F680087D585 /* SDL_renderer_gles.c in Sources */,
041B2CF112FA0F680087D585 /* SDL_render.c in Sources */,
041B2CF312FA0F680087D585 /* SDL_renderer_sw.c in Sources */,
+ 04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+ 04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- a/Xcode/SDL/SDL.xcodeproj/project.pbxproj Wed Feb 02 22:55:12 2011 -0800
+++ b/Xcode/SDL/SDL.xcodeproj/project.pbxproj Thu Feb 03 00:19:40 2011 -0800
@@ -131,6 +131,14 @@
041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2C9E12FA0D680087D585 /* SDL_render.c */; };
041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */ = {isa = PBXBuildFile; fileRef = 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */; };
041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */; };
+ 04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
+ 04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
+ 04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
+ 04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
+ 04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
+ 04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
+ 04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
+ 04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
044E5F8511E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
044E5F8611E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; };
0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0469A10912EE4BF100B846D6 /* SDL_blendmode.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -244,7 +252,6 @@
04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
04BD011B12E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
- 04BD013212E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
04BD016F12E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
04BD017012E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
04BD017112E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
@@ -285,9 +292,6 @@
04BD019B12E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
04BD019D12E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
- 04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
- 04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
- 04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
@@ -457,7 +461,6 @@
04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
04BD033512E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
- 04BD034C12E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
04BD038912E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
04BD038B12E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
@@ -498,9 +501,6 @@
04BD03B512E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
04BD03B712E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
- 04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
- 04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
- 04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
04BD03F312E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
04BD03F412E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
@@ -701,6 +701,10 @@
041B2C9E12FA0D680087D585 /* SDL_render.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_render.c; sourceTree = "<group>"; };
041B2C9F12FA0D680087D585 /* SDL_sysrender.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysrender.h; sourceTree = "<group>"; };
041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_renderer_sw.c; sourceTree = "<group>"; };
+ 04409B8D12FA97ED00FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
+ 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
+ 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
+ 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
044E5F8411E6051C0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
0469A10912EE4BF100B846D6 /* SDL_blendmode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_blendmode.h; path = ../../include/SDL_blendmode.h; sourceTree = SOURCE_ROOT; };
04BDFD7412E6671700899322 /* SDL_atomic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_atomic.c; sourceTree = "<group>"; };
@@ -814,7 +818,6 @@
04BDFEE912E6671800899322 /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
04BDFEEC12E6671800899322 /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
04BDFEED12E6671800899322 /* SDL_nullvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullvideo.h; sourceTree = "<group>"; };
- 04BDFF0412E6671800899322 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
04BDFF4812E6671800899322 /* SDL_alphamult.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_alphamult.c; sourceTree = "<group>"; };
04BDFF4912E6671800899322 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
@@ -855,9 +858,6 @@
04BDFF7412E6671800899322 /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
04BDFF7512E6671800899322 /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
04BDFF7612E6671800899322 /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
- 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
- 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
- 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
04BDFFB812E6671800899322 /* imKStoUCS.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = imKStoUCS.c; sourceTree = "<group>"; };
04BDFFB912E6671800899322 /* imKStoUCS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = imKStoUCS.h; sourceTree = "<group>"; };
04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_x11clipboard.c; sourceTree = "<group>"; };
@@ -1188,8 +1188,12 @@
children = (
041B2C9A12FA0D680087D585 /* opengl */,
041B2CA012FA0D680087D585 /* software */,
+ 04409B8D12FA97ED00FB9AA8 /* mmx.h */,
041B2C9E12FA0D680087D585 /* SDL_render.c */,
041B2C9F12FA0D680087D585 /* SDL_sysrender.h */,
+ 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */,
+ 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */,
+ 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */,
);
name = render;
path = ../../src/render;
@@ -1483,7 +1487,6 @@
04BDFEE712E6671800899322 /* dummy */,
04BDFFB712E6671800899322 /* x11 */,
04BDFFD712E6671800899322 /* Xext */,
- 04BDFF0412E6671800899322 /* mmx.h */,
04BDFF4812E6671800899322 /* SDL_alphamult.c */,
04BDFF4912E6671800899322 /* SDL_alphamult.h */,
04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */,
@@ -1524,9 +1527,6 @@
04BDFF7412E6671800899322 /* SDL_surface.c */,
04BDFF7512E6671800899322 /* SDL_sysvideo.h */,
04BDFF7612E6671800899322 /* SDL_video.c */,
- 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */,
- 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */,
- 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */,
);
name = video;
path = ../../src/video;
@@ -1893,7 +1893,6 @@
04BD010312E6671800899322 /* SDL_cocoawindow.h in Headers */,
04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */,
04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */,
- 04BD013212E6671800899322 /* mmx.h in Headers */,
04BD017012E6671800899322 /* SDL_alphamult.h in Headers */,
04BD017612E6671800899322 /* SDL_blit.h in Headers */,
04BD017B12E6671800899322 /* SDL_blit_auto.h in Headers */,
@@ -1907,7 +1906,6 @@
04BD019712E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
04BD019912E6671800899322 /* SDL_shape_internals.h in Headers */,
04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */,
- 04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */,
04BD01DE12E6671800899322 /* SDL_x11clipboard.h in Headers */,
04BD01E012E6671800899322 /* SDL_x11dyn.h in Headers */,
@@ -1942,6 +1940,8 @@
0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
041B2C9512FA0D2A0087D585 /* SDL_render.h in Headers */,
041B2CA612FA0D680087D585 /* SDL_sysrender.h in Headers */,
+ 04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */,
+ 04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2016,7 +2016,6 @@
04BD031D12E6671800899322 /* SDL_cocoawindow.h in Headers */,
04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */,
04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */,
- 04BD034C12E6671800899322 /* mmx.h in Headers */,
04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */,
04BD039012E6671800899322 /* SDL_blit.h in Headers */,
04BD039512E6671800899322 /* SDL_blit_auto.h in Headers */,
@@ -2030,7 +2029,6 @@
04BD03B112E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
04BD03B312E6671800899322 /* SDL_shape_internals.h in Headers */,
04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */,
- 04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
04BD03F412E6671800899322 /* imKStoUCS.h in Headers */,
04BD03F612E6671800899322 /* SDL_x11clipboard.h in Headers */,
04BD03F812E6671800899322 /* SDL_x11dyn.h in Headers */,
@@ -2065,6 +2063,8 @@
0469A10D12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
041B2C9612FA0D2A0087D585 /* SDL_render.h in Headers */,
041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */,
+ 04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */,
+ 04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2412,8 +2412,6 @@
04BD019A12E6671800899322 /* SDL_stretch.c in Sources */,
04BD019B12E6671800899322 /* SDL_surface.c in Sources */,
04BD019D12E6671800899322 /* SDL_video.c in Sources */,
- 04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */,
- 04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */,
04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */,
04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */,
04BD01DF12E6671800899322 /* SDL_x11dyn.c in Sources */,
@@ -2443,6 +2441,8 @@
041B2CA312FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
041B2CA512FA0D680087D585 /* SDL_render.c in Sources */,
041B2CA712FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
+ 04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+ 04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2539,8 +2539,6 @@
04BD03B412E6671800899322 /* SDL_stretch.c in Sources */,
04BD03B512E6671800899322 /* SDL_surface.c in Sources */,
04BD03B712E6671800899322 /* SDL_video.c in Sources */,
- 04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */,
- 04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */,
04BD03F312E6671800899322 /* imKStoUCS.c in Sources */,
04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */,
04BD03F712E6671800899322 /* SDL_x11dyn.c in Sources */,
@@ -2570,6 +2568,8 @@
041B2CA912FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */,
041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
+ 04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+ 04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- a/include/SDL_pixels.h Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_pixels.h Thu Feb 03 00:19:40 2011 -0800
@@ -122,18 +122,26 @@
#define SDL_PIXELORDER(X) (((X) >> 20) & 0x0F)
#define SDL_PIXELLAYOUT(X) (((X) >> 16) & 0x0F)
#define SDL_BITSPERPIXEL(X) (((X) >> 8) & 0xFF)
-#define SDL_BYTESPERPIXEL(X) (((X) >> 0) & 0xFF)
+#define SDL_BYTESPERPIXEL(X) \
+ (SDL_ISPIXELFORMAT_FOURCC(X) ? \
+ ((((X) == SDL_PIXELFORMAT_YV12) || \
+ ((X) == SDL_PIXELFORMAT_IYUV) || \
+ ((X) == SDL_PIXELFORMAT_YUY2) || \
+ ((X) == SDL_PIXELFORMAT_UYVY) || \
+ ((X) == SDL_PIXELFORMAT_YVYU)) ? 2 : 1) : (((X) >> 0) & 0xFF))
#define SDL_ISPIXELFORMAT_INDEXED(format) \
- ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
- (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
- (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8))
+ (!SDL_ISPIXELFORMAT_FOURCC(format) && \
+ ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
+ (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
+ (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8)))
#define SDL_ISPIXELFORMAT_ALPHA(format) \
- ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
- (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
- (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
- (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA))
+ (!SDL_ISPIXELFORMAT_FOURCC(format) && \
+ ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
+ (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
+ (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
+ (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA)))
#define SDL_ISPIXELFORMAT_FOURCC(format) \
((format) && !((format) & 0x80000000))
--- a/include/SDL_rect.h Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_rect.h Thu Feb 03 00:19:40 2011 -0800
@@ -70,25 +70,6 @@
} SDL_Rect;
/**
- * \brief A structure used to track dirty rectangles
- *
- * \sa SDL_AddDirtyRect
- * \sa SDL_ClearDirtyRects
- * \sa SDL_FreeDirtyRects
- */
-typedef struct SDL_DirtyRect
-{
- SDL_Rect rect;
- struct SDL_DirtyRect *next;
-} SDL_DirtyRect;
-
-typedef struct SDL_DirtyRectList
-{
- SDL_DirtyRect *list;
- SDL_DirtyRect *free;
-} SDL_DirtyRectList;
-
-/**
* \brief Returns true if the rectangle has no area.
*/
#define SDL_RectEmpty(X) (((X)->w <= 0) || ((X)->h <= 0))
@@ -143,22 +124,6 @@
int *Y1, int *X2,
int *Y2);
-/**
- * \brief Add a rectangle to a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect);
-
-/**
- * \brief Remove all rectangles associated with a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_ClearDirtyRects(SDL_DirtyRectList * list);
-
-/**
- * \brief Free memory associated with a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_FreeDirtyRects(SDL_DirtyRectList * list);
-
-
/* Ends C function definitions when using C++ */
#ifdef __cplusplus
/* *INDENT-OFF* */
--- a/include/SDL_render.h Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_render.h Thu Feb 03 00:19:40 2011 -0800
@@ -61,7 +61,7 @@
const char *name; /**< The name of the renderer */
Uint32 flags; /**< Supported ::SDL_RendererFlags */
Uint32 num_texture_formats; /**< The number of available texture formats */
- Uint32 texture_formats[50]; /**< The available texture formats */
+ Uint32 texture_formats[16]; /**< The available texture formats */
int max_texture_width; /**< The maximimum texture width */
int max_texture_height; /**< The maximimum texture height */
} SDL_RendererInfo;
@@ -204,22 +204,6 @@
int *w, int *h);
/**
- * \brief Query the pixels of a texture, if the texture does not need to be
- * locked for pixel access.
- *
- * \param texture A texture to be queried, which was created with
- * ::SDL_TEXTUREACCESS_STREAMING.
- * \param pixels A pointer filled with a pointer to the pixels for the
- * texture.
- * \param pitch A pointer filled in with the pitch of the pixel data.
- *
- * \return 0 on success, or -1 if the texture is not valid, or must be locked
- * for pixel access.
- */
-extern DECLSPEC int SDLCALL SDL_QueryTexturePixels(SDL_Texture * texture,
- void **pixels, int *pitch);
-
-/**
* \brief Set an additional color value used in render copy operations.
*
* \param texture The texture to update.
@@ -299,7 +283,7 @@
/**
* \brief Get the blend mode used for texture copy operations.
*
- * \param texture The texture to query.
+ * \param texture The texture to query.
* \param blendMode A pointer filled in with the current blend mode.
*
* \return 0 on success, or -1 if the texture is not valid.
@@ -312,7 +296,7 @@
/**
* \brief Update the given texture rectangle with new pixel data.
*
- * \param texture The texture to update
+ * \param texture The texture to update
* \param rect A pointer to the rectangle of pixels to update, or NULL to
* update the entire texture.
* \param pixels The raw pixel data.
@@ -329,49 +313,28 @@
/**
* \brief Lock a portion of the texture for pixel access.
*
- * \param texture The texture to lock for access, which was created with
+ * \param texture The texture to lock for access, which was created with
* ::SDL_TEXTUREACCESS_STREAMING.
* \param rect A pointer to the rectangle to lock for access. If the rect
* is NULL, the entire texture will be locked.
- * \param markDirty If this is nonzero, the locked area will be marked dirty
- * when the texture is unlocked.
* \param pixels This is filled in with a pointer to the locked pixels,
* appropriately offset by the locked area.
* \param pitch This is filled in with the pitch of the locked pixels.
*
- * \return 0 on success, or -1 if the texture is not valid or was created with
- * ::SDL_TEXTUREACCESS_STATIC.
+ * \return 0 on success, or -1 if the texture is not valid or was not created with ::SDL_TEXTUREACCESS_STREAMING.
*
- * \sa SDL_DirtyTexture()
* \sa SDL_UnlockTexture()
*/
extern DECLSPEC int SDLCALL SDL_LockTexture(SDL_Texture * texture,
const SDL_Rect * rect,
- int markDirty, void **pixels,
- int *pitch);
-
-/**
- * \brief Unlock a texture, uploading the changes to renderer memory, if needed.
- *
- * \sa SDL_LockTexture()
- * \sa SDL_DirtyTexture()
- */
-extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture);
+ void **pixels, int *pitch);
/**
- * \brief Mark the specified rectangles of the texture as dirty.
- *
- * \param texture The texture to mark dirty, which was created with
- * ::SDL_TEXTUREACCESS_STREAMING.
- * \param numrects The number of rectangles pointed to by rects.
- * \param rects The pointer to an array of dirty rectangles.
+ * \brief Unlock a texture, uploading the changes to video memory, if needed.
*
* \sa SDL_LockTexture()
- * \sa SDL_UnlockTexture()
*/
-extern DECLSPEC void SDLCALL SDL_DirtyTexture(SDL_Texture * texture,
- int numrects,
- const SDL_Rect * rects);
+extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture);
/**
* \brief Set the color used for drawing operations (Fill and Line).
--- a/src/SDL_compat.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/SDL_compat.c Thu Feb 03 00:19:40 2011 -0800
@@ -28,7 +28,6 @@
#include "video/SDL_sysvideo.h"
#include "video/SDL_pixels_c.h"
-#include "video/SDL_yuv_sw_c.h"
static SDL_Window *SDL_VideoWindow = NULL;
static SDL_Renderer *SDL_VideoRenderer = NULL;
@@ -344,13 +343,10 @@
static SDL_Surface *
CreateVideoSurface(SDL_Texture * texture)
{
- SDL_Surface *surface;
Uint32 format;
int w, h;
int bpp;
Uint32 Rmask, Gmask, Bmask, Amask;
- void *pixels;
- int pitch;
if (SDL_QueryTexture(texture, &format, NULL, &w, &h) < 0) {
return NULL;
@@ -362,15 +358,7 @@
return NULL;
}
- if (SDL_QueryTexturePixels(texture, &pixels, &pitch) == 0) {
- surface =
- SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, Gmask,
- Bmask, Amask);
- } else {
- surface =
- SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
- }
- return surface;
+ return SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
}
static void
@@ -412,8 +400,6 @@
int w, h;
Uint32 format;
int access;
- void *pixels;
- int pitch;
/* We can't resize something we don't have... */
if (!SDL_VideoWindow) {
@@ -454,15 +440,10 @@
SDL_VideoSurface->w = width;
SDL_VideoSurface->h = height;
- if (SDL_QueryTexturePixels(SDL_VideoTexture, &pixels, &pitch) == 0) {
- SDL_VideoSurface->pixels = pixels;
- SDL_VideoSurface->pitch = pitch;
- } else {
- SDL_CalculatePitch(SDL_VideoSurface);
- SDL_VideoSurface->pixels =
- SDL_realloc(SDL_VideoSurface->pixels,
- SDL_VideoSurface->h * SDL_VideoSurface->pitch);
- }
+ SDL_CalculatePitch(SDL_VideoSurface);
+ SDL_VideoSurface->pixels =
+ SDL_realloc(SDL_VideoSurface->pixels,
+ SDL_VideoSurface->h * SDL_VideoSurface->pitch);
SDL_SetClipRect(SDL_VideoSurface, NULL);
SDL_InvalidateMap(SDL_VideoSurface->map);
@@ -830,20 +811,15 @@
screen = SDL_VideoSurface;
}
if (screen == SDL_VideoSurface) {
- if (screen->flags & SDL_PREALLOC) {
- /* The surface memory is maintained by the renderer */
- SDL_DirtyTexture(SDL_VideoTexture, numrects, rects);
- } else {
- /* The surface memory needs to be copied to texture */
- int pitch = screen->pitch;
- int psize = screen->format->BytesPerPixel;
- for (i = 0; i < numrects; ++i) {
- const SDL_Rect *rect = &rects[i];
- void *pixels =
- (Uint8 *) screen->pixels + rect->y * pitch +
- rect->x * psize;
- SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
- }
+ /* The surface memory needs to be copied to texture */
+ int pitch = screen->pitch;
+ int psize = screen->format->BytesPerPixel;
+ for (i = 0; i < numrects; ++i) {
+ const SDL_Rect *rect = &rects[i];
+ void *pixels =
+ (Uint8 *) screen->pixels + rect->y * pitch +
+ rect->x * psize;
+ SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
}
rect.x = 0;
rect.y = 0;
@@ -1459,8 +1435,6 @@
Uint16 pitches[3];
Uint8 *planes[3];
- SDL_SW_YUVTexture *sw;
-
SDL_Texture *texture;
Uint32 texture_format;
};
@@ -1545,24 +1519,6 @@
overlay->hwdata->texture =
SDL_CreateTexture(SDL_VideoRenderer, texture_format,
SDL_TEXTUREACCESS_STREAMING, w, h);
- if (overlay->hwdata->texture) {
- overlay->hwdata->sw = NULL;
- } else {
- SDL_DisplayMode current_mode;
-
- overlay->hwdata->sw = SDL_SW_CreateYUVTexture(texture_format, w, h);
- if (!overlay->hwdata->sw) {
- SDL_FreeYUVOverlay(overlay);
- return NULL;
- }
-
- /* Create a supported RGB format texture for display */
- SDL_GetCurrentDisplayMode(¤t_mode);
- texture_format = current_mode.format;
- overlay->hwdata->texture =
- SDL_CreateTexture(SDL_VideoRenderer, texture_format,
- SDL_TEXTUREACCESS_STREAMING, w, h);
- }
if (!overlay->hwdata->texture) {
SDL_FreeYUVOverlay(overlay);
return NULL;
@@ -1582,17 +1538,8 @@
SDL_SetError("Passed a NULL overlay");
return -1;
}
- if (overlay->hwdata->sw) {
- if (SDL_SW_QueryYUVTexturePixels(overlay->hwdata->sw, &pixels, &pitch)
- < 0) {
- return -1;
- }
- } else {
- if (SDL_LockTexture
- (overlay->hwdata->texture, NULL, 1, &pixels, &pitch)
- < 0) {
- return -1;
- }
+ if (SDL_LockTexture(overlay->hwdata->texture, NULL, &pixels, &pitch) < 0) {
+ return -1;
}
overlay->pixels[0] = (Uint8 *) pixels;
overlay->pitches[0] = pitch;
@@ -1620,25 +1567,7 @@
if (!overlay) {
return;
}
- if (overlay->hwdata->sw) {
- void *pixels;
- int pitch;
- if (SDL_LockTexture
- (overlay->hwdata->texture, NULL, 1, &pixels, &pitch) == 0) {
- SDL_Rect srcrect;
-
- srcrect.x = 0;
- srcrect.y = 0;
- srcrect.w = overlay->w;
- srcrect.h = overlay->h;
- SDL_SW_CopyYUVToRGB(overlay->hwdata->sw, &srcrect,
- overlay->hwdata->texture_format,
- overlay->w, overlay->h, pixels, pitch);
- SDL_UnlockTexture(overlay->hwdata->texture);
- }
- } else {
- SDL_UnlockTexture(overlay->hwdata->texture);
- }
+ SDL_UnlockTexture(overlay->hwdata->texture);
}
int
--- a/src/render/SDL_render.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/SDL_render.c Thu Feb 03 00:19:40 2011 -0800
@@ -152,6 +152,34 @@
return 0;
}
+static SDL_bool
+IsSupportedFormat(SDL_Renderer * renderer, Uint32 format)
+{
+ Uint32 i;
+
+ for (i = 0; i < renderer->info.num_texture_formats; ++i) {
+ if (renderer->info.texture_formats[i] == format) {
+ return SDL_TRUE;
+ }
+ }
+ return SDL_FALSE;
+}
+
+static Uint32
+GetClosestSupportedFormat(SDL_Renderer * renderer, Uint32 format)
+{
+ Uint32 i;
+ SDL_bool hasAlpha = SDL_ISPIXELFORMAT_ALPHA(format);
+
+ /* We just want to match the first format that has the same channels */
+ for (i = 0; i < renderer->info.num_texture_formats; ++i) {
+ if (SDL_ISPIXELFORMAT_ALPHA(renderer->info.texture_formats[i]) == hasAlpha) {
+ return renderer->info.texture_formats[i];
+ }
+ }
+ return renderer->info.texture_formats[0];
+}
+
SDL_Texture *
SDL_CreateTexture(SDL_Renderer * renderer, Uint32 format, int access, int w, int h)
{
@@ -159,14 +187,18 @@
CHECK_RENDERER_MAGIC(renderer, NULL);
+ if (SDL_ISPIXELFORMAT_INDEXED(format)) {
+ SDL_SetError("Palettized textures are not supported");
+ return NULL;
+ }
if (w <= 0 || h <= 0) {
SDL_SetError("Texture dimensions can't be 0");
- return 0;
+ return NULL;
}
texture = (SDL_Texture *) SDL_calloc(1, sizeof(*texture));
if (!texture) {
SDL_OutOfMemory();
- return 0;
+ return NULL;
}
texture->magic = &texture_magic;
texture->format = format;
@@ -184,9 +216,35 @@
}
renderer->textures = texture;
- if (renderer->CreateTexture(renderer, texture) < 0) {
- SDL_DestroyTexture(texture);
- return 0;
+ if (IsSupportedFormat(renderer, format)) {
+ if (renderer->CreateTexture(renderer, texture) < 0) {
+ SDL_DestroyTexture(texture);
+ return 0;
+ }
+ } else {
+ texture->native = SDL_CreateTexture(renderer,
+ GetClosestSupportedFormat(renderer, format),
+ access, w, h);
+ if (!texture->native) {
+ SDL_DestroyTexture(texture);
+ return NULL;
+ }
+
+ if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
+ texture->yuv = SDL_SW_CreateYUVTexture(format, w, h);
+ if (!texture->yuv) {
+ SDL_DestroyTexture(texture);
+ return NULL;
+ }
+ } else if (access == SDL_TEXTUREACCESS_STREAMING) {
+ /* The pitch is 4 byte aligned */
+ texture->pitch = (((w * SDL_BYTESPERPIXEL(format)) + 3) & ~3);
+ texture->pixels = SDL_malloc(texture->pitch * h);
+ if (!texture->pixels) {
+ SDL_DestroyTexture(texture);
+ return NULL;
+ }
+ }
}
return texture;
}
@@ -501,21 +559,6 @@
}
int
-SDL_QueryTexturePixels(SDL_Texture * texture, void **pixels, int *pitch)
-{
- SDL_Renderer *renderer;
-
- CHECK_TEXTURE_MAGIC(texture, -1);
-
- renderer = texture->renderer;
- if (!renderer->QueryTexturePixels) {
- SDL_Unsupported();
- return -1;
- }
- return renderer->QueryTexturePixels(renderer, texture, pixels, pitch);
-}
-
-int
SDL_SetTextureColorMod(SDL_Texture * texture, Uint8 r, Uint8 g, Uint8 b)
{
SDL_Renderer *renderer;
@@ -531,7 +574,9 @@
texture->r = r;
texture->g = g;
texture->b = b;
- if (renderer->SetTextureColorMod) {
+ if (texture->native) {
+ return SDL_SetTextureColorMod(texture->native, r, g, b);
+ } else if (renderer->SetTextureColorMod) {
return renderer->SetTextureColorMod(renderer, texture);
} else {
return 0;
@@ -573,7 +618,9 @@
texture->modMode &= ~SDL_TEXTUREMODULATE_ALPHA;
}
texture->a = alpha;
- if (renderer->SetTextureAlphaMod) {
+ if (texture->native) {
+ return SDL_SetTextureAlphaMod(texture->native, alpha);
+ } else if (renderer->SetTextureAlphaMod) {
return renderer->SetTextureAlphaMod(renderer, texture);
} else {
return 0;
@@ -600,7 +647,9 @@
renderer = texture->renderer;
texture->blendMode = blendMode;
- if (renderer->SetTextureBlendMode) {
+ if (texture->native) {
+ return SDL_SetTextureBlendMode(texture, blendMode);
+ } else if (renderer->SetTextureBlendMode) {
return renderer->SetTextureBlendMode(renderer, texture);
} else {
return 0;
@@ -618,6 +667,91 @@
return 0;
}
+static int
+SDL_UpdateTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
+ const void *pixels, int pitch)
+{
+ SDL_Texture *native = texture->native;
+ SDL_Rect full_rect;
+
+ if (SDL_SW_UpdateYUVTexture(texture->yuv, rect, pixels, pitch) < 0) {
+ return -1;
+ }
+
+ full_rect.x = 0;
+ full_rect.y = 0;
+ full_rect.w = texture->w;
+ full_rect.h = texture->h;
+ rect = &full_rect;
+
+ if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
+ /* We can lock the texture and copy to it */
+ void *native_pixels;
+ int native_pitch;
+
+ if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+ return -1;
+ }
+ SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
+ rect->w, rect->h, native_pixels, native_pitch);
+ SDL_UnlockTexture(native);
+ } else {
+ /* Use a temporary buffer for updating */
+ void *temp_pixels;
+ int temp_pitch;
+
+ temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
+ temp_pixels = SDL_malloc(rect->h * temp_pitch);
+ if (!temp_pixels) {
+ SDL_OutOfMemory();
+ return -1;
+ }
+ SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
+ rect->w, rect->h, temp_pixels, temp_pitch);
+ SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
+ SDL_free(temp_pixels);
+ }
+ return 0;
+}
+
+static int
+SDL_UpdateTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
+ const void *pixels, int pitch)
+{
+ SDL_Texture *native = texture->native;
+
+ if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
+ /* We can lock the texture and copy to it */
+ void *native_pixels;
+ int native_pitch;
+
+ if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+ return -1;
+ }
+ SDL_ConvertPixels(rect->w, rect->h,
+ texture->format, pixels, pitch,
+ native->format, native_pixels, native_pitch);
+ SDL_UnlockTexture(native);
+ } else {
+ /* Use a temporary buffer for updating */
+ void *temp_pixels;
+ int temp_pitch;
+
+ temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
+ temp_pixels = SDL_malloc(rect->h * temp_pitch);
+ if (!temp_pixels) {
+ SDL_OutOfMemory();
+ return -1;
+ }
+ SDL_ConvertPixels(rect->w, rect->h,
+ texture->format, pixels, pitch,
+ native->format, temp_pixels, temp_pitch);
+ SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
+ SDL_free(temp_pixels);
+ }
+ return 0;
+}
+
int
SDL_UpdateTexture(SDL_Texture * texture, const SDL_Rect * rect,
const void *pixels, int pitch)
@@ -627,11 +761,6 @@
CHECK_TEXTURE_MAGIC(texture, -1);
- renderer = texture->renderer;
- if (!renderer->UpdateTexture) {
- SDL_Unsupported();
- return -1;
- }
if (!rect) {
full_rect.x = 0;
full_rect.y = 0;
@@ -639,11 +768,38 @@
full_rect.h = texture->h;
rect = &full_rect;
}
- return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
+
+ if (texture->yuv) {
+ return SDL_UpdateTextureYUV(texture, rect, pixels, pitch);
+ } else if (texture->native) {
+ return SDL_UpdateTextureNative(texture, rect, pixels, pitch);
+ } else {
+ renderer = texture->renderer;
+ return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
+ }
+}
+
+static int
+SDL_LockTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
+ void **pixels, int *pitch)
+{
+ return SDL_SW_LockYUVTexture(texture->yuv, rect, pixels, pitch);
+}
+
+static int
+SDL_LockTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
+ void **pixels, int *pitch)
+{
+ texture->locked_rect = *rect;
+ *pixels = (void *) ((Uint8 *) texture->pixels +
+ rect->y * texture->pitch +
+ rect->x * SDL_BYTESPERPIXEL(texture->format));
+ *pitch = texture->pitch;
+ return 0;
}
int
-SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, int markDirty,
+SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect,
void **pixels, int *pitch)
{
SDL_Renderer *renderer;
@@ -655,11 +811,7 @@
SDL_SetError("SDL_LockTexture(): texture must be streaming");
return -1;
}
- renderer = texture->renderer;
- if (!renderer->LockTexture) {
- SDL_Unsupported();
- return -1;
- }
+
if (!rect) {
full_rect.x = 0;
full_rect.y = 0;
@@ -667,8 +819,57 @@
full_rect.h = texture->h;
rect = &full_rect;
}
- return renderer->LockTexture(renderer, texture, rect, markDirty, pixels,
- pitch);
+
+ if (texture->yuv) {
+ return SDL_LockTextureYUV(texture, rect, pixels, pitch);
+ } else if (texture->native) {
+ return SDL_LockTextureNative(texture, rect, pixels, pitch);
+ } else {
+ renderer = texture->renderer;
+ return renderer->LockTexture(renderer, texture, rect, pixels, pitch);
+ }
+}
+
+static void
+SDL_UnlockTextureYUV(SDL_Texture * texture)
+{
+ SDL_Texture *native = texture->native;
+ void *native_pixels;
+ int native_pitch;
+ SDL_Rect rect;
+
+ rect.x = 0;
+ rect.y = 0;
+ rect.w = texture->w;
+ rect.h = texture->h;
+
+ if (SDL_LockTexture(native, &rect, &native_pixels, &native_pitch) < 0) {
+ return;
+ }
+ SDL_SW_CopyYUVToRGB(texture->yuv, &rect, native->format,
+ rect.w, rect.h, native_pixels, native_pitch);
+ SDL_UnlockTexture(native);
+}
+
+void
+SDL_UnlockTextureNative(SDL_Texture * texture)
+{
+ SDL_Texture *native = texture->native;
+ void *native_pixels;
+ int native_pitch;
+ const SDL_Rect *rect = &texture->locked_rect;
+ const void* pixels = (void *) ((Uint8 *) texture->pixels +
+ rect->y * texture->pitch +
+ rect->x * SDL_BYTESPERPIXEL(texture->format));
+ int pitch = texture->pitch;
+
+ if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+ return;
+ }
+ SDL_ConvertPixels(rect->w, rect->h,
+ texture->format, pixels, pitch,
+ native->format, native_pixels, native_pitch);
+ SDL_UnlockTexture(native);
}
void
@@ -681,29 +882,14 @@
if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
return;
}
- renderer = texture->renderer;
- if (!renderer->UnlockTexture) {
- return;
+ if (texture->yuv) {
+ SDL_UnlockTextureYUV(texture);
+ } else if (texture->native) {
+ SDL_UnlockTextureNative(texture);
+ } else {
+ renderer = texture->renderer;
+ renderer->UnlockTexture(renderer, texture);
}
- renderer->UnlockTexture(renderer, texture);
-}
-
-void
-SDL_DirtyTexture(SDL_Texture * texture, int numrects,
- const SDL_Rect * rects)
-{
- SDL_Renderer *renderer;
-
- CHECK_TEXTURE_MAGIC(texture, );
-
- if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
- return;
- }
- renderer = texture->renderer;
- if (!renderer->DirtyTexture) {
- return;
- }
- renderer->DirtyTexture(renderer, texture, numrects, rects);
}
int
@@ -979,6 +1165,10 @@
}
}
+ if (texture->native) {
+ texture = texture->native;
+ }
+
return renderer->RenderCopy(renderer, texture, &real_srcrect,
&real_dstrect);
}
@@ -1087,6 +1277,16 @@
renderer->textures = texture->next;
}
+ if (texture->native) {
+ SDL_DestroyTexture(texture->native);
+ }
+ if (texture->yuv) {
+ SDL_SW_DestroyYUVTexture(texture->yuv);
+ }
+ if (texture->pixels) {
+ SDL_free(texture->pixels);
+ }
+
renderer->DestroyTexture(renderer, texture);
SDL_free(texture);
}
--- a/src/render/SDL_sysrender.h Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/SDL_sysrender.h Thu Feb 03 00:19:40 2011 -0800
@@ -26,6 +26,7 @@
#include "SDL_render.h"
#include "SDL_events.h"
+#include "SDL_yuv_sw_c.h"
/* The SDL 2D rendering system */
@@ -45,6 +46,13 @@
SDL_Renderer *renderer;
+ /* Support for formats not supported directly by the renderer */
+ SDL_Texture *native;
+ SDL_SW_YUVTexture *yuv;
+ void *pixels;
+ int pitch;
+ SDL_Rect locked_rect;
+
void *driverdata; /**< Driver specific texture representation */
SDL_Texture *prev;
@@ -58,8 +66,6 @@
void (*WindowEvent) (SDL_Renderer * renderer, const SDL_WindowEvent *event);
int (*CreateTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
- int (*QueryTexturePixels) (SDL_Renderer * renderer, SDL_Texture * texture,
- void **pixels, int *pitch);
int (*SetTextureColorMod) (SDL_Renderer * renderer,
SDL_Texture * texture);
int (*SetTextureAlphaMod) (SDL_Renderer * renderer,
@@ -70,11 +76,8 @@
const SDL_Rect * rect, const void *pixels,
int pitch);
int (*LockTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch);
+ const SDL_Rect * rect, void **pixels, int *pitch);
void (*UnlockTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
- void (*DirtyTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
- int numrects, const SDL_Rect * rects);
int (*RenderClear) (SDL_Renderer * renderer);
int (*RenderDrawPoints) (SDL_Renderer * renderer, const SDL_Point * points,
int count);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_mmx.c Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,432 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2010 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+
+#include "SDL_stdinc.h"
+
+#include "mmx.h"
+
+/* *INDENT-OFF* */
+
+static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
+static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
+static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
+
+static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
+
+static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
+static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
+static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
+static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
+
+static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
+static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
+static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
+static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
+
+static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
+static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
+
+/**
+ This MMX assembler is my first assembler/MMX program ever.
+ Thus it maybe buggy.
+ Send patches to:
+ mvogt@rhrk.uni-kl.de
+
+ After it worked fine I have "obfuscated" the code a bit to have
+ more parallism in the MMX units. This means I moved
+ initilisation around and delayed other instruction.
+ Performance measurement did not show that this brought any advantage
+ but in theory it _should_ be faster this way.
+
+ The overall performanve gain to the C based dither was 30%-40%.
+ The MMX routine calculates 256bit=8RGB values in each cycle
+ (4 for row1 & 4 for row2)
+
+ The red/green/blue.. coefficents are taken from the mpeg_play
+ player. They look nice, but I dont know if you can have
+ better values, to avoid integer rounding errors.
+
+
+ IMPORTANT:
+ ==========
+
+ It is a requirement that the cr/cb/lum are 8 byte aligned and
+ the out are 16byte aligned or you will/may get segfaults
+
+*/
+
+void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod )
+{
+ Uint32 *row1;
+ Uint32 *row2;
+
+ unsigned char* y = lum +cols*rows; // Pointer to the end
+ int x = 0;
+ row1 = (Uint32 *)out; // 32 bit target
+ row2 = (Uint32 *)out+cols+mod; // start of second row
+ mod = (mod+cols+mod)*4; // increment for row1 in byte
+
+ __asm__ __volatile__ (
+ // tap dance to workaround the inability to use %%ebx at will...
+ // move one thing to the stack...
+ "pushl $0\n" // save a slot on the stack.
+ "pushl %%ebx\n" // save %%ebx.
+ "movl %0, %%ebx\n" // put the thing in ebx.
+ "movl %%ebx,4(%%esp)\n" // put the thing in the stack slot.
+ "popl %%ebx\n" // get back %%ebx (the PIC register).
+
+ ".align 8\n"
+ "1:\n"
+
+ // create Cr (result in mm1)
+ "pushl %%ebx\n"
+ "movl 4(%%esp),%%ebx\n"
+ "movd (%%ebx),%%mm1\n" // 0 0 0 0 v3 v2 v1 v0
+ "popl %%ebx\n"
+ "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
+ "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0
+ "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0
+ "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0
+ "psubw %9,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
+
+ // create Cr_g (result in mm0)
+ "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0
+ "pmullw %10,%%mm0\n" // red*-46dec=0.7136*64
+ "pmullw %11,%%mm1\n" // red*89dec=1.4013*64
+ "psraw $6, %%mm0\n" // red=red/64
+ "psraw $6, %%mm1\n" // red=red/64
+
+ // create L1 L2 (result in mm2,mm4)
+ // L2=lum+cols
+ "movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0
+ "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0
+ "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0
+ "pand %12,%%mm2\n" // L3 0 L1 0 l3 0 l1 0
+ "pand %13,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0
+ "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1
+
+ // create R (result in mm6)
+ "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0
+ "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1
+ "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0
+ "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1
+ "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0
+ "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
+ "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0
+
+ // create Cb (result in mm1)
+ "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0
+ "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0
+ "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0
+ "psubw %9,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
+
+ // create Cb_g (result in mm5)
+ "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0
+ "pmullw %14,%%mm5\n" // blue*-109dec=1.7129*64
+ "pmullw %15,%%mm1\n" // blue*114dec=1.78125*64
+ "psraw $6, %%mm5\n" // blue=red/64
+ "psraw $6, %%mm1\n" // blue=blue/64
+
+ // create G (result in mm7)
+ "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1
+ "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t
+ "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t
+ "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1
+ "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0
+ "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1
+ "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0
+ "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
+
+ // create B (result in mm5)
+ "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
+ "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1
+ "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1
+ "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0
+ "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1
+ "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0
+ "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0
+
+ // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0
+ "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0
+ "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0
+
+ // process lower lum
+ "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0
+ "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0
+ "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0
+ "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0
+ "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0
+ "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0
+ "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0
+ "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0
+ "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0
+ "movq %%mm2,(%3)\n" // wrote out ! row1
+
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0
+ "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0
+ "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2
+ "movq %%mm4,8(%3)\n" // wrote out ! row1
+
+ // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+ // this can be done "destructive"
+ "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
+ "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0
+ "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0
+ "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0
+ "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0
+ "movq %%mm1,(%5)\n" // wrote out ! row2
+ "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2
+ "movq %%mm5,8(%5)\n" // wrote out ! row2
+
+ "addl $4,%2\n" // lum+4
+ "leal 16(%3),%3\n" // row1+16
+ "leal 16(%5),%5\n" // row2+16
+ "addl $2,(%%esp)\n" // cr+2
+ "addl $2,%1\n" // cb+2
+
+ "addl $4,%6\n" // x+4
+ "cmpl %4,%6\n"
+
+ "jl 1b\n"
+ "addl %4,%2\n" // lum += cols
+ "addl %8,%3\n" // row1+= mod
+ "addl %8,%5\n" // row2+= mod
+ "movl $0,%6\n" // x=0
+ "cmpl %7,%2\n"
+ "jl 1b\n"
+
+ "addl $4,%%esp\n" // get rid of the stack slot we reserved.
+ "emms\n" // reset MMX registers.
+ :
+ : "m" (cr), "r"(cb),"r"(lum),
+ "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
+ "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
+ "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
+ "m"(MMX_UbluRGB)
+ );
+}
+
+void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod )
+{
+ Uint16 *row1;
+ Uint16 *row2;
+
+ unsigned char* y = lum +cols*rows; /* Pointer to the end */
+ int x = 0;
+ row1 = (Uint16 *)out; /* 16 bit target */
+ row2 = (Uint16 *)out+cols+mod; /* start of second row */
+ mod = (mod+cols+mod)*2; /* increment for row1 in byte */
+
+ __asm__ __volatile__(
+ // tap dance to workaround the inability to use %%ebx at will...
+ // move one thing to the stack...
+ "pushl $0\n" // save a slot on the stack.
+ "pushl %%ebx\n" // save %%ebx.
+ "movl %0, %%ebx\n" // put the thing in ebx.
+ "movl %%ebx, 4(%%esp)\n" // put the thing in the stack slot.
+ "popl %%ebx\n" // get back %%ebx (the PIC register).
+
+ ".align 8\n"
+ "1:\n"
+
+ "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0
+ "pxor %%mm7, %%mm7\n"
+ "pushl %%ebx\n"
+ "movl 4(%%esp), %%ebx\n"
+ "movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0
+ "popl %%ebx\n"
+
+ "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0
+ "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0
+ "psubw %9, %%mm0\n"
+ "psubw %9, %%mm1\n"
+ "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0
+ "movq %%mm1, %%mm3\n" // Cr
+ "pmullw %10, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0
+ "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0
+ "pmullw %11, %%mm0\n" // Cb2blue
+ "pand %12, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0
+ "pmullw %13, %%mm3\n" // Cr2green
+ "movq (%2), %%mm7\n" // L2
+ "pmullw %14, %%mm1\n" // Cr2red
+ "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1
+ "pmullw %15, %%mm6\n" // lum1
+ "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green
+ "pmullw %15, %%mm7\n" // lum2
+
+ "movq %%mm6, %%mm4\n" // lum1
+ "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
+ "movq %%mm4, %%mm5\n" // lum1
+ "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0
+ "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
+ "psraw $6, %%mm4\n" // R1 0 .. 64
+ "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1
+ "psraw $6, %%mm5\n" // G1 - .. +
+ "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
+ "psraw $6, %%mm6\n" // B1 0 .. 64
+ "packuswb %%mm4, %%mm4\n" // R1 R1
+ "packuswb %%mm5, %%mm5\n" // G1 G1
+ "packuswb %%mm6, %%mm6\n" // B1 B1
+ "punpcklbw %%mm4, %%mm4\n"
+ "punpcklbw %%mm5, %%mm5\n"
+
+ "pand %16, %%mm4\n"
+ "psllw $3, %%mm5\n" // GREEN 1
+ "punpcklbw %%mm6, %%mm6\n"
+ "pand %17, %%mm5\n"
+ "pand %16, %%mm6\n"
+ "por %%mm5, %%mm4\n" //
+ "psrlw $11, %%mm6\n" // BLUE 1
+ "movq %%mm3, %%mm5\n" // lum2
+ "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1
+ "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
+ "psraw $6, %%mm3\n" // R2
+ "por %%mm6, %%mm4\n" // MM4
+ "psraw $6, %%mm5\n" // G2
+ "movq (%2, %4), %%mm6\n" // L3 load lum2
+ "psraw $6, %%mm7\n"
+ "packuswb %%mm3, %%mm3\n"
+ "packuswb %%mm5, %%mm5\n"
+ "packuswb %%mm7, %%mm7\n"
+ "pand %12, %%mm6\n" // L3
+ "punpcklbw %%mm3, %%mm3\n"
+ "punpcklbw %%mm5, %%mm5\n"
+ "pmullw %15, %%mm6\n" // lum3
+ "punpcklbw %%mm7, %%mm7\n"
+ "psllw $3, %%mm5\n" // GREEN 2
+ "pand %16, %%mm7\n"
+ "pand %16, %%mm3\n"
+ "psrlw $11, %%mm7\n" // BLUE 2
+ "pand %17, %%mm5\n"
+ "por %%mm7, %%mm3\n"
+ "movq (%2,%4), %%mm7\n" // L4 load lum2
+ "por %%mm5, %%mm3\n" //
+ "psrlw $8, %%mm7\n" // L4
+ "movq %%mm4, %%mm5\n"
+ "punpcklwd %%mm3, %%mm4\n"
+ "pmullw %15, %%mm7\n" // lum4
+ "punpckhwd %%mm3, %%mm5\n"
+
+ "movq %%mm4, (%3)\n" // write row1
+ "movq %%mm5, 8(%3)\n" // write row1
+
+ "movq %%mm6, %%mm4\n" // Lum3
+ "paddw %%mm0, %%mm6\n" // Lum3 +blue
+
+ "movq %%mm4, %%mm5\n" // Lum3
+ "paddw %%mm1, %%mm4\n" // Lum3 +red
+ "paddw %%mm2, %%mm5\n" // Lum3 +green
+ "psraw $6, %%mm4\n"
+ "movq %%mm7, %%mm3\n" // Lum4
+ "psraw $6, %%mm5\n"
+ "paddw %%mm0, %%mm7\n" // Lum4 +blue
+ "psraw $6, %%mm6\n" // Lum3 +blue
+ "movq %%mm3, %%mm0\n" // Lum4
+ "packuswb %%mm4, %%mm4\n"
+ "paddw %%mm1, %%mm3\n" // Lum4 +red
+ "packuswb %%mm5, %%mm5\n"
+ "paddw %%mm2, %%mm0\n" // Lum4 +green
+ "packuswb %%mm6, %%mm6\n"
+ "punpcklbw %%mm4, %%mm4\n"
+ "punpcklbw %%mm5, %%mm5\n"
+ "punpcklbw %%mm6, %%mm6\n"
+ "psllw $3, %%mm5\n" // GREEN 3
+ "pand %16, %%mm4\n"
+ "psraw $6, %%mm3\n" // psr 6
+ "psraw $6, %%mm0\n"
+ "pand %16, %%mm6\n" // BLUE
+ "pand %17, %%mm5\n"
+ "psrlw $11, %%mm6\n" // BLUE 3
+ "por %%mm5, %%mm4\n"
+ "psraw $6, %%mm7\n"
+ "por %%mm6, %%mm4\n"
+ "packuswb %%mm3, %%mm3\n"
+ "packuswb %%mm0, %%mm0\n"
+ "packuswb %%mm7, %%mm7\n"
+ "punpcklbw %%mm3, %%mm3\n"
+ "punpcklbw %%mm0, %%mm0\n"
+ "punpcklbw %%mm7, %%mm7\n"
+ "pand %16, %%mm3\n"
+ "pand %16, %%mm7\n" // BLUE
+ "psllw $3, %%mm0\n" // GREEN 4
+ "psrlw $11, %%mm7\n"
+ "pand %17, %%mm0\n"
+ "por %%mm7, %%mm3\n"
+ "por %%mm0, %%mm3\n"
+
+ "movq %%mm4, %%mm5\n"
+
+ "punpcklwd %%mm3, %%mm4\n"
+ "punpckhwd %%mm3, %%mm5\n"
+
+ "movq %%mm4, (%5)\n"
+ "movq %%mm5, 8(%5)\n"
+
+ "addl $8, %6\n"
+ "addl $8, %2\n"
+ "addl $4, (%%esp)\n"
+ "addl $4, %1\n"
+ "cmpl %4, %6\n"
+ "leal 16(%3), %3\n"
+ "leal 16(%5),%5\n" // row2+16
+
+ "jl 1b\n"
+ "addl %4, %2\n" // lum += cols
+ "addl %8, %3\n" // row1+= mod
+ "addl %8, %5\n" // row2+= mod
+ "movl $0, %6\n" // x=0
+ "cmpl %7, %2\n"
+ "jl 1b\n"
+ "addl $4, %%esp\n" // get rid of the stack slot we reserved.
+ "emms\n"
+ :
+ : "m" (cr), "r"(cb),"r"(lum),
+ "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
+ "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
+ "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
+ "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
+ );
+}
+
+/* *INDENT-ON* */
+
+#endif /* GCC3 i386 inline assembly */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_sw.c Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,1322 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2010 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* This is the software implementation of the YUV texture support */
+
+/* This code was derived from code carrying the following copyright notices:
+
+ * Copyright (c) 1995 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+ * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+ * Copyright (c) 1995 Erik Corry
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+ * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
+ * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+ * Portions of this software Copyright (c) 1995 Brown University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement
+ * is hereby granted, provided that the above copyright notice and the
+ * following two paragraphs appear in all copies of this software.
+ *
+ * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
+ * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "SDL_video.h"
+#include "SDL_cpuinfo.h"
+#include "SDL_yuv_sw_c.h"
+
+
+/* The colorspace conversion functions */
+
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod);
+extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod);
+#endif
+
+static void
+Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned short *row1;
+ unsigned short *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row1 = (unsigned short *) out;
+ row2 = row1 + cols + mod;
+ lum2 = lum + cols;
+
+ mod += cols + mod;
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+
+ L = *lum++;
+ *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+
+ L = *lum2++;
+ *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+static void
+Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int value;
+ unsigned char *row1;
+ unsigned char *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row1 = out;
+ row2 = row1 + cols * 3 + mod * 3;
+ lum2 = lum + cols;
+
+ mod += cols + mod;
+ mod *= 3;
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row1++ = (value) & 0xFF;
+ *row1++ = (value >> 8) & 0xFF;
+ *row1++ = (value >> 16) & 0xFF;
+
+ L = *lum++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row1++ = (value) & 0xFF;
+ *row1++ = (value >> 8) & 0xFF;
+ *row1++ = (value >> 16) & 0xFF;
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row2++ = (value) & 0xFF;
+ *row2++ = (value >> 8) & 0xFF;
+ *row2++ = (value >> 16) & 0xFF;
+
+ L = *lum2++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row2++ = (value) & 0xFF;
+ *row2++ = (value >> 8) & 0xFF;
+ *row2++ = (value >> 16) & 0xFF;
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+static void
+Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row1;
+ unsigned int *row2;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row1 = (unsigned int *) out;
+ row2 = row1 + cols + mod;
+ lum2 = lum + cols;
+
+ mod += cols + mod;
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ *row1++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+ L = *lum++;
+ *row1++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ *row2++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+ L = *lum2++;
+ *row2++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void
+Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row1 = (unsigned int *) out;
+ const int next_row = cols + (mod / 2);
+ unsigned int *row2 = row1 + 2 * next_row;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ lum2 = lum + cols;
+
+ mod = (next_row * 3) + (mod / 2);
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row1++;
+
+ L = *lum++;
+ row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row1++;
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row2++;
+
+ L = *lum2++;
+ row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row2++;
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+static void
+Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int value;
+ unsigned char *row1 = out;
+ const int next_row = (cols * 2 + mod) * 3;
+ unsigned char *row2 = row1 + 2 * next_row;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ lum2 = lum + cols;
+
+ mod = next_row * 3 + mod * 3;
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
+ row1[next_row + 3 + 0] = (value) & 0xFF;
+ row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
+ row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
+ row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row1 += 2 * 3;
+
+ L = *lum++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
+ row1[next_row + 3 + 0] = (value) & 0xFF;
+ row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
+ row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
+ row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row1 += 2 * 3;
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
+ row2[next_row + 3 + 0] = (value) & 0xFF;
+ row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
+ row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
+ row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row2 += 2 * 3;
+
+ L = *lum2++;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
+ row2[next_row + 3 + 0] = (value) & 0xFF;
+ row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
+ row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
+ row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row2 += 2 * 3;
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+static void
+Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row1 = (unsigned int *) out;
+ const int next_row = cols * 2 + mod;
+ unsigned int *row2 = row1 + 2 * next_row;
+ unsigned char *lum2;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ lum2 = lum + cols;
+
+ mod = (next_row * 3) + mod;
+
+ y = rows / 2;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ ++cr;
+ ++cb;
+
+ L = *lum++;
+ row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row1 += 2;
+
+ L = *lum++;
+ row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row1 += 2;
+
+
+ /* Now, do second row. */
+
+ L = *lum2++;
+ row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row2 += 2;
+
+ L = *lum2++;
+ row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row2 += 2;
+ }
+
+ /*
+ * These values are at the start of the next line, (due
+ * to the ++'s above),but they need to be at the start
+ * of the line after that.
+ */
+ lum += cols;
+ lum2 += cols;
+ row1 += mod;
+ row2 += mod;
+ }
+}
+
+static void
+Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned short *row;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row = (unsigned short *) out;
+
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+
+ L = *lum;
+ lum += 2;
+ *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+
+ }
+
+ row += mod;
+ }
+}
+
+static void
+Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int value;
+ unsigned char *row;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row = (unsigned char *) out;
+ mod *= 3;
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row++ = (value) & 0xFF;
+ *row++ = (value >> 8) & 0xFF;
+ *row++ = (value >> 16) & 0xFF;
+
+ L = *lum;
+ lum += 2;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ *row++ = (value) & 0xFF;
+ *row++ = (value >> 8) & 0xFF;
+ *row++ = (value >> 16) & 0xFF;
+
+ }
+ row += mod;
+ }
+}
+
+static void
+Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ row = (unsigned int *) out;
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ *row++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+ L = *lum;
+ lum += 2;
+ *row++ = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+
+ }
+ row += mod;
+ }
+}
+
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void
+Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row = (unsigned int *) out;
+ const int next_row = cols + (mod / 2);
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row++;
+
+ L = *lum;
+ lum += 2;
+ row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] |
+ rgb_2_pix[L + cb_b]);
+ row++;
+
+ }
+ row += next_row;
+ }
+}
+
+static void
+Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int value;
+ unsigned char *row = out;
+ const int next_row = (cols * 2 + mod) * 3;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row[0 + 0] = row[3 + 0] = row[next_row + 0] =
+ row[next_row + 3 + 0] = (value) & 0xFF;
+ row[0 + 1] = row[3 + 1] = row[next_row + 1] =
+ row[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row[0 + 2] = row[3 + 2] = row[next_row + 2] =
+ row[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row += 2 * 3;
+
+ L = *lum;
+ lum += 2;
+ value = (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row[0 + 0] = row[3 + 0] = row[next_row + 0] =
+ row[next_row + 3 + 0] = (value) & 0xFF;
+ row[0 + 1] = row[3 + 1] = row[next_row + 1] =
+ row[next_row + 3 + 1] = (value >> 8) & 0xFF;
+ row[0 + 2] = row[3 + 2] = row[next_row + 2] =
+ row[next_row + 3 + 2] = (value >> 16) & 0xFF;
+ row += 2 * 3;
+
+ }
+ row += next_row;
+ }
+}
+
+static void
+Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod)
+{
+ unsigned int *row = (unsigned int *) out;
+ const int next_row = cols * 2 + mod;
+ int x, y;
+ int cr_r;
+ int crb_g;
+ int cb_b;
+ int cols_2 = cols / 2;
+ mod += mod;
+ y = rows;
+ while (y--) {
+ x = cols_2;
+ while (x--) {
+ register int L;
+
+ cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+ crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+ + colortab[*cb + 2 * 256];
+ cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+ cr += 4;
+ cb += 4;
+
+ L = *lum;
+ lum += 2;
+ row[0] = row[1] = row[next_row] = row[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row += 2;
+
+ L = *lum;
+ lum += 2;
+ row[0] = row[1] = row[next_row] = row[next_row + 1] =
+ (rgb_2_pix[L + cr_r] |
+ rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+ row += 2;
+
+
+ }
+
+ row += next_row;
+ }
+}
+
+/*
+ * How many 1 bits are there in the Uint32.
+ * Low performance, do not call often.
+ */
+static int
+number_of_bits_set(Uint32 a)
+{
+ if (!a)
+ return 0;
+ if (a & 1)
+ return 1 + number_of_bits_set(a >> 1);
+ return (number_of_bits_set(a >> 1));
+}
+
+/*
+ * How many 0 bits are there at least significant end of Uint32.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_bottom(Uint32 a)
+{
+ /* assume char is 8 bits */
+ if (!a)
+ return sizeof(Uint32) * 8;
+ if (((Sint32) a) & 1l)
+ return 0;
+ return 1 + free_bits_at_bottom(a >> 1);
+}
+
+static int
+SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
+{
+ Uint32 *r_2_pix_alloc;
+ Uint32 *g_2_pix_alloc;
+ Uint32 *b_2_pix_alloc;
+ int i;
+ int bpp;
+ Uint32 Rmask, Gmask, Bmask, Amask;
+
+ if (!SDL_PixelFormatEnumToMasks
+ (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
+ SDL_SetError("Unsupported YUV destination format");
+ return -1;
+ }
+
+ swdata->target_format = target_format;
+ r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
+ g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
+ b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
+
+ /*
+ * Set up entries 0-255 in rgb-to-pixel value tables.
+ */
+ for (i = 0; i < 256; ++i) {
+ r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
+ r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
+ r_2_pix_alloc[i + 256] |= Amask;
+ g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
+ g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
+ g_2_pix_alloc[i + 256] |= Amask;
+ b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
+ b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
+ b_2_pix_alloc[i + 256] |= Amask;
+ }
+
+ /*
+ * If we have 16-bit output depth, then we double the value
+ * in the top word. This means that we can write out both
+ * pixels in the pixel doubling mode with one op. It is
+ * harmless in the normal case as storing a 32-bit value
+ * through a short pointer will lose the top bits anyway.
+ */
+ if (SDL_BYTESPERPIXEL(target_format) == 2) {
+ for (i = 0; i < 256; ++i) {
+ r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
+ g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
+ b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
+ }
+ }
+
+ /*
+ * Spread out the values we have to the rest of the array so that
+ * we do not need to check for overflow.
+ */
+ for (i = 0; i < 256; ++i) {
+ r_2_pix_alloc[i] = r_2_pix_alloc[256];
+ r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
+ g_2_pix_alloc[i] = g_2_pix_alloc[256];
+ g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
+ b_2_pix_alloc[i] = b_2_pix_alloc[256];
+ b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
+ }
+
+ /* You have chosen wisely... */
+ switch (swdata->format) {
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ if (SDL_BYTESPERPIXEL(target_format) == 2) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+ /* inline assembly functions */
+ if (SDL_HasMMX() && (Rmask == 0xF800) &&
+ (Gmask == 0x07E0) && (Bmask == 0x001F)
+ && (swdata->w & 15) == 0) {
+/*printf("Using MMX 16-bit 565 dither\n");*/
+ swdata->Display1X = Color565DitherYV12MMX1X;
+ } else {
+/*printf("Using C 16-bit dither\n");*/
+ swdata->Display1X = Color16DitherYV12Mod1X;
+ }
+#else
+ swdata->Display1X = Color16DitherYV12Mod1X;
+#endif
+ swdata->Display2X = Color16DitherYV12Mod2X;
+ }
+ if (SDL_BYTESPERPIXEL(target_format) == 3) {
+ swdata->Display1X = Color24DitherYV12Mod1X;
+ swdata->Display2X = Color24DitherYV12Mod2X;
+ }
+ if (SDL_BYTESPERPIXEL(target_format) == 4) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+ /* inline assembly functions */
+ if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
+ (Gmask == 0x0000FF00) &&
+ (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
+/*printf("Using MMX 32-bit dither\n");*/
+ swdata->Display1X = ColorRGBDitherYV12MMX1X;
+ } else {
+/*printf("Using C 32-bit dither\n");*/
+ swdata->Display1X = Color32DitherYV12Mod1X;
+ }
+#else
+ swdata->Display1X = Color32DitherYV12Mod1X;
+#endif
+ swdata->Display2X = Color32DitherYV12Mod2X;
+ }
+ break;
+ case SDL_PIXELFORMAT_YUY2:
+ case SDL_PIXELFORMAT_UYVY:
+ case SDL_PIXELFORMAT_YVYU:
+ if (SDL_BYTESPERPIXEL(target_format) == 2) {
+ swdata->Display1X = Color16DitherYUY2Mod1X;
+ swdata->Display2X = Color16DitherYUY2Mod2X;
+ }
+ if (SDL_BYTESPERPIXEL(target_format) == 3) {
+ swdata->Display1X = Color24DitherYUY2Mod1X;
+ swdata->Display2X = Color24DitherYUY2Mod2X;
+ }
+ if (SDL_BYTESPERPIXEL(target_format) == 4) {
+ swdata->Display1X = Color32DitherYUY2Mod1X;
+ swdata->Display2X = Color32DitherYUY2Mod2X;
+ }
+ break;
+ default:
+ /* We should never get here (caught above) */
+ break;
+ }
+
+ if (swdata->display) {
+ SDL_FreeSurface(swdata->display);
+ swdata->display = NULL;
+ }
+ return 0;
+}
+
+SDL_SW_YUVTexture *
+SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
+{
+ SDL_SW_YUVTexture *swdata;
+ int *Cr_r_tab;
+ int *Cr_g_tab;
+ int *Cb_g_tab;
+ int *Cb_b_tab;
+ int i;
+ int CR, CB;
+
+ swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
+ if (!swdata) {
+ SDL_OutOfMemory();
+ return NULL;
+ }
+
+ switch (format) {
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ case SDL_PIXELFORMAT_YUY2:
+ case SDL_PIXELFORMAT_UYVY:
+ case SDL_PIXELFORMAT_YVYU:
+ break;
+ default:
+ SDL_SetError("Unsupported YUV format");
+ return NULL;
+ }
+
+ swdata->format = format;
+ swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
+ swdata->w = w;
+ swdata->h = h;
+ swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
+ swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
+ swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
+ if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
+ SDL_OutOfMemory();
+ SDL_SW_DestroyYUVTexture(swdata);
+ return NULL;
+ }
+
+ /* Generate the tables for the display surface */
+ Cr_r_tab = &swdata->colortab[0 * 256];
+ Cr_g_tab = &swdata->colortab[1 * 256];
+ Cb_g_tab = &swdata->colortab[2 * 256];
+ Cb_b_tab = &swdata->colortab[3 * 256];
+ for (i = 0; i < 256; i++) {
+ /* Gamma correction (luminescence table) and chroma correction
+ would be done here. See the Berkeley mpeg_play sources.
+ */
+ CB = CR = (i - 128);
+ Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
+ Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
+ Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
+ Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
+ }
+
+ /* Find the pitch and offset values for the overlay */
+ switch (format) {
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ swdata->pitches[0] = w;
+ swdata->pitches[1] = swdata->pitches[0] / 2;
+ swdata->pitches[2] = swdata->pitches[0] / 2;
+ swdata->planes[0] = swdata->pixels;
+ swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
+ swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
+ break;
+ case SDL_PIXELFORMAT_YUY2:
+ case SDL_PIXELFORMAT_UYVY:
+ case SDL_PIXELFORMAT_YVYU:
+ swdata->pitches[0] = w * 2;
+ swdata->planes[0] = swdata->pixels;
+ break;
+ default:
+ /* We should never get here (caught above) */
+ break;
+ }
+
+ /* We're all done.. */
+ return (swdata);
+}
+
+int
+SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
+ int *pitch)
+{
+ *pixels = swdata->planes[0];
+ *pitch = swdata->pitches[0];
+ return 0;
+}
+
+int
+SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+ const void *pixels, int pitch)
+{
+ switch (swdata->format) {
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ if (rect
+ && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
+ || rect->h != swdata->h)) {
+ SDL_SetError
+ ("YV12 and IYUV textures only support full surface updates");
+ return -1;
+ }
+ SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
+ break;
+ case SDL_PIXELFORMAT_YUY2:
+ case SDL_PIXELFORMAT_UYVY:
+ case SDL_PIXELFORMAT_YVYU:
+ {
+ Uint8 *src, *dst;
+ int row;
+ size_t length;
+
+ src = (Uint8 *) pixels;
+ dst =
+ swdata->planes[0] + rect->y * swdata->pitches[0] +
+ rect->x * 2;
+ length = rect->w * 2;
+ for (row = 0; row < rect->h; ++row) {
+ SDL_memcpy(dst, src, length);
+ src += pitch;
+ dst += swdata->pitches[0];
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+int
+SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+ void **pixels, int *pitch)
+{
+ switch (swdata->format) {
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ if (rect
+ && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
+ || rect->h != swdata->h)) {
+ SDL_SetError
+ ("YV12 and IYUV textures only support full surface locks");
+ return -1;
+ }
+ break;
+ }
+
+ *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
+ *pitch = swdata->pitches[0];
+ return 0;
+}
+
+void
+SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
+{
+}
+
+int
+SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
+ Uint32 target_format, int w, int h, void *pixels,
+ int pitch)
+{
+ int stretch;
+ int scale_2x;
+ Uint8 *lum, *Cr, *Cb;
+ int mod;
+
+ /* Make sure we're set up to display in the desired format */
+ if (target_format != swdata->target_format) {
+ if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
+ return -1;
+ }
+ }
+
+ stretch = 0;
+ scale_2x = 0;
+ if (srcrect->x || srcrect->y || srcrect->w < swdata->w
+ || srcrect->h < swdata->h) {
+ /* The source rectangle has been clipped.
+ Using a scratch surface is easier than adding clipped
+ source support to all the blitters, plus that would
+ slow them down in the general unclipped case.
+ */
+ stretch = 1;
+ } else if ((srcrect->w != w) || (srcrect->h != h)) {
+ if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
+ scale_2x = 1;
+ } else {
+ stretch = 1;
+ }
+ }
+ if (stretch) {
+ int bpp;
+ Uint32 Rmask, Gmask, Bmask, Amask;
+
+ if (swdata->display) {
+ swdata->display->w = w;
+ swdata->display->h = h;
+ swdata->display->pixels = pixels;
+ swdata->display->pitch = pitch;
+ } else {
+ /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
+ SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
+ &Bmask, &Amask);
+ swdata->display =
+ SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
+ Gmask, Bmask, Amask);
+ if (!swdata->display) {
+ return (-1);
+ }
+ }
+ if (!swdata->stretch) {
+ /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
+ SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
+ &Bmask, &Amask);
+ swdata->stretch =
+ SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
+ Gmask, Bmask, Amask);
+ if (!swdata->stretch) {
+ return (-1);
+ }
+ }
+ pixels = swdata->stretch->pixels;
+ pitch = swdata->stretch->pitch;
+ }
+ switch (swdata->format) {
+ case SDL_PIXELFORMAT_YV12:
+ lum = swdata->planes[0];
+ Cr = swdata->planes[1];
+ Cb = swdata->planes[2];
+ break;
+ case SDL_PIXELFORMAT_IYUV:
+ lum = swdata->planes[0];
+ Cr = swdata->planes[2];
+ Cb = swdata->planes[1];
+ break;
+ case SDL_PIXELFORMAT_YUY2:
+ lum = swdata->planes[0];
+ Cr = lum + 3;
+ Cb = lum + 1;
+ break;
+ case SDL_PIXELFORMAT_UYVY:
+ lum = swdata->planes[0] + 1;
+ Cr = lum + 1;
+ Cb = lum - 1;
+ break;
+ case SDL_PIXELFORMAT_YVYU:
+ lum = swdata->planes[0];
+ Cr = lum + 1;
+ Cb = lum + 3;
+ break;
+ default:
+ SDL_SetError("Unsupported YUV format in copy");
+ return (-1);
+ }
+ mod = (pitch / SDL_BYTESPERPIXEL(target_format));
+
+ if (scale_2x) {
+ mod -= (swdata->w * 2);
+ swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
+ lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
+ } else {
+ mod -= swdata->w;
+ swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
+ lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
+ }
+ if (stretch) {
+ SDL_Rect rect = *srcrect;
+ SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
+ }
+ return 0;
+}
+
+void
+SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
+{
+ if (swdata) {
+ if (swdata->pixels) {
+ SDL_free(swdata->pixels);
+ }
+ if (swdata->colortab) {
+ SDL_free(swdata->colortab);
+ }
+ if (swdata->rgb_2_pix) {
+ SDL_free(swdata->rgb_2_pix);
+ }
+ if (swdata->stretch) {
+ SDL_FreeSurface(swdata->stretch);
+ }
+ if (swdata->display) {
+ SDL_FreeSurface(swdata->display);
+ }
+ SDL_free(swdata);
+ }
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_sw_c.h Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,69 @@
+/*
+ SDL - Simple DirectMedia Layer
+ Copyright (C) 1997-2010 Sam Lantinga
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Sam Lantinga
+ slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+
+/* This is the software implementation of the YUV texture support */
+
+struct SDL_SW_YUVTexture
+{
+ Uint32 format;
+ Uint32 target_format;
+ int w, h;
+ Uint8 *pixels;
+ int *colortab;
+ Uint32 *rgb_2_pix;
+ void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod);
+ void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
+ unsigned char *lum, unsigned char *cr,
+ unsigned char *cb, unsigned char *out,
+ int rows, int cols, int mod);
+
+ /* These are just so we don't have to allocate them separately */
+ Uint16 pitches[3];
+ Uint8 *planes[3];
+
+ /* This is a temporary surface in case we have to stretch copy */
+ SDL_Surface *stretch;
+ SDL_Surface *display;
+};
+
+typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
+
+SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
+int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
+ int *pitch);
+int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+ const void *pixels, int pitch);
+int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+ void **pixels, int *pitch);
+void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
+int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
+ Uint32 target_format, int w, int h, void *pixels,
+ int pitch);
+void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/direct3d/SDL_d3drender.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/direct3d/SDL_d3drender.c Thu Feb 03 00:19:40 2011 -0800
@@ -28,7 +28,6 @@
#include "SDL_loadso.h"
#include "SDL_syswm.h"
#include "../SDL_sysrender.h"
-#include "../../video/SDL_yuv_sw_c.h"
#if SDL_VIDEO_RENDER_D3D
#define D3D_DEBUG_INFO
@@ -89,7 +88,8 @@
/* Direct3D renderer implementation */
-#if 1 /* This takes more memory but you won't lose your texture data */
+#if 1
+/* This takes more memory but you won't lose your texture data */
#define D3DPOOL_SDL D3DPOOL_MANAGED
#define SDL_MEMORY_POOL_MANAGED
#else
@@ -99,18 +99,12 @@
static SDL_Renderer *D3D_CreateRenderer(SDL_Window * window, Uint32 flags);
static int D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int D3D_QueryTexturePixels(SDL_Renderer * renderer,
- SDL_Texture * texture, void **pixels,
- int *pitch);
static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels,
int pitch);
static int D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty,
- void **pixels, int *pitch);
+ const SDL_Rect * rect, void **pixels, int *pitch);
static void D3D_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static void D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- int numrects, const SDL_Rect * rects);
static int D3D_RenderDrawPoints(SDL_Renderer * renderer,
const SDL_Point * points, int count);
static int D3D_RenderDrawLines(SDL_Renderer * renderer,
@@ -134,8 +128,8 @@
{
"d3d",
(SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
- 0,
- {0},
+ 1,
+ {SDL_PIXELFORMAT_ARGB8888},
0,
0}
};
@@ -152,7 +146,6 @@
typedef struct
{
- SDL_SW_YUVTexture *yuv;
Uint32 format;
IDirect3DTexture9 *texture;
} D3D_TextureData;
@@ -248,113 +241,30 @@
PixelFormatToD3DFMT(Uint32 format)
{
switch (format) {
- case SDL_PIXELFORMAT_INDEX8:
- return D3DFMT_P8;
- case SDL_PIXELFORMAT_RGB332:
- return D3DFMT_R3G3B2;
- case SDL_PIXELFORMAT_RGB444:
- return D3DFMT_X4R4G4B4;
- case SDL_PIXELFORMAT_RGB555:
- return D3DFMT_X1R5G5B5;
- case SDL_PIXELFORMAT_ARGB4444:
- return D3DFMT_A4R4G4B4;
- case SDL_PIXELFORMAT_ARGB1555:
- return D3DFMT_A1R5G5B5;
case SDL_PIXELFORMAT_RGB565:
return D3DFMT_R5G6B5;
case SDL_PIXELFORMAT_RGB888:
return D3DFMT_X8R8G8B8;
case SDL_PIXELFORMAT_ARGB8888:
return D3DFMT_A8R8G8B8;
- case SDL_PIXELFORMAT_ARGB2101010:
- return D3DFMT_A2R10G10B10;
- case SDL_PIXELFORMAT_YV12:
- return MAKEFOURCC('Y','V','1','2');
- case SDL_PIXELFORMAT_IYUV:
- return MAKEFOURCC('I','4','2','0');
- case SDL_PIXELFORMAT_UYVY:
- return D3DFMT_UYVY;
- case SDL_PIXELFORMAT_YUY2:
- return D3DFMT_YUY2;
default:
return D3DFMT_UNKNOWN;
}
}
-static SDL_bool
-D3D_IsTextureFormatAvailable(IDirect3D9 * d3d, UINT adapter,
- D3DFORMAT display_format,
- D3DFORMAT texture_format)
+static Uint32
+D3DFMTToPixelFormat(D3DFORMAT format)
{
- HRESULT result;
-
- result = IDirect3D9_CheckDeviceFormat(d3d, adapter,
- D3DDEVTYPE_HAL,
- display_format,
- 0,
- D3DRTYPE_TEXTURE,
- texture_format);
- return FAILED(result) ? SDL_FALSE : SDL_TRUE;
-}
-
-static void
-UpdateYUVTextureData(SDL_Texture * texture)
-{
- D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
- SDL_Rect rect;
- RECT d3drect;
- D3DLOCKED_RECT locked;
- HRESULT result;
-
- d3drect.left = 0;
- d3drect.right = texture->w;
- d3drect.top = 0;
- d3drect.bottom = texture->h;
-
- result =
- IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
- if (FAILED(result)) {
- return;
+ switch (format) {
+ case D3DFMT_R5G6B5:
+ return SDL_PIXELFORMAT_RGB565;
+ case D3DFMT_X8R8G8B8:
+ return SDL_PIXELFORMAT_RGB888;
+ case D3DFMT_A8R8G8B8:
+ return SDL_PIXELFORMAT_ARGB8888;
+ default:
+ return SDL_PIXELFORMAT_UNKNOWN;
}
-
- rect.x = 0;
- rect.y = 0;
- rect.w = texture->w;
- rect.h = texture->h;
- SDL_SW_CopyYUVToRGB(data->yuv, &rect, data->format, texture->w,
- texture->h, locked.pBits, locked.Pitch);
-
- IDirect3DTexture9_UnlockRect(data->texture, 0);
-}
-
-static void
-D3D_AddTextureFormats(D3D_RenderData *data, SDL_RendererInfo *info)
-{
- int i;
- int formats[] = {
- SDL_PIXELFORMAT_RGB332,
- SDL_PIXELFORMAT_RGB444,
- SDL_PIXELFORMAT_RGB555,
- SDL_PIXELFORMAT_ARGB4444,
- SDL_PIXELFORMAT_ARGB1555,
- SDL_PIXELFORMAT_RGB565,
- SDL_PIXELFORMAT_RGB888,
- SDL_PIXELFORMAT_ARGB8888,
- SDL_PIXELFORMAT_ARGB2101010,
- };
-
- info->num_texture_formats = 0;
- for (i = 0; i < SDL_arraysize(formats); ++i) {
- if (D3D_IsTextureFormatAvailable
- (data->d3d, data->adapter, data->pparams.BackBufferFormat, PixelFormatToD3DFMT(formats[i]))) {
- info->texture_formats[info->num_texture_formats++] = formats[i];
- }
- }
- info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YV12;
- info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
- info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YUY2;
- info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_UYVY;
- info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YVYU;
}
SDL_Renderer *
@@ -367,6 +277,9 @@
D3DPRESENT_PARAMETERS pparams;
IDirect3DSwapChain9 *chain;
D3DCAPS9 caps;
+ Uint32 window_flags;
+ int w, h;
+ SDL_DisplayMode fullscreen_mode;
renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
if (!renderer) {
@@ -404,11 +317,9 @@
}
renderer->CreateTexture = D3D_CreateTexture;
- renderer->QueryTexturePixels = D3D_QueryTexturePixels;
renderer->UpdateTexture = D3D_UpdateTexture;
renderer->LockTexture = D3D_LockTexture;
renderer->UnlockTexture = D3D_UnlockTexture;
- renderer->DirtyTexture = D3D_DirtyTexture;
renderer->RenderDrawPoints = D3D_RenderDrawPoints;
renderer->RenderDrawLines = D3D_RenderDrawLines;
renderer->RenderFillRects = D3D_RenderFillRects;
@@ -427,23 +338,27 @@
SDL_VERSION(&windowinfo.version);
SDL_GetWindowWMInfo(window, &windowinfo);
+ window_flags = SDL_GetWindowFlags(window);
+ SDL_GetWindowSize(window, &w, &h);
+ SDL_GetWindowDisplayMode(window, &fullscreen_mode);
+
SDL_zero(pparams);
pparams.hDeviceWindow = windowinfo.info.win.window;
- pparams.BackBufferWidth = window->w;
- pparams.BackBufferHeight = window->h;
- if (window->flags & SDL_WINDOW_FULLSCREEN) {
+ pparams.BackBufferWidth = w;
+ pparams.BackBufferHeight = h;
+ if (window_flags & SDL_WINDOW_FULLSCREEN) {
pparams.BackBufferFormat =
- PixelFormatToD3DFMT(window->fullscreen_mode.format);
+ PixelFormatToD3DFMT(fullscreen_mode.format);
} else {
pparams.BackBufferFormat = D3DFMT_UNKNOWN;
}
pparams.BackBufferCount = 1;
pparams.SwapEffect = D3DSWAPEFFECT_DISCARD;
- if (window->flags & SDL_WINDOW_FULLSCREEN) {
+ if (window_flags & SDL_WINDOW_FULLSCREEN) {
pparams.Windowed = FALSE;
pparams.FullScreen_RefreshRateInHz =
- window->fullscreen_mode.refresh_rate;
+ fullscreen_mode.refresh_rate;
} else {
pparams.Windowed = TRUE;
pparams.FullScreen_RefreshRateInHz = 0;
@@ -494,8 +409,6 @@
}
data->pparams = pparams;
- D3D_AddTextureFormats(data, &renderer->info);
-
IDirect3DDevice9_GetDeviceCaps(data->device, &caps);
renderer->info.max_texture_width = caps.MaxTextureWidth;
renderer->info.max_texture_height = caps.MaxTextureHeight;
@@ -594,22 +507,7 @@
texture->driverdata = data;
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format) &&
- (texture->format != SDL_PIXELFORMAT_YUY2 ||
- !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
- display_format, PixelFormatToD3DFMT(texture->format)))
- && (texture->format != SDL_PIXELFORMAT_YVYU
- || !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
- display_format, PixelFormatToD3DFMT(texture->format)))) {
- data->yuv =
- SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
- if (!data->yuv) {
- return -1;
- }
- data->format = SDL_GetWindowPixelFormat(window);
- } else {
- data->format = texture->format;
- }
+ data->format = texture->format;
result =
IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
@@ -625,153 +523,118 @@
}
static int
-D3D_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
- void **pixels, int *pitch)
-{
- D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
-
- if (data->yuv) {
- return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
- } else {
- /* D3D textures don't have their pixels hanging out */
- return -1;
- }
-}
-
-static int
D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels, int pitch)
{
D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
D3D_RenderData *renderdata = (D3D_RenderData *) renderer->driverdata;
- if (data->yuv) {
- if (SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch) < 0) {
- return -1;
- }
- UpdateYUVTextureData(texture);
- return 0;
- } else {
#ifdef SDL_MEMORY_POOL_DEFAULT
- IDirect3DTexture9 *temp;
- RECT d3drect;
- D3DLOCKED_RECT locked;
- const Uint8 *src;
- Uint8 *dst;
- int row, length;
- HRESULT result;
+ IDirect3DTexture9 *temp;
+ RECT d3drect;
+ D3DLOCKED_RECT locked;
+ const Uint8 *src;
+ Uint8 *dst;
+ int row, length;
+ HRESULT result;
- result =
- IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
- texture->h, 1, 0,
- PixelFormatToD3DFMT(texture->
- format),
- D3DPOOL_SYSTEMMEM, &temp, NULL);
- if (FAILED(result)) {
- D3D_SetError("CreateTexture()", result);
- return -1;
- }
+ result =
+ IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
+ texture->h, 1, 0,
+ PixelFormatToD3DFMT(texture-> format),
+ D3DPOOL_SYSTEMMEM, &temp, NULL);
+ if (FAILED(result)) {
+ D3D_SetError("CreateTexture()", result);
+ return -1;
+ }
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
+ d3drect.left = rect->x;
+ d3drect.right = rect->x + rect->w;
+ d3drect.top = rect->y;
+ d3drect.bottom = rect->y + rect->h;
- result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
- if (FAILED(result)) {
- IDirect3DTexture9_Release(temp);
- D3D_SetError("LockRect()", result);
- return -1;
- }
+ result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
+ if (FAILED(result)) {
+ IDirect3DTexture9_Release(temp);
+ D3D_SetError("LockRect()", result);
+ return -1;
+ }
- src = pixels;
- dst = locked.pBits;
- length = rect->w * SDL_BYTESPERPIXEL(texture->format);
- for (row = 0; row < rect->h; ++row) {
- SDL_memcpy(dst, src, length);
- src += pitch;
- dst += locked.Pitch;
- }
- IDirect3DTexture9_UnlockRect(temp, 0);
+ src = pixels;
+ dst = locked.pBits;
+ length = rect->w * SDL_BYTESPERPIXEL(texture->format);
+ for (row = 0; row < rect->h; ++row) {
+ SDL_memcpy(dst, src, length);
+ src += pitch;
+ dst += locked.Pitch;
+ }
+ IDirect3DTexture9_UnlockRect(temp, 0);
- result =
- IDirect3DDevice9_UpdateTexture(renderdata->device,
- (IDirect3DBaseTexture9 *) temp,
- (IDirect3DBaseTexture9 *)
- data->texture);
- IDirect3DTexture9_Release(temp);
- if (FAILED(result)) {
- D3D_SetError("UpdateTexture()", result);
- return -1;
- }
+ result =
+ IDirect3DDevice9_UpdateTexture(renderdata->device,
+ (IDirect3DBaseTexture9 *) temp,
+ (IDirect3DBaseTexture9 *)
+ data->texture);
+ IDirect3DTexture9_Release(temp);
+ if (FAILED(result)) {
+ D3D_SetError("UpdateTexture()", result);
+ return -1;
+ }
#else
- RECT d3drect;
- D3DLOCKED_RECT locked;
- const Uint8 *src;
- Uint8 *dst;
- int row, length;
- HRESULT result;
+ RECT d3drect;
+ D3DLOCKED_RECT locked;
+ const Uint8 *src;
+ Uint8 *dst;
+ int row, length;
+ HRESULT result;
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
+ d3drect.left = rect->x;
+ d3drect.right = rect->x + rect->w;
+ d3drect.top = rect->y;
+ d3drect.bottom = rect->y + rect->h;
- result =
- IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
- 0);
- if (FAILED(result)) {
- D3D_SetError("LockRect()", result);
- return -1;
- }
+ result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+ if (FAILED(result)) {
+ D3D_SetError("LockRect()", result);
+ return -1;
+ }
- src = pixels;
- dst = locked.pBits;
- length = rect->w * SDL_BYTESPERPIXEL(texture->format);
- for (row = 0; row < rect->h; ++row) {
- SDL_memcpy(dst, src, length);
- src += pitch;
- dst += locked.Pitch;
- }
- IDirect3DTexture9_UnlockRect(data->texture, 0);
+ src = pixels;
+ dst = locked.pBits;
+ length = rect->w * SDL_BYTESPERPIXEL(texture->format);
+ for (row = 0; row < rect->h; ++row) {
+ SDL_memcpy(dst, src, length);
+ src += pitch;
+ dst += locked.Pitch;
+ }
+ IDirect3DTexture9_UnlockRect(data->texture, 0);
#endif // SDL_MEMORY_POOL_DEFAULT
- return 0;
- }
+ return 0;
}
static int
D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch)
+ const SDL_Rect * rect, void **pixels, int *pitch)
{
D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
+ RECT d3drect;
+ D3DLOCKED_RECT locked;
+ HRESULT result;
- if (data->yuv) {
- return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels,
- pitch);
- } else {
- RECT d3drect;
- D3DLOCKED_RECT locked;
- HRESULT result;
+ d3drect.left = rect->x;
+ d3drect.right = rect->x + rect->w;
+ d3drect.top = rect->y;
+ d3drect.bottom = rect->y + rect->h;
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
-
- result =
- IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
- markDirty ? 0 :
- D3DLOCK_NO_DIRTY_UPDATE);
- if (FAILED(result)) {
- D3D_SetError("LockRect()", result);
- return -1;
- }
- *pixels = locked.pBits;
- *pitch = locked.Pitch;
- return 0;
+ result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+ if (FAILED(result)) {
+ D3D_SetError("LockRect()", result);
+ return -1;
}
+ *pixels = locked.pBits;
+ *pitch = locked.Pitch;
+ return 0;
}
static void
@@ -779,32 +642,7 @@
{
D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
- if (data->yuv) {
- SDL_SW_UnlockYUVTexture(data->yuv);
- UpdateYUVTextureData(texture);
- } else {
- IDirect3DTexture9_UnlockRect(data->texture, 0);
- }
-}
-
-static void
-D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
- const SDL_Rect * rects)
-{
- D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
- RECT d3drect;
- int i;
-
- for (i = 0; i < numrects; ++i) {
- const SDL_Rect *rect = &rects[i];
-
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
-
- IDirect3DTexture9_AddDirtyRect(data->texture, &d3drect);
- }
+ IDirect3DTexture9_UnlockRect(data->texture, 0);
}
static void
@@ -1123,8 +961,6 @@
Uint32 format, void * pixels, int pitch)
{
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
- SDL_Window *window = renderer->window;
- SDL_VideoDisplay *display = window->display;
D3DSURFACE_DESC desc;
LPDIRECT3DSURFACE9 backBuffer;
LPDIRECT3DSURFACE9 surface;
@@ -1174,7 +1010,7 @@
}
SDL_ConvertPixels(rect->w, rect->h,
- display->current_mode.format, locked.pBits, locked.Pitch,
+ D3DFMTToPixelFormat(desc.Format), locked.pBits, locked.Pitch,
format, pixels, pitch);
IDirect3DSurface9_UnlockRect(surface);
@@ -1227,9 +1063,6 @@
if (!data) {
return;
}
- if (data->yuv) {
- SDL_SW_DestroyYUVTexture(data->yuv);
- }
if (data->texture) {
IDirect3DTexture9_Release(data->texture);
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/mmx.h Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,642 @@
+/* mmx.h
+
+ MultiMedia eXtensions GCC interface library for IA32.
+
+ To use this library, simply include this header file
+ and compile with GCC. You MUST have inlining enabled
+ in order for mmx_ok() to work; this can be done by
+ simply using -O on the GCC command line.
+
+ Compiling with -DMMX_TRACE will cause detailed trace
+ output to be sent to stderr for each mmx operation.
+ This adds lots of code, and obviously slows execution to
+ a crawl, but can be very useful for debugging.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+ LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+ 1997-99 by H. Dietz and R. Fisher
+
+ Notes:
+ It appears that the latest gas has the pand problem fixed, therefore
+ I'll undefine BROKEN_PAND by default.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+
+/* Warning: at this writing, the version of GAS packaged
+ with most Linux distributions does not handle the
+ parallel AND operation mnemonic correctly. If the
+ symbol BROKEN_PAND is defined, a slower alternative
+ coding will be used. If execution of mmxtest results
+ in an illegal instruction fault, define this symbol.
+*/
+#undef BROKEN_PAND
+
+
+/* The type of an value that fits in an MMX register
+ (note that long long constant values MUST be suffixed
+ by LL and unsigned long long values by ULL, lest
+ they be truncated by the compiler)
+*/
+typedef union
+{
+ long long q; /* Quadword (64-bit) value */
+ unsigned long long uq; /* Unsigned Quadword */
+ int d[2]; /* 2 Doubleword (32-bit) values */
+ unsigned int ud[2]; /* 2 Unsigned Doubleword */
+ short w[4]; /* 4 Word (16-bit) values */
+ unsigned short uw[4]; /* 4 Unsigned Word */
+ char b[8]; /* 8 Byte (8-bit) values */
+ unsigned char ub[8]; /* 8 Unsigned Byte */
+ float s[2]; /* Single-precision (32-bit) value */
+} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */
+
+
+#if 0
+/* Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+ /* Returns 1 if MMX instructions are supported,
+ 3 if Cyrix MMX and Extended MMX instructions are supported
+ 5 if AMD MMX and 3DNow! instructions are supported
+ 0 if hardware does not support any of these
+ */
+ register int rval = 0;
+
+ __asm__ __volatile__(
+ /* See if CPUID instruction is supported ... */
+ /* ... Get copies of EFLAGS into eax and ecx */
+ "pushf\n\t"
+ "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
+ /* ... Toggle the ID bit in one copy and store */
+ /* to the EFLAGS reg */
+ "xorl $0x200000, %%eax\n\t"
+ "push %%eax\n\t" "popf\n\t"
+ /* ... Get the (hopefully modified) EFLAGS */
+ "pushf\n\t" "popl %%eax\n\t"
+ /* ... Compare and test result */
+ "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
+ /* Get standard CPUID information, and
+ go to a specific vendor section */
+ "movl $0, %%eax\n\t" "cpuid\n\t"
+ /* Check for Intel */
+ "cmpl $0x756e6547, %%ebx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x49656e69, %%edx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x6c65746e, %%ecx\n"
+ "jne TryAMD\n\t" "jmp Intel\n\t"
+ /* Check for AMD */
+ "\nTryAMD:\n\t"
+ "cmpl $0x68747541, %%ebx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x69746e65, %%edx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x444d4163, %%ecx\n"
+ "jne TryCyrix\n\t" "jmp AMD\n\t"
+ /* Check for Cyrix */
+ "\nTryCyrix:\n\t"
+ "cmpl $0x69727943, %%ebx\n\t"
+ "jne NotSupported2\n\t"
+ "cmpl $0x736e4978, %%edx\n\t"
+ "jne NotSupported3\n\t"
+ "cmpl $0x64616574, %%ecx\n\t"
+ "jne NotSupported4\n\t"
+ /* Drop through to Cyrix... */
+ /* Cyrix Section */
+ /* See if extended CPUID level 80000001 is supported */
+ /* The value of CPUID/80000001 for the 6x86MX is undefined
+ according to the Cyrix CPU Detection Guide (Preliminary
+ Rev. 1.01 table 1), so we'll check the value of eax for
+ CPUID/0 to see if standard CPUID level 2 is supported.
+ According to the table, the only CPU which supports level
+ 2 is also the only one which supports extended CPUID levels.
+ */
+ "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */
+ /* Extended CPUID supported (in theory), so get extended
+ features */
+ "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */
+ "jz NotSupported5\n\t" /* MMX not supported */
+ "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
+ "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
+ "jmp Return\n\t"
+ /* AMD Section */
+ "AMD:\n\t"
+ /* See if extended CPUID is supported */
+ "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */
+ /* Extended CPUID supported, so get extended features */
+ "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported6\n\t" /* MMX not supported */
+ "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
+ "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
+ "jmp Return\n\t"
+ /* Intel Section */
+ "Intel:\n\t"
+ /* Check for MMX */
+ "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported7\n\t" /* MMX Not supported */
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\t"
+ /* Nothing supported */
+ "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */
+ :"eax", "ebx", "ecx", "edx");
+
+ /* Return */
+ return (rval);
+}
+
+/* Function to test if mmx instructions are supported...
+*/
+inline extern int
+mmx_ok(void)
+{
+ /* Returns 1 if MMX instructions are supported, 0 otherwise */
+ return (mm_support() & 0x1);
+}
+#endif
+
+/* Helper functions for the instruction macros that follow...
+ (note that memory-to-register, m2r, instructions are nearly
+ as efficient as register-to-register, r2r, instructions;
+ however, memory-to-memory instructions are really simulated
+ as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef MMX_TRACE
+
+/* Include the stuff for printing a trace to stderr...
+*/
+
+#define mmx_i2r(op, imm, reg) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace.uq = (imm); \
+ printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#reg "=0x%08x%08x) => ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#reg "=0x%08x%08x\n", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ }
+
+#define mmx_m2r(op, mem, reg) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace = (mem); \
+ printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#reg "=0x%08x%08x) => ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#reg "=0x%08x%08x\n", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ }
+
+#define mmx_r2m(op, reg, mem) \
+ { \
+ mmx_t mmx_trace; \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ mmx_trace = (mem); \
+ printf(#mem "=0x%08x%08x) => ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=X" (mem) \
+ : /* nothing */ ); \
+ mmx_trace = (mem); \
+ printf(#mem "=0x%08x%08x\n", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ }
+
+#define mmx_r2r(op, regs, regd) \
+ { \
+ mmx_t mmx_trace; \
+ __asm__ __volatile__ ("movq %%" #regs ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#regd "=0x%08x%08x) => ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ printf(#regd "=0x%08x%08x\n", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ }
+
+#define mmx_m2m(op, mems, memd) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace = (mems); \
+ printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ mmx_trace = (memd); \
+ printf(#memd "=0x%08x%08x) => ", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+ #op " %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems)); \
+ mmx_trace = (memd); \
+ printf(#memd "=0x%08x%08x\n", \
+ mmx_trace.d[1], mmx_trace.d[0]); \
+ }
+
+#else
+
+/* These macros are a lot simpler without the tracing...
+*/
+
+#define mmx_i2r(op, imm, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm) )
+
+#define mmx_m2r(op, mem, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "m" (mem))
+
+#define mmx_r2m(op, reg, mem) \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=m" (mem) \
+ : /* nothing */ )
+
+#define mmx_r2r(op, regs, regd) \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define mmx_m2m(op, mems, memd) \
+ __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+ #op " %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems))
+
+#endif
+
+
+/* 1x64 MOVe Quadword
+ (this is both a load and a store...
+ in fact, it is the only way to store)
+*/
+#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
+#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
+#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
+#define movq(vars, vard) \
+ __asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 1x32 MOVe Doubleword
+ (like movq, this is both load and store...
+ but is most useful for moving things between
+ mmx registers and ordinary registers)
+*/
+#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
+#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
+#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
+#define movd(vars, vard) \
+ __asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+ "movd %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
+#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
+#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
+
+#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
+#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
+#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
+
+#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
+#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
+#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
+#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
+#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
+
+#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
+#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
+#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
+#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
+#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
+
+#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
+#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
+#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
+#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
+#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
+
+#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
+#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
+#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
+
+#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
+#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
+#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
+#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
+#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
+
+#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
+#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
+#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
+#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
+#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
+
+#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
+#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
+#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
+
+
+/* 4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
+#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
+#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
+
+
+/* 4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
+#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
+#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
+
+
+/* 4x16->2x32 Parallel Mul-ADD
+ (muls like pmullw, then adds adjacent 16-bit fields
+ in the multiply result to make the final 2x32 result)
+*/
+#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
+#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
+#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
+
+
+/* 1x64 bitwise AND
+*/
+#ifdef BROKEN_PAND
+#define pand_m2r(var, reg) \
+ { \
+ mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+ mmx_m2r(pandn, var, reg); \
+ }
+#define pand_r2r(regs, regd) \
+ { \
+ mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+ mmx_r2r(pandn, regs, regd) \
+ }
+#define pand(vars, vard) \
+ { \
+ movq_m2r(vard, mm0); \
+ mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+ mmx_m2r(pandn, vars, mm0); \
+ movq_r2m(mm0, vard); \
+ }
+#else
+#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
+#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
+#define pand(vars, vard) mmx_m2m(pand, vars, vard)
+#endif
+
+
+/* 1x64 bitwise AND with Not the destination
+*/
+#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
+#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
+#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
+
+
+/* 1x64 bitwise OR
+*/
+#define por_m2r(var, reg) mmx_m2r(por, var, reg)
+#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
+#define por(vars, vard) mmx_m2m(por, vars, vard)
+
+
+/* 1x64 bitwise eXclusive OR
+*/
+#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
+#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
+#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+ (resulting fields are either 0 or -1)
+*/
+#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
+#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
+#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
+
+#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
+#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
+#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
+
+#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
+#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
+#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+ (resulting fields are either 0 or -1)
+*/
+#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
+#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
+#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
+
+#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
+#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
+#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
+
+#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
+#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
+#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
+
+
+/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
+#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
+#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
+#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
+
+#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
+#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
+#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
+#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
+
+#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
+#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
+#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
+#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
+
+
+/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
+#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
+#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
+#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
+
+#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
+#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
+#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
+#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
+
+#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
+#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
+#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
+#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
+
+
+/* 2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
+#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
+#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
+#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
+
+#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
+#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
+#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
+#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
+
+
+/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+ (packs source and dest fields into dest in that order)
+*/
+#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
+#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
+
+#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
+#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
+
+
+/* 4x16->8x8 PACK and Unsigned Saturate
+ (packs source and dest fields into dest in that order)
+*/
+#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
+#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
+
+
+/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+ (interleaves low half of dest with low half of source
+ as padding in each result field)
+*/
+#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
+#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
+
+#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
+#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
+
+#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
+#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
+
+
+/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+ (interleaves high half of dest with high half of source
+ as padding in each result field)
+*/
+#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
+#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
+
+#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
+#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
+
+#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
+#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
+
+
+/* Empty MMx State
+ (used to clean-up when going from mmx to float use
+ of the registers that are shared by both; note that
+ there is no float-to-mmx operation needed, because
+ only the float tag word info is corruptible)
+*/
+#ifdef MMX_TRACE
+
+#define emms() \
+ { \
+ printf("emms()\n"); \
+ __asm__ __volatile__ ("emms"); \
+ }
+
+#else
+
+#define emms() __asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/opengl/SDL_renderer_gl.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/opengl/SDL_renderer_gl.c Thu Feb 03 00:19:40 2011 -0800
@@ -37,27 +37,6 @@
http://developer.apple.com/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/chapter_10_section_2.html
*/
-/* !!! FIXME: this should go in a higher level than the GL renderer. */
-static __inline__ int
-bytes_per_pixel(const Uint32 format)
-{
- if (!SDL_ISPIXELFORMAT_FOURCC(format)) {
- return SDL_BYTESPERPIXEL(format);
- }
-
- /* FOURCC format */
- switch (format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- case SDL_PIXELFORMAT_YUY2:
- case SDL_PIXELFORMAT_UYVY:
- case SDL_PIXELFORMAT_YVYU:
- return 2;
- default:
- return 1; /* shouldn't ever hit this. */
- }
-}
-
/* Used to re-create the window with OpenGL capability */
extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags);
@@ -67,18 +46,12 @@
static void GL_WindowEvent(SDL_Renderer * renderer,
const SDL_WindowEvent *event);
static int GL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int GL_QueryTexturePixels(SDL_Renderer * renderer,
- SDL_Texture * texture, void **pixels,
- int *pitch);
static int GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels,
int pitch);
static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch);
+ const SDL_Rect * rect, void **pixels, int *pitch);
static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static void GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- int numrects, const SDL_Rect * rects);
static int GL_RenderClear(SDL_Renderer * renderer);
static int GL_RenderDrawPoints(SDL_Renderer * renderer,
const SDL_Point * points, int count);
@@ -102,21 +75,8 @@
{
"opengl",
(SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
- 13,
- {
- SDL_PIXELFORMAT_RGB332,
- SDL_PIXELFORMAT_RGB444,
- SDL_PIXELFORMAT_RGB555,
- SDL_PIXELFORMAT_ARGB4444,
- SDL_PIXELFORMAT_ARGB1555,
- SDL_PIXELFORMAT_RGB565,
- SDL_PIXELFORMAT_RGB24,
- SDL_PIXELFORMAT_BGR24,
- SDL_PIXELFORMAT_RGB888,
- SDL_PIXELFORMAT_BGR888,
- SDL_PIXELFORMAT_ARGB8888,
- SDL_PIXELFORMAT_ABGR8888,
- SDL_PIXELFORMAT_ARGB2101010},
+ 1,
+ {SDL_PIXELFORMAT_ARGB8888},
0,
0}
};
@@ -126,10 +86,6 @@
SDL_GLContext context;
SDL_bool updateSize;
SDL_bool GL_ARB_texture_rectangle_supported;
- SDL_bool GL_EXT_paletted_texture_supported;
- SDL_bool GL_APPLE_ycbcr_422_supported;
- SDL_bool GL_MESA_ycbcr_texture_supported;
- SDL_bool GL_ARB_fragment_program_supported;
int blendMode;
/* OpenGL functions */
@@ -139,33 +95,18 @@
void (*glTextureRangeAPPLE) (GLenum target, GLsizei length,
const GLvoid * pointer);
-
- PFNGLGETPROGRAMIVARBPROC glGetProgramivARB;
- PFNGLGETPROGRAMSTRINGARBPROC glGetProgramStringARB;
- PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glProgramLocalParameter4fvARB;
- PFNGLDELETEPROGRAMSARBPROC glDeleteProgramsARB;
- PFNGLGENPROGRAMSARBPROC glGenProgramsARB;
- PFNGLBINDPROGRAMARBPROC glBindProgramARB;
- PFNGLPROGRAMSTRINGARBPROC glProgramStringARB;
-
- /* (optional) fragment programs */
- GLuint fragment_program_UYVY;
} GL_RenderData;
typedef struct
{
GLuint texture;
- GLuint shader;
GLenum type;
GLfloat texw;
GLfloat texh;
GLenum format;
GLenum formattype;
- Uint8 *palette;
void *pixels;
int pitch;
- SDL_DirtyRectList dirty;
- int HACK_RYAN_FIXME;
} GL_TextureData;
@@ -257,11 +198,9 @@
renderer->WindowEvent = GL_WindowEvent;
renderer->CreateTexture = GL_CreateTexture;
- renderer->QueryTexturePixels = GL_QueryTexturePixels;
renderer->UpdateTexture = GL_UpdateTexture;
renderer->LockTexture = GL_LockTexture;
renderer->UnlockTexture = GL_UnlockTexture;
- renderer->DirtyTexture = GL_DirtyTexture;
renderer->RenderClear = GL_RenderClear;
renderer->RenderDrawPoints = GL_RenderDrawPoints;
renderer->RenderDrawLines = GL_RenderDrawLines;
@@ -317,40 +256,12 @@
|| SDL_GL_ExtensionSupported("GL_EXT_texture_rectangle")) {
data->GL_ARB_texture_rectangle_supported = SDL_TRUE;
}
- if (SDL_GL_ExtensionSupported("GL_APPLE_ycbcr_422")) {
- data->GL_APPLE_ycbcr_422_supported = SDL_TRUE;
- }
- if (SDL_GL_ExtensionSupported("GL_MESA_ycbcr_texture")) {
- data->GL_MESA_ycbcr_texture_supported = SDL_TRUE;
- }
if (SDL_GL_ExtensionSupported("GL_APPLE_texture_range")) {
data->glTextureRangeAPPLE =
(void (*)(GLenum, GLsizei, const GLvoid *))
SDL_GL_GetProcAddress("glTextureRangeAPPLE");
}
- /* we might use fragment programs for YUV data, etc. */
- if (SDL_GL_ExtensionSupported("GL_ARB_fragment_program")) {
- /* !!! FIXME: this doesn't check for errors. */
- /* !!! FIXME: this should really reuse the glfuncs.h stuff. */
- data->glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)
- SDL_GL_GetProcAddress("glGetProgramivARB");
- data->glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)
- SDL_GL_GetProcAddress("glGetProgramStringARB");
- data->glProgramLocalParameter4fvARB =
- (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)
- SDL_GL_GetProcAddress("glProgramLocalParameter4fvARB");
- data->glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)
- SDL_GL_GetProcAddress("glDeleteProgramsARB");
- data->glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)
- SDL_GL_GetProcAddress("glGenProgramsARB");
- data->glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)
- SDL_GL_GetProcAddress("glBindProgramARB");
- data->glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)
- SDL_GL_GetProcAddress("glProgramStringARB");
- data->GL_ARB_fragment_program_supported = SDL_TRUE;
- }
-
/* Set up parameters for rendering */
data->blendMode = -1;
data->glDisable(GL_DEPTH_TEST);
@@ -419,240 +330,16 @@
return value;
}
-
-//#define DEBUG_PROGRAM_COMPILE 1
-
-static void
-set_shader_error(GL_RenderData * data, const char *prefix)
-{
- GLint pos = 0;
- const GLubyte *errstr;
- data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
- errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
- SDL_SetError("%s: shader compile error at position %d: %s",
- prefix, (int) pos, (const char *) errstr);
-}
-
-static GLuint
-compile_shader(GL_RenderData * data, GLenum shader_type, const char *_code)
-{
- const int have_texture_rects = data->GL_ARB_texture_rectangle_supported;
- const char *replacement = have_texture_rects ? "RECT" : "2D";
- const size_t replacementlen = SDL_strlen(replacement);
- const char *token = "%TEXTURETARGET%";
- const size_t tokenlen = SDL_strlen(token);
- char *code = NULL;
- char *ptr = NULL;
- GLuint program = 0;
-
- /*
- * The TEX instruction needs a different target depending on what we use.
- * To handle this, we use "%TEXTURETARGET%" and replace the string before
- * compiling the shader.
- */
- code = SDL_strdup(_code);
- if (code == NULL)
- return 0;
-
- for (ptr = SDL_strstr(code, token); ptr; ptr = SDL_strstr(ptr + 1, token)) {
- SDL_memcpy(ptr, replacement, replacementlen);
- SDL_memmove(ptr + replacementlen, ptr + tokenlen,
- SDL_strlen(ptr + tokenlen) + 1);
- }
-
-#if DEBUG_PROGRAM_COMPILE
- printf("compiling shader:\n%s\n\n", code);
-#endif
-
- data->glGetError(); /* flush any existing error state. */
- data->glGenProgramsARB(1, &program);
- data->glBindProgramARB(shader_type, program);
- data->glProgramStringARB(shader_type, GL_PROGRAM_FORMAT_ASCII_ARB,
- (GLsizei)SDL_strlen(code), code);
-
- SDL_free(code);
-
- if (data->glGetError() == GL_INVALID_OPERATION) {
-#if DEBUG_PROGRAM_COMPILE
- GLint pos = 0;
- const GLubyte *errstr;
- data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
- errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
- printf("program compile error at position %d: %s\n\n",
- (int) pos, (const char *) errstr);
-#endif
- data->glBindProgramARB(shader_type, 0);
- data->glDeleteProgramsARB(1, &program);
- return 0;
- }
-
- return program;
-}
-
-
-/*
- * Fragment program that renders from UYVY textures.
- * The UYVY to RGB equasion is:
- * R = 1.164(Y-16) + 1.596(Cr-128)
- * G = 1.164(Y-16) - 0.813(Cr-128) - 0.391(Cb-128)
- * B = 1.164(Y-16) + 2.018(Cb-128)
- * Byte layout is Cb, Y1, Cr, Y2, stored in the R, G, B, A channels.
- * 4 bytes == 2 pixels: Y1/Cb/Cr, Y2/Cb/Cr
- *
- * !!! FIXME: this ignores blendmodes, etc.
- * !!! FIXME: this could be more efficient...use a dot product for green, etc.
- */
-static const char *fragment_program_UYVY_source_code = "!!ARBfp1.0\n"
- /* outputs... */
- "OUTPUT outcolor = result.color;\n"
- /* scratch registers... */
- "TEMP uyvy;\n" "TEMP luminance;\n" "TEMP work;\n"
- /* Halve the coordinates to grab the correct 32 bits for the fragment. */
- "MUL work, fragment.texcoord, { 0.5, 1.0, 1.0, 1.0 };\n"
- /* Sample the YUV texture. Cb, Y1, Cr, Y2, are stored in x, y, z, w. */
- "TEX uyvy, work, texture[0], %TEXTURETARGET%;\n"
- /* Do subtractions (128/255, 16/255, 128/255, 16/255) */
- "SUB uyvy, uyvy, { 0.501960784313726, 0.06274509803922, 0.501960784313726, 0.06274509803922 };\n"
- /* Choose the luminance component by texcoord. */
- /* !!! FIXME: laziness wins out for now... just average Y1 and Y2. */
- "ADD luminance, uyvy.yyyy, uyvy.wwww;\n"
- "MUL luminance, luminance, { 0.5, 0.5, 0.5, 0.5 };\n"
- /* Multiply luminance by its magic value. */
- "MUL luminance, luminance, { 1.164, 1.164, 1.164, 1.164 };\n"
- /* uyvy.xyzw becomes Cr/Cr/Cb/Cb, with multiplications. */
- "MUL uyvy, uyvy.zzxx, { 1.596, -0.813, 2.018, -0.391 };\n"
- /* Add luminance to Cr and Cb, store to RGB channels. */
- "ADD work.rgb, luminance, uyvy;\n"
- /* Do final addition for Green channel. (!!! FIXME: this should be a DPH?) */
- "ADD work.g, work.g, uyvy.w;\n"
- /* Make sure alpha channel is fully opaque. (!!! FIXME: blend modes!) */
- "MOV work.a, { 1.0 };\n"
- /* Store out the final fragment color... */
- "MOV outcolor, work;\n"
- /* ...and we're done! */
- "END\n";
-
static __inline__ SDL_bool
convert_format(GL_RenderData *renderdata, Uint32 pixel_format,
GLint* internalFormat, GLenum* format, GLenum* type)
{
switch (pixel_format) {
- case SDL_PIXELFORMAT_RGB332:
- *internalFormat = GL_R3_G3_B2;
- *format = GL_RGB;
- *type = GL_UNSIGNED_BYTE_3_3_2;
- break;
- case SDL_PIXELFORMAT_RGB444:
- *internalFormat = GL_RGB4;
- *format = GL_RGB;
- *type = GL_UNSIGNED_SHORT_4_4_4_4;
- break;
- case SDL_PIXELFORMAT_RGB555:
- *internalFormat = GL_RGB5;
- *format = GL_RGB;
- *type = GL_UNSIGNED_SHORT_5_5_5_1;
- break;
- case SDL_PIXELFORMAT_ARGB4444:
- *internalFormat = GL_RGBA4;
- *format = GL_BGRA;
- *type = GL_UNSIGNED_SHORT_4_4_4_4_REV;
- break;
- case SDL_PIXELFORMAT_ARGB1555:
- *internalFormat = GL_RGB5_A1;
- *format = GL_BGRA;
- *type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
- break;
- case SDL_PIXELFORMAT_RGB565:
- *internalFormat = GL_RGB8;
- *format = GL_RGB;
- *type = GL_UNSIGNED_SHORT_5_6_5;
- break;
- case SDL_PIXELFORMAT_RGB24:
- *internalFormat = GL_RGB8;
- *format = GL_RGB;
- *type = GL_UNSIGNED_BYTE;
- break;
case SDL_PIXELFORMAT_RGB888:
- *internalFormat = GL_RGB8;
- *format = GL_BGRA;
- *type = GL_UNSIGNED_BYTE;
- break;
- case SDL_PIXELFORMAT_BGR24:
- *internalFormat = GL_RGB8;
- *format = GL_BGR;
- *type = GL_UNSIGNED_BYTE;
- break;
- case SDL_PIXELFORMAT_BGR888:
- *internalFormat = GL_RGB8;
- *format = GL_RGBA;
- *type = GL_UNSIGNED_BYTE;
- break;
case SDL_PIXELFORMAT_ARGB8888:
-#ifdef __MACOSX__
- *internalFormat = GL_RGBA;
- *format = GL_BGRA;
- *type = GL_UNSIGNED_INT_8_8_8_8_REV;
-#else
*internalFormat = GL_RGBA8;
*format = GL_BGRA;
- *type = GL_UNSIGNED_BYTE;
-#endif
- break;
- case SDL_PIXELFORMAT_ABGR8888:
- *internalFormat = GL_RGBA8;
- *format = GL_RGBA;
- *type = GL_UNSIGNED_BYTE;
- break;
- case SDL_PIXELFORMAT_ARGB2101010:
- *internalFormat = GL_RGB10_A2;
- *format = GL_BGRA;
- *type = GL_UNSIGNED_INT_2_10_10_10_REV;
- break;
- case SDL_PIXELFORMAT_UYVY:
- if (renderdata->GL_APPLE_ycbcr_422_supported) {
- *internalFormat = GL_RGB;
- *format = GL_YCBCR_422_APPLE;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- *type = GL_UNSIGNED_SHORT_8_8_APPLE;
-#else
- *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
-#endif
- } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
- *internalFormat = GL_YCBCR_MESA;
- *format = GL_YCBCR_MESA;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- *type = GL_UNSIGNED_SHORT_8_8_MESA;
-#else
- *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
-#endif
- } else if (renderdata->GL_ARB_fragment_program_supported) {
- *internalFormat = GL_RGBA;
- *format = GL_RGBA;
- *type = GL_UNSIGNED_BYTE;
- } else {
- return SDL_FALSE;
- }
- break;
- case SDL_PIXELFORMAT_YUY2:
- if (renderdata->GL_APPLE_ycbcr_422_supported) {
- *internalFormat = GL_RGB;
- *format = GL_YCBCR_422_APPLE;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
-#else
- *type = GL_UNSIGNED_SHORT_8_8_APPLE;
-#endif
- } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
- *internalFormat = GL_YCBCR_MESA;
- *format = GL_YCBCR_MESA;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
-#else
- *type = GL_UNSIGNED_SHORT_8_8_MESA;
-#endif
- } else {
- return SDL_FALSE;
- }
+ *type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
default:
return SDL_FALSE;
@@ -668,7 +355,6 @@
GLint internalFormat;
GLenum format, type;
int texture_w, texture_h;
- GLuint shader = 0;
GLenum result;
GL_ActivateRenderer(renderer);
@@ -679,21 +365,6 @@
SDL_GetPixelFormatName(texture->format));
return -1;
}
- if (texture->format == SDL_PIXELFORMAT_UYVY &&
- !renderdata->GL_APPLE_ycbcr_422_supported &&
- !renderdata->GL_MESA_ycbcr_texture_supported &&
- renderdata->GL_ARB_fragment_program_supported) {
- if (renderdata->fragment_program_UYVY == 0) {
- renderdata->fragment_program_UYVY =
- compile_shader(renderdata, GL_FRAGMENT_PROGRAM_ARB,
- fragment_program_UYVY_source_code);
- if (renderdata->fragment_program_UYVY == 0) {
- set_shader_error(renderdata, "UYVY");
- return -1;
- }
- }
- shader = renderdata->fragment_program_UYVY;
- }
data = (GL_TextureData *) SDL_calloc(1, sizeof(*data));
if (!data) {
@@ -701,10 +372,8 @@
return -1;
}
- data->shader = shader;
-
if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
- data->pitch = texture->w * bytes_per_pixel(texture->format);
+ data->pitch = texture->w * SDL_BYTESPERPIXEL(texture->format);
data->pixels = SDL_malloc(texture->h * data->pitch);
if (!data->pixels) {
SDL_OutOfMemory();
@@ -731,17 +400,6 @@
data->texh = (GLfloat) texture->h / texture_h;
}
- /* YUV formats use RGBA but are really two bytes per pixel */
- if (internalFormat == GL_RGBA && bytes_per_pixel(texture->format) < 4) {
- texture_w /= 2;
- if (data->type == GL_TEXTURE_2D) {
- data->texw *= 2.0f;
- }
- data->HACK_RYAN_FIXME = 2;
- } else {
- data->HACK_RYAN_FIXME = 1;
- }
-
data->format = format;
data->formattype = type;
renderdata->glEnable(data->type);
@@ -771,22 +429,13 @@
renderdata->glTexParameteri(data->type, GL_TEXTURE_STORAGE_HINT_APPLE,
GL_STORAGE_CACHED_APPLE);
}
-/* This causes a crash in testoverlay for some reason. Apple bug? */
-#if 0
if (texture->access == SDL_TEXTUREACCESS_STREAMING
&& texture->format == SDL_PIXELFORMAT_ARGB8888) {
- /*
- if (renderdata->glTextureRangeAPPLE) {
- renderdata->glTextureRangeAPPLE(data->type,
- texture->h * data->pitch,
- data->pixels);
- }
- */
renderdata->glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
texture_h, 0, format, type, data->pixels);
- } else
-#endif
+ }
+ else
#endif
{
renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
@@ -801,26 +450,13 @@
return 0;
}
-static int
-GL_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
- void **pixels, int *pitch)
-{
- GL_TextureData *data = (GL_TextureData *) texture->driverdata;
-
- *pixels = data->pixels;
- *pitch = data->pitch;
- return 0;
-}
-
static void
SetupTextureUpdate(GL_RenderData * renderdata, SDL_Texture * texture,
int pitch)
{
renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
renderdata->glPixelStorei(GL_UNPACK_ROW_LENGTH,
- (pitch / bytes_per_pixel(texture->format)) /
- ((GL_TextureData *) texture->driverdata)->
- HACK_RYAN_FIXME);
+ (pitch / SDL_BYTESPERPIXEL(texture->format)));
}
static int
@@ -851,18 +487,13 @@
static int
GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch)
+ const SDL_Rect * rect, void **pixels, int *pitch)
{
GL_TextureData *data = (GL_TextureData *) texture->driverdata;
- if (markDirty) {
- SDL_AddDirtyRect(&data->dirty, rect);
- }
-
*pixels =
(void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
- rect->x * bytes_per_pixel(texture->format));
+ rect->x * SDL_BYTESPERPIXEL(texture->format));
*pitch = data->pitch;
return 0;
}
@@ -870,18 +501,17 @@
static void
GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
-}
+ GL_RenderData *renderdata = (GL_RenderData *) renderer->driverdata;
+ GL_TextureData *data = (GL_TextureData *) texture->driverdata;
+
+ GL_ActivateRenderer(renderer);
-static void
-GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
- const SDL_Rect * rects)
-{
- GL_TextureData *data = (GL_TextureData *) texture->driverdata;
- int i;
-
- for (i = 0; i < numrects; ++i) {
- SDL_AddDirtyRect(&data->dirty, &rects[i]);
- }
+ SetupTextureUpdate(renderdata, texture, data->pitch);
+ renderdata->glEnable(data->type);
+ renderdata->glBindTexture(data->type, data->texture);
+ renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w, texture->h,
+ data->format, data->formattype, data->pixels);
+ renderdata->glDisable(data->type);
}
static void
@@ -1056,28 +686,6 @@
GL_ActivateRenderer(renderer);
- if (texturedata->dirty.list) {
- SDL_DirtyRect *dirty;
- void *pixels;
- int bpp = bytes_per_pixel(texture->format);
- int pitch = texturedata->pitch;
-
- SetupTextureUpdate(data, texture, pitch);
- data->glEnable(texturedata->type);
- data->glBindTexture(texturedata->type, texturedata->texture);
- for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
- SDL_Rect *rect = &dirty->rect;
- pixels =
- (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
- rect->x * bpp);
- data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
- rect->w / texturedata->HACK_RYAN_FIXME,
- rect->h, texturedata->format,
- texturedata->formattype, pixels);
- }
- SDL_ClearDirtyRects(&texturedata->dirty);
- }
-
minx = dstrect->x;
miny = dstrect->y;
maxx = dstrect->x + dstrect->w;
@@ -1106,12 +714,6 @@
GL_SetBlendMode(data, texture->blendMode);
- /* Set up the shader for the copy, if any */
- if (texturedata->shader) {
- data->glEnable(GL_FRAGMENT_PROGRAM_ARB);
- data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, texturedata->shader);
- }
-
data->glBegin(GL_TRIANGLE_STRIP);
data->glTexCoord2f(minu, minv);
data->glVertex2f((GLfloat) minx, (GLfloat) miny);
@@ -1123,10 +725,6 @@
data->glVertex2f((GLfloat) maxx, (GLfloat) maxy);
data->glEnd();
- if (texturedata->shader) {
- data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
- }
-
data->glDisable(texturedata->type);
return 0;
@@ -1155,13 +753,13 @@
data->glPixelStorei(GL_PACK_ALIGNMENT, 1);
data->glPixelStorei(GL_PACK_ROW_LENGTH,
- (pitch / bytes_per_pixel(pixel_format)));
+ (pitch / SDL_BYTESPERPIXEL(pixel_format)));
data->glReadPixels(rect->x, (h-rect->y)-rect->h, rect->w, rect->h,
format, type, pixels);
/* Flip the rows to be top-down */
- length = rect->w * bytes_per_pixel(pixel_format);
+ length = rect->w * SDL_BYTESPERPIXEL(pixel_format);
src = (Uint8*)pixels + (rect->h-1)*pitch;
dst = (Uint8*)pixels;
tmp = SDL_stack_alloc(Uint8, length);
@@ -1201,7 +799,7 @@
data->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
data->glPixelStorei(GL_UNPACK_ROW_LENGTH,
- (pitch / bytes_per_pixel(pixel_format)));
+ (pitch / SDL_BYTESPERPIXEL(pixel_format)));
/* Flip the rows to be bottom-up */
length = rect->h * rect->w * pitch;
@@ -1244,13 +842,9 @@
if (data->texture) {
renderdata->glDeleteTextures(1, &data->texture);
}
- if (data->palette) {
- SDL_free(data->palette);
- }
if (data->pixels) {
SDL_free(data->pixels);
}
- SDL_FreeDirtyRects(&data->dirty);
SDL_free(data);
texture->driverdata = NULL;
}
@@ -1262,16 +856,6 @@
if (data) {
if (data->context) {
- if (data->GL_ARB_fragment_program_supported) {
- data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
- data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
- if (data->fragment_program_UYVY &&
- data->fragment_program_UYVY != ~0) {
- data->glDeleteProgramsARB(1,
- &data->fragment_program_UYVY);
- }
- }
-
/* SDL_GL_MakeCurrent(0, NULL); *//* doesn't do anything */
SDL_GL_DeleteContext(data->context);
}
--- a/src/render/opengles/SDL_renderer_gles.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/opengles/SDL_renderer_gles.c Thu Feb 03 00:19:40 2011 -0800
@@ -49,19 +49,13 @@
static void GLES_WindowEvent(SDL_Renderer * renderer,
const SDL_WindowEvent *event);
static int GLES_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int GLES_QueryTexturePixels(SDL_Renderer * renderer,
- SDL_Texture * texture, void **pixels,
- int *pitch);
static int GLES_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels,
int pitch);
static int GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty,
- void **pixels, int *pitch);
+ const SDL_Rect * rect, void **pixels, int *pitch);
static void GLES_UnlockTexture(SDL_Renderer * renderer,
SDL_Texture * texture);
-static void GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- int numrects, const SDL_Rect * rects);
static int GLES_RenderDrawPoints(SDL_Renderer * renderer,
const SDL_Point * points, int count);
static int GLES_RenderDrawLines(SDL_Renderer * renderer,
@@ -82,15 +76,8 @@
{
"opengl_es",
(SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
- 6,
- {
- /* OpenGL ES 1.x supported formats list */
- SDL_PIXELFORMAT_RGBA4444,
- SDL_PIXELFORMAT_RGBA5551,
- SDL_PIXELFORMAT_RGB565,
- SDL_PIXELFORMAT_RGB24,
- SDL_PIXELFORMAT_BGR888,
- SDL_PIXELFORMAT_ABGR8888},
+ 1,
+ {SDL_PIXELFORMAT_ABGR8888},
0,
0}
};
@@ -125,7 +112,6 @@
GLenum formattype;
void *pixels;
int pitch;
- SDL_DirtyRectList dirty;
} GLES_TextureData;
static void
@@ -205,11 +191,9 @@
renderer->WindowEvent = GLES_WindowEvent;
renderer->CreateTexture = GLES_CreateTexture;
- renderer->QueryTexturePixels = GLES_QueryTexturePixels;
renderer->UpdateTexture = GLES_UpdateTexture;
renderer->LockTexture = GLES_LockTexture;
renderer->UnlockTexture = GLES_UnlockTexture;
- renderer->DirtyTexture = GLES_DirtyTexture;
renderer->RenderDrawPoints = GLES_RenderDrawPoints;
renderer->RenderDrawLines = GLES_RenderDrawLines;
renderer->RenderFillRects = GLES_RenderFillRects;
@@ -343,32 +327,11 @@
GLES_ActivateRenderer(renderer);
switch (texture->format) {
- case SDL_PIXELFORMAT_RGB24:
- internalFormat = GL_RGB;
- format = GL_RGB;
- type = GL_UNSIGNED_BYTE;
- break;
- case SDL_PIXELFORMAT_BGR888:
case SDL_PIXELFORMAT_ABGR8888:
internalFormat = GL_RGBA;
format = GL_RGBA;
type = GL_UNSIGNED_BYTE;
break;
- case SDL_PIXELFORMAT_RGB565:
- internalFormat = GL_RGB;
- format = GL_RGB;
- type = GL_UNSIGNED_SHORT_5_6_5;
- break;
- case SDL_PIXELFORMAT_RGBA5551:
- internalFormat = GL_RGBA;
- format = GL_RGBA;
- type = GL_UNSIGNED_SHORT_5_5_5_1;
- break;
- case SDL_PIXELFORMAT_RGBA4444:
- internalFormat = GL_RGBA;
- format = GL_RGBA;
- type = GL_UNSIGNED_SHORT_4_4_4_4;
- break;
default:
SDL_SetError("Texture format %s not supported by OpenGL ES",
SDL_GetPixelFormatName(texture->format));
@@ -428,23 +391,10 @@
return 0;
}
-static int
-GLES_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
- void **pixels, int *pitch)
-{
- GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
-
- *pixels = data->pixels;
- *pitch = data->pitch;
- return 0;
-}
-
static void
SetupTextureUpdate(GLES_RenderData * renderdata, SDL_Texture * texture,
int pitch)
{
- GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
- renderdata->glBindTexture(data->type, data->texture);
renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
}
@@ -463,8 +413,9 @@
GLES_ActivateRenderer(renderer);
renderdata->glGetError();
+ SetupTextureUpdate(renderdata, texture, pitch);
renderdata->glEnable(data->type);
- SetupTextureUpdate(renderdata, texture, pitch);
+ renderdata->glBindTexture(data->type, data->texture);
if( rect->w * bpp == pitch ) {
temp_buffer = (void *)pixels; /* No need to reformat */
@@ -498,15 +449,10 @@
static int
GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch)
+ const SDL_Rect * rect, void **pixels, int *pitch)
{
GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
- if (markDirty) {
- SDL_AddDirtyRect(&data->dirty, rect);
- }
-
*pixels =
(void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
rect->x * SDL_BYTESPERPIXEL(texture->format));
@@ -517,18 +463,18 @@
static void
GLES_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
-}
+ GLES_RenderData *renderdata = (GLES_RenderData *) renderer->driverdata;
+ GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
+
+ GLES_ActivateRenderer(renderer);
-static void
-GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- int numrects, const SDL_Rect * rects)
-{
- GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
- int i;
-
- for (i = 0; i < numrects; ++i) {
- SDL_AddDirtyRect(&data->dirty, &rects[i]);
- }
+ SetupTextureUpdate(renderdata, texture, data->pitch);
+ renderdata->glEnable(data->type);
+ renderdata->glBindTexture(data->type, data->texture);
+ renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w,
+ texture->h, data->format, data->formattype,
+ data->pixels);
+ renderdata->glDisable(data->type);
}
static void
@@ -676,49 +622,6 @@
data->glEnable(GL_TEXTURE_2D);
- if (texturedata->dirty.list) {
- SDL_DirtyRect *dirty;
- void *pixels;
- int bpp = SDL_BYTESPERPIXEL(texture->format);
- int pitch = texturedata->pitch;
-
- SetupTextureUpdate(data, texture, pitch);
-
- data->glBindTexture(texturedata->type, texturedata->texture);
- for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
- SDL_Rect *rect = &dirty->rect;
- pixels =
- (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
- rect->x * bpp);
- /* There is no GL_UNPACK_ROW_LENGTH in OpenGLES
- we must do this reformatting ourselves(!)
-
- maybe it'd be a good idea to keep a temp buffer around
- for this purpose rather than allocating it each time
- */
- if( rect->x == 0 && rect->w * bpp == pitch ) {
- temp_buffer = pixels; /* Updating whole texture, no need to reformat */
- } else {
- temp_buffer = SDL_malloc(rect->w * rect->h * bpp);
- temp_ptr = temp_buffer;
- for (i = 0; i < rect->h; i++) {
- SDL_memcpy(temp_ptr, pixels, rect->w * bpp);
- temp_ptr += rect->w * bpp;
- pixels += pitch;
- }
- }
-
- data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
- rect->w, rect->h, texturedata->format,
- texturedata->formattype, temp_buffer);
-
- if( temp_buffer != pixels ) {
- SDL_free(temp_buffer);
- }
- }
- SDL_ClearDirtyRects(&texturedata->dirty);
- }
-
data->glBindTexture(texturedata->type, texturedata->texture);
if (texture->modMode) {
@@ -818,7 +721,6 @@
if (data->pixels) {
SDL_free(data->pixels);
}
- SDL_FreeDirtyRects(&data->dirty);
SDL_free(data);
texture->driverdata = NULL;
}
--- a/src/render/software/SDL_renderer_sw.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/software/SDL_renderer_sw.c Thu Feb 03 00:19:40 2011 -0800
@@ -23,7 +23,6 @@
#include "../SDL_sysrender.h"
#include "../../video/SDL_pixels_c.h"
-#include "../../video/SDL_yuv_sw_c.h"
/* SDL surface based renderer implementation */
@@ -32,9 +31,6 @@
static void SW_WindowEvent(SDL_Renderer * renderer,
const SDL_WindowEvent *event);
static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int SW_QueryTexturePixels(SDL_Renderer * renderer,
- SDL_Texture * texture, void **pixels,
- int *pitch);
static int SW_SetTextureColorMod(SDL_Renderer * renderer,
SDL_Texture * texture);
static int SW_SetTextureAlphaMod(SDL_Renderer * renderer,
@@ -45,8 +41,7 @@
const SDL_Rect * rect, const void *pixels,
int pitch);
static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch);
+ const SDL_Rect * rect, void **pixels, int *pitch);
static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
static int SW_RenderDrawPoints(SDL_Renderer * renderer,
const SDL_Point * points, int count);
@@ -70,7 +65,7 @@
{
"software",
(SDL_RENDERER_PRESENTVSYNC),
- 13,
+ 8,
{
SDL_PIXELFORMAT_RGB555,
SDL_PIXELFORMAT_RGB565,
@@ -79,12 +74,8 @@
SDL_PIXELFORMAT_ARGB8888,
SDL_PIXELFORMAT_RGBA8888,
SDL_PIXELFORMAT_ABGR8888,
- SDL_PIXELFORMAT_BGRA8888,
- SDL_PIXELFORMAT_YV12,
- SDL_PIXELFORMAT_IYUV,
- SDL_PIXELFORMAT_YUY2,
- SDL_PIXELFORMAT_UYVY,
- SDL_PIXELFORMAT_YVYU},
+ SDL_PIXELFORMAT_BGRA8888
+ },
0,
0}
};
@@ -96,7 +87,6 @@
SDL_Texture *texture;
SDL_Surface surface;
SDL_Renderer *renderer;
- SDL_DirtyRectList dirty;
} SW_RenderData;
static SDL_Texture *
@@ -136,6 +126,7 @@
SDL_Renderer *renderer;
SW_RenderData *data;
int i;
+ int w, h;
Uint32 format;
int bpp;
Uint32 Rmask, Gmask, Bmask, Amask;
@@ -163,7 +154,6 @@
}
renderer->WindowEvent = SW_WindowEvent;
renderer->CreateTexture = SW_CreateTexture;
- renderer->QueryTexturePixels = SW_QueryTexturePixels;
renderer->SetTextureColorMod = SW_SetTextureColorMod;
renderer->SetTextureAlphaMod = SW_SetTextureAlphaMod;
renderer->SetTextureBlendMode = SW_SetTextureBlendMode;
@@ -217,8 +207,8 @@
}
/* Create the textures we'll use for display */
- data->texture =
- CreateTexture(data->renderer, data->format, window->w, window->h);
+ SDL_GetWindowSize(window, &w, &h);
+ data->texture = CreateTexture(data->renderer, data->format, w, h);
if (!data->texture) {
SW_DestroyRenderer(renderer);
return NULL;
@@ -243,11 +233,12 @@
if (data->updateSize) {
/* Recreate the textures for the new window size */
+ int w, h;
if (data->texture) {
DestroyTexture(data->renderer, data->texture);
}
- data->texture = CreateTexture(data->renderer, data->format,
- window->w, window->h);
+ SDL_GetWindowSize(window, &w, &h);
+ data->texture = CreateTexture(data->renderer, data->format, w, h);
if (data->texture) {
data->updateSize = SDL_FALSE;
}
@@ -268,30 +259,25 @@
static int
SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- texture->driverdata =
- SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
- } else {
- int bpp;
- Uint32 Rmask, Gmask, Bmask, Amask;
+ int bpp;
+ Uint32 Rmask, Gmask, Bmask, Amask;
+
+ if (!SDL_PixelFormatEnumToMasks
+ (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
+ SDL_SetError("Unknown texture format");
+ return -1;
+ }
- if (!SDL_PixelFormatEnumToMasks
- (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
- SDL_SetError("Unknown texture format");
- return -1;
- }
+ texture->driverdata =
+ SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
+ Bmask, Amask);
+ SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
+ texture->b);
+ SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
+ SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
- texture->driverdata =
- SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
- Bmask, Amask);
- SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
- texture->b);
- SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
- SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
-
- if (texture->access == SDL_TEXTUREACCESS_STATIC) {
- SDL_SetSurfaceRLE(texture->driverdata, 1);
- }
+ if (texture->access == SDL_TEXTUREACCESS_STATIC) {
+ SDL_SetSurfaceRLE(texture->driverdata, 1);
}
if (!texture->driverdata) {
@@ -301,23 +287,6 @@
}
static int
-SW_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
- void **pixels, int *pitch)
-{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- return SDL_SW_QueryYUVTexturePixels((SDL_SW_YUVTexture *)
- texture->driverdata, pixels,
- pitch);
- } else {
- SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
-
- *pixels = surface->pixels;
- *pitch = surface->pitch;
- return 0;
- }
-}
-
-static int
SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
{
SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
@@ -343,56 +312,40 @@
SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels, int pitch)
{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- return SDL_SW_UpdateYUVTexture((SDL_SW_YUVTexture *)
- texture->driverdata, rect, pixels,
- pitch);
- } else {
- SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
- Uint8 *src, *dst;
- int row;
- size_t length;
+ SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+ Uint8 *src, *dst;
+ int row;
+ size_t length;
- src = (Uint8 *) pixels;
- dst =
- (Uint8 *) surface->pixels + rect->y * surface->pitch +
- rect->x * surface->format->BytesPerPixel;
- length = rect->w * surface->format->BytesPerPixel;
- for (row = 0; row < rect->h; ++row) {
- SDL_memcpy(dst, src, length);
- src += pitch;
- dst += surface->pitch;
- }
- return 0;
+ src = (Uint8 *) pixels;
+ dst = (Uint8 *) surface->pixels +
+ rect->y * surface->pitch +
+ rect->x * surface->format->BytesPerPixel;
+ length = rect->w * surface->format->BytesPerPixel;
+ for (row = 0; row < rect->h; ++row) {
+ SDL_memcpy(dst, src, length);
+ src += pitch;
+ dst += surface->pitch;
}
+ return 0;
}
static int
SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
- const SDL_Rect * rect, int markDirty, void **pixels,
- int *pitch)
+ const SDL_Rect * rect, void **pixels, int *pitch)
{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- return SDL_SW_LockYUVTexture((SDL_SW_YUVTexture *)
- texture->driverdata, rect, markDirty,
- pixels, pitch);
- } else {
- SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+ SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
- *pixels =
- (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
- rect->x * surface->format->BytesPerPixel);
- *pitch = surface->pitch;
- return 0;
- }
+ *pixels =
+ (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
+ rect->x * surface->format->BytesPerPixel);
+ *pitch = surface->pitch;
+ return 0;
}
static void
SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- SDL_SW_UnlockYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
- }
}
static int
@@ -420,7 +373,7 @@
return 0;
}
- if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+ if (data->renderer->LockTexture(data->renderer, texture, &rect,
&data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
@@ -484,7 +437,7 @@
return 0;
}
- if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+ if (data->renderer->LockTexture(data->renderer, texture, &rect,
&data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
@@ -558,7 +511,7 @@
continue;
}
- if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+ if (data->renderer->LockTexture(data->renderer, texture, &rect,
&data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
@@ -586,38 +539,31 @@
const SDL_Rect * srcrect, const SDL_Rect * dstrect)
{
SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
+ SDL_Surface *surface;
+ SDL_Rect real_srcrect;
+ SDL_Rect real_dstrect;
int status;
if (!SW_ActivateRenderer(renderer)) {
return -1;
}
- if (data->renderer->LockTexture(data->renderer, data->texture,
- dstrect, 1, &data->surface.pixels,
+ if (data->renderer->LockTexture(data->renderer, data->texture, dstrect,
+ &data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
}
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- status =
- SDL_SW_CopyYUVToRGB((SDL_SW_YUVTexture *) texture->driverdata,
- srcrect, data->format, dstrect->w, dstrect->h,
- data->surface.pixels, data->surface.pitch);
- } else {
- SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
- SDL_Rect real_srcrect = *srcrect;
- SDL_Rect real_dstrect;
+ surface = (SDL_Surface *) texture->driverdata;
+ real_srcrect = *srcrect;
- data->surface.w = dstrect->w;
- data->surface.h = dstrect->h;
- data->surface.clip_rect.w = dstrect->w;
- data->surface.clip_rect.h = dstrect->h;
- real_dstrect = data->surface.clip_rect;
+ data->surface.w = dstrect->w;
+ data->surface.h = dstrect->h;
+ data->surface.clip_rect.w = dstrect->w;
+ data->surface.clip_rect.h = dstrect->h;
+ real_dstrect = data->surface.clip_rect;
- status =
- SDL_LowerBlit(surface, &real_srcrect, &data->surface,
- &real_dstrect);
- }
+ status = SDL_LowerBlit(surface, &real_srcrect, &data->surface, &real_dstrect);
data->renderer->UnlockTexture(data->renderer, data->texture);
return status;
}
@@ -632,8 +578,8 @@
return -1;
}
- if (data->renderer->LockTexture(data->renderer, data->texture,
- rect, 0, &data->surface.pixels,
+ if (data->renderer->LockTexture(data->renderer, data->texture, rect,
+ &data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
}
@@ -656,8 +602,8 @@
return -1;
}
- if (data->renderer->LockTexture(data->renderer, data->texture,
- rect, 1, &data->surface.pixels,
+ if (data->renderer->LockTexture(data->renderer, data->texture, rect,
+ &data->surface.pixels,
&data->surface.pitch) < 0) {
return -1;
}
@@ -692,13 +638,9 @@
static void
SW_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
- if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
- SDL_SW_DestroyYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
- } else {
- SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+ SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
- SDL_FreeSurface(surface);
- }
+ SDL_FreeSurface(surface);
}
static void
@@ -717,7 +659,6 @@
if (data->renderer) {
data->renderer->DestroyRenderer(data->renderer);
}
- SDL_FreeDirtyRects(&data->dirty);
SDL_free(data);
}
SDL_free(renderer);
--- a/src/video/SDL_leaks.h Wed Feb 02 22:55:12 2011 -0800
+++ b/src/video/SDL_leaks.h Thu Feb 03 00:19:40 2011 -0800
@@ -29,4 +29,5 @@
#ifdef CHECK_LEAKS
extern int surfaces_allocated;
#endif
+
/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_rect.c Wed Feb 02 22:55:12 2011 -0800
+++ b/src/video/SDL_rect.c Thu Feb 03 00:19:40 2011 -0800
@@ -339,66 +339,4 @@
return SDL_TRUE;
}
-void
-SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect)
-{
- SDL_DirtyRect *dirty;
-
- /* FIXME: At what point is this optimization too expensive? */
- for (dirty = list->list; dirty; dirty = dirty->next) {
- if (SDL_HasIntersection(&dirty->rect, rect)) {
- SDL_UnionRect(&dirty->rect, rect, &dirty->rect);
- return;
- }
- }
-
- if (list->free) {
- dirty = list->free;
- list->free = dirty->next;
- } else {
- dirty = (SDL_DirtyRect *) SDL_malloc(sizeof(*dirty));
- if (!dirty) {
- return;
- }
- }
- dirty->rect = *rect;
- dirty->next = list->list;
- list->list = dirty;
-}
-
-void
-SDL_ClearDirtyRects(SDL_DirtyRectList * list)
-{
- SDL_DirtyRect *prev, *curr;
-
- /* Skip to the end of the free list */
- prev = NULL;
- for (curr = list->free; curr; curr = curr->next) {
- prev = curr;
- }
-
- /* Add the list entries to the end */
- if (prev) {
- prev->next = list->list;
- } else {
- list->free = list->list;
- }
- list->list = NULL;
-}
-
-void
-SDL_FreeDirtyRects(SDL_DirtyRectList * list)
-{
- while (list->list) {
- SDL_DirtyRect *elem = list->list;
- list->list = elem->next;
- SDL_free(elem);
- }
- while (list->free) {
- SDL_DirtyRect *elem = list->free;
- list->free = elem->next;
- SDL_free(elem);
- }
-}
-
/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_mmx.c Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,432 +0,0 @@
-/*
- SDL - Simple DirectMedia Layer
- Copyright (C) 1997-2010 Sam Lantinga
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
- Sam Lantinga
- slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-
-#include "SDL_stdinc.h"
-
-#include "mmx.h"
-
-/* *INDENT-OFF* */
-
-static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
-static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
-static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
-
-static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
-
-static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
-static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
-static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
-static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
-
-static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
-static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
-static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
-static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
-
-static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
-static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
-
-/**
- This MMX assembler is my first assembler/MMX program ever.
- Thus it maybe buggy.
- Send patches to:
- mvogt@rhrk.uni-kl.de
-
- After it worked fine I have "obfuscated" the code a bit to have
- more parallism in the MMX units. This means I moved
- initilisation around and delayed other instruction.
- Performance measurement did not show that this brought any advantage
- but in theory it _should_ be faster this way.
-
- The overall performanve gain to the C based dither was 30%-40%.
- The MMX routine calculates 256bit=8RGB values in each cycle
- (4 for row1 & 4 for row2)
-
- The red/green/blue.. coefficents are taken from the mpeg_play
- player. They look nice, but I dont know if you can have
- better values, to avoid integer rounding errors.
-
-
- IMPORTANT:
- ==========
-
- It is a requirement that the cr/cb/lum are 8 byte aligned and
- the out are 16byte aligned or you will/may get segfaults
-
-*/
-
-void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod )
-{
- Uint32 *row1;
- Uint32 *row2;
-
- unsigned char* y = lum +cols*rows; // Pointer to the end
- int x = 0;
- row1 = (Uint32 *)out; // 32 bit target
- row2 = (Uint32 *)out+cols+mod; // start of second row
- mod = (mod+cols+mod)*4; // increment for row1 in byte
-
- __asm__ __volatile__ (
- // tap dance to workaround the inability to use %%ebx at will...
- // move one thing to the stack...
- "pushl $0\n" // save a slot on the stack.
- "pushl %%ebx\n" // save %%ebx.
- "movl %0, %%ebx\n" // put the thing in ebx.
- "movl %%ebx,4(%%esp)\n" // put the thing in the stack slot.
- "popl %%ebx\n" // get back %%ebx (the PIC register).
-
- ".align 8\n"
- "1:\n"
-
- // create Cr (result in mm1)
- "pushl %%ebx\n"
- "movl 4(%%esp),%%ebx\n"
- "movd (%%ebx),%%mm1\n" // 0 0 0 0 v3 v2 v1 v0
- "popl %%ebx\n"
- "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
- "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0
- "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0
- "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0
- "psubw %9,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
-
- // create Cr_g (result in mm0)
- "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0
- "pmullw %10,%%mm0\n" // red*-46dec=0.7136*64
- "pmullw %11,%%mm1\n" // red*89dec=1.4013*64
- "psraw $6, %%mm0\n" // red=red/64
- "psraw $6, %%mm1\n" // red=red/64
-
- // create L1 L2 (result in mm2,mm4)
- // L2=lum+cols
- "movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0
- "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0
- "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0
- "pand %12,%%mm2\n" // L3 0 L1 0 l3 0 l1 0
- "pand %13,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0
- "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1
-
- // create R (result in mm6)
- "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1
- "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0
- "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1
- "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0
- "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1
- "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0
- "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
- "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0
-
- // create Cb (result in mm1)
- "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0
- "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0
- "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0
- "psubw %9,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
-
- // create Cb_g (result in mm5)
- "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0
- "pmullw %14,%%mm5\n" // blue*-109dec=1.7129*64
- "pmullw %15,%%mm1\n" // blue*114dec=1.78125*64
- "psraw $6, %%mm5\n" // blue=red/64
- "psraw $6, %%mm1\n" // blue=blue/64
-
- // create G (result in mm7)
- "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
- "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1
- "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t
- "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t
- "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1
- "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0
- "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1
- "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0
- "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
-
- // create B (result in mm5)
- "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
- "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1
- "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1
- "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0
- "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1
- "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0
- "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0
-
- // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
-
- "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
- "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0
- "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0
- "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0
-
- // process lower lum
- "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0
- "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0
- "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0
- "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0
- "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0
- "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2
-
- "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
- "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0
- "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0
- "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0
- "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0
- "movq %%mm2,(%3)\n" // wrote out ! row1
-
- "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
- "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0
- "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0
- "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2
- "movq %%mm4,8(%3)\n" // wrote out ! row1
-
- // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
- // this can be done "destructive"
- "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
- "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0
- "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0
- "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0
- "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0
- "movq %%mm1,(%5)\n" // wrote out ! row2
- "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2
- "movq %%mm5,8(%5)\n" // wrote out ! row2
-
- "addl $4,%2\n" // lum+4
- "leal 16(%3),%3\n" // row1+16
- "leal 16(%5),%5\n" // row2+16
- "addl $2,(%%esp)\n" // cr+2
- "addl $2,%1\n" // cb+2
-
- "addl $4,%6\n" // x+4
- "cmpl %4,%6\n"
-
- "jl 1b\n"
- "addl %4,%2\n" // lum += cols
- "addl %8,%3\n" // row1+= mod
- "addl %8,%5\n" // row2+= mod
- "movl $0,%6\n" // x=0
- "cmpl %7,%2\n"
- "jl 1b\n"
-
- "addl $4,%%esp\n" // get rid of the stack slot we reserved.
- "emms\n" // reset MMX registers.
- :
- : "m" (cr), "r"(cb),"r"(lum),
- "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
- "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
- "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
- "m"(MMX_UbluRGB)
- );
-}
-
-void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod )
-{
- Uint16 *row1;
- Uint16 *row2;
-
- unsigned char* y = lum +cols*rows; /* Pointer to the end */
- int x = 0;
- row1 = (Uint16 *)out; /* 16 bit target */
- row2 = (Uint16 *)out+cols+mod; /* start of second row */
- mod = (mod+cols+mod)*2; /* increment for row1 in byte */
-
- __asm__ __volatile__(
- // tap dance to workaround the inability to use %%ebx at will...
- // move one thing to the stack...
- "pushl $0\n" // save a slot on the stack.
- "pushl %%ebx\n" // save %%ebx.
- "movl %0, %%ebx\n" // put the thing in ebx.
- "movl %%ebx, 4(%%esp)\n" // put the thing in the stack slot.
- "popl %%ebx\n" // get back %%ebx (the PIC register).
-
- ".align 8\n"
- "1:\n"
-
- "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0
- "pxor %%mm7, %%mm7\n"
- "pushl %%ebx\n"
- "movl 4(%%esp), %%ebx\n"
- "movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0
- "popl %%ebx\n"
-
- "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0
- "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0
- "psubw %9, %%mm0\n"
- "psubw %9, %%mm1\n"
- "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0
- "movq %%mm1, %%mm3\n" // Cr
- "pmullw %10, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0
- "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0
- "pmullw %11, %%mm0\n" // Cb2blue
- "pand %12, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0
- "pmullw %13, %%mm3\n" // Cr2green
- "movq (%2), %%mm7\n" // L2
- "pmullw %14, %%mm1\n" // Cr2red
- "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1
- "pmullw %15, %%mm6\n" // lum1
- "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green
- "pmullw %15, %%mm7\n" // lum2
-
- "movq %%mm6, %%mm4\n" // lum1
- "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
- "movq %%mm4, %%mm5\n" // lum1
- "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0
- "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
- "psraw $6, %%mm4\n" // R1 0 .. 64
- "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1
- "psraw $6, %%mm5\n" // G1 - .. +
- "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
- "psraw $6, %%mm6\n" // B1 0 .. 64
- "packuswb %%mm4, %%mm4\n" // R1 R1
- "packuswb %%mm5, %%mm5\n" // G1 G1
- "packuswb %%mm6, %%mm6\n" // B1 B1
- "punpcklbw %%mm4, %%mm4\n"
- "punpcklbw %%mm5, %%mm5\n"
-
- "pand %16, %%mm4\n"
- "psllw $3, %%mm5\n" // GREEN 1
- "punpcklbw %%mm6, %%mm6\n"
- "pand %17, %%mm5\n"
- "pand %16, %%mm6\n"
- "por %%mm5, %%mm4\n" //
- "psrlw $11, %%mm6\n" // BLUE 1
- "movq %%mm3, %%mm5\n" // lum2
- "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1
- "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
- "psraw $6, %%mm3\n" // R2
- "por %%mm6, %%mm4\n" // MM4
- "psraw $6, %%mm5\n" // G2
- "movq (%2, %4), %%mm6\n" // L3 load lum2
- "psraw $6, %%mm7\n"
- "packuswb %%mm3, %%mm3\n"
- "packuswb %%mm5, %%mm5\n"
- "packuswb %%mm7, %%mm7\n"
- "pand %12, %%mm6\n" // L3
- "punpcklbw %%mm3, %%mm3\n"
- "punpcklbw %%mm5, %%mm5\n"
- "pmullw %15, %%mm6\n" // lum3
- "punpcklbw %%mm7, %%mm7\n"
- "psllw $3, %%mm5\n" // GREEN 2
- "pand %16, %%mm7\n"
- "pand %16, %%mm3\n"
- "psrlw $11, %%mm7\n" // BLUE 2
- "pand %17, %%mm5\n"
- "por %%mm7, %%mm3\n"
- "movq (%2,%4), %%mm7\n" // L4 load lum2
- "por %%mm5, %%mm3\n" //
- "psrlw $8, %%mm7\n" // L4
- "movq %%mm4, %%mm5\n"
- "punpcklwd %%mm3, %%mm4\n"
- "pmullw %15, %%mm7\n" // lum4
- "punpckhwd %%mm3, %%mm5\n"
-
- "movq %%mm4, (%3)\n" // write row1
- "movq %%mm5, 8(%3)\n" // write row1
-
- "movq %%mm6, %%mm4\n" // Lum3
- "paddw %%mm0, %%mm6\n" // Lum3 +blue
-
- "movq %%mm4, %%mm5\n" // Lum3
- "paddw %%mm1, %%mm4\n" // Lum3 +red
- "paddw %%mm2, %%mm5\n" // Lum3 +green
- "psraw $6, %%mm4\n"
- "movq %%mm7, %%mm3\n" // Lum4
- "psraw $6, %%mm5\n"
- "paddw %%mm0, %%mm7\n" // Lum4 +blue
- "psraw $6, %%mm6\n" // Lum3 +blue
- "movq %%mm3, %%mm0\n" // Lum4
- "packuswb %%mm4, %%mm4\n"
- "paddw %%mm1, %%mm3\n" // Lum4 +red
- "packuswb %%mm5, %%mm5\n"
- "paddw %%mm2, %%mm0\n" // Lum4 +green
- "packuswb %%mm6, %%mm6\n"
- "punpcklbw %%mm4, %%mm4\n"
- "punpcklbw %%mm5, %%mm5\n"
- "punpcklbw %%mm6, %%mm6\n"
- "psllw $3, %%mm5\n" // GREEN 3
- "pand %16, %%mm4\n"
- "psraw $6, %%mm3\n" // psr 6
- "psraw $6, %%mm0\n"
- "pand %16, %%mm6\n" // BLUE
- "pand %17, %%mm5\n"
- "psrlw $11, %%mm6\n" // BLUE 3
- "por %%mm5, %%mm4\n"
- "psraw $6, %%mm7\n"
- "por %%mm6, %%mm4\n"
- "packuswb %%mm3, %%mm3\n"
- "packuswb %%mm0, %%mm0\n"
- "packuswb %%mm7, %%mm7\n"
- "punpcklbw %%mm3, %%mm3\n"
- "punpcklbw %%mm0, %%mm0\n"
- "punpcklbw %%mm7, %%mm7\n"
- "pand %16, %%mm3\n"
- "pand %16, %%mm7\n" // BLUE
- "psllw $3, %%mm0\n" // GREEN 4
- "psrlw $11, %%mm7\n"
- "pand %17, %%mm0\n"
- "por %%mm7, %%mm3\n"
- "por %%mm0, %%mm3\n"
-
- "movq %%mm4, %%mm5\n"
-
- "punpcklwd %%mm3, %%mm4\n"
- "punpckhwd %%mm3, %%mm5\n"
-
- "movq %%mm4, (%5)\n"
- "movq %%mm5, 8(%5)\n"
-
- "addl $8, %6\n"
- "addl $8, %2\n"
- "addl $4, (%%esp)\n"
- "addl $4, %1\n"
- "cmpl %4, %6\n"
- "leal 16(%3), %3\n"
- "leal 16(%5),%5\n" // row2+16
-
- "jl 1b\n"
- "addl %4, %2\n" // lum += cols
- "addl %8, %3\n" // row1+= mod
- "addl %8, %5\n" // row2+= mod
- "movl $0, %6\n" // x=0
- "cmpl %7, %2\n"
- "jl 1b\n"
- "addl $4, %%esp\n" // get rid of the stack slot we reserved.
- "emms\n"
- :
- : "m" (cr), "r"(cb),"r"(lum),
- "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
- "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
- "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
- "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
- );
-}
-
-/* *INDENT-ON* */
-
-#endif /* GCC3 i386 inline assembly */
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw.c Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1322 +0,0 @@
-/*
- SDL - Simple DirectMedia Layer
- Copyright (C) 1997-2010 Sam Lantinga
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
- Sam Lantinga
- slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-/* This is the software implementation of the YUV texture support */
-
-/* This code was derived from code carrying the following copyright notices:
-
- * Copyright (c) 1995 The Regents of the University of California.
- * All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement is
- * hereby granted, provided that the above copyright notice and the following
- * two paragraphs appear in all copies of this software.
- *
- * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
- * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
- * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
- * Copyright (c) 1995 Erik Corry
- * All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement is
- * hereby granted, provided that the above copyright notice and the following
- * two paragraphs appear in all copies of this software.
- *
- * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
- * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
- * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
- * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
- * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
- * Portions of this software Copyright (c) 1995 Brown University.
- * All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement
- * is hereby granted, provided that the above copyright notice and the
- * following two paragraphs appear in all copies of this software.
- *
- * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
- * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
- * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
- * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
- * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-#include "SDL_video.h"
-#include "SDL_cpuinfo.h"
-#include "SDL_yuv_sw_c.h"
-
-
-/* The colorspace conversion functions */
-
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod);
-extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod);
-#endif
-
-static void
-Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned short *row1;
- unsigned short *row2;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row1 = (unsigned short *) out;
- row2 = row1 + cols + mod;
- lum2 = lum + cols;
-
- mod += cols + mod;
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
-
- L = *lum++;
- *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
-
- L = *lum2++;
- *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-static void
-Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int value;
- unsigned char *row1;
- unsigned char *row2;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row1 = out;
- row2 = row1 + cols * 3 + mod * 3;
- lum2 = lum + cols;
-
- mod += cols + mod;
- mod *= 3;
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row1++ = (value) & 0xFF;
- *row1++ = (value >> 8) & 0xFF;
- *row1++ = (value >> 16) & 0xFF;
-
- L = *lum++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row1++ = (value) & 0xFF;
- *row1++ = (value >> 8) & 0xFF;
- *row1++ = (value >> 16) & 0xFF;
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row2++ = (value) & 0xFF;
- *row2++ = (value >> 8) & 0xFF;
- *row2++ = (value >> 16) & 0xFF;
-
- L = *lum2++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row2++ = (value) & 0xFF;
- *row2++ = (value >> 8) & 0xFF;
- *row2++ = (value >> 16) & 0xFF;
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-static void
-Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row1;
- unsigned int *row2;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row1 = (unsigned int *) out;
- row2 = row1 + cols + mod;
- lum2 = lum + cols;
-
- mod += cols + mod;
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- *row1++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
- L = *lum++;
- *row1++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- *row2++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
- L = *lum2++;
- *row2++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-/*
- * In this function I make use of a nasty trick. The tables have the lower
- * 16 bits replicated in the upper 16. This means I can write ints and get
- * the horisontal doubling for free (almost).
- */
-static void
-Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row1 = (unsigned int *) out;
- const int next_row = cols + (mod / 2);
- unsigned int *row2 = row1 + 2 * next_row;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- lum2 = lum + cols;
-
- mod = (next_row * 3) + (mod / 2);
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row1++;
-
- L = *lum++;
- row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row1++;
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row2++;
-
- L = *lum2++;
- row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row2++;
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-static void
-Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int value;
- unsigned char *row1 = out;
- const int next_row = (cols * 2 + mod) * 3;
- unsigned char *row2 = row1 + 2 * next_row;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- lum2 = lum + cols;
-
- mod = next_row * 3 + mod * 3;
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
- row1[next_row + 3 + 0] = (value) & 0xFF;
- row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
- row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
- row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row1 += 2 * 3;
-
- L = *lum++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
- row1[next_row + 3 + 0] = (value) & 0xFF;
- row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
- row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
- row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row1 += 2 * 3;
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
- row2[next_row + 3 + 0] = (value) & 0xFF;
- row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
- row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
- row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row2 += 2 * 3;
-
- L = *lum2++;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
- row2[next_row + 3 + 0] = (value) & 0xFF;
- row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
- row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
- row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row2 += 2 * 3;
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-static void
-Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row1 = (unsigned int *) out;
- const int next_row = cols * 2 + mod;
- unsigned int *row2 = row1 + 2 * next_row;
- unsigned char *lum2;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- lum2 = lum + cols;
-
- mod = (next_row * 3) + mod;
-
- y = rows / 2;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- ++cr;
- ++cb;
-
- L = *lum++;
- row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row1 += 2;
-
- L = *lum++;
- row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row1 += 2;
-
-
- /* Now, do second row. */
-
- L = *lum2++;
- row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row2 += 2;
-
- L = *lum2++;
- row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row2 += 2;
- }
-
- /*
- * These values are at the start of the next line, (due
- * to the ++'s above),but they need to be at the start
- * of the line after that.
- */
- lum += cols;
- lum2 += cols;
- row1 += mod;
- row2 += mod;
- }
-}
-
-static void
-Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned short *row;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row = (unsigned short *) out;
-
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
-
- L = *lum;
- lum += 2;
- *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
-
- }
-
- row += mod;
- }
-}
-
-static void
-Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int value;
- unsigned char *row;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row = (unsigned char *) out;
- mod *= 3;
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row++ = (value) & 0xFF;
- *row++ = (value >> 8) & 0xFF;
- *row++ = (value >> 16) & 0xFF;
-
- L = *lum;
- lum += 2;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- *row++ = (value) & 0xFF;
- *row++ = (value >> 8) & 0xFF;
- *row++ = (value >> 16) & 0xFF;
-
- }
- row += mod;
- }
-}
-
-static void
-Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- row = (unsigned int *) out;
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- *row++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
- L = *lum;
- lum += 2;
- *row++ = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-
- }
- row += mod;
- }
-}
-
-/*
- * In this function I make use of a nasty trick. The tables have the lower
- * 16 bits replicated in the upper 16. This means I can write ints and get
- * the horisontal doubling for free (almost).
- */
-static void
-Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row = (unsigned int *) out;
- const int next_row = cols + (mod / 2);
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
-
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row++;
-
- L = *lum;
- lum += 2;
- row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] |
- rgb_2_pix[L + cb_b]);
- row++;
-
- }
- row += next_row;
- }
-}
-
-static void
-Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int value;
- unsigned char *row = out;
- const int next_row = (cols * 2 + mod) * 3;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row[0 + 0] = row[3 + 0] = row[next_row + 0] =
- row[next_row + 3 + 0] = (value) & 0xFF;
- row[0 + 1] = row[3 + 1] = row[next_row + 1] =
- row[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row[0 + 2] = row[3 + 2] = row[next_row + 2] =
- row[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row += 2 * 3;
-
- L = *lum;
- lum += 2;
- value = (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row[0 + 0] = row[3 + 0] = row[next_row + 0] =
- row[next_row + 3 + 0] = (value) & 0xFF;
- row[0 + 1] = row[3 + 1] = row[next_row + 1] =
- row[next_row + 3 + 1] = (value >> 8) & 0xFF;
- row[0 + 2] = row[3 + 2] = row[next_row + 2] =
- row[next_row + 3 + 2] = (value >> 16) & 0xFF;
- row += 2 * 3;
-
- }
- row += next_row;
- }
-}
-
-static void
-Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod)
-{
- unsigned int *row = (unsigned int *) out;
- const int next_row = cols * 2 + mod;
- int x, y;
- int cr_r;
- int crb_g;
- int cb_b;
- int cols_2 = cols / 2;
- mod += mod;
- y = rows;
- while (y--) {
- x = cols_2;
- while (x--) {
- register int L;
-
- cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
- crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
- + colortab[*cb + 2 * 256];
- cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
- cr += 4;
- cb += 4;
-
- L = *lum;
- lum += 2;
- row[0] = row[1] = row[next_row] = row[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row += 2;
-
- L = *lum;
- lum += 2;
- row[0] = row[1] = row[next_row] = row[next_row + 1] =
- (rgb_2_pix[L + cr_r] |
- rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
- row += 2;
-
-
- }
-
- row += next_row;
- }
-}
-
-/*
- * How many 1 bits are there in the Uint32.
- * Low performance, do not call often.
- */
-static int
-number_of_bits_set(Uint32 a)
-{
- if (!a)
- return 0;
- if (a & 1)
- return 1 + number_of_bits_set(a >> 1);
- return (number_of_bits_set(a >> 1));
-}
-
-/*
- * How many 0 bits are there at least significant end of Uint32.
- * Low performance, do not call often.
- */
-static int
-free_bits_at_bottom(Uint32 a)
-{
- /* assume char is 8 bits */
- if (!a)
- return sizeof(Uint32) * 8;
- if (((Sint32) a) & 1l)
- return 0;
- return 1 + free_bits_at_bottom(a >> 1);
-}
-
-static int
-SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
-{
- Uint32 *r_2_pix_alloc;
- Uint32 *g_2_pix_alloc;
- Uint32 *b_2_pix_alloc;
- int i;
- int bpp;
- Uint32 Rmask, Gmask, Bmask, Amask;
-
- if (!SDL_PixelFormatEnumToMasks
- (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
- SDL_SetError("Unsupported YUV destination format");
- return -1;
- }
-
- swdata->target_format = target_format;
- r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
- g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
- b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
-
- /*
- * Set up entries 0-255 in rgb-to-pixel value tables.
- */
- for (i = 0; i < 256; ++i) {
- r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
- r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
- r_2_pix_alloc[i + 256] |= Amask;
- g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
- g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
- g_2_pix_alloc[i + 256] |= Amask;
- b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
- b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
- b_2_pix_alloc[i + 256] |= Amask;
- }
-
- /*
- * If we have 16-bit output depth, then we double the value
- * in the top word. This means that we can write out both
- * pixels in the pixel doubling mode with one op. It is
- * harmless in the normal case as storing a 32-bit value
- * through a short pointer will lose the top bits anyway.
- */
- if (SDL_BYTESPERPIXEL(target_format) == 2) {
- for (i = 0; i < 256; ++i) {
- r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
- g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
- b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
- }
- }
-
- /*
- * Spread out the values we have to the rest of the array so that
- * we do not need to check for overflow.
- */
- for (i = 0; i < 256; ++i) {
- r_2_pix_alloc[i] = r_2_pix_alloc[256];
- r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
- g_2_pix_alloc[i] = g_2_pix_alloc[256];
- g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
- b_2_pix_alloc[i] = b_2_pix_alloc[256];
- b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
- }
-
- /* You have chosen wisely... */
- switch (swdata->format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- if (SDL_BYTESPERPIXEL(target_format) == 2) {
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
- /* inline assembly functions */
- if (SDL_HasMMX() && (Rmask == 0xF800) &&
- (Gmask == 0x07E0) && (Bmask == 0x001F)
- && (swdata->w & 15) == 0) {
-/*printf("Using MMX 16-bit 565 dither\n");*/
- swdata->Display1X = Color565DitherYV12MMX1X;
- } else {
-/*printf("Using C 16-bit dither\n");*/
- swdata->Display1X = Color16DitherYV12Mod1X;
- }
-#else
- swdata->Display1X = Color16DitherYV12Mod1X;
-#endif
- swdata->Display2X = Color16DitherYV12Mod2X;
- }
- if (SDL_BYTESPERPIXEL(target_format) == 3) {
- swdata->Display1X = Color24DitherYV12Mod1X;
- swdata->Display2X = Color24DitherYV12Mod2X;
- }
- if (SDL_BYTESPERPIXEL(target_format) == 4) {
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
- /* inline assembly functions */
- if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
- (Gmask == 0x0000FF00) &&
- (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
-/*printf("Using MMX 32-bit dither\n");*/
- swdata->Display1X = ColorRGBDitherYV12MMX1X;
- } else {
-/*printf("Using C 32-bit dither\n");*/
- swdata->Display1X = Color32DitherYV12Mod1X;
- }
-#else
- swdata->Display1X = Color32DitherYV12Mod1X;
-#endif
- swdata->Display2X = Color32DitherYV12Mod2X;
- }
- break;
- case SDL_PIXELFORMAT_YUY2:
- case SDL_PIXELFORMAT_UYVY:
- case SDL_PIXELFORMAT_YVYU:
- if (SDL_BYTESPERPIXEL(target_format) == 2) {
- swdata->Display1X = Color16DitherYUY2Mod1X;
- swdata->Display2X = Color16DitherYUY2Mod2X;
- }
- if (SDL_BYTESPERPIXEL(target_format) == 3) {
- swdata->Display1X = Color24DitherYUY2Mod1X;
- swdata->Display2X = Color24DitherYUY2Mod2X;
- }
- if (SDL_BYTESPERPIXEL(target_format) == 4) {
- swdata->Display1X = Color32DitherYUY2Mod1X;
- swdata->Display2X = Color32DitherYUY2Mod2X;
- }
- break;
- default:
- /* We should never get here (caught above) */
- break;
- }
-
- if (swdata->display) {
- SDL_FreeSurface(swdata->display);
- swdata->display = NULL;
- }
- return 0;
-}
-
-SDL_SW_YUVTexture *
-SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
-{
- SDL_SW_YUVTexture *swdata;
- int *Cr_r_tab;
- int *Cr_g_tab;
- int *Cb_g_tab;
- int *Cb_b_tab;
- int i;
- int CR, CB;
-
- swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
- if (!swdata) {
- SDL_OutOfMemory();
- return NULL;
- }
-
- switch (format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- case SDL_PIXELFORMAT_YUY2:
- case SDL_PIXELFORMAT_UYVY:
- case SDL_PIXELFORMAT_YVYU:
- break;
- default:
- SDL_SetError("Unsupported YUV format");
- return NULL;
- }
-
- swdata->format = format;
- swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
- swdata->w = w;
- swdata->h = h;
- swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
- swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
- swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
- if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
- SDL_OutOfMemory();
- SDL_SW_DestroyYUVTexture(swdata);
- return NULL;
- }
-
- /* Generate the tables for the display surface */
- Cr_r_tab = &swdata->colortab[0 * 256];
- Cr_g_tab = &swdata->colortab[1 * 256];
- Cb_g_tab = &swdata->colortab[2 * 256];
- Cb_b_tab = &swdata->colortab[3 * 256];
- for (i = 0; i < 256; i++) {
- /* Gamma correction (luminescence table) and chroma correction
- would be done here. See the Berkeley mpeg_play sources.
- */
- CB = CR = (i - 128);
- Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
- Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
- Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
- Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
- }
-
- /* Find the pitch and offset values for the overlay */
- switch (format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- swdata->pitches[0] = w;
- swdata->pitches[1] = swdata->pitches[0] / 2;
- swdata->pitches[2] = swdata->pitches[0] / 2;
- swdata->planes[0] = swdata->pixels;
- swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
- swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
- break;
- case SDL_PIXELFORMAT_YUY2:
- case SDL_PIXELFORMAT_UYVY:
- case SDL_PIXELFORMAT_YVYU:
- swdata->pitches[0] = w * 2;
- swdata->planes[0] = swdata->pixels;
- break;
- default:
- /* We should never get here (caught above) */
- break;
- }
-
- /* We're all done.. */
- return (swdata);
-}
-
-int
-SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
- int *pitch)
-{
- *pixels = swdata->planes[0];
- *pitch = swdata->pitches[0];
- return 0;
-}
-
-int
-SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
- const void *pixels, int pitch)
-{
- switch (swdata->format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- if (rect
- && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
- || rect->h != swdata->h)) {
- SDL_SetError
- ("YV12 and IYUV textures only support full surface updates");
- return -1;
- }
- SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
- break;
- case SDL_PIXELFORMAT_YUY2:
- case SDL_PIXELFORMAT_UYVY:
- case SDL_PIXELFORMAT_YVYU:
- {
- Uint8 *src, *dst;
- int row;
- size_t length;
-
- src = (Uint8 *) pixels;
- dst =
- swdata->planes[0] + rect->y * swdata->pitches[0] +
- rect->x * 2;
- length = rect->w * 2;
- for (row = 0; row < rect->h; ++row) {
- SDL_memcpy(dst, src, length);
- src += pitch;
- dst += swdata->pitches[0];
- }
- }
- break;
- }
- return 0;
-}
-
-int
-SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
- int markDirty, void **pixels, int *pitch)
-{
- switch (swdata->format) {
- case SDL_PIXELFORMAT_YV12:
- case SDL_PIXELFORMAT_IYUV:
- if (rect
- && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
- || rect->h != swdata->h)) {
- SDL_SetError
- ("YV12 and IYUV textures only support full surface locks");
- return -1;
- }
- break;
- }
-
- *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
- *pitch = swdata->pitches[0];
- return 0;
-}
-
-void
-SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
-{
-}
-
-int
-SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
- Uint32 target_format, int w, int h, void *pixels,
- int pitch)
-{
- int stretch;
- int scale_2x;
- Uint8 *lum, *Cr, *Cb;
- int mod;
-
- /* Make sure we're set up to display in the desired format */
- if (target_format != swdata->target_format) {
- if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
- return -1;
- }
- }
-
- stretch = 0;
- scale_2x = 0;
- if (srcrect->x || srcrect->y || srcrect->w < swdata->w
- || srcrect->h < swdata->h) {
- /* The source rectangle has been clipped.
- Using a scratch surface is easier than adding clipped
- source support to all the blitters, plus that would
- slow them down in the general unclipped case.
- */
- stretch = 1;
- } else if ((srcrect->w != w) || (srcrect->h != h)) {
- if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
- scale_2x = 1;
- } else {
- stretch = 1;
- }
- }
- if (stretch) {
- int bpp;
- Uint32 Rmask, Gmask, Bmask, Amask;
-
- if (swdata->display) {
- swdata->display->w = w;
- swdata->display->h = h;
- swdata->display->pixels = pixels;
- swdata->display->pitch = pitch;
- } else {
- /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
- SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
- &Bmask, &Amask);
- swdata->display =
- SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
- Gmask, Bmask, Amask);
- if (!swdata->display) {
- return (-1);
- }
- }
- if (!swdata->stretch) {
- /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
- SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
- &Bmask, &Amask);
- swdata->stretch =
- SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
- Gmask, Bmask, Amask);
- if (!swdata->stretch) {
- return (-1);
- }
- }
- pixels = swdata->stretch->pixels;
- pitch = swdata->stretch->pitch;
- }
- switch (swdata->format) {
- case SDL_PIXELFORMAT_YV12:
- lum = swdata->planes[0];
- Cr = swdata->planes[1];
- Cb = swdata->planes[2];
- break;
- case SDL_PIXELFORMAT_IYUV:
- lum = swdata->planes[0];
- Cr = swdata->planes[2];
- Cb = swdata->planes[1];
- break;
- case SDL_PIXELFORMAT_YUY2:
- lum = swdata->planes[0];
- Cr = lum + 3;
- Cb = lum + 1;
- break;
- case SDL_PIXELFORMAT_UYVY:
- lum = swdata->planes[0] + 1;
- Cr = lum + 1;
- Cb = lum - 1;
- break;
- case SDL_PIXELFORMAT_YVYU:
- lum = swdata->planes[0];
- Cr = lum + 1;
- Cb = lum + 3;
- break;
- default:
- SDL_SetError("Unsupported YUV format in copy");
- return (-1);
- }
- mod = (pitch / SDL_BYTESPERPIXEL(target_format));
-
- if (scale_2x) {
- mod -= (swdata->w * 2);
- swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
- lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
- } else {
- mod -= swdata->w;
- swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
- lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
- }
- if (stretch) {
- SDL_Rect rect = *srcrect;
- SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
- }
- return 0;
-}
-
-void
-SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
-{
- if (swdata) {
- if (swdata->pixels) {
- SDL_free(swdata->pixels);
- }
- if (swdata->colortab) {
- SDL_free(swdata->colortab);
- }
- if (swdata->rgb_2_pix) {
- SDL_free(swdata->rgb_2_pix);
- }
- if (swdata->stretch) {
- SDL_FreeSurface(swdata->stretch);
- }
- if (swdata->display) {
- SDL_FreeSurface(swdata->display);
- }
- SDL_free(swdata);
- }
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw_c.h Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-/*
- SDL - Simple DirectMedia Layer
- Copyright (C) 1997-2010 Sam Lantinga
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
- Sam Lantinga
- slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "SDL_sysvideo.h"
-
-/* This is the software implementation of the YUV texture support */
-
-struct SDL_SW_YUVTexture
-{
- Uint32 format;
- Uint32 target_format;
- int w, h;
- Uint8 *pixels;
- int *colortab;
- Uint32 *rgb_2_pix;
- void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod);
- void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
- unsigned char *lum, unsigned char *cr,
- unsigned char *cb, unsigned char *out,
- int rows, int cols, int mod);
-
- /* These are just so we don't have to allocate them separately */
- Uint16 pitches[3];
- Uint8 *planes[3];
-
- /* This is a temporary surface in case we have to stretch copy */
- SDL_Surface *stretch;
- SDL_Surface *display;
-};
-
-typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
-
-SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
-int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
- int *pitch);
-int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
- const void *pixels, int pitch);
-int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
- int markDirty, void **pixels, int *pitch);
-void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
-int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
- Uint32 target_format, int w, int h, void *pixels,
- int pitch);
-void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/mmx.h Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,642 +0,0 @@
-/* mmx.h
-
- MultiMedia eXtensions GCC interface library for IA32.
-
- To use this library, simply include this header file
- and compile with GCC. You MUST have inlining enabled
- in order for mmx_ok() to work; this can be done by
- simply using -O on the GCC command line.
-
- Compiling with -DMMX_TRACE will cause detailed trace
- output to be sent to stderr for each mmx operation.
- This adds lots of code, and obviously slows execution to
- a crawl, but can be very useful for debugging.
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
- LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- AND FITNESS FOR ANY PARTICULAR PURPOSE.
-
- 1997-99 by H. Dietz and R. Fisher
-
- Notes:
- It appears that the latest gas has the pand problem fixed, therefore
- I'll undefine BROKEN_PAND by default.
-*/
-
-#ifndef _MMX_H
-#define _MMX_H
-
-
-/* Warning: at this writing, the version of GAS packaged
- with most Linux distributions does not handle the
- parallel AND operation mnemonic correctly. If the
- symbol BROKEN_PAND is defined, a slower alternative
- coding will be used. If execution of mmxtest results
- in an illegal instruction fault, define this symbol.
-*/
-#undef BROKEN_PAND
-
-
-/* The type of an value that fits in an MMX register
- (note that long long constant values MUST be suffixed
- by LL and unsigned long long values by ULL, lest
- they be truncated by the compiler)
-*/
-typedef union
-{
- long long q; /* Quadword (64-bit) value */
- unsigned long long uq; /* Unsigned Quadword */
- int d[2]; /* 2 Doubleword (32-bit) values */
- unsigned int ud[2]; /* 2 Unsigned Doubleword */
- short w[4]; /* 4 Word (16-bit) values */
- unsigned short uw[4]; /* 4 Unsigned Word */
- char b[8]; /* 8 Byte (8-bit) values */
- unsigned char ub[8]; /* 8 Unsigned Byte */
- float s[2]; /* Single-precision (32-bit) value */
-} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */
-
-
-#if 0
-/* Function to test if multimedia instructions are supported...
-*/
-inline extern int
-mm_support(void)
-{
- /* Returns 1 if MMX instructions are supported,
- 3 if Cyrix MMX and Extended MMX instructions are supported
- 5 if AMD MMX and 3DNow! instructions are supported
- 0 if hardware does not support any of these
- */
- register int rval = 0;
-
- __asm__ __volatile__(
- /* See if CPUID instruction is supported ... */
- /* ... Get copies of EFLAGS into eax and ecx */
- "pushf\n\t"
- "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
- /* ... Toggle the ID bit in one copy and store */
- /* to the EFLAGS reg */
- "xorl $0x200000, %%eax\n\t"
- "push %%eax\n\t" "popf\n\t"
- /* ... Get the (hopefully modified) EFLAGS */
- "pushf\n\t" "popl %%eax\n\t"
- /* ... Compare and test result */
- "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
- /* Get standard CPUID information, and
- go to a specific vendor section */
- "movl $0, %%eax\n\t" "cpuid\n\t"
- /* Check for Intel */
- "cmpl $0x756e6547, %%ebx\n\t"
- "jne TryAMD\n\t"
- "cmpl $0x49656e69, %%edx\n\t"
- "jne TryAMD\n\t"
- "cmpl $0x6c65746e, %%ecx\n"
- "jne TryAMD\n\t" "jmp Intel\n\t"
- /* Check for AMD */
- "\nTryAMD:\n\t"
- "cmpl $0x68747541, %%ebx\n\t"
- "jne TryCyrix\n\t"
- "cmpl $0x69746e65, %%edx\n\t"
- "jne TryCyrix\n\t"
- "cmpl $0x444d4163, %%ecx\n"
- "jne TryCyrix\n\t" "jmp AMD\n\t"
- /* Check for Cyrix */
- "\nTryCyrix:\n\t"
- "cmpl $0x69727943, %%ebx\n\t"
- "jne NotSupported2\n\t"
- "cmpl $0x736e4978, %%edx\n\t"
- "jne NotSupported3\n\t"
- "cmpl $0x64616574, %%ecx\n\t"
- "jne NotSupported4\n\t"
- /* Drop through to Cyrix... */
- /* Cyrix Section */
- /* See if extended CPUID level 80000001 is supported */
- /* The value of CPUID/80000001 for the 6x86MX is undefined
- according to the Cyrix CPU Detection Guide (Preliminary
- Rev. 1.01 table 1), so we'll check the value of eax for
- CPUID/0 to see if standard CPUID level 2 is supported.
- According to the table, the only CPU which supports level
- 2 is also the only one which supports extended CPUID levels.
- */
- "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */
- /* Extended CPUID supported (in theory), so get extended
- features */
- "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */
- "jz NotSupported5\n\t" /* MMX not supported */
- "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
- "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
- "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
- "jmp Return\n\t"
- /* AMD Section */
- "AMD:\n\t"
- /* See if extended CPUID is supported */
- "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */
- /* Extended CPUID supported, so get extended features */
- "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
- "jz NotSupported6\n\t" /* MMX not supported */
- "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
- "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
- "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
- "jmp Return\n\t"
- /* Intel Section */
- "Intel:\n\t"
- /* Check for MMX */
- "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
- "jz NotSupported7\n\t" /* MMX Not supported */
- "movl $1, %0:\n\n\t" /* MMX Supported */
- "jmp Return\n\t"
- /* Nothing supported */
- "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */
- :"eax", "ebx", "ecx", "edx");
-
- /* Return */
- return (rval);
-}
-
-/* Function to test if mmx instructions are supported...
-*/
-inline extern int
-mmx_ok(void)
-{
- /* Returns 1 if MMX instructions are supported, 0 otherwise */
- return (mm_support() & 0x1);
-}
-#endif
-
-/* Helper functions for the instruction macros that follow...
- (note that memory-to-register, m2r, instructions are nearly
- as efficient as register-to-register, r2r, instructions;
- however, memory-to-memory instructions are really simulated
- as a convenience, and are only 1/3 as efficient)
-*/
-#ifdef MMX_TRACE
-
-/* Include the stuff for printing a trace to stderr...
-*/
-
-#define mmx_i2r(op, imm, reg) \
- { \
- mmx_t mmx_trace; \
- mmx_trace.uq = (imm); \
- printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ ("movq %%" #reg ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#reg "=0x%08x%08x) => ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ (#op " %0, %%" #reg \
- : /* nothing */ \
- : "X" (imm)); \
- __asm__ __volatile__ ("movq %%" #reg ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#reg "=0x%08x%08x\n", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- }
-
-#define mmx_m2r(op, mem, reg) \
- { \
- mmx_t mmx_trace; \
- mmx_trace = (mem); \
- printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ ("movq %%" #reg ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#reg "=0x%08x%08x) => ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ (#op " %0, %%" #reg \
- : /* nothing */ \
- : "X" (mem)); \
- __asm__ __volatile__ ("movq %%" #reg ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#reg "=0x%08x%08x\n", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- }
-
-#define mmx_r2m(op, reg, mem) \
- { \
- mmx_t mmx_trace; \
- __asm__ __volatile__ ("movq %%" #reg ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- mmx_trace = (mem); \
- printf(#mem "=0x%08x%08x) => ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ (#op " %%" #reg ", %0" \
- : "=X" (mem) \
- : /* nothing */ ); \
- mmx_trace = (mem); \
- printf(#mem "=0x%08x%08x\n", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- }
-
-#define mmx_r2r(op, regs, regd) \
- { \
- mmx_t mmx_trace; \
- __asm__ __volatile__ ("movq %%" #regs ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ ("movq %%" #regd ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#regd "=0x%08x%08x) => ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
- __asm__ __volatile__ ("movq %%" #regd ", %0" \
- : "=X" (mmx_trace) \
- : /* nothing */ ); \
- printf(#regd "=0x%08x%08x\n", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- }
-
-#define mmx_m2m(op, mems, memd) \
- { \
- mmx_t mmx_trace; \
- mmx_trace = (mems); \
- printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- mmx_trace = (memd); \
- printf(#memd "=0x%08x%08x) => ", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
- #op " %1, %%mm0\n\t" \
- "movq %%mm0, %0" \
- : "=X" (memd) \
- : "X" (mems)); \
- mmx_trace = (memd); \
- printf(#memd "=0x%08x%08x\n", \
- mmx_trace.d[1], mmx_trace.d[0]); \
- }
-
-#else
-
-/* These macros are a lot simpler without the tracing...
-*/
-
-#define mmx_i2r(op, imm, reg) \
- __asm__ __volatile__ (#op " %0, %%" #reg \
- : /* nothing */ \
- : "X" (imm) )
-
-#define mmx_m2r(op, mem, reg) \
- __asm__ __volatile__ (#op " %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem))
-
-#define mmx_r2m(op, reg, mem) \
- __asm__ __volatile__ (#op " %%" #reg ", %0" \
- : "=m" (mem) \
- : /* nothing */ )
-
-#define mmx_r2r(op, regs, regd) \
- __asm__ __volatile__ (#op " %" #regs ", %" #regd)
-
-#define mmx_m2m(op, mems, memd) \
- __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
- #op " %1, %%mm0\n\t" \
- "movq %%mm0, %0" \
- : "=X" (memd) \
- : "X" (mems))
-
-#endif
-
-
-/* 1x64 MOVe Quadword
- (this is both a load and a store...
- in fact, it is the only way to store)
-*/
-#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
-#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
-#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
-#define movq(vars, vard) \
- __asm__ __volatile__ ("movq %1, %%mm0\n\t" \
- "movq %%mm0, %0" \
- : "=X" (vard) \
- : "X" (vars))
-
-
-/* 1x32 MOVe Doubleword
- (like movq, this is both load and store...
- but is most useful for moving things between
- mmx registers and ordinary registers)
-*/
-#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
-#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
-#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
-#define movd(vars, vard) \
- __asm__ __volatile__ ("movd %1, %%mm0\n\t" \
- "movd %%mm0, %0" \
- : "=X" (vard) \
- : "X" (vars))
-
-
-/* 2x32, 4x16, and 8x8 Parallel ADDs
-*/
-#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
-#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
-#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
-
-#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
-#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
-#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
-
-#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
-#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
-#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
-
-
-/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
-*/
-#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
-#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
-#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
-
-#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
-#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
-#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
-
-
-/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
-*/
-#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
-#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
-#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
-
-#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
-#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
-#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
-
-
-/* 2x32, 4x16, and 8x8 Parallel SUBs
-*/
-#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
-#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
-#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
-
-#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
-#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
-#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
-
-#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
-#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
-#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
-
-
-/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
-*/
-#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
-#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
-#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
-
-#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
-#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
-#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
-
-
-/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
-*/
-#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
-#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
-#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
-
-#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
-#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
-#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
-
-
-/* 4x16 Parallel MULs giving Low 4x16 portions of results
-*/
-#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
-#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
-#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
-
-
-/* 4x16 Parallel MULs giving High 4x16 portions of results
-*/
-#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
-#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
-#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
-
-
-/* 4x16->2x32 Parallel Mul-ADD
- (muls like pmullw, then adds adjacent 16-bit fields
- in the multiply result to make the final 2x32 result)
-*/
-#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
-#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
-#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
-
-
-/* 1x64 bitwise AND
-*/
-#ifdef BROKEN_PAND
-#define pand_m2r(var, reg) \
- { \
- mmx_m2r(pandn, (mmx_t) -1LL, reg); \
- mmx_m2r(pandn, var, reg); \
- }
-#define pand_r2r(regs, regd) \
- { \
- mmx_m2r(pandn, (mmx_t) -1LL, regd); \
- mmx_r2r(pandn, regs, regd) \
- }
-#define pand(vars, vard) \
- { \
- movq_m2r(vard, mm0); \
- mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
- mmx_m2r(pandn, vars, mm0); \
- movq_r2m(mm0, vard); \
- }
-#else
-#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
-#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
-#define pand(vars, vard) mmx_m2m(pand, vars, vard)
-#endif
-
-
-/* 1x64 bitwise AND with Not the destination
-*/
-#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
-#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
-#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
-
-
-/* 1x64 bitwise OR
-*/
-#define por_m2r(var, reg) mmx_m2r(por, var, reg)
-#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
-#define por(vars, vard) mmx_m2m(por, vars, vard)
-
-
-/* 1x64 bitwise eXclusive OR
-*/
-#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
-#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
-#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
-
-
-/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
- (resulting fields are either 0 or -1)
-*/
-#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
-#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
-#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
-
-#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
-#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
-#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
-
-#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
-#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
-#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
-
-
-/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
- (resulting fields are either 0 or -1)
-*/
-#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
-#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
-#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
-
-#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
-#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
-#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
-
-#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
-#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
-#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
-
-
-/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
-*/
-#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
-#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
-#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
-#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
-
-#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
-#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
-#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
-#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
-
-#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
-#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
-#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
-#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
-
-
-/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
-*/
-#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
-#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
-#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
-#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
-
-#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
-#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
-#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
-#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
-
-#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
-#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
-#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
-#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
-
-
-/* 2x32 and 4x16 Parallel Shift Right Arithmetic
-*/
-#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
-#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
-#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
-#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
-
-#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
-#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
-#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
-#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
-
-
-/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
- (packs source and dest fields into dest in that order)
-*/
-#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
-#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
-#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
-
-#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
-#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
-#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
-
-
-/* 4x16->8x8 PACK and Unsigned Saturate
- (packs source and dest fields into dest in that order)
-*/
-#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
-#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
-#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
-
-
-/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
- (interleaves low half of dest with low half of source
- as padding in each result field)
-*/
-#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
-#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
-#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
-
-#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
-#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
-#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
-
-#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
-#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
-#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
-
-
-/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
- (interleaves high half of dest with high half of source
- as padding in each result field)
-*/
-#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
-#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
-#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
-
-#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
-#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
-#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
-
-#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
-#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
-#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
-
-
-/* Empty MMx State
- (used to clean-up when going from mmx to float use
- of the registers that are shared by both; note that
- there is no float-to-mmx operation needed, because
- only the float tag word info is corruptible)
-*/
-#ifdef MMX_TRACE
-
-#define emms() \
- { \
- printf("emms()\n"); \
- __asm__ __volatile__ ("emms"); \
- }
-
-#else
-
-#define emms() __asm__ __volatile__ ("emms")
-
-#endif
-
-#endif
-/* vi: set ts=4 sw=4 expandtab: */